Compare commits

...

35 Commits

Author SHA1 Message Date
ShenJunkun
afac885c10 refactor: add schema column to the scripts table (#868) 2023-02-07 11:07:32 +08:00
Lei, HUANG
5d62e193bd feat: support multi regions on datanode (#653)
* wip: fix compile errors

* chore: move splitter to partition crate

* fix: remove useless variants in frontend errors

* chore: move more partition related code to partition manager

* fix: license header

* wip: move WriteSplitter to PartitionRuleManager

* fix: clippy warnings

* chore: remove useless error variant and format toml

* fix: cr comments

* chore: resolve conflicts

* chore: rebase develop

* fix: cr comments

* feat: support multi regions on datanode

* chore: rebase onto develop

* chore: rebase develop

* chore: rebase develop

* wip

* fix: compile errors

* feat: multi region

* fix: CR comments

* feat: allow stat existing regions without actually open it

* fix: use table meta in manifest to recover region info
2023-02-07 10:46:18 +08:00
elijah
7d77913e88 chore: fix rfc typo (#952) 2023-02-07 08:47:06 +08:00
Lei, HUANG
3f45a0d337 docs: rfc for table compaction (#939)
* doc: rfc for table compaction

* docs: update compaction rfc
2023-02-06 22:15:53 +08:00
Zhizhen He
a1e97c990f chore: fix typo (#949) 2023-02-06 22:13:56 +08:00
Ning Sun
4ae63b7089 feat: Initial prepare statement support for Postgres protocol (#925)
* feat: add describe statement to query_engine

* feat: add ability to describe statement for sql handler

* refactor: return schema instead of wrapped ref

* test: resolve tests

* feat: add initial support for prepared statements

* feat: add parameter types to query statement

* test: fix parser test

* chore: add todo task

* fix: turn on integer_datetime for binary timestamp

* fix: format string using single quote

* test: add tests for prepared statement

* Apply suggestions from code review

Co-authored-by: LFC <bayinamine@gmail.com>

* refactor: use stream api from recordbatches

---------

Co-authored-by: LFC <bayinamine@gmail.com>
2023-02-06 22:06:00 +08:00
Yingwen
b0925d94ed feat: Implement lock component for ProcedureManager (#937)
* feat: Add procedure meta

* feat: Implement lock for procedures

* chore: Allow dead code

* docs: Fix comment

* docs: Update docs of acquire_lock
2023-02-03 18:42:03 +08:00
Ruihang Xia
fc9276c79d feat: export promql service in server (#924)
* chore: some tiny typo/style fix

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: add promql server

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* works for mocked query

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* integration test case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* resolve CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* expose promql api to our http server

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* resolve CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adjust router structure

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-02-03 08:28:56 +00:00
LFC
184ca78a4d revert: removed all "USE"s in sqlness tests introduced in #922 (#938) 2023-02-03 15:44:58 +08:00
discord9
ebbf1e43b5 feat: Query using sql inside python script (#884)
* feat: add weakref to QueryEngine in copr

* feat: sql query in python

* fix: make_class for Query Engine

* fix: use `Handle::try_current` instead

* fix: cache `Runtime`

* fix: lock file conflict

* fix: dedicated thread for blocking&fix test

* test: remove unnecessary print
2023-02-03 15:05:27 +08:00
dennis zhuang
54fe81dad9 docs: add dashboard to resources in README (#934) 2023-02-03 13:47:19 +08:00
LFC
af935671b2 feat: support "use" in GRPC requests (#922)
* feat: support "use catalog and schema"(behave like the "use" in MySQL) in GRPC requests

* fix: rebase develop
2023-02-02 20:02:56 +08:00
Yingwen
74adb077bc feat: Implement ProcedureStore (#927)
* test: Add more tests for ProcedureId

* feat: Add ObjectStore based state store

* feat: Implement ProcedureStore

* test: Add tests for ParsedKey

* refactor: Rename list to walk_top_down

* fix: Test ProcedureStore and handles unordered key values.

* style: Fix clippy

* docs: Update comment

* chore: Adjust log level for printing invalid key
2023-02-02 17:49:31 +08:00
Ruihang Xia
54c7a8be02 docs: document sqlness-runner usage (#931)
docs: paste doc from greptime-doc

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-02-02 15:56:51 +08:00
Ruihang Xia
ea5146762a chore(deps): bump promql-parser (#929)
* fix promql crate

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* migrate to new api

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix aggregator test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix styles

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-02-02 07:31:41 +00:00
Yingwen
788b5362a1 docs: Add procedure framework RFC (#836)
* docs: Add procedure framework RFC

* docs: Add dump, rollback and locking to procedure framework

* docs: Change ProcedureBuilder to ProcedureLoader

* docs: Add sub-procedures section

* docs: Add a link to explain idempotent

* docs: Add link to the tracking issue

* docs: Fix ProcedureLoader type alias

* docs: Update procedure API

* docs: Address CR comments

* docs: Update path and make the docs more clear
2023-02-02 11:28:56 +08:00
Lei, HUANG
028a69e349 refactor: move partition related code to partition manager (#906)
* wip: fix compile errors

* chore: move splitter to partition crate

* fix: remove useless variants in frontend errors

* chore: move more partition related code to partition manager

* fix: license header

* wip: move WriteSplitter to PartitionRuleManager

* fix: clippy warnings

* chore: remove useless error variant and format toml

* fix: cr comments

* chore: resolve conflicts

* chore: rebase develop

* fix: cr comments
2023-02-01 19:24:49 +08:00
elijah
9a30ba00c4 test: run sqlness test in distributed mode (#916)
* test: run sqlness test in distributed mode

* chore: fix ci test

* chore: improve the ci yaml

* chore: improve the code

* chore: fix conflicts
2023-01-31 15:00:11 +08:00
LFC
8149932bad feat: local catalog drop table (#913)
* feat: local catalog drop table

* Update src/catalog/src/local/manager.rs

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* Update src/catalog/src/local/manager.rs

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* fix: resolve PR comments

---------

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2023-01-31 14:44:03 +08:00
Ruihang Xia
89e4084af4 build(ci): upload sqlness log files (#920)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-01-31 14:31:27 +08:00
Ning Sun
39df25a8f6 refactor: make postgres handler stateful (#914)
* feat: update pgwire to 0.8 and unify postgres handler

* fix: correct password message matching
2023-01-31 14:19:18 +08:00
Yingwen
b2ad0e972b feat: Define procedure related traits (#904)
* chore: Move uuid to workspace.dependencies

* feat: Define procedure related traits

* test: Add tests

* chore: Update imports

* feat: Submit ProcedureWithId to manager

* chore: pub ProcedureId::parse_str

* refactor: ProcedureId::parse_str returns Result

* chore: Address CR comments

Also implements FromStr for ProcedureId
2023-01-31 14:17:28 +08:00
shuiyisong
18e6740ac9 chore: add interceptor err in frontend::error::Error (#917)
* chore: add interceptor boxed err

* chore: rename

* chore: update err msg

Co-authored-by: fys <40801205+Fengys123@users.noreply.github.com>

---------

Co-authored-by: fys <40801205+Fengys123@users.noreply.github.com>
2023-01-30 03:12:03 +00:00
Yun Chen
a7dc86ffe5 feat: oss storage support (#911)
* feat: add oss storage support

* fix: ci build format check

* fix: align OSS to Oss

* fix: cr comments

* fix: rename OSS to Oss in integration tests

* fix: clippy fix
2023-01-29 20:09:38 +08:00
Ruihang Xia
71482b38d7 feat: PromQL binary expr planner (#889)
* feat: PromQL binary expr planner

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* column & column test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* column & literal test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* mark literal-literal unsupported

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-01-29 17:02:11 +08:00
Ruihang Xia
dc9b5339bf feat: impl increase and irate/idelta in PromQL (#880)
* feat: impl increase and irate/idelta in PromQL

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add license header

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix styles

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* resolve CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add counter reset test case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-01-29 14:21:13 +08:00
Lei, HUANG
5e05c8f884 fix: TimestampRange::new_inclusive and strum dependency (#910)
fix: TimestampRange::new_inclusive; also fix strum dependency in common-error
2023-01-29 13:09:05 +08:00
shuiyisong
aafc26c788 feat: add mysql reject_no_database (#896)
* chore: update opensrv-mysql to main

* refactor: change mysql server struct

* feat: add option to reject no database mysql connection request

* chore: remove unused condition

* chore: rebase develop

* chore: make reject_no_database optional
2023-01-29 04:09:47 +00:00
LFC
64243e3a7d refactor: accommodate java flight client (#886)
* refactor: change how AffectedRows is carried in flight stream to accommodate Java Flight client

* fix: clippy
2023-01-29 11:27:13 +08:00
Ruihang Xia
36a13dafb7 build(deps): bump tokio to 1.24.2 (#900)
deps: bump tokio to 1.24.2

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-01-29 11:13:37 +08:00
shuiyisong
637837ae44 chore: return authorize err msg to mysql client (#905)
chore: refine authorize err msg to client
2023-01-29 10:53:36 +08:00
dependabot[bot]
ae8afd3711 build(deps): bump bzip2 from 0.4.3 to 0.4.4 (#898)
Bumps [bzip2](https://github.com/alexcrichton/bzip2-rs) from 0.4.3 to 0.4.4.
- [Release notes](https://github.com/alexcrichton/bzip2-rs/releases)
- [Commits](https://github.com/alexcrichton/bzip2-rs/commits/0.4.4)

---
updated-dependencies:
- dependency-name: bzip2
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-01-28 21:08:03 +08:00
Yingwen
3db8f95169 ci: Skip status check on docs changed (#903)
* ci: Pass status check on docs changed

* ci: Remove coverage.yml
2023-01-28 16:37:47 +08:00
Lei, HUANG
43aefc5d74 feat: prunine sst files according to time range in filters (#887)
* 1. Reimplement Eq for Timestamp
2. Add and/or for GenericRange

* feat: extract time range from filters

* feat: select sst files according to time range

* fix: clippy

* fix: empty value in range

* fix: some cr comments

* fix: return optional timestamp range

* fix: cr comments
2023-01-28 15:16:41 +08:00
Ruihang Xia
b33937f48e test: sqlness test for alter table rename (#891)
* test: sqlness test for alter table rename

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update test result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* change show create table to desc table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-01-28 11:35:38 +08:00
206 changed files with 8383 additions and 2934 deletions

View File

@@ -2,3 +2,9 @@
GT_S3_BUCKET=S3 bucket
GT_S3_ACCESS_KEY_ID=S3 access key id
GT_S3_ACCESS_KEY=S3 secret access key
# Settings for oss test
GT_OSS_BUCKET=OSS bucket
GT_OSS_ACCESS_KEY_ID=OSS access key id
GT_OSS_ACCESS_KEY=OSS access key
GT_OSS_ENDPOINT=OSS endpoint

View File

@@ -1,70 +0,0 @@
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
paths-ignore:
- 'docs/**'
- 'config/**'
- '**.md'
- '.dockerignore'
- 'docker/**'
- '.gitignore'
push:
branches:
- "main"
- "develop"
paths-ignore:
- 'docs/**'
- 'config/**'
- '**.md'
- '.dockerignore'
- 'docker/**'
- '.gitignore'
workflow_dispatch:
name: Code coverage
env:
RUST_TOOLCHAIN: nightly-2022-12-20
jobs:
coverage:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest-8-cores
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
- name: Install toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
components: llvm-tools-preview
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install latest nextest release
uses: taiki-e/install-action@nextest
- name: Install cargo-llvm-cov
uses: taiki-e/install-action@cargo-llvm-cov
- name: Collect coverage data
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
UNITTEST_LOG_DIR: "__unittest_logs"
- name: Codecov upload
uses: codecov/codecov-action@v2
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./lcov.info
flags: rust
fail_ci_if_error: true
verbose: true

View File

@@ -7,6 +7,7 @@ on:
- '**.md'
- '.dockerignore'
- 'docker/**'
- '.gitignore'
push:
branches:
- develop
@@ -125,8 +126,25 @@ jobs:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Run etcd
run: |
ETCD_VER=v3.5.7
DOWNLOAD_URL=https://github.com/etcd-io/etcd/releases/download
curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
mkdir -p /tmp/etcd-download
tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
sudo cp -a /tmp/etcd-download/etcd* /usr/local/bin/
nohup etcd >/tmp/etcd.log 2>&1 &
- name: Run sqlness
run: cargo run --bin sqlness-runner
run: cargo run --bin sqlness-runner && ls /tmp
- name: Upload sqlness logs
uses: actions/upload-artifact@v3
with:
name: sqlness-logs
path: /tmp/greptime-*.log
retention-days: 3
fmt:
name: Rustfmt
@@ -165,3 +183,45 @@ jobs:
uses: Swatinem/rust-cache@v2
- name: Run cargo clippy
run: cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr
coverage:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest-8-cores
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
- name: Install toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
components: llvm-tools-preview
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install latest nextest release
uses: taiki-e/install-action@nextest
- name: Install cargo-llvm-cov
uses: taiki-e/install-action@cargo-llvm-cov
- name: Collect coverage data
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
UNITTEST_LOG_DIR: "__unittest_logs"
- name: Codecov upload
uses: codecov/codecov-action@v2
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./lcov.info
flags: rust
fail_ci_if_error: true
verbose: true

55
.github/workflows/docs.yml vendored Normal file
View File

@@ -0,0 +1,55 @@
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
paths:
- 'docs/**'
- 'config/**'
- '**.md'
- '.dockerignore'
- 'docker/**'
- '.gitignore'
push:
branches:
- develop
- main
paths:
- 'docs/**'
- 'config/**'
- '**.md'
- '.dockerignore'
- 'docker/**'
- '.gitignore'
workflow_dispatch:
name: CI
# To pass the required status check, see:
# https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/defining-the-mergeability-of-pull-requests/troubleshooting-required-status-checks#handling-skipped-but-required-checks
jobs:
check:
name: Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- run: 'echo "No action required"'
fmt:
name: Rustfmt
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- run: 'echo "No action required"'
clippy:
name: Clippy
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- run: 'echo "No action required"'
coverage:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- run: 'echo "No action required"'

View File

@@ -69,6 +69,25 @@ jobs:
run: |
brew install protobuf
- name: Install etcd for linux
if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
run: |
ETCD_VER=v3.5.7
DOWNLOAD_URL=https://github.com/etcd-io/etcd/releases/download
curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
mkdir -p /tmp/etcd-download
tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
sudo cp -a /tmp/etcd-download/etcd* /usr/local/bin/
nohup etcd >/tmp/etcd.log 2>&1 &
- name: Install etcd for macos
if: contains(matrix.arch, 'darwin')
run: |
brew install etcd
brew services start etcd
- name: Install dependencies for linux
if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
run: |

929
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -12,6 +12,7 @@ members = [
"src/common/function-macro",
"src/common/grpc",
"src/common/grpc-expr",
"src/common/procedure",
"src/common/query",
"src/common/recordbatch",
"src/common/runtime",
@@ -26,6 +27,7 @@ members = [
"src/meta-srv",
"src/mito",
"src/object-store",
"src/partition",
"src/promql",
"src/query",
"src/script",
@@ -65,8 +67,9 @@ prost = "0.11"
serde = { version = "1.0", features = ["derive"] }
snafu = { version = "0.7", features = ["backtraces"] }
sqlparser = "0.28"
tokio = { version = "1", features = ["full"] }
tokio = { version = "1.24.2", features = ["full"] }
tonic = "0.8"
uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }
[profile.release]
debug = true

View File

@@ -153,6 +153,9 @@ You can always cleanup test database by removing `/tmp/greptimedb`.
- GreptimeDB [Developer
Guide](https://docs.greptime.com/developer-guide/overview.html)
### Dashboard
- [The dashboard UI for GreptimeDB](https://github.com/GreptimeTeam/dashboard)
### SDK
- [GreptimeDB Java
@@ -169,7 +172,7 @@ For future plans, check out [GreptimeDB roadmap](https://github.com/GreptimeTeam
## Community
Our core team is thrilled too see you participate in any ways you like. When you are stuck, try to
Our core team is thrilled to see you participate in any ways you like. When you are stuck, try to
ask for help by filling an issue with a detailed description of what you were trying to do
and what went wrong. If you have any questions or if you would like to get involved in our
community, please check out:

View File

@@ -32,7 +32,6 @@ use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use tokio::task::JoinSet;
const DATABASE_NAME: &str = "greptime";
const CATALOG_NAME: &str = "greptime";
const SCHEMA_NAME: &str = "public";
const TABLE_NAME: &str = "nyc_taxi";
@@ -100,7 +99,6 @@ async fn write_data(
let record_batch = record_batch.unwrap();
let (columns, row_count) = convert_record_batch(record_batch);
let request = InsertRequest {
schema_name: "public".to_string(),
table_name: TABLE_NAME.to_string(),
region_number: 0,
columns,
@@ -424,7 +422,7 @@ fn main() {
.unwrap()
.block_on(async {
let client = Client::with_urls(vec![&args.endpoint]);
let db = Database::new(DATABASE_NAME, client);
let db = Database::with_client(client);
if !args.skip_write {
do_write(&args, &db).await;

View File

@@ -0,0 +1,153 @@
---
Feature Name: "procedure-framework"
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/286
Date: 2023-01-03
Author: "Yingwen <realevenyag@gmail.com>"
---
Procedure Framework
----------------------
# Summary
A framework for executing operations in a fault-tolerant manner.
# Motivation
Some operations in GreptimeDB require multiple steps to implement. For example, creating a table needs:
1. Check whether the table exists
2. Create the table in the table engine
1. Create a region for the table in the storage engine
2. Persist the metadata of the table to the table manifest
3. Add the table to the catalog manager
If the node dies or restarts in the middle of creating a table, it could leave the system in an inconsistent state. The procedure framework, inspired by [Apache HBase's ProcedureV2 framework](https://github.com/apache/hbase/blob/bfc9fc9605de638785435e404430a9408b99a8d0/src/main/asciidoc/_chapters/pv2.adoc) and [Apache Accumulos FATE framework](https://accumulo.apache.org/docs/2.x/administration/fate), aims to provide a unified way to implement multi-step operations that is tolerant to failure.
# Details
## Overview
The procedure framework consists of the following primary components:
- A `Procedure` represents an operation or a set of operations to be performed step-by-step
- `ProcedureManager`, the runtime to run `Procedures`. It executes the submitted procedures, stores procedures' states to the `ProcedureStore` and restores procedures from `ProcedureStore` while the database restarts.
- `ProcedureStore` is a storage layer for persisting the procedure state
## Procedures
The `ProcedureManager` keeps calling `Procedure::execute()` until the Procedure is done, so the operation of the Procedure should be [idempotent](https://developer.mozilla.org/en-US/docs/Glossary/Idempotent): it needs to be able to undo or replay a partial execution of itself.
```rust
trait Procedure {
fn execute(&mut self, ctx: &Context) -> Result<Status>;
fn dump(&self) -> Result<String>;
fn rollback(&self) -> Result<()>;
// other methods...
}
```
The `Status` is an enum that has the following variants:
```rust
enum Status {
Executing {
persist: bool,
},
Suspended {
subprocedures: Vec<ProcedureWithId>,
persist: bool,
},
Done,
}
```
A call to `execute()` can result in the following possibilities:
- `Ok(Status::Done)`: we are done
- `Ok(Status::Executing { .. })`: there are remaining steps to do
- `Ok(Status::Suspend { sub_procedure, .. })`: execution is suspended and can be resumed later after the sub-procedure is done.
- `Err(e)`: error occurs during execution and the procedure is unable to proceed anymore.
Users need to assign a unique `ProcedureId` to the procedure and the procedure can get this id via the `Context`. The `ProcedureId` is typically a UUID.
```rust
struct Context {
id: ProcedureId,
// other fields ...
}
```
The `ProcedureManager` calls `Procedure::dump()` to serialize the internal state of the procedure and writes to the `ProcedureStore`. The `Status` has a field `persist` to tell the `ProcedureManager` whether it needs persistence.
## Sub-procedures
A procedure may need to create some sub-procedures to process its subtasks. For example, creating a distributed table with multiple regions (partitions) needs to set up the regions in each node, thus the parent procedure should instantiate a sub-procedure for each region. The `ProcedureManager` makes sure that the parent procedure does not proceed till all sub-procedures are successfully finished.
The procedure can submit sub-procedures to the `ProcedureManager` by returning `Status::Suspended`. It needs to assign a procedure id to each procedure manually so it can track the status of the sub-procedures.
```rust
struct ProcedureWithId {
id: ProcedureId,
procedure: BoxedProcedure,
}
```
## ProcedureStore
We might need to provide two different ProcedureStore implementations:
- In standalone mode, it stores data on the local disk.
- In distributed mode, it stores data on the meta server or the object store service.
These implementations should share the same storage structure. They store each procedure's state in a unique path based on the procedure id:
```
Sample paths:
/procedures/{PROCEDURE_ID}/000001.step
/procedures/{PROCEDURE_ID}/000002.step
/procedures/{PROCEDURE_ID}/000003.commit
```
`ProcedureStore` behaves like a WAL. Before performing each step, the `ProcedureManager` can write the procedure's current state to the ProcedureStore, which stores the state in the `.step` file. The `000001` in the path is a monotonic increasing sequence of the step. After the procedure is done, the `ProcedureManager` puts a `.commit` file to indicate the procedure is finished (committed).
The `ProcedureManager` can remove the procedure's files once the procedure is done, but it needs to leave the `.commit` as the last file to remove in case of failure during removal.
## ProcedureManager
`ProcedureManager` executes procedures submitted to it.
```rust
trait ProcedureManager {
fn register_loader(&self, name: &str, loader: BoxedProcedureLoader) -> Result<()>;
async fn submit(&self, procedure: ProcedureWithId) -> Result<()>;
}
```
It supports the following operations:
- Register a `ProcedureLoader` by the type name of the `Procedure`.
- Submit a `Procedure` to the manager and execute it.
When `ProcedureManager` starts, it loads procedures from the `ProcedureStore` and restores the procedures by the `ProcedureLoader`. The manager stores the type name from `Procedure::type_name()` with the data from `Procedure::dump()` in the `.step` file and uses the type name to find a `ProcedureLoader` to recover the procedure from its data.
```rust
type BoxedProcedureLoader = Box<dyn Fn(&str) -> Result<BoxedProcedure> + Send>;
```
## Rollback
The rollback step is supposed to clean up the resources created during the execute() step. When a procedure has failed, the `ProcedureManager` puts a `rollback` file and calls the `Procedure::rollback()` method.
```text
/procedures/{PROCEDURE_ID}/000001.step
/procedures/{PROCEDURE_ID}/000002.rollback
```
Rollback is complicated to implement so some procedures might not support rollback or only provide a best-efforts approach.
## Locking
The `ProcedureManager` can provide a locking mechanism that gives a procedure read/write access to a database object such as a table so other procedures are unable to modify the same table while the current one is executing.
Sub-procedures always inherit their parents' locks. The `ProcedureManager` only acquires locks for a procedure if its parent doesn't hold the lock.
# Drawbacks
The `Procedure` framework introduces additional complexity and overhead to our database.
- To execute a `Procedure`, we need to write to the `ProcedureStore` multiple times, which may slow down the server
- We need to rewrite the logic of creating/dropping/altering a table using the procedure framework
# Alternatives
Another approach is to tolerate failure during execution and allow users to retry the operation until it succeeds. But we still need to:
- Make each step idempotent
- Record the status in some place to check whether we are done

View File

@@ -0,0 +1,92 @@
---
Feature Name: "table-compaction"
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/930
Date: 2023-02-01
Author: "Lei, HUANG <mrsatangel@gmail.com>"
---
# Table Compaction
---
## Background
GreptimeDB uses an LSM-tree based storage engine that flushes memtables to SSTs for persistence.
But currently it only supports level 0. SST files in level 0 does not guarantee to contain only rows with disjoint time ranges.
That is to say, different SST files in level 0 may contain overlapped timestamps.
The consequence is, in order to retrieve rows in some time range, all files need to be scanned, which brings a lot of IO overhead.
Also, just like other LSMT engines, delete/update to existing primary keys are converted to new rows with delete/update mark and appended to SSTs on flushing.
We need to merge the operations to same primary keys so that we don't have to go through all SST files to find the final state of these primary keys.
## Goal
Implement a compaction framework to:
- maintain SSTs in timestamp order to accelerate queries with timestamp condition;
- merge rows with same primary key;
- purge expired SSTs;
- accommodate other tasks like data rollup/indexing.
## Overview
Table compaction involves following components:
- Compaction scheduler: run compaction tasks, limit the consumed resources;
- Compaction strategy: find the SSTs to compact and determine the output files of compaction.
- Compaction task: read the rows from input SSTs and write to the output files.
## Implementation
### Compaction scheduler
`CompactionScheduler` is an executor that continuously polls and executes compaction request from a task queue.
```rust
#[async_trait]
pub trait CompactionScheduler {
/// Schedules a compaction task.
async fn schedule(&self, task: CompactionRequest) -> Result<()>;
/// Stops compaction scheduler.
async fn stop(&self) -> Result<()>;
}
```
### Compaction triggering
Currently, we can check whether to compact tables when memtable is flushed to SST.
https://github.com/GreptimeTeam/greptimedb/blob/4015dd80752e1e6aaa3d7cacc3203cb67ed9be6d/src/storage/src/flush.rs#L245
### Compaction strategy
`CompactionStrategy` defines how to pick SSTs in all levels for compaction.
```rust
pub trait CompactionStrategy {
fn pick(
&self,
ctx: CompactionContext,
levels: &LevelMetas,
) -> Result<CompactionTask>;
}
```
The most suitable compaction strategy for time-series scenario would be
a hybrid strategy that combines time window compaction with size-tired compaction, just like [Cassandra](https://cassandra.apache.org/doc/latest/cassandra/operating/compaction/twcs.html) and [ScyllaDB](https://docs.scylladb.com/stable/architecture/compaction/compaction-strategies.html#time-window-compaction-strategy-twcs) does.
We can first group SSTs in level n into buckets according to some predefined time window. Within that window,
SSTs are compacted in a size-tired manner (find SSTs with similar size and compact them to level n+1).
SSTs from different time windows are neven compacted together.
That strategy guarantees SSTs in each level are mainly sorted in timestamp order which boosts queries with
explicit timestamp condition, while size-tired compaction minimizes the impact to foreground writes.
### Alternatives
Currently, GreptimeDB's storage engine [only support two levels](https://github.com/GreptimeTeam/greptimedb/blob/43aefc5d74dfa73b7819cae77b7eb546d8534a41/src/storage/src/sst.rs#L32).
For level 0, we can start with a simple time-window based leveled compaction, which reads from all SSTs in level 0,
align them to time windows with a fixed duration, merge them with SSTs in level 1 within the same time window
to ensure there is only one sorted run in level 1.

View File

@@ -5,11 +5,19 @@ package greptime.v1;
import "greptime/v1/ddl.proto";
import "greptime/v1/column.proto";
message RequestHeader {
// The `catalog` that is selected to be used in this request.
string catalog = 1;
// The `schema` that is selected to be used in this request.
string schema = 2;
}
message GreptimeRequest {
RequestHeader header = 1;
oneof request {
InsertRequest insert = 1;
QueryRequest query = 2;
DdlRequest ddl = 3;
InsertRequest insert = 2;
QueryRequest query = 3;
DdlRequest ddl = 4;
}
}
@@ -21,8 +29,7 @@ message QueryRequest {
}
message InsertRequest {
string schema_name = 1;
string table_name = 2;
string table_name = 1;
// Data is represented here.
repeated Column columns = 3;
@@ -36,6 +43,10 @@ message InsertRequest {
uint32 region_number = 5;
}
message FlightDataExt {
uint32 affected_rows = 1;
message AffectedRows {
uint32 value = 1;
}
message FlightMetadata {
AffectedRows affected_rows = 1;
}

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use std::any::Any;
use std::fmt::Debug;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::prelude::{Snafu, StatusCode};
@@ -21,6 +22,8 @@ use datatypes::prelude::ConcreteDataType;
use datatypes::schema::RawSchema;
use snafu::{Backtrace, ErrorCompat};
use crate::DeregisterTableRequest;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
@@ -96,18 +99,15 @@ pub enum Error {
#[snafu(display("Table `{}` already exists", table))]
TableExists { table: String, backtrace: Backtrace },
#[snafu(display("Table `{}` not exist", table))]
TableNotExist { table: String, backtrace: Backtrace },
#[snafu(display("Schema {} already exists", schema))]
SchemaExists {
schema: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to register table"))]
RegisterTable {
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Operation {} not implemented yet", operation))]
Unimplemented {
operation: String,
@@ -142,6 +142,17 @@ pub enum Error {
source: table::error::Error,
},
#[snafu(display(
"Failed to deregister table, request: {:?}, source: {}",
request,
source
))]
DeregisterTable {
request: DeregisterTableRequest,
#[snafu(backtrace)]
source: table::error::Error,
},
#[snafu(display("Illegal catalog manager state: {}", msg))]
IllegalManagerState { backtrace: Backtrace, msg: String },
@@ -165,7 +176,10 @@ pub enum Error {
},
#[snafu(display("Failure during SchemaProvider operation, source: {}", source))]
SchemaProviderOperation { source: BoxedError },
SchemaProviderOperation {
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Failed to execute system catalog table scan, source: {}", source))]
SystemCatalogTableScanExec {
@@ -178,15 +192,6 @@ pub enum Error {
source: common_catalog::error::Error,
},
#[snafu(display("IO error occurred while fetching catalog info, source: {}", source))]
Io {
backtrace: Backtrace,
source: std::io::Error,
},
#[snafu(display("Local and remote catalog data are inconsistent, msg: {}", msg))]
CatalogStateInconsistent { msg: String, backtrace: Backtrace },
#[snafu(display("Failed to perform metasrv operation, source: {}", source))]
MetaSrv {
#[snafu(backtrace)]
@@ -198,12 +203,6 @@ pub enum Error {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display("Catalog internal error: {}", source))]
Internal {
#[snafu(backtrace)]
source: BoxedError,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -216,37 +215,34 @@ impl ErrorExt for Error {
| Error::TableNotFound { .. }
| Error::IllegalManagerState { .. }
| Error::CatalogNotFound { .. }
| Error::InvalidEntryType { .. }
| Error::CatalogStateInconsistent { .. } => StatusCode::Unexpected,
| Error::InvalidEntryType { .. } => StatusCode::Unexpected,
Error::SystemCatalog { .. }
| Error::EmptyValue { .. }
| Error::ValueDeserialize { .. }
| Error::Io { .. } => StatusCode::StorageUnavailable,
| Error::ValueDeserialize { .. } => StatusCode::StorageUnavailable,
Error::RegisterTable { .. } | Error::SystemCatalogTypeMismatch { .. } => {
StatusCode::Internal
}
Error::SystemCatalogTypeMismatch { .. } => StatusCode::Internal,
Error::ReadSystemCatalog { source, .. } => source.status_code(),
Error::InvalidCatalogValue { source, .. } => source.status_code(),
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
Error::TableNotExist { .. } => StatusCode::TableNotFound,
Error::SchemaExists { .. } => StatusCode::InvalidArguments,
Error::OpenSystemCatalog { source, .. }
| Error::CreateSystemCatalog { source, .. }
| Error::InsertCatalogRecord { source, .. }
| Error::OpenTable { source, .. }
| Error::CreateTable { source, .. } => source.status_code(),
| Error::CreateTable { source, .. }
| Error::DeregisterTable { source, .. } => source.status_code(),
Error::MetaSrv { source, .. } => source.status_code(),
Error::SystemCatalogTableScan { source } => source.status_code(),
Error::SystemCatalogTableScanExec { source } => source.status_code(),
Error::InvalidTableSchema { source, .. } => source.status_code(),
Error::InvalidTableInfoInCatalog { .. } => StatusCode::Unexpected,
Error::Internal { source, .. } | Error::SchemaProviderOperation { source } => {
source.status_code()
}
Error::SchemaProviderOperation { source } => source.status_code(),
Error::Unimplemented { .. } => StatusCode::Unsupported,
}

View File

@@ -154,7 +154,7 @@ pub struct RenameTableRequest {
pub table_id: TableId,
}
#[derive(Clone)]
#[derive(Debug, Clone)]
pub struct DeregisterTableRequest {
pub catalog: String,
pub schema: String,
@@ -167,11 +167,6 @@ pub struct RegisterSchemaRequest {
pub schema: String,
}
/// Formats table fully-qualified name
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {
format!("{catalog}.{schema}.{table}")
}
pub trait CatalogProviderFactory {
fn create(&self, catalog_name: String) -> CatalogProviderRef;
}
@@ -198,8 +193,10 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
.create_table(&EngineContext::default(), req.create_table_request.clone())
.await
.with_context(|_| CreateTableSnafu {
table_info: format!(
"{catalog_name}.{schema_name}.{table_name}, id: {table_id}",
table_info: common_catalog::format_full_table_name(
catalog_name,
schema_name,
table_name,
),
})?;
manager

View File

@@ -20,6 +20,7 @@ use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID,
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
};
use common_catalog::format_full_table_name;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_telemetry::{error, info};
use datatypes::prelude::ScalarVector;
@@ -34,9 +35,9 @@ use table::table::TableIdProvider;
use table::TableRef;
use crate::error::{
CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu, Result,
SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu, SystemCatalogTypeMismatchSnafu,
TableExistsSnafu, TableNotFoundSnafu, UnimplementedSnafu,
self, CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu,
Result, SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu,
SystemCatalogTypeMismatchSnafu, TableExistsSnafu, TableNotFoundSnafu,
};
use crate::local::memory::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
use crate::system::{
@@ -45,10 +46,9 @@ use crate::system::{
};
use crate::tables::SystemCatalog;
use crate::{
format_full_table_name, handle_system_table_request, CatalogList, CatalogManager,
CatalogProvider, CatalogProviderRef, DeregisterTableRequest, RegisterSchemaRequest,
RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest, SchemaProvider,
SchemaProviderRef,
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
DeregisterTableRequest, RegisterSchemaRequest, RegisterSystemTableRequest,
RegisterTableRequest, RenameTableRequest, SchemaProvider, SchemaProviderRef,
};
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
@@ -252,7 +252,6 @@ impl LocalCatalogManager {
schema_name: t.schema_name.clone(),
table_name: t.table_name.clone(),
table_id: t.table_id,
region_numbers: vec![0],
};
let option = self
@@ -419,11 +418,36 @@ impl CatalogManager for LocalCatalogManager {
.is_ok())
}
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
UnimplementedSnafu {
operation: "deregister table",
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
{
let started = *self.init_lock.lock().await;
ensure!(started, IllegalManagerStateSnafu { msg: "not started" });
}
{
let _ = self.register_lock.lock().await;
let DeregisterTableRequest {
catalog,
schema,
table_name,
} = &request;
let table_id = self
.catalogs
.table(catalog, schema, table_name)?
.with_context(|| error::TableNotExistSnafu {
table: format!("{catalog}.{schema}.{table_name}"),
})?
.table_info()
.ident
.table_id;
if !self.system.deregister_table(&request, table_id).await? {
return Ok(false);
}
self.catalogs.deregister_table(request).await
}
.fail()
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {

View File

@@ -20,13 +20,13 @@ use std::sync::{Arc, RwLock};
use common_catalog::consts::MIN_USER_TABLE_ID;
use common_telemetry::error;
use snafu::OptionExt;
use snafu::{ensure, OptionExt};
use table::metadata::TableId;
use table::table::TableIdProvider;
use table::TableRef;
use crate::error::{
CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, TableNotFoundSnafu,
self, CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, TableNotFoundSnafu,
};
use crate::schema::SchemaProvider;
use crate::{
@@ -250,6 +250,10 @@ impl CatalogProvider for MemoryCatalogProvider {
schema: SchemaProviderRef,
) -> Result<Option<SchemaProviderRef>> {
let mut schemas = self.schemas.write().unwrap();
ensure!(
!schemas.contains_key(&name),
error::SchemaExistsSnafu { schema: &name }
);
Ok(schemas.insert(name, schema))
}

View File

@@ -324,7 +324,6 @@ impl RemoteCatalogManager {
schema_name: schema_name.clone(),
table_name: table_name.clone(),
table_id,
region_numbers: region_numbers.clone(),
};
match self
.engine

View File

@@ -25,29 +25,27 @@ use common_query::physical_plan::{PhysicalPlanRef, SessionContext};
use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::debug;
use common_time::util;
use datatypes::prelude::{ConcreteDataType, ScalarVector};
use datatypes::prelude::{ConcreteDataType, ScalarVector, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaBuilder, SchemaRef};
use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::{TableId, TableInfoRef};
use table::requests::{CreateTableRequest, InsertRequest, OpenTableRequest};
use table::requests::{CreateTableRequest, DeleteRequest, InsertRequest, OpenTableRequest};
use table::{Table, TableRef};
use crate::error::{
self, CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
OpenSystemCatalogSnafu, Result, ValueDeserializeSnafu,
};
use crate::DeregisterTableRequest;
pub const ENTRY_TYPE_INDEX: usize = 0;
pub const KEY_INDEX: usize = 1;
pub const VALUE_INDEX: usize = 3;
pub struct SystemCatalogTable {
table_info: TableInfoRef,
pub table: TableRef,
}
pub struct SystemCatalogTable(TableRef);
#[async_trait::async_trait]
impl Table for SystemCatalogTable {
@@ -56,25 +54,29 @@ impl Table for SystemCatalogTable {
}
fn schema(&self) -> SchemaRef {
self.table_info.meta.schema.clone()
self.0.schema()
}
async fn scan(
&self,
_projection: Option<&Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
projection: Option<&Vec<usize>>,
filters: &[Expr],
limit: Option<usize>,
) -> table::Result<PhysicalPlanRef> {
panic!("System catalog table does not support scan!")
self.0.scan(projection, filters, limit).await
}
/// Insert values into table.
async fn insert(&self, request: InsertRequest) -> table::error::Result<usize> {
self.table.insert(request).await
self.0.insert(request).await
}
fn table_info(&self) -> TableInfoRef {
self.table_info.clone()
self.0.table_info()
}
async fn delete(&self, request: DeleteRequest) -> table::Result<usize> {
self.0.delete(request).await
}
}
@@ -85,7 +87,6 @@ impl SystemCatalogTable {
schema_name: INFORMATION_SCHEMA_NAME.to_string(),
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
table_id: SYSTEM_CATALOG_TABLE_ID,
region_numbers: vec![0],
};
let schema = Arc::new(build_system_catalog_schema());
let ctx = EngineContext::default();
@@ -95,10 +96,7 @@ impl SystemCatalogTable {
.await
.context(OpenSystemCatalogSnafu)?
{
Ok(Self {
table_info: table.table_info(),
table,
})
Ok(Self(table))
} else {
// system catalog table is not yet created, try to create
let request = CreateTableRequest {
@@ -118,8 +116,7 @@ impl SystemCatalogTable {
.create_table(&ctx, request)
.await
.context(CreateSystemCatalogSnafu)?;
let table_info = table.table_info();
Ok(Self { table, table_info })
Ok(Self(table))
}
}
@@ -128,7 +125,6 @@ impl SystemCatalogTable {
let full_projection = None;
let ctx = SessionContext::new();
let scan = self
.table
.scan(full_projection, &[], None)
.await
.context(error::SystemCatalogTableScanSnafu)?;
@@ -208,6 +204,34 @@ pub fn build_table_insert_request(
)
}
pub(crate) fn build_table_deletion_request(
request: &DeregisterTableRequest,
table_id: TableId,
) -> DeleteRequest {
let table_key = format_table_entry_key(&request.catalog, &request.schema, table_id);
DeleteRequest {
key_column_values: build_primary_key_columns(EntryType::Table, table_key.as_bytes()),
}
}
fn build_primary_key_columns(entry_type: EntryType, key: &[u8]) -> HashMap<String, VectorRef> {
let mut m = HashMap::with_capacity(3);
m.insert(
"entry_type".to_string(),
Arc::new(UInt8Vector::from_slice(&[entry_type as u8])) as _,
);
m.insert(
"key".to_string(),
Arc::new(BinaryVector::from_slice(&[key])) as _,
);
// Timestamp in key part is intentionally left to 0
m.insert(
"timestamp".to_string(),
Arc::new(TimestampMillisecondVector::from_slice(&[0])) as _,
);
m
}
pub fn build_schema_insert_request(catalog_name: String, schema_name: String) -> InsertRequest {
let full_schema_name = format!("{catalog_name}.{schema_name}");
build_insert_request(
@@ -220,22 +244,10 @@ pub fn build_schema_insert_request(catalog_name: String, schema_name: String) ->
}
pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) -> InsertRequest {
let primary_key_columns = build_primary_key_columns(entry_type, key);
let mut columns_values = HashMap::with_capacity(6);
columns_values.insert(
"entry_type".to_string(),
Arc::new(UInt8Vector::from_slice(&[entry_type as u8])) as _,
);
columns_values.insert(
"key".to_string(),
Arc::new(BinaryVector::from_slice(&[key])) as _,
);
// Timestamp in key part is intentionally left to 0
columns_values.insert(
"timestamp".to_string(),
Arc::new(TimestampMillisecondVector::from_slice(&[0])) as _,
);
columns_values.extend(primary_key_columns.into_iter());
columns_values.insert(
"value".to_string(),
@@ -258,6 +270,7 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
columns_values,
region_number: 0, // system catalog table has only one region
}
}
@@ -380,6 +393,8 @@ pub struct TableEntryValue {
#[cfg(test)]
mod tests {
use common_recordbatch::RecordBatches;
use datatypes::value::Value;
use log_store::NoopLogStore;
use mito::config::EngineConfig;
use mito::engine::MitoEngine;
@@ -500,4 +515,53 @@ mod tests {
assert_eq!(SYSTEM_CATALOG_NAME, info.catalog_name);
assert_eq!(INFORMATION_SCHEMA_NAME, info.schema_name);
}
#[tokio::test]
async fn test_system_catalog_table_records() {
let (_, table_engine) = prepare_table_engine().await;
let catalog_table = SystemCatalogTable::new(table_engine).await.unwrap();
let table_insertion = build_table_insert_request(
DEFAULT_CATALOG_NAME.to_string(),
DEFAULT_SCHEMA_NAME.to_string(),
"my_table".to_string(),
1,
);
let result = catalog_table.insert(table_insertion).await.unwrap();
assert_eq!(result, 1);
let records = catalog_table.records().await.unwrap();
let mut batches = RecordBatches::try_collect(records).await.unwrap().take();
assert_eq!(batches.len(), 1);
let batch = batches.remove(0);
assert_eq!(batch.num_rows(), 1);
let row = batch.rows().next().unwrap();
let Value::UInt8(entry_type) = row[0] else { unreachable!() };
let Value::Binary(key) = row[1].clone() else { unreachable!() };
let Value::Binary(value) = row[3].clone() else { unreachable!() };
let entry = decode_system_catalog(Some(entry_type), Some(&*key), Some(&*value)).unwrap();
let expected = Entry::Table(TableEntry {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "my_table".to_string(),
table_id: 1,
});
assert_eq!(entry, expected);
let table_deletion = build_table_deletion_request(
&DeregisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "my_table".to_string(),
},
1,
);
let result = catalog_table.delete(table_deletion).await.unwrap();
assert_eq!(result, 1);
let records = catalog_table.records().await.unwrap();
let batches = RecordBatches::try_collect(records).await.unwrap().take();
assert_eq!(batches.len(), 0);
}
}

View File

@@ -38,9 +38,14 @@ use table::metadata::{TableId, TableInfoRef};
use table::table::scan::SimpleTableScan;
use table::{Table, TableRef};
use crate::error::{Error, InsertCatalogRecordSnafu};
use crate::system::{build_schema_insert_request, build_table_insert_request, SystemCatalogTable};
use crate::{CatalogListRef, CatalogProvider, SchemaProvider, SchemaProviderRef};
use crate::error::{self, Error, InsertCatalogRecordSnafu, Result as CatalogResult};
use crate::system::{
build_schema_insert_request, build_table_deletion_request, build_table_insert_request,
SystemCatalogTable,
};
use crate::{
CatalogListRef, CatalogProvider, DeregisterTableRequest, SchemaProvider, SchemaProviderRef,
};
/// Tables holds all tables created by user.
pub struct Tables {
@@ -279,6 +284,21 @@ impl SystemCatalog {
.context(InsertCatalogRecordSnafu)
}
pub(crate) async fn deregister_table(
&self,
request: &DeregisterTableRequest,
table_id: TableId,
) -> CatalogResult<bool> {
self.information_schema
.system
.delete(build_table_deletion_request(request, table_id))
.await
.map(|x| x == 1)
.with_context(|_| error::DeregisterTableSnafu {
request: request.clone(),
})
}
pub async fn register_schema(
&self,
catalog: String,

View File

@@ -9,6 +9,7 @@ api = { path = "../api" }
arrow-flight.workspace = true
async-stream.workspace = true
common-base = { path = "../common/base" }
common-catalog = { path = "../common/catalog" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
common-grpc-expr = { path = "../common/grpc-expr" }

View File

@@ -65,13 +65,12 @@ async fn run() {
region_ids: vec![0],
};
let db = Database::new("create table", client.clone());
let db = Database::with_client(client);
let result = db.create(create_table_expr).await.unwrap();
event!(Level::INFO, "create table result: {:#?}", result);
let logical = mock_logical_plan();
event!(Level::INFO, "plan size: {:#?}", logical.len());
let db = Database::new("greptime", client);
let result = db.logical_plan(logical).await.unwrap();
event!(Level::INFO, "result: {:#?}", result);

View File

@@ -19,9 +19,10 @@ use api::v1::greptime_request::Request;
use api::v1::query_request::Query;
use api::v1::{
AlterExpr, CreateTableExpr, DdlRequest, DropTableExpr, GreptimeRequest, InsertRequest,
QueryRequest,
QueryRequest, RequestHeader,
};
use arrow_flight::{FlightData, Ticket};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::prelude::*;
use common_grpc::flight::{flight_messages_to_recordbatches, FlightDecoder, FlightMessage};
use common_query::Output;
@@ -34,83 +35,89 @@ use crate::{error, Client, Result};
#[derive(Clone, Debug)]
pub struct Database {
name: String,
// The "catalog" and "schema" to be used in processing the requests at the server side.
// They are the "hint" or "context", just like how the "database" in "USE" statement is treated in MySQL.
// They will be carried in the request header.
catalog: String,
schema: String,
client: Client,
}
impl Database {
pub fn new(name: impl Into<String>, client: Client) -> Self {
pub fn new(catalog: impl Into<String>, schema: impl Into<String>, client: Client) -> Self {
Self {
name: name.into(),
catalog: catalog.into(),
schema: schema.into(),
client,
}
}
pub fn name(&self) -> &str {
&self.name
pub fn with_client(client: Client) -> Self {
Self::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client)
}
pub fn set_schema(&mut self, schema: impl Into<String>) {
self.schema = schema.into();
}
pub async fn insert(&self, request: InsertRequest) -> Result<Output> {
self.do_get(GreptimeRequest {
request: Some(Request::Insert(request)),
})
.await
self.do_get(Request::Insert(request)).await
}
pub async fn sql(&self, sql: &str) -> Result<Output> {
self.do_get(GreptimeRequest {
request: Some(Request::Query(QueryRequest {
query: Some(Query::Sql(sql.to_string())),
})),
})
self.do_get(Request::Query(QueryRequest {
query: Some(Query::Sql(sql.to_string())),
}))
.await
}
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> {
self.do_get(GreptimeRequest {
request: Some(Request::Query(QueryRequest {
query: Some(Query::LogicalPlan(logical_plan)),
})),
})
self.do_get(Request::Query(QueryRequest {
query: Some(Query::LogicalPlan(logical_plan)),
}))
.await
}
pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
self.do_get(GreptimeRequest {
request: Some(Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(expr)),
})),
})
self.do_get(Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(expr)),
}))
.await
}
pub async fn alter(&self, expr: AlterExpr) -> Result<Output> {
self.do_get(GreptimeRequest {
request: Some(Request::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(expr)),
})),
})
self.do_get(Request::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(expr)),
}))
.await
}
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<Output> {
self.do_get(GreptimeRequest {
request: Some(Request::Ddl(DdlRequest {
expr: Some(DdlExpr::DropTable(expr)),
})),
})
self.do_get(Request::Ddl(DdlRequest {
expr: Some(DdlExpr::DropTable(expr)),
}))
.await
}
async fn do_get(&self, request: GreptimeRequest) -> Result<Output> {
async fn do_get(&self, request: Request) -> Result<Output> {
let request = GreptimeRequest {
header: Some(RequestHeader {
catalog: self.catalog.clone(),
schema: self.schema.clone(),
}),
request: Some(request),
};
let request = Ticket {
ticket: request.encode_to_vec(),
};
let mut client = self.client.make_client()?;
// TODO(LFC): Streaming get flight data.
let flight_data: Vec<FlightData> = client
.mut_inner()
.do_get(Ticket {
ticket: request.encode_to_vec(),
})
.do_get(request)
.and_then(|response| response.into_inner().try_collect())
.await
.map_err(|e| {

View File

@@ -24,7 +24,7 @@ meta-srv = { path = "../meta-srv" }
serde.workspace = true
servers = { path = "../servers" }
snafu.workspace = true
tokio = { version = "1.18", features = ["full"] }
tokio.workspace = true
toml = "0.5"
[dev-dependencies]

View File

@@ -14,7 +14,7 @@
use clap::Parser;
use common_telemetry::logging;
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig};
use datanode::datanode::{Datanode, DatanodeOptions, FileConfig, ObjectStoreConfig};
use meta_client::MetaClientOpts;
use servers::Mode;
use snafu::ResultExt;
@@ -128,7 +128,7 @@ impl TryFrom<StartCommand> for DatanodeOptions {
}
if let Some(data_dir) = cmd.data_dir {
opts.storage = ObjectStoreConfig::File { data_dir };
opts.storage = ObjectStoreConfig::File(FileConfig { data_dir });
}
if let Some(wal_dir) = cmd.wal_dir {
@@ -175,10 +175,11 @@ mod tests {
assert!(!tcp_nodelay);
match options.storage {
ObjectStoreConfig::File { data_dir } => {
ObjectStoreConfig::File(FileConfig { data_dir }) => {
assert_eq!("/tmp/greptimedb/data/".to_string(), data_dir)
}
ObjectStoreConfig::S3 { .. } => unreachable!(),
ObjectStoreConfig::Oss { .. } => unreachable!(),
};
}

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use clap::Parser;
use common_telemetry::{info, logging};
use common_telemetry::{info, logging, warn};
use meta_srv::bootstrap;
use meta_srv::metasrv::MetaSrvOptions;
use snafu::ResultExt;
@@ -58,6 +58,8 @@ struct StartCommand {
config_file: Option<String>,
#[clap(short, long)]
selector: Option<String>,
#[clap(long)]
use_memory_store: bool,
}
impl StartCommand {
@@ -100,6 +102,11 @@ impl TryFrom<StartCommand> for MetaSrvOptions {
info!("Using {} selector", selector_type);
}
if cmd.use_memory_store {
warn!("Using memory store for Meta. Make sure you are in running tests.");
opts.use_memory_store = true;
}
Ok(opts)
}
}
@@ -118,6 +125,7 @@ mod tests {
store_addr: Some("127.0.0.1:2380".to_string()),
config_file: None,
selector: Some("LoadBased".to_string()),
use_memory_store: false,
};
let options: MetaSrvOptions = cmd.try_into().unwrap();
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
@@ -137,6 +145,7 @@ mod tests {
"{}/../../config/metasrv.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
use_memory_store: false,
};
let options: MetaSrvOptions = cmd.try_into().unwrap();
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);

View File

@@ -26,6 +26,7 @@ use frontend::mysql::MysqlOptions;
use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
use frontend::prometheus::PrometheusOptions;
use frontend::promql::PromqlOptions;
use frontend::Plugins;
use serde::{Deserialize, Serialize};
use servers::http::HttpOptions;
@@ -72,6 +73,7 @@ pub struct StandaloneOptions {
pub opentsdb_options: Option<OpentsdbOptions>,
pub influxdb_options: Option<InfluxdbOptions>,
pub prometheus_options: Option<PrometheusOptions>,
pub promql_options: Option<PromqlOptions>,
pub mode: Mode,
pub wal: WalConfig,
pub storage: ObjectStoreConfig,
@@ -88,6 +90,7 @@ impl Default for StandaloneOptions {
opentsdb_options: Some(OpentsdbOptions::default()),
influxdb_options: Some(InfluxdbOptions::default()),
prometheus_options: Some(PrometheusOptions::default()),
promql_options: Some(PromqlOptions::default()),
mode: Mode::Standalone,
wal: WalConfig::default(),
storage: ObjectStoreConfig::default(),
@@ -106,6 +109,7 @@ impl StandaloneOptions {
opentsdb_options: self.opentsdb_options,
influxdb_options: self.influxdb_options,
prometheus_options: self.prometheus_options,
promql_options: self.promql_options,
mode: self.mode,
meta_client_opts: None,
}
@@ -323,6 +327,10 @@ mod tests {
fe_opts.mysql_options.as_ref().unwrap().addr
);
assert_eq!(2, fe_opts.mysql_options.as_ref().unwrap().runtime_size);
assert_eq!(
None,
fe_opts.mysql_options.as_ref().unwrap().reject_no_database
);
assert!(fe_opts.influxdb_options.as_ref().unwrap().enable);
}

View File

@@ -18,4 +18,4 @@ snafu = { version = "0.7", features = ["backtraces"] }
[dev-dependencies]
chrono = "0.4"
tempdir = "0.3"
tokio = { version = "1.0", features = ["full"] }
tokio.workspace = true

View File

@@ -14,3 +14,9 @@
pub mod consts;
pub mod error;
/// Formats table fully-qualified name
#[inline]
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {
format!("{catalog}.{schema}.{table}")
}

View File

@@ -5,5 +5,5 @@ edition.workspace = true
license.workspace = true
[dependencies]
strum = "0.24.1"
snafu = { version = "0.7", features = ["backtraces"] }
strum = { version = "0.24", features = ["std", "derive"] }

View File

@@ -88,6 +88,45 @@ impl crate::snafu::ErrorCompat for BoxedError {
}
}
/// Error type with plain error message
#[derive(Debug)]
pub struct PlainError {
msg: String,
status_code: StatusCode,
}
impl PlainError {
pub fn new(msg: String, status_code: StatusCode) -> Self {
Self { msg, status_code }
}
}
impl std::fmt::Display for PlainError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.msg)
}
}
impl std::error::Error for PlainError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
None
}
}
impl crate::ext::ErrorExt for PlainError {
fn status_code(&self) -> crate::status_code::StatusCode {
self.status_code
}
fn backtrace_opt(&self) -> Option<&crate::snafu::Backtrace> {
None
}
fn as_any(&self) -> &dyn std::any::Any {
self as _
}
}
#[cfg(test)]
mod tests {
use std::error::Error;

View File

@@ -29,16 +29,8 @@ use crate::error::{
/// Convert an [`AlterExpr`] to an [`AlterTableRequest`]
pub fn alter_expr_to_request(expr: AlterExpr) -> Result<AlterTableRequest> {
let catalog_name = if expr.catalog_name.is_empty() {
None
} else {
Some(expr.catalog_name)
};
let schema_name = if expr.schema_name.is_empty() {
None
} else {
Some(expr.schema_name)
};
let catalog_name = expr.catalog_name;
let schema_name = expr.schema_name;
let kind = expr.kind.context(MissingFieldSnafu { field: "kind" })?;
match kind {
Kind::AddColumns(add_columns) => {
@@ -219,8 +211,8 @@ mod tests {
};
let alter_request = alter_expr_to_request(expr).unwrap();
assert_eq!(None, alter_request.catalog_name);
assert_eq!(None, alter_request.schema_name);
assert_eq!(alter_request.catalog_name, "");
assert_eq!(alter_request.schema_name, "");
assert_eq!("monitor".to_string(), alter_request.table_name);
let add_column = match alter_request.alter_kind {
AlterKind::AddColumns { mut columns } => columns.pop().unwrap(),
@@ -250,8 +242,8 @@ mod tests {
};
let alter_request = alter_expr_to_request(expr).unwrap();
assert_eq!(Some("test_catalog".to_string()), alter_request.catalog_name);
assert_eq!(Some("test_schema".to_string()), alter_request.schema_name);
assert_eq!(alter_request.catalog_name, "test_catalog");
assert_eq!(alter_request.schema_name, "test_schema");
assert_eq!("monitor".to_string(), alter_request.table_name);
let mut drop_names = match alter_request.alter_kind {

View File

@@ -21,7 +21,6 @@ use api::v1::{
InsertRequest as GrpcInsertRequest,
};
use common_base::BitVec;
use common_catalog::consts::DEFAULT_CATALOG_NAME;
use common_time::timestamp::Timestamp;
use common_time::{Date, DateTime};
use datatypes::data_type::{ConcreteDataType, DataType};
@@ -31,7 +30,7 @@ use datatypes::value::Value;
use datatypes::vectors::MutableVector;
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::TableId;
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequest};
use table::requests::InsertRequest;
use crate::error::{
ColumnDataTypeSnafu, CreateVectorSnafu, DuplicatedTimestampColumnSnafu, IllegalInsertDataSnafu,
@@ -81,20 +80,6 @@ pub fn find_new_columns(schema: &SchemaRef, columns: &[Column]) -> Result<Option
}
}
/// Build a alter table rqeusts that adding new columns.
#[inline]
pub fn build_alter_table_request(
table_name: &str,
columns: Vec<AddColumnRequest>,
) -> AlterTableRequest {
AlterTableRequest {
catalog_name: None,
schema_name: None,
table_name: table_name.to_string(),
alter_kind: AlterKind::AddColumns { columns },
}
}
pub fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
let wrapper = ColumnDataTypeWrapper::try_new(column.datatype).context(ColumnDataTypeSnafu)?;
let column_datatype = wrapper.datatype();
@@ -281,9 +266,11 @@ pub fn build_create_expr_from_insertion(
Ok(expr)
}
pub fn to_table_insert_request(request: GrpcInsertRequest) -> Result<InsertRequest> {
let catalog_name = DEFAULT_CATALOG_NAME;
let schema_name = &request.schema_name;
pub fn to_table_insert_request(
catalog_name: &str,
schema_name: &str,
request: GrpcInsertRequest,
) -> Result<InsertRequest> {
let table_name = &request.table_name;
let row_count = request.row_count as usize;
@@ -319,6 +306,7 @@ pub fn to_table_insert_request(request: GrpcInsertRequest) -> Result<InsertReque
schema_name: schema_name.to_string(),
table_name: table_name.to_string(),
columns_values,
region_number: request.region_number,
})
}
@@ -452,6 +440,7 @@ fn is_null(null_mask: &BitVec, idx: usize) -> Option<bool> {
mod tests {
use std::any::Any;
use std::sync::Arc;
use std::{assert_eq, unimplemented, vec};
use api::helper::ColumnDataTypeWrapper;
use api::v1::column::{self, SemanticType, Values};
@@ -617,13 +606,12 @@ mod tests {
fn test_to_table_insert_request() {
let (columns, row_count) = mock_insert_batch();
let request = GrpcInsertRequest {
schema_name: "public".to_string(),
table_name: "demo".to_string(),
columns,
row_count,
region_number: 0,
};
let insert_req = to_table_insert_request(request).unwrap();
let insert_req = to_table_insert_request("greptime", "public", request).unwrap();
assert_eq!("greptime", insert_req.catalog_name);
assert_eq!("public", insert_req.schema_name);

View File

@@ -17,6 +17,4 @@ pub mod error;
pub mod insert;
pub use alter::{alter_expr_to_request, create_expr_to_request, create_table_schema};
pub use insert::{
build_alter_table_request, build_create_expr_from_insertion, column_to_vector, find_new_columns,
};
pub use insert::{build_create_expr_from_insertion, column_to_vector, find_new_columns};

View File

@@ -15,7 +15,7 @@
use std::collections::HashMap;
use std::sync::Arc;
use api::v1::FlightDataExt;
use api::v1::{AffectedRows, FlightMetadata};
use arrow_flight::utils::{flight_data_from_arrow_batch, flight_data_to_arrow_batch};
use arrow_flight::{FlightData, IpcMessage, SchemaAsIpc};
use common_recordbatch::{RecordBatch, RecordBatches};
@@ -66,11 +66,11 @@ impl FlightEncoder {
flight_batch
}
FlightMessage::AffectedRows(rows) => {
let ext_data = FlightDataExt {
affected_rows: rows as _,
let metadata = FlightMetadata {
affected_rows: Some(AffectedRows { value: rows as _ }),
}
.encode_to_vec();
FlightData::new(None, IpcMessage(build_none_flight_msg()), vec![], ext_data)
FlightData::new(None, IpcMessage(build_none_flight_msg()), metadata, vec![])
}
}
}
@@ -91,9 +91,15 @@ impl FlightDecoder {
})?;
match message.header_type() {
MessageHeader::NONE => {
let ext_data = FlightDataExt::decode(flight_data.data_body.as_slice())
let metadata = FlightMetadata::decode(flight_data.app_metadata.as_slice())
.context(DecodeFlightDataSnafu)?;
Ok(FlightMessage::AffectedRows(ext_data.affected_rows as _))
if let Some(AffectedRows { value }) = metadata.affected_rows {
return Ok(FlightMessage::AffectedRows(value as _));
}
InvalidFlightDataSnafu {
reason: "Expecting FlightMetadata have some meaningful content.",
}
.fail()
}
MessageHeader::Schema => {
let arrow_schema = ArrowSchema::try_from(&flight_data).map_err(|e| {

View File

@@ -0,0 +1,21 @@
[package]
name = "common-procedure"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
async-trait.workspace = true
common-error = { path = "../error" }
common-runtime = { path = "../runtime" }
common-telemetry = { path = "../telemetry" }
futures.workspace = true
object-store = { path = "../../object-store" }
serde.workspace = true
serde_json = "1.0"
snafu.workspace = true
tokio.workspace = true
uuid.workspace = true
[dev-dependencies]
tempdir = "0.3"

View File

@@ -0,0 +1,107 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use common_error::prelude::*;
use crate::procedure::ProcedureId;
/// Procedure error.
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display(
"Failed to execute procedure due to external error, source: {}",
source
))]
External {
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Loader {} is already registered", name))]
LoaderConflict { name: String, backtrace: Backtrace },
#[snafu(display("Failed to serialize to json, source: {}", source))]
ToJson {
source: serde_json::Error,
backtrace: Backtrace,
},
#[snafu(display("Procedure {} already exists", procedure_id))]
DuplicateProcedure {
procedure_id: ProcedureId,
backtrace: Backtrace,
},
#[snafu(display("Failed to put {}, source: {}", key, source))]
PutState {
key: String,
source: object_store::Error,
},
#[snafu(display("Failed to delete {}, source: {}", key, source))]
DeleteState {
key: String,
source: object_store::Error,
},
#[snafu(display("Failed to list {}, source: {}", path, source))]
ListState {
path: String,
source: object_store::Error,
},
#[snafu(display("Failed to read {}, source: {}", key, source))]
ReadState {
key: String,
source: object_store::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::External { source } => source.status_code(),
Error::ToJson { .. }
| Error::PutState { .. }
| Error::DeleteState { .. }
| Error::ListState { .. }
| Error::ReadState { .. } => StatusCode::Internal,
Error::LoaderConflict { .. } | Error::DuplicateProcedure { .. } => {
StatusCode::InvalidArguments
}
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}
impl Error {
/// Creates a new [Error::External] error from source `err`.
pub fn external<E: ErrorExt + Send + Sync + 'static>(err: E) -> Error {
Error::External {
source: BoxedError::new(err),
}
}
}

View File

@@ -0,0 +1,29 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Common traits and structures for the procedure framework.
pub mod error;
#[allow(dead_code)]
mod local;
mod procedure;
// TODO(yingwen): Remove this attribute once ProcedureManager is implemented.
#[allow(dead_code)]
mod store;
pub use crate::error::{Error, Result};
pub use crate::procedure::{
BoxedProcedure, Context, LockKey, Procedure, ProcedureId, ProcedureManager,
ProcedureManagerRef, ProcedureState, ProcedureWithId, Status,
};

View File

@@ -0,0 +1,114 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod lock;
use std::sync::{Arc, Mutex};
use tokio::sync::Notify;
use crate::{LockKey, ProcedureId, ProcedureState};
/// Mutable metadata of a procedure during execution.
#[derive(Debug)]
struct ExecMeta {
/// Current procedure state.
state: ProcedureState,
}
/// Shared metadata of a procedure.
///
/// # Note
/// [Notify] is not a condition variable, we can't guarantee the waiters are notified
/// if they didn't call `notified()` before we signal the notify. So we
/// 1. use dedicated notify for each condition, such as waiting for a lock, waiting
/// for children;
/// 2. always use `notify_one` and ensure there are only one waiter.
#[derive(Debug)]
struct ProcedureMeta {
/// Id of this procedure.
id: ProcedureId,
/// Notify to wait for a lock.
lock_notify: Notify,
/// Parent procedure id.
parent_id: Option<ProcedureId>,
/// Notify to wait for subprocedures.
child_notify: Notify,
/// Locks inherted from the parent procedure.
parent_locks: Vec<LockKey>,
/// Lock not in `parent_locks` but required by this procedure.
///
/// If the parent procedure already owns the lock that this procedure
/// needs, we set this field to `None`.
lock_key: Option<LockKey>,
/// Mutable status during execution.
exec_meta: Mutex<ExecMeta>,
}
impl ProcedureMeta {
/// Return all locks the procedure needs.
fn locks_needed(&self) -> Vec<LockKey> {
let num_locks = self.parent_locks.len() + if self.lock_key.is_some() { 1 } else { 0 };
let mut locks = Vec::with_capacity(num_locks);
locks.extend_from_slice(&self.parent_locks);
if let Some(key) = &self.lock_key {
locks.push(key.clone());
}
locks
}
}
/// Reference counted pointer to [ProcedureMeta].
type ProcedureMetaRef = Arc<ProcedureMeta>;
/// Create a new [ProcedureMeta] for test purpose.
#[cfg(test)]
fn procedure_meta_for_test() -> ProcedureMeta {
ProcedureMeta {
id: ProcedureId::random(),
lock_notify: Notify::new(),
parent_id: None,
child_notify: Notify::new(),
parent_locks: Vec::new(),
lock_key: None,
exec_meta: Mutex::new(ExecMeta {
state: ProcedureState::Running,
}),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_locks_needed() {
let mut meta = procedure_meta_for_test();
let locks = meta.locks_needed();
assert!(locks.is_empty());
let parent_locks = vec![LockKey::new("a"), LockKey::new("b")];
meta.parent_locks = parent_locks.clone();
let locks = meta.locks_needed();
assert_eq!(parent_locks, locks);
meta.lock_key = Some(LockKey::new("c"));
let locks = meta.locks_needed();
assert_eq!(
vec![LockKey::new("a"), LockKey::new("b"), LockKey::new("c")],
locks
);
}
}

View File

@@ -0,0 +1,214 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{HashMap, VecDeque};
use std::sync::RwLock;
use crate::local::ProcedureMetaRef;
use crate::ProcedureId;
/// A lock entry.
#[derive(Debug)]
struct Lock {
/// Current lock owner.
owner: ProcedureMetaRef,
/// Waiter procedures.
waiters: VecDeque<ProcedureMetaRef>,
}
impl Lock {
/// Returns a [Lock] with specific `owner` procedure.
fn from_owner(owner: ProcedureMetaRef) -> Lock {
Lock {
owner,
waiters: VecDeque::new(),
}
}
/// Try to pop a waiter from the waiter list, set it as owner
/// and wake up the new owner.
///
/// Returns false if there is no waiter in the waiter list.
fn switch_owner(&mut self) -> bool {
if let Some(waiter) = self.waiters.pop_front() {
// Update owner.
self.owner = waiter.clone();
// We need to use notify_one() since the waiter may have not called `notified()` yet.
waiter.lock_notify.notify_one();
true
} else {
false
}
}
}
/// Manages lock entries for procedures.
struct LockMap {
locks: RwLock<HashMap<String, Lock>>,
}
impl LockMap {
/// Returns a new [LockMap].
fn new() -> LockMap {
LockMap {
locks: RwLock::new(HashMap::new()),
}
}
/// Acquire lock by `key` for procedure with specific `meta`.
///
/// Though `meta` is cloneable, callers must ensure that only one `meta`
/// is acquiring and holding the lock at the same time.
///
/// # Panics
/// Panics if the procedure acquires the lock recursively.
async fn acquire_lock(&self, key: &str, meta: ProcedureMetaRef) {
assert!(!self.hold_lock(key, meta.id));
{
let mut locks = self.locks.write().unwrap();
if let Some(lock) = locks.get_mut(key) {
// Lock already exists, but we don't expect that a procedure acquires
// the same lock again.
assert_ne!(lock.owner.id, meta.id);
// Add this procedure to the waiter list. Here we don't check
// whether the procedure is already in the waiter list as we
// expect that a procedure should not wait for two lock simultaneously.
lock.waiters.push_back(meta.clone());
} else {
locks.insert(key.to_string(), Lock::from_owner(meta));
return;
}
}
// Wait for notify.
meta.lock_notify.notified().await;
assert!(self.hold_lock(key, meta.id));
}
/// Release lock by `key`.
fn release_lock(&self, key: &str, procedure_id: ProcedureId) {
let mut locks = self.locks.write().unwrap();
if let Some(lock) = locks.get_mut(key) {
if lock.owner.id != procedure_id {
// This is not the lock owner.
return;
}
if !lock.switch_owner() {
// No body waits for this lock, we can remove the lock entry.
locks.remove(key);
}
}
}
/// Returns true if the procedure with specific `procedure_id` holds the
/// lock of `key`.
fn hold_lock(&self, key: &str, procedure_id: ProcedureId) -> bool {
let locks = self.locks.read().unwrap();
locks
.get(key)
.map(|lock| lock.owner.id == procedure_id)
.unwrap_or(false)
}
/// Returns true if the procedure is waiting for the lock `key`.
#[cfg(test)]
fn waiting_lock(&self, key: &str, procedure_id: ProcedureId) -> bool {
let locks = self.locks.read().unwrap();
locks
.get(key)
.map(|lock| lock.waiters.iter().any(|meta| meta.id == procedure_id))
.unwrap_or(false)
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::local;
#[test]
fn test_lock_no_waiter() {
let meta = Arc::new(local::procedure_meta_for_test());
let mut lock = Lock::from_owner(meta);
assert!(!lock.switch_owner());
}
#[tokio::test]
async fn test_lock_with_waiter() {
let owner = Arc::new(local::procedure_meta_for_test());
let mut lock = Lock::from_owner(owner);
let waiter = Arc::new(local::procedure_meta_for_test());
lock.waiters.push_back(waiter.clone());
assert!(lock.switch_owner());
assert!(lock.waiters.is_empty());
waiter.lock_notify.notified().await;
assert_eq!(lock.owner.id, waiter.id);
}
#[tokio::test]
async fn test_lock_map() {
let key = "hello";
let owner = Arc::new(local::procedure_meta_for_test());
let lock_map = Arc::new(LockMap::new());
lock_map.acquire_lock(key, owner.clone()).await;
let waiter = Arc::new(local::procedure_meta_for_test());
let waiter_id = waiter.id;
// Waiter release the lock, this should not take effect.
lock_map.release_lock(key, waiter_id);
let lock_map2 = lock_map.clone();
let owner_id = owner.id;
let handle = tokio::spawn(async move {
assert!(lock_map2.hold_lock(key, owner_id));
assert!(!lock_map2.hold_lock(key, waiter_id));
// Waiter wait for lock.
lock_map2.acquire_lock(key, waiter.clone()).await;
assert!(lock_map2.hold_lock(key, waiter_id));
});
// Owner still holds the lock.
assert!(lock_map.hold_lock(key, owner_id));
// Wait until the waiter acquired the lock
while !lock_map.waiting_lock(key, waiter_id) {
tokio::time::sleep(std::time::Duration::from_millis(5)).await;
}
// Release lock
lock_map.release_lock(key, owner_id);
assert!(!lock_map.hold_lock(key, owner_id));
// Wait for task.
handle.await.unwrap();
// The waiter should hold the lock now.
assert!(lock_map.hold_lock(key, waiter_id));
lock_map.release_lock(key, waiter_id);
}
}

View File

@@ -0,0 +1,256 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use std::str::FromStr;
use std::sync::Arc;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use snafu::{ResultExt, Snafu};
use uuid::Uuid;
use crate::error::Result;
/// Procedure execution status.
pub enum Status {
/// The procedure is still executing.
Executing {
/// Whether the framework needs to persist the procedure.
persist: bool,
},
/// The procedure has suspended itself and is waiting for subprocedures.
Suspended {
subprocedures: Vec<ProcedureWithId>,
/// Whether the framework needs to persist the procedure.
persist: bool,
},
/// the procedure is done.
Done,
}
impl Status {
/// Returns a [Status::Executing] with given `persist` flag.
pub fn executing(persist: bool) -> Status {
Status::Executing { persist }
}
/// Returns `true` if the procedure needs the framework to persist its intermediate state.
pub fn need_persist(&self) -> bool {
// If the procedure is done, the framework doesn't need to persist the procedure
// anymore. It only needs to mark the procedure as committed.
match self {
Status::Executing { persist } | Status::Suspended { persist, .. } => *persist,
Status::Done => false,
}
}
}
/// Procedure execution context.
#[derive(Debug)]
pub struct Context {
/// Id of the procedure.
pub procedure_id: ProcedureId,
}
/// A `Procedure` represents an operation or a set of operations to be performed step-by-step.
#[async_trait]
pub trait Procedure: Send + Sync {
/// Type name of the procedure.
fn type_name(&self) -> &str;
/// Execute the procedure.
///
/// The implementation must be idempotent.
async fn execute(&mut self, ctx: &Context) -> Result<Status>;
/// Dump the state of the procedure to a string.
fn dump(&self) -> Result<String>;
/// Returns the [LockKey] if this procedure needs to acquire lock.
fn lock_key(&self) -> Option<LockKey>;
}
/// A key to identify the lock.
// We might hold multiple keys in this struct. When there are multiple keys, we need to sort the
// keys lock all the keys in order to avoid dead lock.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LockKey(String);
impl LockKey {
/// Returns a new [LockKey].
pub fn new(key: impl Into<String>) -> LockKey {
LockKey(key.into())
}
/// Returns the lock key.
pub fn key(&self) -> &str {
&self.0
}
}
/// Boxed [Procedure].
pub type BoxedProcedure = Box<dyn Procedure>;
/// A procedure with specific id.
pub struct ProcedureWithId {
/// Id of the procedure.
pub id: ProcedureId,
pub procedure: BoxedProcedure,
}
impl ProcedureWithId {
/// Returns a new [ProcedureWithId] that holds specific `procedure`
/// and a random [ProcedureId].
pub fn with_random_id(procedure: BoxedProcedure) -> ProcedureWithId {
ProcedureWithId {
id: ProcedureId::random(),
procedure,
}
}
}
#[derive(Debug, Snafu)]
pub struct ParseIdError {
source: uuid::Error,
}
/// Unique id for [Procedure].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ProcedureId(Uuid);
impl ProcedureId {
/// Returns a new unique [ProcedureId] randomly.
pub fn random() -> ProcedureId {
ProcedureId(Uuid::new_v4())
}
/// Parses id from string.
pub fn parse_str(input: &str) -> std::result::Result<ProcedureId, ParseIdError> {
Uuid::parse_str(input)
.map(ProcedureId)
.context(ParseIdSnafu)
}
}
impl fmt::Display for ProcedureId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl FromStr for ProcedureId {
type Err = ParseIdError;
fn from_str(s: &str) -> std::result::Result<ProcedureId, ParseIdError> {
ProcedureId::parse_str(s)
}
}
/// Loader to recover the [Procedure] instance from serialized data.
pub type BoxedProcedureLoader = Box<dyn Fn(&str) -> Result<BoxedProcedure> + Send>;
// TODO(yingwen): Find a way to return the error message if the procedure is failed.
/// State of a submitted procedure.
#[derive(Debug)]
pub enum ProcedureState {
/// The procedure is running.
Running,
/// The procedure is finished.
Done,
/// The procedure is failed and cannot proceed anymore.
Failed,
}
// TODO(yingwen): Shutdown
/// `ProcedureManager` executes [Procedure] submitted to it.
#[async_trait]
pub trait ProcedureManager: Send + Sync + 'static {
/// Registers loader for specific procedure type `name`.
fn register_loader(&self, name: &str, loader: BoxedProcedureLoader) -> Result<()>;
/// Submits a procedure to execute.
async fn submit(&self, procedure: ProcedureWithId) -> Result<()>;
/// Recovers unfinished procedures and reruns them.
///
/// Callers should ensure all loaders are registered.
async fn recover(&self) -> Result<()>;
/// Query the procedure state.
///
/// Returns `Ok(None)` if the procedure doesn't exist.
async fn procedure_state(&self, procedure_id: ProcedureId) -> Result<Option<ProcedureState>>;
}
/// Ref-counted pointer to the [ProcedureManager].
pub type ProcedureManagerRef = Arc<dyn ProcedureManager>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_status() {
let status = Status::Executing { persist: false };
assert!(!status.need_persist());
let status = Status::Executing { persist: true };
assert!(status.need_persist());
let status = Status::Suspended {
subprocedures: Vec::new(),
persist: false,
};
assert!(!status.need_persist());
let status = Status::Suspended {
subprocedures: Vec::new(),
persist: true,
};
assert!(status.need_persist());
let status = Status::Done;
assert!(!status.need_persist());
}
#[test]
fn test_lock_key() {
let entity = "catalog.schema.my_table";
let key = LockKey::new(entity);
assert_eq!(entity, key.key());
}
#[test]
fn test_procedure_id() {
let id = ProcedureId::random();
let uuid_str = id.to_string();
assert_eq!(id.0.to_string(), uuid_str);
let parsed = ProcedureId::parse_str(&uuid_str).unwrap();
assert_eq!(id, parsed);
let parsed = uuid_str.parse().unwrap();
assert_eq!(id, parsed);
}
#[test]
fn test_procedure_id_serialization() {
let id = ProcedureId::random();
let json = serde_json::to_string(&id).unwrap();
assert_eq!(format!("\"{id}\""), json);
let parsed = serde_json::from_str(&json).unwrap();
assert_eq!(id, parsed);
}
}

View File

@@ -0,0 +1,399 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::fmt;
use common_telemetry::logging;
use futures::TryStreamExt;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use crate::error::{Result, ToJsonSnafu};
use crate::store::state_store::StateStoreRef;
use crate::{BoxedProcedure, ProcedureId};
mod state_store;
/// Serialized data of a procedure.
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
struct ProcedureMessage {
/// Type name of the procedure. The procedure framework also use the type name to
/// find a loader to load the procedure.
type_name: String,
/// The data of the procedure.
data: String,
/// Parent procedure id.
parent_id: Option<ProcedureId>,
}
/// Procedure storage layer.
#[derive(Clone)]
struct ProcedureStore(StateStoreRef);
impl ProcedureStore {
/// Dump the `procedure` to the storage.
async fn store_procedure(
&self,
procedure_id: ProcedureId,
step: u32,
procedure: &BoxedProcedure,
parent_id: Option<ProcedureId>,
) -> Result<()> {
let type_name = procedure.type_name();
let data = procedure.dump()?;
let message = ProcedureMessage {
type_name: type_name.to_string(),
data,
parent_id,
};
let key = ParsedKey {
procedure_id,
step,
is_committed: false,
}
.to_string();
let value = serde_json::to_string(&message).context(ToJsonSnafu)?;
self.0.put(&key, value.into_bytes()).await?;
Ok(())
}
/// Write commit flag to the storage.
async fn commit_procedure(&self, procedure_id: ProcedureId, step: u32) -> Result<()> {
let key = ParsedKey {
procedure_id,
step,
is_committed: true,
}
.to_string();
self.0.put(&key, Vec::new()).await?;
Ok(())
}
/// Load uncommitted procedures from the storage.
async fn load_messages(&self) -> Result<HashMap<ProcedureId, ProcedureMessage>> {
let mut messages = HashMap::new();
// Track the key-value pair by procedure id.
let mut procedure_key_values: HashMap<_, (ParsedKey, Vec<u8>)> = HashMap::new();
// Scan all procedures.
let mut key_values = self.0.walk_top_down("/").await?;
while let Some((key, value)) = key_values.try_next().await? {
let Some(curr_key) = ParsedKey::parse_str(&key) else {
logging::warn!("Unknown key while loading procedures, key: {}", key);
continue;
};
if let Some(entry) = procedure_key_values.get_mut(&curr_key.procedure_id) {
if entry.0.step < curr_key.step {
entry.0 = curr_key;
entry.1 = value;
}
} else {
procedure_key_values.insert(curr_key.procedure_id, (curr_key, value));
}
}
for (procedure_id, (parsed_key, value)) in procedure_key_values {
if !parsed_key.is_committed {
let Some(message) = self.load_one_message(&parsed_key, &value) else {
// We don't abort the loading process and just ignore errors to ensure all remaining
// procedures are loaded.
continue;
};
messages.insert(procedure_id, message);
}
}
Ok(messages)
}
fn load_one_message(&self, key: &ParsedKey, value: &[u8]) -> Option<ProcedureMessage> {
serde_json::from_slice(value)
.map_err(|e| {
// `e` doesn't impl ErrorExt so we print it as normal error.
logging::error!("Failed to parse value, key: {:?}, source: {}", key, e);
e
})
.ok()
}
}
/// Key to refer the procedure in the [ProcedureStore].
#[derive(Debug, PartialEq, Eq)]
struct ParsedKey {
procedure_id: ProcedureId,
step: u32,
is_committed: bool,
}
impl fmt::Display for ParsedKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}/{:010}.{}",
self.procedure_id,
self.step,
if self.is_committed { "commit" } else { "step" }
)
}
}
impl ParsedKey {
/// Try to parse the key from specific `input`.
fn parse_str(input: &str) -> Option<ParsedKey> {
let mut iter = input.rsplit('/');
let name = iter.next()?;
let id_str = iter.next()?;
let procedure_id = ProcedureId::parse_str(id_str).ok()?;
let mut parts = name.split('.');
let step_str = parts.next()?;
let suffix = parts.next()?;
let is_committed = match suffix {
"commit" => true,
"step" => false,
_ => return None,
};
let step = step_str.parse().ok()?;
Some(ParsedKey {
procedure_id,
step,
is_committed,
})
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use async_trait::async_trait;
use object_store::services::fs::Builder;
use object_store::ObjectStore;
use tempdir::TempDir;
use super::*;
use crate::store::state_store::ObjectStateStore;
use crate::{Context, LockKey, Procedure, Status};
#[test]
fn test_parsed_key() {
let procedure_id = ProcedureId::random();
let key = ParsedKey {
procedure_id,
step: 2,
is_committed: false,
};
assert_eq!(format!("{procedure_id}/0000000002.step"), key.to_string());
assert_eq!(key, ParsedKey::parse_str(&key.to_string()).unwrap());
let key = ParsedKey {
procedure_id,
step: 2,
is_committed: true,
};
assert_eq!(format!("{procedure_id}/0000000002.commit"), key.to_string());
assert_eq!(key, ParsedKey::parse_str(&key.to_string()).unwrap());
}
#[test]
fn test_parse_invalid_key() {
assert!(ParsedKey::parse_str("").is_none());
let procedure_id = ProcedureId::random();
let input = format!("{procedure_id}");
assert!(ParsedKey::parse_str(&input).is_none());
let input = format!("{procedure_id}/");
assert!(ParsedKey::parse_str(&input).is_none());
let input = format!("{procedure_id}/0000000003");
assert!(ParsedKey::parse_str(&input).is_none());
let input = format!("{procedure_id}/0000000003.");
assert!(ParsedKey::parse_str(&input).is_none());
let input = format!("{procedure_id}/0000000003.other");
assert!(ParsedKey::parse_str(&input).is_none());
assert!(ParsedKey::parse_str("12345/0000000003.step").is_none());
let input = format!("{procedure_id}-0000000003.commit");
assert!(ParsedKey::parse_str(&input).is_none());
}
#[test]
fn test_procedure_message() {
let mut message = ProcedureMessage {
type_name: "TestMessage".to_string(),
data: "no parent id".to_string(),
parent_id: None,
};
let json = serde_json::to_string(&message).unwrap();
assert_eq!(
json,
r#"{"type_name":"TestMessage","data":"no parent id","parent_id":null}"#
);
let procedure_id = ProcedureId::parse_str("9f805a1f-05f7-490c-9f91-bd56e3cc54c1").unwrap();
message.parent_id = Some(procedure_id);
let json = serde_json::to_string(&message).unwrap();
assert_eq!(
json,
r#"{"type_name":"TestMessage","data":"no parent id","parent_id":"9f805a1f-05f7-490c-9f91-bd56e3cc54c1"}"#
);
}
struct MockProcedure {
data: String,
}
impl MockProcedure {
fn new(data: impl Into<String>) -> MockProcedure {
MockProcedure { data: data.into() }
}
}
#[async_trait]
impl Procedure for MockProcedure {
fn type_name(&self) -> &str {
"MockProcedure"
}
async fn execute(&mut self, _ctx: &Context) -> Result<Status> {
unimplemented!()
}
fn dump(&self) -> Result<String> {
Ok(self.data.clone())
}
fn lock_key(&self) -> Option<LockKey> {
None
}
}
fn new_procedure_store(dir: &TempDir) -> ProcedureStore {
let store_dir = dir.path().to_str().unwrap();
let accessor = Builder::default().root(store_dir).build().unwrap();
let object_store = ObjectStore::new(accessor);
let state_store = ObjectStateStore::new(object_store);
ProcedureStore(Arc::new(state_store))
}
#[tokio::test]
async fn test_store_procedure() {
let dir = TempDir::new("store_procedure").unwrap();
let store = new_procedure_store(&dir);
let procedure_id = ProcedureId::random();
let procedure: BoxedProcedure = Box::new(MockProcedure::new("test store procedure"));
store
.store_procedure(procedure_id, 0, &procedure, None)
.await
.unwrap();
let messages = store.load_messages().await.unwrap();
assert_eq!(1, messages.len());
let msg = messages.get(&procedure_id).unwrap();
let expect = ProcedureMessage {
type_name: "MockProcedure".to_string(),
data: "test store procedure".to_string(),
parent_id: None,
};
assert_eq!(expect, *msg);
}
#[tokio::test]
async fn test_commit_procedure() {
let dir = TempDir::new("store_procedure").unwrap();
let store = new_procedure_store(&dir);
let procedure_id = ProcedureId::random();
let procedure: BoxedProcedure = Box::new(MockProcedure::new("test store procedure"));
store
.store_procedure(procedure_id, 0, &procedure, None)
.await
.unwrap();
store.commit_procedure(procedure_id, 1).await.unwrap();
let messages = store.load_messages().await.unwrap();
assert!(messages.is_empty());
}
#[tokio::test]
async fn test_load_messages() {
let dir = TempDir::new("store_procedure").unwrap();
let store = new_procedure_store(&dir);
// store 3 steps
let id0 = ProcedureId::random();
let procedure: BoxedProcedure = Box::new(MockProcedure::new("id0-0"));
store
.store_procedure(id0, 0, &procedure, None)
.await
.unwrap();
let procedure: BoxedProcedure = Box::new(MockProcedure::new("id0-1"));
store
.store_procedure(id0, 1, &procedure, None)
.await
.unwrap();
let procedure: BoxedProcedure = Box::new(MockProcedure::new("id0-2"));
store
.store_procedure(id0, 2, &procedure, None)
.await
.unwrap();
// store 2 steps and then commit
let id1 = ProcedureId::random();
let procedure: BoxedProcedure = Box::new(MockProcedure::new("id1-0"));
store
.store_procedure(id1, 0, &procedure, None)
.await
.unwrap();
let procedure: BoxedProcedure = Box::new(MockProcedure::new("id1-1"));
store
.store_procedure(id1, 1, &procedure, None)
.await
.unwrap();
store.commit_procedure(id1, 2).await.unwrap();
// store 1 step
let id2 = ProcedureId::random();
let procedure: BoxedProcedure = Box::new(MockProcedure::new("id2-0"));
store
.store_procedure(id2, 0, &procedure, None)
.await
.unwrap();
let messages = store.load_messages().await.unwrap();
assert_eq!(2, messages.len());
let msg = messages.get(&id0).unwrap();
assert_eq!("id0-2", msg.data);
let msg = messages.get(&id2).unwrap();
assert_eq!("id2-0", msg.data);
}
}

View File

@@ -0,0 +1,188 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::pin::Pin;
use std::sync::Arc;
use async_trait::async_trait;
use futures::{Stream, TryStreamExt};
use object_store::{ObjectMode, ObjectStore};
use snafu::ResultExt;
use crate::error::{
DeleteStateSnafu, Error, ListStateSnafu, PutStateSnafu, ReadStateSnafu, Result,
};
/// Key value from state store.
type KeyValue = (String, Vec<u8>);
/// Stream that yields [KeyValue].
type KeyValueStream = Pin<Box<dyn Stream<Item = Result<KeyValue>> + Send>>;
/// Storage layer for persisting procedure's state.
#[async_trait]
pub(crate) trait StateStore: Send + Sync {
/// Puts `key` and `value` into the store.
async fn put(&self, key: &str, value: Vec<u8>) -> Result<()>;
/// Returns the key-value pairs under `path` in top down way.
///
/// # Note
/// - There is no guarantee about the order of the keys in the stream.
/// - The `path` must ends with `/`.
async fn walk_top_down(&self, path: &str) -> Result<KeyValueStream>;
/// Deletes key-value pairs by `keys`.
async fn delete(&self, keys: &[String]) -> Result<()>;
}
/// Reference counted pointer to [StateStore].
pub(crate) type StateStoreRef = Arc<dyn StateStore>;
/// [StateStore] based on [ObjectStore].
#[derive(Debug)]
pub(crate) struct ObjectStateStore {
store: ObjectStore,
}
impl ObjectStateStore {
/// Returns a new [ObjectStateStore] with specific `store`.
pub(crate) fn new(store: ObjectStore) -> ObjectStateStore {
ObjectStateStore { store }
}
}
#[async_trait]
impl StateStore for ObjectStateStore {
async fn put(&self, key: &str, value: Vec<u8>) -> Result<()> {
let object = self.store.object(key);
object.write(value).await.context(PutStateSnafu { key })
}
async fn walk_top_down(&self, path: &str) -> Result<KeyValueStream> {
let path_string = path.to_string();
let op = self.store.batch();
// Note that there is no guarantee about the order between files and dirs
// at the same level.
// See https://docs.rs/opendal/0.25.2/opendal/raw/struct.TopDownWalker.html#note
let stream = op
.walk_top_down(path)
.context(ListStateSnafu { path })?
.map_err(move |e| Error::ListState {
path: path_string.clone(),
source: e,
})
.try_filter_map(|entry| async move {
let key = entry.path();
let key_value = match entry.mode().await.context(ReadStateSnafu { key })? {
ObjectMode::FILE => {
let value = entry.read().await.context(ReadStateSnafu { key })?;
Some((key.to_string(), value))
}
ObjectMode::DIR | ObjectMode::Unknown => None,
};
Ok(key_value)
});
Ok(Box::pin(stream))
}
async fn delete(&self, keys: &[String]) -> Result<()> {
for key in keys {
let object = self.store.object(key);
object.delete().await.context(DeleteStateSnafu { key })?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use object_store::services::fs::Builder;
use tempdir::TempDir;
use super::*;
#[tokio::test]
async fn test_object_state_store() {
let dir = TempDir::new("state_store").unwrap();
let store_dir = dir.path().to_str().unwrap();
let accessor = Builder::default().root(store_dir).build().unwrap();
let object_store = ObjectStore::new(accessor);
let state_store = ObjectStateStore::new(object_store);
let data: Vec<_> = state_store
.walk_top_down("/")
.await
.unwrap()
.try_collect()
.await
.unwrap();
assert!(data.is_empty());
state_store.put("a/1", b"v1".to_vec()).await.unwrap();
state_store.put("a/2", b"v2".to_vec()).await.unwrap();
state_store.put("b/1", b"v3".to_vec()).await.unwrap();
let mut data: Vec<_> = state_store
.walk_top_down("/")
.await
.unwrap()
.try_collect()
.await
.unwrap();
data.sort_unstable_by(|a, b| a.0.cmp(&b.0));
assert_eq!(
vec![
("a/1".to_string(), b"v1".to_vec()),
("a/2".to_string(), b"v2".to_vec()),
("b/1".to_string(), b"v3".to_vec())
],
data
);
let mut data: Vec<_> = state_store
.walk_top_down("a/")
.await
.unwrap()
.try_collect()
.await
.unwrap();
data.sort_unstable_by(|a, b| a.0.cmp(&b.0));
assert_eq!(
vec![
("a/1".to_string(), b"v1".to_vec()),
("a/2".to_string(), b"v2".to_vec()),
],
data
);
state_store
.delete(&["a/2".to_string(), "b/1".to_string()])
.await
.unwrap();
let mut data: Vec<_> = state_store
.walk_top_down("a/")
.await
.unwrap()
.try_collect()
.await
.unwrap();
data.sort_unstable_by(|a, b| a.0.cmp(&b.0));
assert_eq!(vec![("a/1".to_string(), b"v1".to_vec()),], data);
}
}

View File

@@ -18,4 +18,4 @@ statrs = "0.15"
[dev-dependencies]
common-base = { path = "../base" }
tokio = { version = "1.0", features = ["full"] }
tokio.workspace = true

View File

@@ -16,4 +16,4 @@ snafu = { version = "0.7", features = ["backtraces"] }
[dev-dependencies]
serde_json = "1.0"
tokio = { version = "1.18", features = ["full"] }
tokio.workspace = true

View File

@@ -25,4 +25,4 @@ version = "0.2"
[dev-dependencies]
datatypes = { path = "../../datatypes" }
table = { path = "../../table" }
tokio = { version = "1.0", features = ["full"] }
tokio.workspace = true

View File

@@ -6,6 +6,7 @@ license.workspace = true
[dependencies]
chrono = "0.4"
common-error = { path = "../error" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }

View File

@@ -12,8 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use chrono::ParseError;
use snafu::{Backtrace, Snafu};
use common_error::ext::ErrorExt;
use common_error::prelude::StatusCode;
use snafu::{Backtrace, ErrorCompat, Snafu};
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
@@ -24,6 +28,24 @@ pub enum Error {
ParseTimestamp { raw: String, backtrace: Backtrace },
}
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::ParseDateStr { .. } | Error::ParseTimestamp { .. } => {
StatusCode::InvalidArguments
}
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}
pub type Result<T> = std::result::Result<T, Error>;
#[cfg(test)]

View File

@@ -18,8 +18,9 @@ use crate::Timestamp;
/// A half-open time range.
///
/// The time range contains all timestamp `ts` that `ts >= start` and `ts < end`. It is
/// empty if `start >= end`.
/// The range contains values that `value >= start` and `val < end`.
///
/// The range is empty iff `start == end == "the default value of T"`
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct GenericRange<T> {
start: Option<T>,
@@ -28,8 +29,9 @@ pub struct GenericRange<T> {
impl<T> GenericRange<T>
where
T: Copy + PartialOrd,
T: Copy + PartialOrd + Default,
{
/// Computes the AND'ed range with other.
pub fn and(&self, other: &GenericRange<T>) -> GenericRange<T> {
let start = match (self.start(), other.start()) {
(Some(l), Some(r)) => {
@@ -57,7 +59,7 @@ where
(None, None) => None,
};
Self { start, end }
Self::from_optional(start, end)
}
/// Compute the OR'ed range of two ranges.
@@ -98,12 +100,44 @@ where
(None, None) => None,
};
Self { start, end }
Self::from_optional(start, end)
}
/// Checks if current range intersect with target.
pub fn intersects(&self, target: &GenericRange<T>) -> bool {
!self.and(target).is_empty()
}
/// Create an empty range.
pub fn empty() -> GenericRange<T> {
GenericRange {
start: Some(T::default()),
end: Some(T::default()),
}
}
/// Create GenericRange from optional start and end.
/// If the present value of start >= the present value of end, it will return an empty range
/// with the default value of `T`.
fn from_optional(start: Option<T>, end: Option<T>) -> GenericRange<T> {
match (start, end) {
(Some(start_val), Some(end_val)) => {
if start_val < end_val {
Self {
start: Some(start_val),
end: Some(end_val),
}
} else {
Self::empty()
}
}
(s, e) => Self { start: s, end: e },
}
}
}
impl<T> GenericRange<T> {
/// Creates a new range that contains timestamp in `[start, end)`.
/// Creates a new range that contains values in `[start, end)`.
///
/// Returns `None` if `start` > `end`.
pub fn new<U: PartialOrd + Into<T>>(start: U, end: U) -> Option<GenericRange<T>> {
@@ -115,14 +149,7 @@ impl<T> GenericRange<T> {
}
}
/// Given a value, creates an empty time range that `start == end == value`.
pub fn empty_with_value<U: Clone + Into<T>>(value: U) -> GenericRange<T> {
GenericRange {
start: Some(value.clone().into()),
end: Some(value.into()),
}
}
/// Return a range containing all possible values.
pub fn min_to_max() -> GenericRange<T> {
Self {
start: None,
@@ -143,11 +170,11 @@ impl<T> GenericRange<T> {
}
/// Returns true if `timestamp` is contained in the range.
pub fn contains<U: PartialOrd<T>>(&self, timestamp: &U) -> bool {
pub fn contains<U: PartialOrd<T>>(&self, target: &U) -> bool {
match (&self.start, &self.end) {
(Some(start), Some(end)) => *timestamp >= *start && *timestamp < *end,
(Some(start), None) => *timestamp >= *start,
(None, Some(end)) => *timestamp < *end,
(Some(start), Some(end)) => *target >= *start && *target < *end,
(Some(start), None) => *target >= *start,
(None, Some(end)) => *target < *end,
(None, None) => true,
}
}
@@ -158,19 +185,85 @@ impl<T: PartialOrd> GenericRange<T> {
#[inline]
pub fn is_empty(&self) -> bool {
match (&self.start, &self.end) {
(Some(start), Some(end)) => start >= end,
(Some(start), Some(end)) => start == end,
_ => false,
}
}
}
pub type TimestampRange = GenericRange<Timestamp>;
impl TimestampRange {
/// Create a TimestampRange with optional inclusive end timestamp.
/// If end timestamp is present and is less than start timestamp, this method will return
/// an empty range.
/// ### Caveat
/// If the given end timestamp's value is `i64::MAX`, which will result into overflow when added
/// by 1(the end is inclusive), this method does not try to convert the time unit of end
/// timestamp, instead it just return `[start, INF)`. This exaggerates the range but does not
/// affect correctness.
pub fn new_inclusive(start: Option<Timestamp>, end: Option<Timestamp>) -> Self {
// check for emptiness
if let (Some(start_ts), Some(end_ts)) = (start, end) {
if start_ts >= end_ts {
return Self::empty();
}
}
let end = if let Some(end) = end {
end.value()
.checked_add(1)
.map(|v| Timestamp::new(v, end.unit()))
} else {
None
};
Self::from_optional(start, end)
}
/// Shortcut method to create a timestamp range with given start/end value and time unit.
pub fn with_unit(start: i64, end: i64, unit: TimeUnit) -> Option<Self> {
let start = Timestamp::new(start, unit);
let end = Timestamp::new(end, unit);
Self::new(start, end)
}
/// Create a range that containing only given `ts`.
/// ### Notice:
/// Left-close right-open range cannot properly represent range with a single value.
/// For simplicity, this implementation returns an approximate range `[ts, ts+1)` instead.
pub fn single(ts: Timestamp) -> Self {
let unit = ts.unit();
let start = Some(ts);
let end = ts.value().checked_add(1).map(|v| Timestamp::new(v, unit));
Self::from_optional(start, end)
}
/// Create a range `[start, INF)`.
/// ### Notice
/// Left-close right-open range cannot properly represent range with exclusive start like: `(start, ...)`.
/// You may resort to `[start-1, ...)` instead.
pub fn from_start(start: Timestamp) -> Self {
Self {
start: Some(start),
end: None,
}
}
/// Create a range `[-INF, end)`.
/// ### Notice
/// Left-close right-open range cannot properly represent range with inclusive end like: `[..., END]`.
/// If `inclusive` is true, this method returns `[-INF, end+1)` instead.
pub fn until_end(end: Timestamp, inclusive: bool) -> Self {
let end = if inclusive {
end.value()
.checked_add(1)
.map(|v| Timestamp::new(v, end.unit()))
} else {
Some(end)
};
Self { start: None, end }
}
}
/// Time range in milliseconds.
@@ -197,9 +290,30 @@ mod tests {
assert_eq!(None, RangeMillis::new(1, 0));
let range = RangeMillis::empty_with_value(1024);
let range = RangeMillis::empty();
assert_eq!(range.start(), range.end());
assert_eq!(Some(TimestampMillis::new(1024)), *range.start());
assert_eq!(Some(TimestampMillis::new(0)), *range.start());
}
#[test]
fn test_timestamp_range_new_inclusive() {
let range = TimestampRange::new_inclusive(
Some(Timestamp::new(i64::MAX - 1, TimeUnit::Second)),
Some(Timestamp::new(i64::MAX, TimeUnit::Millisecond)),
);
assert!(range.is_empty());
let range = TimestampRange::new_inclusive(
Some(Timestamp::new(1, TimeUnit::Second)),
Some(Timestamp::new(1, TimeUnit::Millisecond)),
);
assert!(range.is_empty());
let range = TimestampRange::new_inclusive(
Some(Timestamp::new(1, TimeUnit::Second)),
Some(Timestamp::new(i64::MAX, TimeUnit::Millisecond)),
);
assert!(range.end.is_none());
}
#[test]
@@ -295,9 +409,7 @@ mod tests {
TimestampRange::min_to_max().or(&TimestampRange::min_to_max())
);
let empty = TimestampRange::empty_with_value(Timestamp::new_millisecond(1)).or(
&TimestampRange::empty_with_value(Timestamp::new_millisecond(2)),
);
let empty = TimestampRange::empty().or(&TimestampRange::empty());
assert!(empty.is_empty());
let t1 = TimestampRange::with_unit(-10, 0, TimeUnit::Second).unwrap();
@@ -310,4 +422,44 @@ mod tests {
let t1 = TimestampRange::with_unit(-10, 0, TimeUnit::Second).unwrap();
assert_eq!(t1, t1.or(&t1));
}
#[test]
fn test_intersect() {
let t1 = TimestampRange::with_unit(-10, 0, TimeUnit::Second).unwrap();
let t2 = TimestampRange::with_unit(-30, -20, TimeUnit::Second).unwrap();
assert!(!t1.intersects(&t2));
let t1 = TimestampRange::with_unit(10, 20, TimeUnit::Second).unwrap();
let t2 = TimestampRange::with_unit(0, 30, TimeUnit::Second).unwrap();
assert!(t1.intersects(&t2));
let t1 = TimestampRange::with_unit(-20, -10, TimeUnit::Second).unwrap();
let t2 = TimestampRange::with_unit(-10, 0, TimeUnit::Second).unwrap();
assert!(!t1.intersects(&t2));
let t1 = TimestampRange::with_unit(0, 1, TimeUnit::Second).unwrap();
let t2 = TimestampRange::with_unit(999, 1000, TimeUnit::Millisecond).unwrap();
assert!(t1.intersects(&t2));
let t1 = TimestampRange::with_unit(1, 2, TimeUnit::Second).unwrap();
let t2 = TimestampRange::with_unit(1000, 2000, TimeUnit::Millisecond).unwrap();
assert!(t1.intersects(&t2));
let t1 = TimestampRange::with_unit(0, 1, TimeUnit::Second).unwrap();
assert!(t1.intersects(&t1));
let t1 = TimestampRange::with_unit(0, 1, TimeUnit::Second).unwrap();
let t2 = TimestampRange::empty();
assert!(!t1.intersects(&t2));
// empty range does not intersect with empty range
let empty = TimestampRange::empty();
assert!(!empty.intersects(&empty));
// full range intersects with full range
let full = TimestampRange::min_to_max();
assert!(full.intersects(&full));
assert!(!full.intersects(&empty));
}
}

View File

@@ -352,7 +352,7 @@ mod tests {
}
}
/// Generate timestamp less than or equal to `threshold`
/// Generate timestamp less than or equal to `threshold`
fn gen_ts_le(threshold: &Timestamp) -> Timestamp {
let mut rng = rand::thread_rng();
let timestamp = rng.gen_range(i64::MIN..=threshold.value);

View File

@@ -17,7 +17,7 @@ use std::cmp::Ordering;
/// Unix timestamp in millisecond resolution.
///
/// Negative timestamp is allowed, which represents timestamp before '1970-01-01T00:00:00'.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub struct TimestampMillis(i64);
impl TimestampMillis {

View File

@@ -28,24 +28,43 @@ use crate::server::Services;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum ObjectStoreConfig {
File {
data_dir: String,
},
S3 {
bucket: String,
root: String,
access_key_id: String,
secret_access_key: String,
endpoint: Option<String>,
region: Option<String>,
},
File(FileConfig),
S3(S3Config),
Oss(OssConfig),
}
#[derive(Debug, Clone, Serialize, Default, Deserialize)]
#[serde(default)]
pub struct FileConfig {
pub data_dir: String,
}
#[derive(Debug, Clone, Serialize, Default, Deserialize)]
#[serde(default)]
pub struct S3Config {
pub bucket: String,
pub root: String,
pub access_key_id: String,
pub secret_access_key: String,
pub endpoint: Option<String>,
pub region: Option<String>,
}
#[derive(Debug, Clone, Serialize, Default, Deserialize)]
#[serde(default)]
pub struct OssConfig {
pub bucket: String,
pub root: String,
pub access_key_id: String,
pub access_key_secret: String,
pub endpoint: String,
}
impl Default for ObjectStoreConfig {
fn default() -> Self {
ObjectStoreConfig::File {
ObjectStoreConfig::File(FileConfig {
data_dir: "/tmp/greptimedb/data/".to_string(),
}
})
}
}

View File

@@ -80,7 +80,10 @@ pub enum Error {
},
#[snafu(display("Table not found: {}", table_name))]
TableNotFound { table_name: String },
TableNotFound {
table_name: String,
backtrace: Backtrace,
},
#[snafu(display("Column {} not found in table {}", column_name, table_name))]
ColumnNotFound {
@@ -315,6 +318,12 @@ pub enum Error {
column: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to describe schema for given statement, source: {}", source))]
DescribeStatement {
#[snafu(backtrace)]
source: query::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -322,7 +331,9 @@ pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::ExecuteSql { source } => source.status_code(),
Error::ExecuteSql { source } | Error::DescribeStatement { source } => {
source.status_code()
}
Error::DecodeLogicalPlan { source } => source.status_code(),
Error::NewCatalog { source } => source.status_code(),
Error::FindTable { source, .. } => source.status_code(),
@@ -421,7 +432,7 @@ mod tests {
}
fn throw_catalog_error() -> catalog::error::Result<()> {
Err(catalog::error::Error::RegisterTable {
Err(catalog::error::Error::SchemaProviderOperation {
source: BoxedError::new(MockError::with_backtrace(StatusCode::Internal)),
})
}

View File

@@ -30,6 +30,7 @@ use mito::config::EngineConfig as TableEngineConfig;
use mito::engine::MitoEngine;
use object_store::layers::{LoggingLayer, MetricsLayer, RetryLayer, TracingLayer};
use object_store::services::fs::Builder as FsBuilder;
use object_store::services::oss::Builder as OSSBuilder;
use object_store::services::s3::Builder as S3Builder;
use object_store::{util, ObjectStore};
use query::query_engine::{QueryEngineFactory, QueryEngineRef};
@@ -52,7 +53,7 @@ use crate::sql::SqlHandler;
mod grpc;
mod script;
mod sql;
pub mod sql;
pub(crate) type DefaultEngine = MitoEngine<EngineImpl<RaftEngineLogStore>>;
@@ -201,8 +202,9 @@ impl Instance {
pub(crate) async fn new_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
let object_store = match store_config {
ObjectStoreConfig::File { data_dir } => new_fs_object_store(data_dir).await,
ObjectStoreConfig::File { .. } => new_fs_object_store(store_config).await,
ObjectStoreConfig::S3 { .. } => new_s3_object_store(store_config).await,
ObjectStoreConfig::Oss { .. } => new_oss_object_store(store_config).await,
};
object_store.map(|object_store| {
@@ -214,41 +216,57 @@ pub(crate) async fn new_object_store(store_config: &ObjectStoreConfig) -> Result
})
}
pub(crate) async fn new_s3_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
let (root, secret_key, key_id, bucket, endpoint, region) = match store_config {
ObjectStoreConfig::S3 {
bucket,
root,
access_key_id,
secret_access_key,
endpoint,
region,
} => (
root,
secret_access_key,
access_key_id,
bucket,
endpoint,
region,
),
pub(crate) async fn new_oss_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
let oss_config = match store_config {
ObjectStoreConfig::Oss(config) => config,
_ => unreachable!(),
};
let root = util::normalize_dir(root);
info!("The s3 storage bucket is: {}, root is: {}", bucket, &root);
let root = util::normalize_dir(&oss_config.root);
info!(
"The oss storage bucket is: {}, root is: {}",
oss_config.bucket, &root
);
let mut builder = OSSBuilder::default();
let builder = builder
.root(&root)
.bucket(&oss_config.bucket)
.endpoint(&oss_config.endpoint)
.access_key_id(&oss_config.access_key_id)
.access_key_secret(&oss_config.access_key_secret);
let accessor = builder.build().with_context(|_| error::InitBackendSnafu {
config: store_config.clone(),
})?;
Ok(ObjectStore::new(accessor))
}
pub(crate) async fn new_s3_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
let s3_config = match store_config {
ObjectStoreConfig::S3(config) => config,
_ => unreachable!(),
};
let root = util::normalize_dir(&s3_config.root);
info!(
"The s3 storage bucket is: {}, root is: {}",
s3_config.bucket, &root
);
let mut builder = S3Builder::default();
let mut builder = builder
.root(&root)
.bucket(bucket)
.access_key_id(key_id)
.secret_access_key(secret_key);
.bucket(&s3_config.bucket)
.access_key_id(&s3_config.access_key_id)
.secret_access_key(&s3_config.secret_access_key);
if let Some(endpoint) = endpoint {
builder = builder.endpoint(endpoint);
if s3_config.endpoint.is_some() {
builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
}
if let Some(region) = region {
builder = builder.region(region);
if s3_config.region.is_some() {
builder = builder.region(s3_config.region.as_ref().unwrap());
}
let accessor = builder.build().with_context(|_| error::InitBackendSnafu {
@@ -258,8 +276,12 @@ pub(crate) async fn new_s3_object_store(store_config: &ObjectStoreConfig) -> Res
Ok(ObjectStore::new(accessor))
}
pub(crate) async fn new_fs_object_store(data_dir: &str) -> Result<ObjectStore> {
let data_dir = util::normalize_dir(data_dir);
pub(crate) async fn new_fs_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
let file_config = match store_config {
ObjectStoreConfig::File(config) => config,
_ => unreachable!(),
};
let data_dir = util::normalize_dir(&file_config.data_dir);
fs::create_dir_all(path::Path::new(&data_dir))
.context(error::CreateDirSnafu { dir: &data_dir })?;
info!("The file storage directory is: {}", &data_dir);
@@ -271,7 +293,7 @@ pub(crate) async fn new_fs_object_store(data_dir: &str) -> Result<ObjectStore> {
.atomic_write_dir(&atomic_write_dir)
.build()
.context(error::InitBackendSnafu {
config: ObjectStoreConfig::File { data_dir },
config: store_config.clone(),
})?;
Ok(ObjectStore::new(accessor))

View File

@@ -15,14 +15,13 @@
use api::v1::ddl_request::Expr as DdlExpr;
use api::v1::greptime_request::Request as GrpcRequest;
use api::v1::query_request::Query;
use api::v1::{CreateDatabaseExpr, DdlRequest, GreptimeRequest, InsertRequest};
use api::v1::{CreateDatabaseExpr, DdlRequest, InsertRequest};
use async_trait::async_trait;
use common_catalog::consts::DEFAULT_CATALOG_NAME;
use common_query::Output;
use query::parser::QueryLanguageParser;
use query::plan::LogicalPlan;
use servers::query_handler::grpc::GrpcQueryHandler;
use session::context::QueryContext;
use session::context::QueryContextRef;
use snafu::prelude::*;
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use table::requests::CreateDatabaseRequest;
@@ -50,26 +49,31 @@ impl Instance {
.context(ExecuteSqlSnafu)
}
async fn handle_query(&self, query: Query) -> Result<Output> {
async fn handle_query(&self, query: Query, ctx: QueryContextRef) -> Result<Output> {
Ok(match query {
Query::Sql(sql) => {
let stmt = QueryLanguageParser::parse_sql(&sql).context(ExecuteSqlSnafu)?;
self.execute_stmt(stmt, QueryContext::arc()).await?
self.execute_stmt(stmt, ctx).await?
}
Query::LogicalPlan(plan) => self.execute_logical(plan).await?,
})
}
pub async fn handle_insert(&self, request: InsertRequest) -> Result<Output> {
pub async fn handle_insert(
&self,
request: InsertRequest,
ctx: QueryContextRef,
) -> Result<Output> {
let catalog = &ctx.current_catalog();
let schema = &ctx.current_schema();
let table_name = &request.table_name.clone();
// TODO(LFC): InsertRequest should carry catalog name, too.
let table = self
.catalog_manager
.table(DEFAULT_CATALOG_NAME, &request.schema_name, table_name)
.table(catalog, schema, table_name)
.context(error::CatalogSnafu)?
.context(error::TableNotFoundSnafu { table_name })?;
let request = common_grpc_expr::insert::to_table_insert_request(request)
let request = common_grpc_expr::insert::to_table_insert_request(catalog, schema, request)
.context(error::InsertDataSnafu)?;
let affected_rows = table
@@ -96,19 +100,16 @@ impl Instance {
impl GrpcQueryHandler for Instance {
type Error = error::Error;
async fn do_query(&self, query: GreptimeRequest) -> Result<Output> {
let request = query.request.context(error::MissingRequiredFieldSnafu {
name: "GreptimeRequest.request",
})?;
async fn do_query(&self, request: GrpcRequest, ctx: QueryContextRef) -> Result<Output> {
match request {
GrpcRequest::Insert(request) => self.handle_insert(request).await,
GrpcRequest::Insert(request) => self.handle_insert(request, ctx).await,
GrpcRequest::Query(query_request) => {
let query = query_request
.query
.context(error::MissingRequiredFieldSnafu {
name: "QueryRequest.query",
})?;
self.handle_query(query).await
self.handle_query(query, ctx).await
}
GrpcRequest::Ddl(request) => self.handle_ddl(request).await,
}
@@ -124,6 +125,7 @@ mod test {
};
use common_recordbatch::RecordBatches;
use datatypes::prelude::*;
use session::context::QueryContext;
use super::*;
use crate::tests::test_util::{self, MockInstance};
@@ -133,67 +135,61 @@ mod test {
let instance = MockInstance::new("test_handle_ddl").await;
let instance = instance.inner();
let query = GreptimeRequest {
request: Some(GrpcRequest::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateDatabase(CreateDatabaseExpr {
database_name: "my_database".to_string(),
create_if_not_exists: true,
})),
let query = GrpcRequest::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateDatabase(CreateDatabaseExpr {
database_name: "my_database".to_string(),
create_if_not_exists: true,
})),
};
let output = instance.do_query(query).await.unwrap();
});
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
assert!(matches!(output, Output::AffectedRows(1)));
let query = GreptimeRequest {
request: Some(GrpcRequest::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(CreateTableExpr {
catalog_name: "greptime".to_string(),
schema_name: "my_database".to_string(),
table_name: "my_table".to_string(),
desc: "blabla".to_string(),
column_defs: vec![
ColumnDef {
name: "a".to_string(),
datatype: ColumnDataType::String as i32,
is_nullable: true,
default_constraint: vec![],
},
ColumnDef {
name: "ts".to_string(),
datatype: ColumnDataType::TimestampMillisecond as i32,
is_nullable: false,
default_constraint: vec![],
},
],
time_index: "ts".to_string(),
..Default::default()
})),
let query = GrpcRequest::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(CreateTableExpr {
catalog_name: "greptime".to_string(),
schema_name: "my_database".to_string(),
table_name: "my_table".to_string(),
desc: "blabla".to_string(),
column_defs: vec![
ColumnDef {
name: "a".to_string(),
datatype: ColumnDataType::String as i32,
is_nullable: true,
default_constraint: vec![],
},
ColumnDef {
name: "ts".to_string(),
datatype: ColumnDataType::TimestampMillisecond as i32,
is_nullable: false,
default_constraint: vec![],
},
],
time_index: "ts".to_string(),
..Default::default()
})),
};
let output = instance.do_query(query).await.unwrap();
});
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
assert!(matches!(output, Output::AffectedRows(0)));
let query = GreptimeRequest {
request: Some(GrpcRequest::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(AlterExpr {
catalog_name: "greptime".to_string(),
schema_name: "my_database".to_string(),
table_name: "my_table".to_string(),
kind: Some(alter_expr::Kind::AddColumns(AddColumns {
add_columns: vec![AddColumn {
column_def: Some(ColumnDef {
name: "b".to_string(),
datatype: ColumnDataType::Int32 as i32,
is_nullable: true,
default_constraint: vec![],
}),
is_key: true,
}],
})),
let query = GrpcRequest::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(AlterExpr {
catalog_name: "greptime".to_string(),
schema_name: "my_database".to_string(),
table_name: "my_table".to_string(),
kind: Some(alter_expr::Kind::AddColumns(AddColumns {
add_columns: vec![AddColumn {
column_def: Some(ColumnDef {
name: "b".to_string(),
datatype: ColumnDataType::Int32 as i32,
is_nullable: true,
default_constraint: vec![],
}),
is_key: true,
}],
})),
})),
};
let output = instance.do_query(query).await.unwrap();
});
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
assert!(matches!(output, Output::AffectedRows(0)));
let output = instance
@@ -232,7 +228,6 @@ mod test {
.unwrap();
let insert = InsertRequest {
schema_name: "public".to_string(),
table_name: "demo".to_string(),
columns: vec![
Column {
@@ -274,10 +269,8 @@ mod test {
..Default::default()
};
let query = GreptimeRequest {
request: Some(GrpcRequest::Insert(insert)),
};
let output = instance.do_query(query).await.unwrap();
let query = GrpcRequest::Insert(insert);
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
assert!(matches!(output, Output::AffectedRows(3)));
let output = instance
@@ -305,27 +298,23 @@ mod test {
.await
.unwrap();
let query = GreptimeRequest {
request: Some(GrpcRequest::Query(QueryRequest {
query: Some(Query::Sql(
"INSERT INTO demo(host, cpu, memory, ts) VALUES \
let query = GrpcRequest::Query(QueryRequest {
query: Some(Query::Sql(
"INSERT INTO demo(host, cpu, memory, ts) VALUES \
('host1', 66.6, 1024, 1672201025000),\
('host2', 88.8, 333.3, 1672201026000)"
.to_string(),
)),
})),
};
let output = instance.do_query(query).await.unwrap();
.to_string(),
)),
});
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
assert!(matches!(output, Output::AffectedRows(2)));
let query = GreptimeRequest {
request: Some(GrpcRequest::Query(QueryRequest {
query: Some(Query::Sql(
"SELECT ts, host, cpu, memory FROM demo".to_string(),
)),
})),
};
let output = instance.do_query(query).await.unwrap();
let query = GrpcRequest::Query(QueryRequest {
query: Some(Query::Sql(
"SELECT ts, host, cpu, memory FROM demo".to_string(),
)),
});
let output = instance.do_query(query, QueryContext::arc()).await.unwrap();
let Output::Stream(stream) = output else { unreachable!() };
let recordbatch = RecordBatches::try_collect(stream).await.unwrap();
let expected = "\

View File

@@ -22,13 +22,20 @@ use crate::metric;
#[async_trait]
impl ScriptHandler for Instance {
async fn insert_script(&self, name: &str, script: &str) -> servers::error::Result<()> {
async fn insert_script(
&self,
schema: &str,
name: &str,
script: &str,
) -> servers::error::Result<()> {
let _timer = timer!(metric::METRIC_HANDLE_SCRIPTS_ELAPSED);
self.script_executor.insert_script(name, script).await
self.script_executor
.insert_script(schema, name, script)
.await
}
async fn execute_script(&self, name: &str) -> servers::error::Result<Output> {
async fn execute_script(&self, schema: &str, name: &str) -> servers::error::Result<Output> {
let _timer = timer!(metric::METRIC_RUN_SCRIPT_ELAPSED);
self.script_executor.execute_script(name).await
self.script_executor.execute_script(schema, name).await
}
}

View File

@@ -13,19 +13,22 @@
// limitations under the License.
use async_trait::async_trait;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::prelude::BoxedError;
use common_query::Output;
use common_recordbatch::RecordBatches;
use common_telemetry::logging::info;
use common_telemetry::timer;
use datatypes::schema::Schema;
use query::parser::{QueryLanguageParser, QueryStatement};
use servers::error as server_error;
use servers::promql::PromqlHandler;
use servers::query_handler::sql::SqlQueryHandler;
use session::context::QueryContextRef;
use session::context::{QueryContext, QueryContextRef};
use snafu::prelude::*;
use sql::ast::ObjectName;
use sql::statements::statement::Statement;
use table::engine::TableReference;
use table::requests::CreateDatabaseRequest;
use table::requests::{CreateDatabaseRequest, DropTableRequest};
use crate::error::{self, BumpTableIdSnafu, ExecuteSqlSnafu, Result, TableIdProviderNotFoundSnafu};
use crate::instance::Instance;
@@ -89,12 +92,11 @@ impl Instance {
let name = c.name.clone();
let (catalog, schema, table) = table_idents_to_full_name(&name, query_ctx.clone())?;
let table_ref = TableReference::full(&catalog, &schema, &table);
let request = self.sql_handler.create_to_request(table_id, c, table_ref)?;
let request = self
.sql_handler
.create_to_request(table_id, c, &table_ref)?;
let table_id = request.id;
info!(
"Creating table, catalog: {:?}, schema: {:?}, table name: {:?}, table id: {}",
catalog, schema, table, table_id
);
info!("Creating table: {table_ref}, table id = {table_id}",);
self.sql_handler
.execute(SqlRequest::CreateTable(request), query_ctx)
@@ -110,7 +112,13 @@ impl Instance {
.await
}
QueryStatement::Sql(Statement::DropTable(drop_table)) => {
let req = self.sql_handler.drop_table_to_request(drop_table);
let (catalog_name, schema_name, table_name) =
table_idents_to_full_name(drop_table.table_name(), query_ctx.clone())?;
let req = DropTableRequest {
catalog_name,
schema_name,
table_name,
};
self.sql_handler
.execute(SqlRequest::DropTable(req), query_ctx)
.await
@@ -138,16 +146,14 @@ impl Instance {
QueryStatement::Sql(Statement::ShowCreateTable(_stmt)) => {
unimplemented!("SHOW CREATE TABLE is unimplemented yet");
}
QueryStatement::Sql(Statement::Use(schema)) => {
let catalog = query_ctx.current_catalog();
let catalog = catalog.as_deref().unwrap_or(DEFAULT_CATALOG_NAME);
QueryStatement::Sql(Statement::Use(ref schema)) => {
let catalog = &query_ctx.current_catalog();
ensure!(
self.is_valid_schema(catalog, &schema)?,
self.is_valid_schema(catalog, schema)?,
error::DatabaseNotFoundSnafu { catalog, schema }
);
query_ctx.set_current_schema(&schema);
query_ctx.set_current_schema(schema);
Ok(Output::RecordBatches(RecordBatches::empty()))
}
@@ -168,18 +174,18 @@ impl Instance {
// TODO(LFC): Refactor consideration: move this function to some helper mod,
// could be done together or after `TableReference`'s refactoring, when issue #559 is resolved.
/// Converts maybe fully-qualified table name (`<catalog>.<schema>.<table>`) to tuple.
fn table_idents_to_full_name(
pub fn table_idents_to_full_name(
obj_name: &ObjectName,
query_ctx: QueryContextRef,
) -> Result<(String, String, String)> {
match &obj_name.0[..] {
[table] => Ok((
DEFAULT_CATALOG_NAME.to_string(),
query_ctx.current_schema().unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string()),
query_ctx.current_catalog(),
query_ctx.current_schema(),
table.value.clone(),
)),
[schema, table] => Ok((
DEFAULT_CATALOG_NAME.to_string(),
query_ctx.current_catalog(),
schema.value.clone(),
table.value.clone(),
)),
@@ -207,6 +213,16 @@ impl SqlQueryHandler for Instance {
vec![result]
}
async fn do_promql_query(
&self,
query: &str,
query_ctx: QueryContextRef,
) -> Vec<Result<Output>> {
let _timer = timer!(metric::METRIC_HANDLE_PROMQL_ELAPSED);
let result = self.execute_promql(query, query_ctx).await;
vec![result]
}
async fn do_statement_query(
&self,
stmt: Statement,
@@ -217,6 +233,17 @@ impl SqlQueryHandler for Instance {
.await
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
if let Statement::Query(_) = stmt {
self.query_engine
.describe(QueryStatement::Sql(stmt), query_ctx)
.map(Some)
.context(error::DescribeStatementSnafu)
} else {
Ok(None)
}
}
fn is_valid_schema(&self, catalog: &str, schema: &str) -> Result<bool> {
self.catalog_manager
.schema(catalog, schema)
@@ -225,10 +252,22 @@ impl SqlQueryHandler for Instance {
}
}
#[async_trait]
impl PromqlHandler for Instance {
async fn do_query(&self, query: &str) -> server_error::Result<Output> {
let _timer = timer!(metric::METRIC_HANDLE_PROMQL_ELAPSED);
self.execute_promql(query, QueryContext::arc())
.await
.map_err(BoxedError::new)
.with_context(|_| server_error::ExecuteQuerySnafu { query })
}
}
#[cfg(test)]
mod test {
use std::sync::Arc;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use session::context::QueryContext;
use super::*;
@@ -244,10 +283,7 @@ mod test {
let bare = ObjectName(vec![my_table.into()]);
let using_schema = "foo";
let query_ctx = Arc::new(QueryContext::with(
DEFAULT_CATALOG_NAME.to_owned(),
using_schema.to_string(),
));
let query_ctx = Arc::new(QueryContext::with(DEFAULT_CATALOG_NAME, using_schema));
let empty_ctx = Arc::new(QueryContext::new());
assert_eq!(

View File

@@ -17,3 +17,4 @@
pub const METRIC_HANDLE_SQL_ELAPSED: &str = "datanode.handle_sql_elapsed";
pub const METRIC_HANDLE_SCRIPTS_ELAPSED: &str = "datanode.handle_scripts_elapsed";
pub const METRIC_RUN_SCRIPT_ELAPSED: &str = "datanode.run_script_elapsed";
pub const METRIC_HANDLE_PROMQL_ELAPSED: &str = "datanode.handle_promql_elapsed";

View File

@@ -71,10 +71,15 @@ mod python {
})
}
pub async fn insert_script(&self, name: &str, script: &str) -> servers::error::Result<()> {
pub async fn insert_script(
&self,
schema: &str,
name: &str,
script: &str,
) -> servers::error::Result<()> {
let _s = self
.script_manager
.insert_and_compile(name, script)
.insert_and_compile(schema, name, script)
.await
.map_err(|e| {
error!(e; "Instance failed to insert script");
@@ -85,9 +90,13 @@ mod python {
Ok(())
}
pub async fn execute_script(&self, name: &str) -> servers::error::Result<Output> {
pub async fn execute_script(
&self,
schema: &str,
name: &str,
) -> servers::error::Result<Output> {
self.script_manager
.execute(name)
.execute(schema, name)
.await
.map_err(|e| {
error!(e; "Instance failed to execute script");

View File

@@ -18,15 +18,18 @@ use std::sync::Arc;
use common_runtime::Builder as RuntimeBuilder;
use common_telemetry::tracing::log::info;
use servers::error::Error::InternalIo;
use servers::grpc::GrpcServer;
use servers::mysql::server::MysqlServer;
use servers::mysql::server::{MysqlServer, MysqlSpawnConfig, MysqlSpawnRef};
use servers::query_handler::grpc::ServerGrpcQueryHandlerAdaptor;
use servers::query_handler::sql::ServerSqlQueryHandlerAdaptor;
use servers::server::Server;
use servers::tls::TlsOption;
use servers::Mode;
use snafu::ResultExt;
use crate::datanode::DatanodeOptions;
use crate::error::Error::StartServer;
use crate::error::{ParseAddrSnafu, Result, RuntimeResourceSnafu, StartServerSnafu};
use crate::instance::InstanceRef;
@@ -61,11 +64,24 @@ impl Services {
.build()
.context(RuntimeResourceSnafu)?,
);
let tls = TlsOption::default();
// default tls config returns None
// but try to think a better way to do this
Some(MysqlServer::create_server(
ServerSqlQueryHandlerAdaptor::arc(instance.clone()),
mysql_io_runtime,
Default::default(),
None,
Arc::new(MysqlSpawnRef::new(
ServerSqlQueryHandlerAdaptor::arc(instance.clone()),
None,
)),
Arc::new(MysqlSpawnConfig::new(
tls.should_force_tls(),
tls.setup()
.map_err(|e| StartServer {
source: InternalIo { source: e },
})?
.map(Arc::new),
false,
)),
))
}
};

View File

@@ -26,7 +26,8 @@ use table::engine::{EngineContext, TableEngineRef, TableReference};
use table::requests::*;
use table::TableRef;
use crate::error::{ExecuteSqlSnafu, GetTableSnafu, Result, TableNotFoundSnafu};
use crate::error::{self, ExecuteSqlSnafu, GetTableSnafu, Result, TableNotFoundSnafu};
use crate::instance::sql::table_idents_to_full_name;
mod alter;
mod create;
@@ -81,17 +82,29 @@ impl SqlHandler {
show_databases(stmt, self.catalog_manager.clone()).context(ExecuteSqlSnafu)
}
SqlRequest::ShowTables(stmt) => {
show_tables(stmt, self.catalog_manager.clone(), query_ctx).context(ExecuteSqlSnafu)
show_tables(stmt, self.catalog_manager.clone(), query_ctx.clone())
.context(ExecuteSqlSnafu)
}
SqlRequest::DescribeTable(stmt) => {
describe_table(stmt, self.catalog_manager.clone()).context(ExecuteSqlSnafu)
let (catalog, schema, table) =
table_idents_to_full_name(stmt.name(), query_ctx.clone())?;
let table = self
.catalog_manager
.table(&catalog, &schema, &table)
.context(error::CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: stmt.name().to_string(),
})?;
describe_table(table).context(ExecuteSqlSnafu)
}
SqlRequest::Explain(stmt) => {
explain(stmt, self.query_engine.clone(), query_ctx.clone())
.await
.context(ExecuteSqlSnafu)
}
SqlRequest::Explain(stmt) => explain(stmt, self.query_engine.clone(), query_ctx)
.await
.context(ExecuteSqlSnafu),
};
if let Err(e) = &result {
error!("Datanode execution error: {:?}", e);
error!(e; "{query_ctx}");
}
result
}
@@ -117,7 +130,7 @@ mod tests {
use std::any::Any;
use std::sync::Arc;
use catalog::{CatalogList, SchemaProvider};
use catalog::{CatalogManager, RegisterTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
@@ -137,7 +150,7 @@ mod tests {
use storage::EngineImpl;
use table::error::Result as TableResult;
use table::metadata::TableInfoRef;
use table::{Table, TableRef};
use table::Table;
use tempdir::TempDir;
use super::*;
@@ -185,43 +198,6 @@ mod tests {
}
}
struct MockSchemaProvider;
impl SchemaProvider for MockSchemaProvider {
fn as_any(&self) -> &dyn Any {
self
}
fn table_names(&self) -> catalog::error::Result<Vec<String>> {
Ok(vec!["demo".to_string()])
}
fn table(&self, name: &str) -> catalog::error::Result<Option<TableRef>> {
assert_eq!(name, "demo");
Ok(Some(Arc::new(DemoTable {})))
}
fn register_table(
&self,
_name: String,
_table: TableRef,
) -> catalog::error::Result<Option<TableRef>> {
unimplemented!();
}
fn rename_table(&self, _name: &str, _new_name: String) -> catalog::error::Result<TableRef> {
unimplemented!()
}
fn deregister_table(&self, _name: &str) -> catalog::error::Result<Option<TableRef>> {
unimplemented!();
}
fn table_exist(&self, name: &str) -> catalog::error::Result<bool> {
Ok(name == "demo")
}
}
#[tokio::test]
async fn test_statement_to_request() {
let dir = TempDir::new("setup_test_engine_and_table").unwrap();
@@ -249,12 +225,16 @@ mod tests {
.await
.unwrap(),
);
let catalog_provider = catalog_list.catalog(DEFAULT_CATALOG_NAME).unwrap().unwrap();
catalog_provider
.register_schema(
DEFAULT_SCHEMA_NAME.to_string(),
Arc::new(MockSchemaProvider {}),
)
catalog_list.start().await.unwrap();
catalog_list
.register_table(RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "demo".to_string(),
table_id: 1,
table: Arc::new(DemoTable),
})
.await
.unwrap();
let factory = QueryEngineFactory::new(catalog_list.clone());

View File

@@ -13,7 +13,6 @@
// limitations under the License.
use catalog::RenameTableRequest;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_query::Output;
use snafu::prelude::*;
use sql::statements::alter::{AlterTable, AlterTableOperation};
@@ -27,12 +26,10 @@ use crate::sql::SqlHandler;
impl SqlHandler {
pub(crate) async fn alter(&self, req: AlterTableRequest) -> Result<Output> {
let ctx = EngineContext {};
let catalog_name = req.catalog_name.as_deref().unwrap_or(DEFAULT_CATALOG_NAME);
let schema_name = req.schema_name.as_deref().unwrap_or(DEFAULT_SCHEMA_NAME);
let table_name = req.table_name.clone();
let table_ref = TableReference {
catalog: catalog_name,
schema: schema_name,
catalog: &req.catalog_name,
schema: &req.schema_name,
table: &table_name,
};
@@ -98,8 +95,8 @@ impl SqlHandler {
},
};
Ok(AlterTableRequest {
catalog_name: Some(table_ref.catalog.to_string()),
schema_name: Some(table_ref.schema.to_string()),
catalog_name: table_ref.catalog.to_string(),
schema_name: table_ref.schema.to_string(),
table_name: table_ref.table.to_string(),
alter_kind,
})
@@ -134,10 +131,13 @@ mod tests {
let handler = create_mock_sql_handler().await;
let alter_table = parse_sql("ALTER TABLE my_metric_1 ADD tagk_i STRING Null;");
let req = handler
.alter_to_request(alter_table, TableReference::bare("my_metric_1"))
.alter_to_request(
alter_table,
TableReference::full("greptime", "public", "my_metric_1"),
)
.unwrap();
assert_eq!(req.catalog_name, Some("greptime".to_string()));
assert_eq!(req.schema_name, Some("public".to_string()));
assert_eq!(req.catalog_name, "greptime");
assert_eq!(req.schema_name, "public");
assert_eq!(req.table_name, "my_metric_1");
let alter_kind = req.alter_kind;
@@ -159,10 +159,13 @@ mod tests {
let handler = create_mock_sql_handler().await;
let alter_table = parse_sql("ALTER TABLE test_table RENAME table_t;");
let req = handler
.alter_to_request(alter_table, TableReference::bare("test_table"))
.alter_to_request(
alter_table,
TableReference::full("greptime", "public", "test_table"),
)
.unwrap();
assert_eq!(req.catalog_name, Some("greptime".to_string()));
assert_eq!(req.schema_name, Some("public".to_string()));
assert_eq!(req.catalog_name, "greptime");
assert_eq!(req.schema_name, "public");
assert_eq!(req.table_name, "test_table");
let alter_kind = req.alter_kind;

View File

@@ -122,7 +122,7 @@ impl SqlHandler {
&self,
table_id: TableId,
stmt: CreateTable,
table_ref: TableReference,
table_ref: &TableReference,
) -> Result<CreateTableRequest> {
let mut ts_index = usize::MAX;
let mut primary_keys = vec![];
@@ -259,7 +259,7 @@ mod tests {
PRIMARY KEY(host)) engine=mito with(regions=1);"#,
);
let c = handler
.create_to_request(42, parsed_stmt, TableReference::bare("demo_table"))
.create_to_request(42, parsed_stmt, &TableReference::bare("demo_table"))
.unwrap();
assert_eq!("demo_table", c.table_name);
assert_eq!(42, c.id);
@@ -282,7 +282,7 @@ mod tests {
TIME INDEX (ts)) engine=mito with(regions=1);"#,
);
let c = handler
.create_to_request(42, parsed_stmt, TableReference::bare("demo_table"))
.create_to_request(42, parsed_stmt, &TableReference::bare("demo_table"))
.unwrap();
assert!(c.primary_key_indices.is_empty());
assert_eq!(c.schema.timestamp_index(), Some(1));
@@ -300,7 +300,7 @@ mod tests {
);
let error = handler
.create_to_request(42, parsed_stmt, TableReference::bare("demo_table"))
.create_to_request(42, parsed_stmt, &TableReference::bare("demo_table"))
.unwrap_err();
assert_matches!(error, Error::KeyColumnNotFound { .. });
}
@@ -322,7 +322,7 @@ mod tests {
let handler = create_mock_sql_handler().await;
let error = handler
.create_to_request(42, create_table, TableReference::full("c", "s", "demo"))
.create_to_request(42, create_table, &TableReference::full("c", "s", "demo"))
.unwrap_err();
assert_matches!(error, Error::InvalidPrimaryKey { .. });
}
@@ -344,7 +344,7 @@ mod tests {
let handler = create_mock_sql_handler().await;
let request = handler
.create_to_request(42, create_table, TableReference::full("c", "s", "demo"))
.create_to_request(42, create_table, &TableReference::full("c", "s", "demo"))
.unwrap();
assert_eq!(42, request.id);

View File

@@ -17,7 +17,6 @@ use common_error::prelude::BoxedError;
use common_query::Output;
use common_telemetry::info;
use snafu::ResultExt;
use sql::statements::drop::DropTable;
use table::engine::{EngineContext, TableReference};
use table::requests::DropTableRequest;
@@ -60,12 +59,4 @@ impl SqlHandler {
Ok(Output::AffectedRows(1))
}
pub fn drop_table_to_request(&self, drop_table: DropTable) -> DropTableRequest {
DropTableRequest {
catalog_name: drop_table.catalog_name,
schema_name: drop_table.schema_name,
table_name: drop_table.table_name,
}
}
}

View File

@@ -120,6 +120,7 @@ impl SqlHandler {
.into_iter()
.map(|(cs, mut b)| (cs.name.to_string(), b.to_vector()))
.collect(),
region_number: 0,
}))
}
}

View File

@@ -344,6 +344,7 @@ pub async fn test_execute_create() {
#[tokio::test]
async fn test_rename_table() {
common_telemetry::init_default_ut_logging();
let instance = MockInstance::new("test_rename_table_local").await;
let output = execute_sql(&instance, "create database db").await;
@@ -647,10 +648,7 @@ async fn try_execute_sql_in_db(
sql: &str,
db: &str,
) -> Result<Output, crate::error::Error> {
let query_ctx = Arc::new(QueryContext::with(
DEFAULT_CATALOG_NAME.to_owned(),
db.to_string(),
));
let query_ctx = Arc::new(QueryContext::with(DEFAULT_CATALOG_NAME, db));
instance.inner().execute_sql(sql, query_ctx).await
}

View File

@@ -12,9 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_query::Output;
use session::context::QueryContext;
@@ -23,10 +20,7 @@ use crate::tests::test_util::{check_output_stream, setup_test_instance};
#[tokio::test(flavor = "multi_thread")]
async fn sql_insert_promql_query_ceil() {
let instance = setup_test_instance("test_execute_insert").await;
let query_ctx = Arc::new(QueryContext::with(
DEFAULT_CATALOG_NAME.to_owned(),
DEFAULT_SCHEMA_NAME.to_owned(),
));
let query_ctx = QueryContext::arc();
let put_output = instance
.inner()
.execute_sql(

View File

@@ -29,7 +29,7 @@ use table::engine::{EngineContext, TableEngineRef};
use table::requests::CreateTableRequest;
use tempdir::TempDir;
use crate::datanode::{DatanodeOptions, ObjectStoreConfig, WalConfig};
use crate::datanode::{DatanodeOptions, FileConfig, ObjectStoreConfig, WalConfig};
use crate::error::{CreateTableSnafu, Result};
use crate::instance::Instance;
use crate::sql::SqlHandler;
@@ -67,9 +67,9 @@ fn create_tmp_dir_and_datanode_opts(name: &str) -> (DatanodeOptions, TestGuard)
dir: wal_tmp_dir.path().to_str().unwrap().to_string(),
..Default::default()
},
storage: ObjectStoreConfig::File {
storage: ObjectStoreConfig::File(FileConfig {
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
},
}),
mode: Mode::Standalone,
..Default::default()
};

View File

@@ -14,6 +14,7 @@ arrow-schema.workspace = true
common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-time = { path = "../common/time" }
common-telemetry = { path = "../common/telemetry" }
datafusion-common.workspace = true
enum_dispatch = "0.3"
num = "0.4"

View File

@@ -15,7 +15,7 @@
use std::sync::Arc;
use arrow::datatypes::DataType as ArrowDataType;
use common_base::bytes::StringBytes;
use common_base::bytes::Bytes;
use serde::{Deserialize, Serialize};
use crate::data_type::{DataType, DataTypeRef};
@@ -43,7 +43,7 @@ impl DataType for BinaryType {
}
fn default_value(&self) -> Value {
StringBytes::default().into()
Bytes::default().into()
}
fn as_arrow_type(&self) -> ArrowDataType {

View File

@@ -14,9 +14,11 @@
use std::cmp::Ordering;
use std::fmt::{Display, Formatter};
use std::str::FromStr;
use arrow::datatypes::{DataType as ArrowDataType, Field};
use common_base::bytes::{Bytes, StringBytes};
use common_telemetry::logging;
use common_time::date::Date;
use common_time::datetime::DateTime;
use common_time::timestamp::{TimeUnit, Timestamp};
@@ -25,7 +27,8 @@ pub use ordered_float::OrderedFloat;
use serde::{Deserialize, Serialize};
use snafu::ensure;
use crate::error::{self, Result};
use crate::error;
use crate::error::Result;
use crate::prelude::*;
use crate::type_id::LogicalTypeId;
use crate::types::ListType;
@@ -286,6 +289,26 @@ fn timestamp_to_scalar_value(unit: TimeUnit, val: Option<i64>) -> ScalarValue {
}
}
/// Convert [ScalarValue] to [Timestamp].
/// Return `None` if given scalar value cannot be converted to a valid timestamp.
pub fn scalar_value_to_timestamp(scalar: &ScalarValue) -> Option<Timestamp> {
match scalar {
ScalarValue::Int64(val) => val.map(Timestamp::new_millisecond),
ScalarValue::Utf8(Some(s)) => match Timestamp::from_str(s) {
Ok(t) => Some(t),
Err(e) => {
logging::error!(e;"Failed to convert string literal {s} to timestamp");
None
}
},
ScalarValue::TimestampSecond(v, _) => v.map(Timestamp::new_second),
ScalarValue::TimestampMillisecond(v, _) => v.map(Timestamp::new_millisecond),
ScalarValue::TimestampMicrosecond(v, _) => v.map(Timestamp::new_microsecond),
ScalarValue::TimestampNanosecond(v, _) => v.map(Timestamp::new_nanosecond),
_ => None,
}
}
macro_rules! impl_ord_for_value_like {
($Type: ident, $left: ident, $right: ident) => {
if $left.is_null() && !$right.is_null() {

View File

@@ -32,6 +32,7 @@ itertools = "0.10"
meta-client = { path = "../meta-client" }
moka = { version = "0.9", features = ["future"] }
openmetrics-parser = "0.4"
partition = { path = "../partition" }
prost.workspace = true
query = { path = "../query" }
rustls = "0.20"

View File

@@ -29,29 +29,29 @@ use catalog::{
};
use futures::StreamExt;
use meta_client::rpc::TableName;
use partition::manager::PartitionRuleManagerRef;
use snafu::prelude::*;
use table::TableRef;
use crate::datanode::DatanodeClients;
use crate::table::route::TableRoutes;
use crate::table::DistTable;
#[derive(Clone)]
pub struct FrontendCatalogManager {
backend: KvBackendRef,
table_routes: Arc<TableRoutes>,
partition_manager: PartitionRuleManagerRef,
datanode_clients: Arc<DatanodeClients>,
}
impl FrontendCatalogManager {
pub(crate) fn new(
backend: KvBackendRef,
table_routes: Arc<TableRoutes>,
partition_manager: PartitionRuleManagerRef,
datanode_clients: Arc<DatanodeClients>,
) -> Self {
Self {
backend,
table_routes,
partition_manager,
datanode_clients,
}
}
@@ -61,8 +61,8 @@ impl FrontendCatalogManager {
}
#[cfg(test)]
pub(crate) fn table_routes(&self) -> Arc<TableRoutes> {
self.table_routes.clone()
pub(crate) fn partition_manager(&self) -> PartitionRuleManagerRef {
self.partition_manager.clone()
}
#[cfg(test)]
@@ -173,7 +173,7 @@ impl CatalogList for FrontendCatalogManager {
Ok(Some(Arc::new(FrontendCatalogProvider {
catalog_name: name.to_string(),
backend: self.backend.clone(),
table_routes: self.table_routes.clone(),
partition_manager: self.partition_manager.clone(),
datanode_clients: self.datanode_clients.clone(),
})))
} else {
@@ -185,7 +185,7 @@ impl CatalogList for FrontendCatalogManager {
pub struct FrontendCatalogProvider {
catalog_name: String,
backend: KvBackendRef,
table_routes: Arc<TableRoutes>,
partition_manager: PartitionRuleManagerRef,
datanode_clients: Arc<DatanodeClients>,
}
@@ -232,7 +232,7 @@ impl CatalogProvider for FrontendCatalogProvider {
catalog_name: self.catalog_name.clone(),
schema_name: name.to_string(),
backend: self.backend.clone(),
table_routes: self.table_routes.clone(),
partition_manager: self.partition_manager.clone(),
datanode_clients: self.datanode_clients.clone(),
})))
} else {
@@ -245,7 +245,7 @@ pub struct FrontendSchemaProvider {
catalog_name: String,
schema_name: String,
backend: KvBackendRef,
table_routes: Arc<TableRoutes>,
partition_manager: PartitionRuleManagerRef,
datanode_clients: Arc<DatanodeClients>,
}
@@ -286,7 +286,7 @@ impl SchemaProvider for FrontendSchemaProvider {
};
let backend = self.backend.clone();
let table_routes = self.table_routes.clone();
let partition_manager = self.partition_manager.clone();
let datanode_clients = self.datanode_clients.clone();
let table_name = TableName::new(&self.catalog_name, &self.schema_name, name);
let result: Result<Option<TableRef>, catalog::error::Error> = std::thread::spawn(|| {
@@ -306,7 +306,7 @@ impl SchemaProvider for FrontendSchemaProvider {
.try_into()
.context(catalog_err::InvalidTableInfoInCatalogSnafu)?,
),
table_routes,
partition_manager,
datanode_clients,
backend,
));

View File

@@ -15,14 +15,17 @@
use std::any::Any;
use common_error::prelude::*;
use common_query::logical_plan::Expr;
use datafusion_common::ScalarValue;
use datatypes::prelude::Value;
use store_api::storage::RegionId;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("{source}"))]
External {
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Failed to request Datanode, source: {}", source))]
RequestDatanode {
#[snafu(backtrace)]
@@ -99,35 +102,6 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display(
"Failed to convert DataFusion's ScalarValue: {:?}, source: {}",
value,
source
))]
ConvertScalarValue {
value: ScalarValue,
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display("Failed to find partition column: {}", column_name))]
FindPartitionColumn {
column_name: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to find region, reason: {}", reason))]
FindRegion {
reason: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to find regions by filters: {:?}", filters))]
FindRegions {
filters: Vec<Expr>,
backtrace: Backtrace,
},
#[snafu(display("Failed to find Datanode by region: {:?}", region))]
FindDatanode {
region: RegionId,
@@ -140,13 +114,6 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display("Expect {} region keys, actual {}", expect, actual))]
RegionKeysSize {
expect: usize,
actual: usize,
backtrace: Backtrace,
},
#[snafu(display("Table not found: {}", table_name))]
TableNotFound {
table_name: String,
@@ -201,16 +168,17 @@ pub enum Error {
source: meta_client::error::Error,
},
#[snafu(display("Failed to get cache, error: {}", err_msg))]
GetCache {
err_msg: String,
#[snafu(display("Failed to create table route for table {}", table_name))]
CreateTableRoute {
table_name: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to find table routes for table {}", table_name))]
FindTableRoutes {
#[snafu(display("Failed to find table route for table {}", table_name))]
FindTableRoute {
table_name: String,
backtrace: Backtrace,
#[snafu(backtrace)]
source: partition::error::Error,
},
#[snafu(display("Failed to create AlterExpr from Alter statement, source: {}", source))]
@@ -252,24 +220,12 @@ pub enum Error {
source: table::error::Error,
},
#[snafu(display("Failed to find region routes for table {}", table_name))]
FindRegionRoutes {
#[snafu(display("Failed to find region route for table {}", table_name))]
FindRegionRoute {
table_name: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to serialize value to json, source: {}", source))]
SerializeJson {
source: serde_json::Error,
backtrace: Backtrace,
},
#[snafu(display("Failed to deserialize value from json, source: {}", source))]
DeserializeJson {
source: serde_json::Error,
backtrace: Backtrace,
},
#[snafu(display(
"Failed to find leader peer for region {} in table {}",
region,
@@ -281,28 +237,6 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display(
"Failed to find partition info for region {} in table {}",
region,
table_name
))]
FindRegionPartition {
region: u64,
table_name: String,
backtrace: Backtrace,
},
#[snafu(display(
"Illegal table routes data for table {}, error message: {}",
table_name,
err_msg
))]
IllegalTableRoutesData {
table_name: String,
err_msg: String,
backtrace: Backtrace,
},
#[snafu(display("Cannot find primary key column by name: {}", msg))]
PrimaryKeyNotFound { msg: String, backtrace: Backtrace },
@@ -345,17 +279,6 @@ pub enum Error {
source: substrait::error::Error,
},
#[snafu(display(
"Failed to build a vector from values, value: {}, source: {}",
value,
source
))]
BuildVector {
value: Value,
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display("Failed to found context value: {}", key))]
ContextValueNotFound { key: String, backtrace: Backtrace },
@@ -398,6 +321,35 @@ pub enum Error {
column: String,
backtrace: Backtrace,
},
#[snafu(display("SQL execution intercepted, source: {}", source))]
SqlExecIntercepted {
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display(
"Failed to deserialize partition in meta to partition def, source: {}",
source
))]
DeserializePartition {
#[snafu(backtrace)]
source: partition::error::Error,
},
// TODO(ruihang): merge all query execution error kinds
#[snafu(display("failed to execute PromQL query {}, source: {}", query, source))]
ExecutePromql {
query: String,
#[snafu(backtrace)]
source: servers::error::Error,
},
#[snafu(display("Failed to describe schema for given statement, source: {}", source))]
DescribeStatement {
#[snafu(backtrace)]
source: query::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -407,17 +359,15 @@ impl ErrorExt for Error {
match self {
Error::ParseAddr { .. }
| Error::InvalidSql { .. }
| Error::FindRegion { .. }
| Error::FindRegions { .. }
| Error::InvalidInsertRequest { .. }
| Error::FindPartitionColumn { .. }
| Error::ColumnValuesNumberMismatch { .. }
| Error::RegionKeysSize { .. } => StatusCode::InvalidArguments,
| Error::ColumnValuesNumberMismatch { .. } => StatusCode::InvalidArguments,
Error::NotSupported { .. } => StatusCode::Unsupported,
Error::RuntimeResource { source, .. } => source.status_code(),
Error::ExecutePromql { source, .. } => source.status_code(),
Error::SqlExecIntercepted { source, .. } => source.status_code(),
Error::StartServer { source, .. } => source.status_code(),
Error::ParseSql { source } | Error::AlterExprFromStmt { source } => {
@@ -426,8 +376,7 @@ impl ErrorExt for Error {
Error::Table { source } => source.status_code(),
Error::ConvertColumnDefaultConstraint { source, .. }
| Error::ConvertScalarValue { source, .. } => source.status_code(),
Error::ConvertColumnDefaultConstraint { source, .. } => source.status_code(),
Error::RequestDatanode { source } => source.status_code(),
@@ -436,14 +385,9 @@ impl ErrorExt for Error {
}
Error::FindDatanode { .. }
| Error::GetCache { .. }
| Error::FindTableRoutes { .. }
| Error::SerializeJson { .. }
| Error::DeserializeJson { .. }
| Error::FindRegionRoutes { .. }
| Error::CreateTableRoute { .. }
| Error::FindRegionRoute { .. }
| Error::FindLeaderPeer { .. }
| Error::FindRegionPartition { .. }
| Error::IllegalTableRoutesData { .. }
| Error::BuildDfLogicalPlan { .. }
| Error::BuildTableMeta { .. } => StatusCode::Internal,
@@ -469,16 +413,21 @@ impl ErrorExt for Error {
| Error::FindNewColumnsOnInsertion { source } => source.status_code(),
Error::PrimaryKeyNotFound { .. } => StatusCode::InvalidArguments,
Error::ExecuteStatement { source, .. } => source.status_code(),
Error::ExecuteStatement { source, .. } | Error::DescribeStatement { source } => {
source.status_code()
}
Error::MissingMetasrvOpts { .. } => StatusCode::InvalidArguments,
Error::AlterExprToRequest { source, .. } => source.status_code(),
Error::LeaderNotFound { .. } => StatusCode::StorageUnavailable,
Error::TableAlreadyExist { .. } => StatusCode::TableAlreadyExists,
Error::EncodeSubstraitLogicalPlan { source } => source.status_code(),
Error::BuildVector { source, .. } => source.status_code(),
Error::InvokeDatanode { source } => source.status_code(),
Error::ColumnDefaultValue { source, .. } => source.status_code(),
Error::ColumnNoneDefaultValue { .. } => StatusCode::InvalidArguments,
Error::External { source } => source.status_code(),
Error::DeserializePartition { source, .. } | Error::FindTableRoute { source, .. } => {
source.status_code()
}
}
}

View File

@@ -28,6 +28,7 @@ use crate::mysql::MysqlOptions;
use crate::opentsdb::OpentsdbOptions;
use crate::postgres::PostgresOptions;
use crate::prometheus::PrometheusOptions;
use crate::promql::PromqlOptions;
use crate::server::Services;
use crate::Plugins;
@@ -41,6 +42,7 @@ pub struct FrontendOptions {
pub opentsdb_options: Option<OpentsdbOptions>,
pub influxdb_options: Option<InfluxdbOptions>,
pub prometheus_options: Option<PrometheusOptions>,
pub promql_options: Option<PromqlOptions>,
pub mode: Mode,
pub meta_client_opts: Option<MetaClientOpts>,
}
@@ -55,6 +57,7 @@ impl Default for FrontendOptions {
opentsdb_options: Some(OpentsdbOptions::default()),
influxdb_options: Some(InfluxdbOptions::default()),
prometheus_options: Some(PrometheusOptions::default()),
promql_options: Some(PromqlOptions::default()),
mode: Mode::Standalone,
meta_client_opts: None,
}

View File

@@ -25,23 +25,26 @@ use std::time::Duration;
use api::v1::alter_expr::Kind;
use api::v1::ddl_request::Expr as DdlExpr;
use api::v1::greptime_request::Request;
use api::v1::{
AddColumns, AlterExpr, Column, DdlRequest, DropTableExpr, GreptimeRequest, InsertRequest,
};
use api::v1::{AddColumns, AlterExpr, Column, DdlRequest, DropTableExpr, InsertRequest};
use async_trait::async_trait;
use catalog::remote::MetaKvBackend;
use catalog::CatalogManagerRef;
use common_catalog::consts::DEFAULT_CATALOG_NAME;
use common_error::ext::BoxedError;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_query::Output;
use common_recordbatch::RecordBatches;
use common_telemetry::logging::{debug, info};
use datanode::instance::sql::table_idents_to_full_name;
use datanode::instance::InstanceRef as DnInstanceRef;
use datatypes::schema::Schema;
use distributed::DistInstance;
use meta_client::client::{MetaClient, MetaClientBuilder};
use meta_client::MetaClientOpts;
use partition::manager::PartitionRuleManager;
use partition::route::TableRoutes;
use servers::error as server_error;
use servers::interceptor::{SqlQueryInterceptor, SqlQueryInterceptorRef};
use servers::promql::{PromqlHandler, PromqlHandlerRef};
use servers::query_handler::grpc::{GrpcQueryHandler, GrpcQueryHandlerRef};
use servers::query_handler::sql::{SqlQueryHandler, SqlQueryHandlerRef};
use servers::query_handler::{
@@ -56,11 +59,12 @@ use sql::statements::statement::Statement;
use crate::catalog::FrontendCatalogManager;
use crate::datanode::DatanodeClients;
use crate::error::{self, Error, MissingMetasrvOptsSnafu, Result};
use crate::error::{
self, Error, ExecutePromqlSnafu, MissingMetasrvOptsSnafu, NotSupportedSnafu, Result,
};
use crate::expr_factory::{CreateExprFactoryRef, DefaultCreateExprFactory};
use crate::frontend::FrontendOptions;
use crate::instance::standalone::{StandaloneGrpcQueryHandler, StandaloneSqlQueryHandler};
use crate::table::route::TableRoutes;
use crate::Plugins;
#[async_trait]
@@ -71,6 +75,7 @@ pub trait FrontendInstance:
+ InfluxdbLineProtocolHandler
+ PrometheusProtocolHandler
+ ScriptHandler
+ PromqlHandler
+ Send
+ Sync
+ 'static
@@ -88,6 +93,7 @@ pub struct Instance {
script_handler: Option<ScriptHandlerRef>,
sql_handler: SqlQueryHandlerRef<Error>,
grpc_query_handler: GrpcQueryHandlerRef<Error>,
promql_handler: Option<PromqlHandlerRef>,
create_expr_factory: CreateExprFactoryRef,
@@ -104,10 +110,12 @@ impl Instance {
client: meta_client.clone(),
});
let table_routes = Arc::new(TableRoutes::new(meta_client.clone()));
let partition_manager = Arc::new(PartitionRuleManager::new(table_routes));
let datanode_clients = Arc::new(DatanodeClients::new());
let catalog_manager = Arc::new(FrontendCatalogManager::new(
meta_backend,
table_routes,
partition_manager,
datanode_clients.clone(),
));
@@ -121,6 +129,7 @@ impl Instance {
create_expr_factory: Arc::new(DefaultCreateExprFactory),
sql_handler: dist_instance.clone(),
grpc_query_handler: dist_instance,
promql_handler: None,
plugins: Default::default(),
})
}
@@ -162,6 +171,7 @@ impl Instance {
create_expr_factory: Arc::new(DefaultCreateExprFactory),
sql_handler: StandaloneSqlQueryHandler::arc(dn_instance.clone()),
grpc_query_handler: StandaloneGrpcQueryHandler::arc(dn_instance.clone()),
promql_handler: Some(dn_instance.clone()),
plugins: Default::default(),
}
}
@@ -174,6 +184,7 @@ impl Instance {
create_expr_factory: Arc::new(DefaultCreateExprFactory),
sql_handler: dist_instance.clone(),
grpc_query_handler: dist_instance,
promql_handler: None,
plugins: Default::default(),
}
}
@@ -191,10 +202,14 @@ impl Instance {
}
/// Handle batch inserts
pub async fn handle_inserts(&self, requests: Vec<InsertRequest>) -> Result<Output> {
pub async fn handle_inserts(
&self,
requests: Vec<InsertRequest>,
ctx: QueryContextRef,
) -> Result<Output> {
let mut success = 0;
for request in requests {
match self.handle_insert(request).await? {
match self.handle_insert(request, ctx.clone()).await? {
Output::AffectedRows(rows) => success += rows,
_ => unreachable!("Insert should not yield output other than AffectedRows"),
}
@@ -202,20 +217,12 @@ impl Instance {
Ok(Output::AffectedRows(success))
}
async fn handle_insert(&self, request: InsertRequest) -> Result<Output> {
let schema_name = &request.schema_name;
let table_name = &request.table_name;
let catalog_name = DEFAULT_CATALOG_NAME;
let columns = &request.columns;
self.create_or_alter_table_on_demand(catalog_name, schema_name, table_name, columns)
async fn handle_insert(&self, request: InsertRequest, ctx: QueryContextRef) -> Result<Output> {
self.create_or_alter_table_on_demand(ctx.clone(), &request.table_name, &request.columns)
.await?;
let query = GreptimeRequest {
request: Some(Request::Insert(request)),
};
GrpcQueryHandler::do_query(&*self.grpc_query_handler, query).await
let query = Request::Insert(request);
GrpcQueryHandler::do_query(&*self.grpc_query_handler, query, ctx).await
}
// check if table already exist:
@@ -223,11 +230,13 @@ impl Instance {
// - if table exist, check if schema matches. If any new column found, alter table by inferred `AlterExpr`
async fn create_or_alter_table_on_demand(
&self,
catalog_name: &str,
schema_name: &str,
ctx: QueryContextRef,
table_name: &str,
columns: &[Column],
) -> Result<()> {
let catalog_name = &ctx.current_catalog();
let schema_name = &ctx.current_schema();
let table = self
.catalog_manager
.table(catalog_name, schema_name, table_name)
@@ -238,7 +247,7 @@ impl Instance {
"Table {}.{}.{} does not exist, try create table",
catalog_name, schema_name, table_name,
);
self.create_table_by_columns(catalog_name, schema_name, table_name, columns)
self.create_table_by_columns(ctx, table_name, columns)
.await?;
info!(
"Successfully created table on insertion: {}.{}.{}",
@@ -255,13 +264,8 @@ impl Instance {
"Find new columns {:?} on insertion, try to alter table: {}.{}.{}",
add_columns, catalog_name, schema_name, table_name
);
self.add_new_columns_to_table(
catalog_name,
schema_name,
table_name,
add_columns,
)
.await?;
self.add_new_columns_to_table(ctx, table_name, add_columns)
.await?;
info!(
"Successfully altered table on insertion: {}.{}.{}",
catalog_name, schema_name, table_name
@@ -275,11 +279,13 @@ impl Instance {
/// Infer create table expr from inserting data
async fn create_table_by_columns(
&self,
catalog_name: &str,
schema_name: &str,
ctx: QueryContextRef,
table_name: &str,
columns: &[Column],
) -> Result<Output> {
let catalog_name = &ctx.current_catalog();
let schema_name = &ctx.current_schema();
// Create table automatically, build schema from data.
let create_expr = self
.create_expr_factory
@@ -292,18 +298,18 @@ impl Instance {
);
self.grpc_query_handler
.do_query(GreptimeRequest {
request: Some(Request::Ddl(DdlRequest {
.do_query(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(create_expr)),
})),
})
}),
ctx,
)
.await
}
async fn add_new_columns_to_table(
&self,
catalog_name: &str,
schema_name: &str,
ctx: QueryContextRef,
table_name: &str,
add_columns: AddColumns,
) -> Result<Output> {
@@ -312,25 +318,24 @@ impl Instance {
add_columns, table_name
);
let expr = AlterExpr {
catalog_name: ctx.current_catalog(),
schema_name: ctx.current_schema(),
table_name: table_name.to_string(),
schema_name: schema_name.to_string(),
catalog_name: catalog_name.to_string(),
kind: Some(Kind::AddColumns(add_columns)),
};
self.grpc_query_handler
.do_query(GreptimeRequest {
request: Some(Request::Ddl(DdlRequest {
.do_query(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(expr)),
})),
})
}),
ctx,
)
.await
}
fn handle_use(&self, db: String, query_ctx: QueryContextRef) -> Result<Output> {
let catalog = query_ctx.current_catalog();
let catalog = catalog.as_deref().unwrap_or(DEFAULT_CATALOG_NAME);
let catalog = &query_ctx.current_catalog();
ensure!(
self.catalog_manager
.schema(catalog, &db)
@@ -377,34 +382,28 @@ impl Instance {
| Statement::DescribeTable(_)
| Statement::Explain(_)
| Statement::Query(_)
| Statement::Insert(_) => {
| Statement::Insert(_)
| Statement::Alter(_) => {
return self.sql_handler.do_statement_query(stmt, query_ctx).await;
}
Statement::Alter(alter_stmt) => {
let expr =
AlterExpr::try_from(alter_stmt).context(error::AlterExprFromStmtSnafu)?;
return self
.grpc_query_handler
.do_query(GreptimeRequest {
request: Some(Request::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(expr)),
})),
})
.await;
}
Statement::DropTable(drop_stmt) => {
let (catalog_name, schema_name, table_name) =
table_idents_to_full_name(drop_stmt.table_name(), query_ctx.clone())
.map_err(BoxedError::new)
.context(error::ExternalSnafu)?;
let expr = DropTableExpr {
catalog_name: drop_stmt.catalog_name,
schema_name: drop_stmt.schema_name,
table_name: drop_stmt.table_name,
catalog_name,
schema_name,
table_name,
};
return self
.grpc_query_handler
.do_query(GreptimeRequest {
request: Some(Request::Ddl(DdlRequest {
.do_query(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::DropTable(expr)),
})),
})
}),
query_ctx,
)
.await;
}
Statement::ShowCreateTable(_) => error::NotSupportedSnafu { feat: query }.fail(),
@@ -457,6 +456,21 @@ impl SqlQueryHandler for Instance {
}
}
async fn do_promql_query(&self, query: &str, _: QueryContextRef) -> Vec<Result<Output>> {
if let Some(handler) = &self.promql_handler {
let result = handler
.do_query(query)
.await
.context(ExecutePromqlSnafu { query });
vec![result]
} else {
vec![Err(NotSupportedSnafu {
feat: "PromQL Query",
}
.build())]
}
}
async fn do_statement_query(
&self,
stmt: Statement,
@@ -473,6 +487,10 @@ impl SqlQueryHandler for Instance {
.and_then(|output| query_interceptor.post_execute(output, query_ctx.clone()))
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
self.sql_handler.do_describe(stmt, query_ctx)
}
fn is_valid_schema(&self, catalog: &str, schema: &str) -> Result<bool> {
self.catalog_manager
.schema(catalog, schema)
@@ -483,9 +501,14 @@ impl SqlQueryHandler for Instance {
#[async_trait]
impl ScriptHandler for Instance {
async fn insert_script(&self, name: &str, script: &str) -> server_error::Result<()> {
async fn insert_script(
&self,
schema: &str,
name: &str,
script: &str,
) -> server_error::Result<()> {
if let Some(handler) = &self.script_handler {
handler.insert_script(name, script).await
handler.insert_script(schema, name, script).await
} else {
server_error::NotSupportedSnafu {
feat: "Script execution in Frontend",
@@ -494,9 +517,9 @@ impl ScriptHandler for Instance {
}
}
async fn execute_script(&self, script: &str) -> server_error::Result<Output> {
async fn execute_script(&self, schema: &str, script: &str) -> server_error::Result<Output> {
if let Some(handler) = &self.script_handler {
handler.execute_script(script).await
handler.execute_script(schema, script).await
} else {
server_error::NotSupportedSnafu {
feat: "Script execution in Frontend",
@@ -506,6 +529,20 @@ impl ScriptHandler for Instance {
}
}
#[async_trait]
impl PromqlHandler for Instance {
async fn do_query(&self, query: &str) -> server_error::Result<Output> {
if let Some(promql_handler) = &self.promql_handler {
promql_handler.do_query(query).await
} else {
server_error::NotSupportedSnafu {
feat: "PromQL query in Frontend",
}
.fail()
}
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;

View File

@@ -25,15 +25,18 @@ use catalog::{CatalogList, CatalogManager};
use chrono::DateTime;
use client::Database;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::prelude::BoxedError;
use common_query::Output;
use common_telemetry::{debug, error, info};
use datanode::instance::sql::table_idents_to_full_name;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::RawSchema;
use datatypes::schema::{RawSchema, Schema};
use meta_client::client::MetaClient;
use meta_client::rpc::{
CreateRequest as MetaCreateRequest, Partition as MetaPartition, PutRequest, RouteResponse,
TableName, TableRoute,
};
use partition::partition::{PartitionBound, PartitionDef};
use query::parser::QueryStatement;
use query::sql::{describe_table, explain, show_databases, show_tables};
use query::{QueryEngineFactory, QueryEngineRef};
@@ -51,13 +54,12 @@ use crate::catalog::FrontendCatalogManager;
use crate::datanode::DatanodeClients;
use crate::error::{
self, AlterExprToRequestSnafu, CatalogEntrySerdeSnafu, CatalogNotFoundSnafu, CatalogSnafu,
ColumnDataTypeSnafu, ParseSqlSnafu, PrimaryKeyNotFoundSnafu, RequestDatanodeSnafu,
RequestMetaSnafu, Result, SchemaNotFoundSnafu, StartMetaClientSnafu, TableNotFoundSnafu,
TableSnafu, ToTableInsertRequestSnafu,
ColumnDataTypeSnafu, DeserializePartitionSnafu, ParseSqlSnafu, PrimaryKeyNotFoundSnafu,
RequestDatanodeSnafu, RequestMetaSnafu, Result, SchemaNotFoundSnafu, StartMetaClientSnafu,
TableNotFoundSnafu, TableSnafu, ToTableInsertRequestSnafu,
};
use crate::expr_factory::{CreateExprFactory, DefaultCreateExprFactory};
use crate::instance::parse_stmt;
use crate::partitioning::{PartitionBound, PartitionDef};
use crate::sql::insert_to_request;
#[derive(Clone)]
@@ -92,7 +94,7 @@ impl DistInstance {
let table_routes = response.table_routes;
ensure!(
table_routes.len() == 1,
error::FindTableRoutesSnafu {
error::CreateTableRouteSnafu {
table_name: create_table.table_name.to_string()
}
);
@@ -107,7 +109,7 @@ impl DistInstance {
let region_routes = &table_route.region_routes;
ensure!(
!region_routes.is_empty(),
error::FindRegionRoutesSnafu {
error::FindRegionRouteSnafu {
table_name: create_table.table_name.to_string()
}
);
@@ -119,7 +121,7 @@ impl DistInstance {
for datanode in table_route.find_leaders() {
let client = self.datanode_clients.get_client(&datanode).await;
let client = Database::new("greptime", client);
let client = Database::with_client(client);
let regions = table_route.find_leader_regions(&datanode);
let mut create_expr_for_region = create_table.clone();
@@ -168,7 +170,19 @@ impl DistInstance {
Statement::ShowTables(stmt) => {
show_tables(stmt, self.catalog_manager.clone(), query_ctx)
}
Statement::DescribeTable(stmt) => describe_table(stmt, self.catalog_manager.clone()),
Statement::DescribeTable(stmt) => {
let (catalog, schema, table) = table_idents_to_full_name(stmt.name(), query_ctx)
.map_err(BoxedError::new)
.context(error::ExternalSnafu)?;
let table = self
.catalog_manager
.table(&catalog, &schema, &table)
.context(CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: stmt.name().to_string(),
})?;
describe_table(table)
}
Statement::Explain(stmt) => {
explain(Box::new(stmt), self.query_engine.clone(), query_ctx).await
}
@@ -346,16 +360,21 @@ impl DistInstance {
// GRPC InsertRequest to Table InsertRequest, than split Table InsertRequest, than assemble each GRPC InsertRequest, is rather inefficient,
// should operate on GRPC InsertRequest directly.
// Also remember to check the "region_number" carried in InsertRequest, too.
async fn handle_dist_insert(&self, request: InsertRequest) -> Result<Output> {
async fn handle_dist_insert(
&self,
request: InsertRequest,
ctx: QueryContextRef,
) -> Result<Output> {
let catalog = &ctx.current_catalog();
let schema = &ctx.current_schema();
let table_name = &request.table_name;
// TODO(LFC): InsertRequest should carry catalog name, too.
let table = self
.catalog_manager
.table(DEFAULT_CATALOG_NAME, &request.schema_name, table_name)
.table(catalog, schema, table_name)
.context(CatalogSnafu)?
.context(TableNotFoundSnafu { table_name })?;
let request = common_grpc_expr::insert::to_table_insert_request(request)
let request = common_grpc_expr::insert::to_table_insert_request(catalog, schema, request)
.context(ToTableInsertRequestSnafu)?;
let affected_rows = table.insert(request).await.context(TableSnafu)?;
@@ -376,6 +395,14 @@ impl SqlQueryHandler for DistInstance {
self.handle_sql(query, query_ctx).await
}
async fn do_promql_query(
&self,
_: &str,
_: QueryContextRef,
) -> Vec<std::result::Result<Output, Self::Error>> {
unimplemented!()
}
async fn do_statement_query(
&self,
stmt: Statement,
@@ -384,6 +411,17 @@ impl SqlQueryHandler for DistInstance {
self.handle_statement(stmt, query_ctx).await
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
if let Statement::Query(_) = stmt {
self.query_engine
.describe(QueryStatement::Sql(stmt), query_ctx)
.map(Some)
.context(error::DescribeStatementSnafu)
} else {
Ok(None)
}
}
fn is_valid_schema(&self, catalog: &str, schema: &str) -> Result<bool> {
self.catalog_manager
.schema(catalog, schema)
@@ -509,8 +547,9 @@ fn parse_partitions(
partition_entries
.into_iter()
.map(|x| PartitionDef::new(partition_columns.clone(), x).try_into())
.collect::<Result<Vec<MetaPartition>>>()
.map(|x| MetaPartition::try_from(PartitionDef::new(partition_columns.clone(), x)))
.collect::<std::result::Result<_, _>>()
.context(DeserializePartitionSnafu)
}
fn find_partition_entries(

View File

@@ -14,10 +14,10 @@
use api::v1::ddl_request::Expr as DdlExpr;
use api::v1::greptime_request::Request;
use api::v1::GreptimeRequest;
use async_trait::async_trait;
use common_query::Output;
use servers::query_handler::grpc::GrpcQueryHandler;
use session::context::QueryContextRef;
use snafu::OptionExt;
use crate::error::{self, Result};
@@ -27,12 +27,9 @@ use crate::instance::distributed::DistInstance;
impl GrpcQueryHandler for DistInstance {
type Error = error::Error;
async fn do_query(&self, query: GreptimeRequest) -> Result<Output> {
let request = query.request.context(error::IncompleteGrpcResultSnafu {
err_msg: "Missing 'request' in GreptimeRequest",
})?;
async fn do_query(&self, request: Request, ctx: QueryContextRef) -> Result<Output> {
match request {
Request::Insert(request) => self.handle_dist_insert(request).await,
Request::Insert(request) => self.handle_dist_insert(request, ctx).await,
Request::Query(_) => {
unreachable!("Query should have been handled directly in Frontend Instance!")
}

View File

@@ -14,12 +14,11 @@
use api::v1::greptime_request::Request;
use api::v1::query_request::Query;
use api::v1::GreptimeRequest;
use async_trait::async_trait;
use common_query::Output;
use servers::query_handler::grpc::GrpcQueryHandler;
use servers::query_handler::sql::SqlQueryHandler;
use session::context::QueryContext;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt};
use crate::error::{self, Result};
@@ -29,12 +28,9 @@ use crate::instance::Instance;
impl GrpcQueryHandler for Instance {
type Error = error::Error;
async fn do_query(&self, query: GreptimeRequest) -> Result<Output> {
let request = query.request.context(error::IncompleteGrpcResultSnafu {
err_msg: "Missing field 'GreptimeRequest.request'",
})?;
async fn do_query(&self, request: Request, ctx: QueryContextRef) -> Result<Output> {
let output = match request {
Request::Insert(request) => self.handle_insert(request).await?,
Request::Insert(request) => self.handle_insert(request, ctx).await?,
Request::Query(query_request) => {
let query = query_request
.query
@@ -43,8 +39,7 @@ impl GrpcQueryHandler for Instance {
})?;
match query {
Query::Sql(sql) => {
let mut result =
SqlQueryHandler::do_query(self, &sql, QueryContext::arc()).await;
let mut result = SqlQueryHandler::do_query(self, &sql, ctx).await;
ensure!(
result.len() == 1,
error::NotSupportedSnafu {
@@ -62,10 +57,8 @@ impl GrpcQueryHandler for Instance {
}
}
Request::Ddl(request) => {
let query = GreptimeRequest {
request: Some(Request::Ddl(request)),
};
GrpcQueryHandler::do_query(&*self.grpc_query_handler, query).await?
let query = Request::Ddl(request);
GrpcQueryHandler::do_query(&*self.grpc_query_handler, query, ctx).await?
}
};
Ok(output)
@@ -86,6 +79,7 @@ mod test {
use catalog::helper::{TableGlobalKey, TableGlobalValue};
use common_query::Output;
use common_recordbatch::RecordBatches;
use session::context::QueryContext;
use super::*;
use crate::table::DistTable;
@@ -111,93 +105,83 @@ mod test {
}
async fn test_handle_ddl_request(instance: &Arc<Instance>) {
let query = GreptimeRequest {
request: Some(Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateDatabase(CreateDatabaseExpr {
database_name: "database_created_through_grpc".to_string(),
create_if_not_exists: true,
})),
let query = Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateDatabase(CreateDatabaseExpr {
database_name: "database_created_through_grpc".to_string(),
create_if_not_exists: true,
})),
};
let output = GrpcQueryHandler::do_query(instance.as_ref(), query)
});
let output = GrpcQueryHandler::do_query(instance.as_ref(), query, QueryContext::arc())
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(1)));
let query = GreptimeRequest {
request: Some(Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(CreateTableExpr {
catalog_name: "greptime".to_string(),
schema_name: "database_created_through_grpc".to_string(),
table_name: "table_created_through_grpc".to_string(),
column_defs: vec![
ColumnDef {
name: "a".to_string(),
datatype: ColumnDataType::String as _,
let query = Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(CreateTableExpr {
catalog_name: "greptime".to_string(),
schema_name: "database_created_through_grpc".to_string(),
table_name: "table_created_through_grpc".to_string(),
column_defs: vec![
ColumnDef {
name: "a".to_string(),
datatype: ColumnDataType::String as _,
is_nullable: true,
default_constraint: vec![],
},
ColumnDef {
name: "ts".to_string(),
datatype: ColumnDataType::TimestampMillisecond as _,
is_nullable: false,
default_constraint: vec![],
},
],
time_index: "ts".to_string(),
..Default::default()
})),
});
let output = GrpcQueryHandler::do_query(instance.as_ref(), query, QueryContext::arc())
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(0)));
let query = Request::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(AlterExpr {
catalog_name: "greptime".to_string(),
schema_name: "database_created_through_grpc".to_string(),
table_name: "table_created_through_grpc".to_string(),
kind: Some(alter_expr::Kind::AddColumns(AddColumns {
add_columns: vec![AddColumn {
column_def: Some(ColumnDef {
name: "b".to_string(),
datatype: ColumnDataType::Int32 as _,
is_nullable: true,
default_constraint: vec![],
},
ColumnDef {
name: "ts".to_string(),
datatype: ColumnDataType::TimestampMillisecond as _,
is_nullable: false,
default_constraint: vec![],
},
],
time_index: "ts".to_string(),
..Default::default()
}),
is_key: false,
}],
})),
})),
};
let output = GrpcQueryHandler::do_query(instance.as_ref(), query)
});
let output = GrpcQueryHandler::do_query(instance.as_ref(), query, QueryContext::arc())
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(0)));
let query = GreptimeRequest {
request: Some(Request::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(AlterExpr {
catalog_name: "greptime".to_string(),
schema_name: "database_created_through_grpc".to_string(),
table_name: "table_created_through_grpc".to_string(),
kind: Some(alter_expr::Kind::AddColumns(AddColumns {
add_columns: vec![AddColumn {
column_def: Some(ColumnDef {
name: "b".to_string(),
datatype: ColumnDataType::Int32 as _,
is_nullable: true,
default_constraint: vec![],
}),
is_key: false,
}],
})),
})),
})),
};
let output = GrpcQueryHandler::do_query(instance.as_ref(), query)
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(0)));
let query = GreptimeRequest {
request: Some(Request::Query(QueryRequest {
query: Some(Query::Sql("INSERT INTO database_created_through_grpc.table_created_through_grpc (a, b, ts) VALUES ('s', 1, 1672816466000)".to_string()))
}))
};
let output = GrpcQueryHandler::do_query(instance.as_ref(), query)
let query = Request::Query(QueryRequest {
query: Some(Query::Sql("INSERT INTO database_created_through_grpc.table_created_through_grpc (a, b, ts) VALUES ('s', 1, 1672816466000)".to_string()))
});
let output = GrpcQueryHandler::do_query(instance.as_ref(), query, QueryContext::arc())
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(1)));
let query = GreptimeRequest {
request: Some(Request::Query(QueryRequest {
query: Some(Query::Sql(
"SELECT ts, a, b FROM database_created_through_grpc.table_created_through_grpc"
.to_string(),
)),
})),
};
let output = GrpcQueryHandler::do_query(instance.as_ref(), query)
let query = Request::Query(QueryRequest {
query: Some(Query::Sql(
"SELECT ts, a, b FROM database_created_through_grpc.table_created_through_grpc"
.to_string(),
)),
});
let output = GrpcQueryHandler::do_query(instance.as_ref(), query, QueryContext::arc())
.await
.unwrap();
let Output::Stream(stream) = output else { unreachable!() };
@@ -327,12 +311,10 @@ CREATE TABLE {table_name} (
}
async fn create_table(frontend: &Arc<Instance>, sql: String) {
let query = GreptimeRequest {
request: Some(Request::Query(QueryRequest {
query: Some(Query::Sql(sql)),
})),
};
let output = GrpcQueryHandler::do_query(frontend.as_ref(), query)
let query = Request::Query(QueryRequest {
query: Some(Query::Sql(sql)),
});
let output = GrpcQueryHandler::do_query(frontend.as_ref(), query, QueryContext::arc())
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(0)));
@@ -340,7 +322,6 @@ CREATE TABLE {table_name} (
async fn test_insert_and_query_on_existing_table(instance: &Arc<Instance>, table_name: &str) {
let insert = InsertRequest {
schema_name: "public".to_string(),
table_name: table_name.to_string(),
columns: vec![
Column {
@@ -377,22 +358,18 @@ CREATE TABLE {table_name} (
..Default::default()
};
let query = GreptimeRequest {
request: Some(Request::Insert(insert)),
};
let output = GrpcQueryHandler::do_query(instance.as_ref(), query)
let query = Request::Insert(insert);
let output = GrpcQueryHandler::do_query(instance.as_ref(), query, QueryContext::arc())
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(8)));
let query = GreptimeRequest {
request: Some(Request::Query(QueryRequest {
query: Some(Query::Sql(format!(
"SELECT ts, a FROM {table_name} ORDER BY ts"
))),
})),
};
let output = GrpcQueryHandler::do_query(instance.as_ref(), query)
let query = Request::Query(QueryRequest {
query: Some(Query::Sql(format!(
"SELECT ts, a FROM {table_name} ORDER BY ts"
))),
});
let output = GrpcQueryHandler::do_query(instance.as_ref(), query, QueryContext::arc())
.await
.unwrap();
let Output::Stream(stream) = output else { unreachable!() };
@@ -461,7 +438,6 @@ CREATE TABLE {table_name} (
async fn test_insert_and_query_on_auto_created_table(instance: &Arc<Instance>) {
let insert = InsertRequest {
schema_name: "public".to_string(),
table_name: "auto_created_table".to_string(),
columns: vec![
Column {
@@ -490,16 +466,13 @@ CREATE TABLE {table_name} (
};
// Test auto create not existed table upon insertion.
let query = GreptimeRequest {
request: Some(Request::Insert(insert)),
};
let output = GrpcQueryHandler::do_query(instance.as_ref(), query)
let query = Request::Insert(insert);
let output = GrpcQueryHandler::do_query(instance.as_ref(), query, QueryContext::arc())
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(3)));
let insert = InsertRequest {
schema_name: "public".to_string(),
table_name: "auto_created_table".to_string(),
columns: vec![
Column {
@@ -528,22 +501,18 @@ CREATE TABLE {table_name} (
};
// Test auto add not existed column upon insertion.
let query = GreptimeRequest {
request: Some(Request::Insert(insert)),
};
let output = GrpcQueryHandler::do_query(instance.as_ref(), query)
let query = Request::Insert(insert);
let output = GrpcQueryHandler::do_query(instance.as_ref(), query, QueryContext::arc())
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(3)));
let query = GreptimeRequest {
request: Some(Request::Query(QueryRequest {
query: Some(Query::Sql(
"SELECT ts, a, b FROM auto_created_table".to_string(),
)),
})),
};
let output = GrpcQueryHandler::do_query(instance.as_ref(), query)
let query = Request::Query(QueryRequest {
query: Some(Query::Sql(
"SELECT ts, a, b FROM auto_created_table".to_string(),
)),
});
let output = GrpcQueryHandler::do_query(instance.as_ref(), query, QueryContext::arc())
.await
.unwrap();
let Output::Stream(stream) = output else { unreachable!() };

View File

@@ -16,15 +16,20 @@ use async_trait::async_trait;
use common_error::prelude::BoxedError;
use servers::influxdb::InfluxdbRequest;
use servers::query_handler::InfluxdbLineProtocolHandler;
use session::context::QueryContextRef;
use snafu::ResultExt;
use crate::instance::Instance;
#[async_trait]
impl InfluxdbLineProtocolHandler for Instance {
async fn exec(&self, request: &InfluxdbRequest) -> servers::error::Result<()> {
async fn exec(
&self,
request: &InfluxdbRequest,
ctx: QueryContextRef,
) -> servers::error::Result<()> {
let requests = request.try_into()?;
self.handle_inserts(requests)
self.handle_inserts(requests, ctx)
.await
.map_err(BoxedError::new)
.context(servers::error::ExecuteGrpcQuerySnafu)?;
@@ -68,10 +73,9 @@ monitor1,host=host1 cpu=66.6,memory=1024 1663840496100023100
monitor1,host=host2 memory=1027 1663840496400340001";
let request = InfluxdbRequest {
precision: None,
db: "public".to_string(),
lines: lines.to_string(),
};
instance.exec(&request).await.unwrap();
instance.exec(&request, QueryContext::arc()).await.unwrap();
let mut output = instance
.do_query(

View File

@@ -17,6 +17,7 @@ use common_error::prelude::BoxedError;
use servers::error as server_error;
use servers::opentsdb::codec::DataPoint;
use servers::query_handler::OpentsdbProtocolHandler;
use session::context::QueryContext;
use snafu::prelude::*;
use crate::instance::Instance;
@@ -25,7 +26,7 @@ use crate::instance::Instance;
impl OpentsdbProtocolHandler for Instance {
async fn exec(&self, data_point: &DataPoint) -> server_error::Result<()> {
let request = data_point.as_grpc_insert();
self.handle_insert(request)
self.handle_insert(request, QueryContext::arc())
.await
.map_err(BoxedError::new)
.with_context(|_| server_error::ExecuteQuerySnafu {

View File

@@ -15,7 +15,7 @@
use api::prometheus::remote::read_request::ResponseType;
use api::prometheus::remote::{Query, QueryResult, ReadRequest, ReadResponse, WriteRequest};
use api::v1::greptime_request::Request;
use api::v1::{query_request, GreptimeRequest, QueryRequest};
use api::v1::{query_request, QueryRequest};
use async_trait::async_trait;
use common_error::prelude::BoxedError;
use common_query::Output;
@@ -26,6 +26,7 @@ use servers::error::{self, Result as ServerResult};
use servers::prometheus::{self, Metrics};
use servers::query_handler::grpc::GrpcQueryHandler;
use servers::query_handler::{PrometheusProtocolHandler, PrometheusResponse};
use session::context::QueryContextRef;
use snafu::{OptionExt, ResultExt};
use crate::instance::Instance;
@@ -74,26 +75,24 @@ async fn to_query_result(table_name: &str, output: Output) -> ServerResult<Query
impl Instance {
async fn handle_remote_queries(
&self,
db: &str,
ctx: QueryContextRef,
queries: &[Query],
) -> ServerResult<Vec<(String, Output)>> {
let mut results = Vec::with_capacity(queries.len());
for query in queries {
let (table_name, sql) = prometheus::query_to_sql(db, query)?;
let (table_name, sql) = prometheus::query_to_sql(query)?;
logging::debug!(
"prometheus remote read, table: {}, sql: {}",
table_name,
sql
);
let query = GreptimeRequest {
request: Some(Request::Query(QueryRequest {
query: Some(query_request::Query::Sql(sql.to_string())),
})),
};
let query = Request::Query(QueryRequest {
query: Some(query_request::Query::Sql(sql.to_string())),
});
let output = self
.do_query(query)
.do_query(query, ctx.clone())
.await
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)?;
@@ -106,22 +105,24 @@ impl Instance {
#[async_trait]
impl PrometheusProtocolHandler for Instance {
async fn write(&self, database: &str, request: WriteRequest) -> ServerResult<()> {
let requests = prometheus::to_grpc_insert_requests(database, request.clone())?;
self.handle_inserts(requests)
async fn write(&self, request: WriteRequest, ctx: QueryContextRef) -> ServerResult<()> {
let requests = prometheus::to_grpc_insert_requests(request.clone())?;
self.handle_inserts(requests, ctx)
.await
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)?;
Ok(())
}
async fn read(&self, database: &str, request: ReadRequest) -> ServerResult<PrometheusResponse> {
async fn read(
&self,
request: ReadRequest,
ctx: QueryContextRef,
) -> ServerResult<PrometheusResponse> {
let response_type = negotiate_response_type(&request.accepted_response_types)?;
// TODO(dennis): use read_hints to speedup query if possible
let results = self
.handle_remote_queries(database, &request.queries)
.await?;
let results = self.handle_remote_queries(ctx, &request.queries).await?;
match response_type {
ResponseType::Samples => {
@@ -159,6 +160,7 @@ mod tests {
use api::prometheus::remote::label_matcher::Type as MatcherType;
use api::prometheus::remote::{Label, LabelMatcher, Sample};
use common_catalog::consts::DEFAULT_CATALOG_NAME;
use servers::query_handler::sql::SqlQueryHandler;
use session::context::QueryContext;
@@ -190,18 +192,19 @@ mod tests {
};
let db = "prometheus";
let ctx = Arc::new(QueryContext::with(DEFAULT_CATALOG_NAME, db));
assert!(SqlQueryHandler::do_query(
instance.as_ref(),
"CREATE DATABASE IF NOT EXISTS prometheus",
QueryContext::arc()
ctx.clone(),
)
.await
.get(0)
.unwrap()
.is_ok());
instance.write(db, write_request).await.unwrap();
instance.write(write_request, ctx.clone()).await.unwrap();
let read_request = ReadRequest {
queries: vec![
@@ -236,7 +239,7 @@ mod tests {
..Default::default()
};
let resp = instance.read(db, read_request).await.unwrap();
let resp = instance.read(read_request, ctx).await.unwrap();
assert_eq!(resp.content_type, "application/x-protobuf");
assert_eq!(resp.content_encoding, "snappy");
let body = prometheus::snappy_decompress(&resp.body).unwrap();

View File

@@ -14,10 +14,11 @@
use std::sync::Arc;
use api::v1::GreptimeRequest;
use api::v1::greptime_request::Request as GreptimeRequest;
use async_trait::async_trait;
use common_query::Output;
use datanode::error::Error as DatanodeError;
use datatypes::schema::Schema;
use servers::query_handler::grpc::{GrpcQueryHandler, GrpcQueryHandlerRef};
use servers::query_handler::sql::{SqlQueryHandler, SqlQueryHandlerRef};
use session::context::QueryContextRef;
@@ -47,6 +48,14 @@ impl SqlQueryHandler for StandaloneSqlQueryHandler {
.collect()
}
async fn do_promql_query(
&self,
_: &str,
_: QueryContextRef,
) -> Vec<std::result::Result<Output, Self::Error>> {
unimplemented!()
}
async fn do_statement_query(
&self,
stmt: Statement,
@@ -58,6 +67,12 @@ impl SqlQueryHandler for StandaloneSqlQueryHandler {
.context(error::InvokeDatanodeSnafu)
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
self.0
.do_describe(stmt, query_ctx)
.context(error::InvokeDatanodeSnafu)
}
fn is_valid_schema(&self, catalog: &str, schema: &str) -> Result<bool> {
self.0
.is_valid_schema(catalog, schema)
@@ -77,9 +92,9 @@ impl StandaloneGrpcQueryHandler {
impl GrpcQueryHandler for StandaloneGrpcQueryHandler {
type Error = error::Error;
async fn do_query(&self, query: GreptimeRequest) -> Result<Output> {
async fn do_query(&self, query: GreptimeRequest, ctx: QueryContextRef) -> Result<Output> {
self.0
.do_query(query)
.do_query(query, ctx)
.await
.context(error::InvokeDatanodeSnafu)
}

View File

@@ -26,11 +26,10 @@ pub mod influxdb;
pub mod instance;
pub mod mysql;
pub mod opentsdb;
pub mod partitioning;
pub mod postgres;
pub mod prometheus;
pub mod promql;
mod server;
pub mod spliter;
mod sql;
mod table;
#[cfg(test)]

View File

@@ -21,6 +21,7 @@ pub struct MysqlOptions {
pub runtime_size: usize,
#[serde(default = "Default::default")]
pub tls: TlsOption,
pub reject_no_database: Option<bool>,
}
impl Default for MysqlOptions {
@@ -29,6 +30,7 @@ impl Default for MysqlOptions {
addr: "127.0.0.1:4002".to_string(),
runtime_size: 2,
tls: TlsOption::default(),
reject_no_database: None,
}
}
}

View File

@@ -0,0 +1,39 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PromqlOptions {
pub addr: String,
}
impl Default for PromqlOptions {
fn default() -> Self {
Self {
addr: "127.0.0.1:4004".to_string(),
}
}
}
#[cfg(test)]
mod tests {
use super::PromqlOptions;
#[test]
fn test_prometheus_options() {
let default = PromqlOptions::default();
assert_eq!(default.addr, "127.0.0.1:4004".to_string());
}
}

View File

@@ -18,17 +18,20 @@ use std::sync::Arc;
use common_runtime::Builder as RuntimeBuilder;
use common_telemetry::info;
use servers::auth::UserProviderRef;
use servers::error::Error::InternalIo;
use servers::grpc::GrpcServer;
use servers::http::HttpServer;
use servers::mysql::server::MysqlServer;
use servers::mysql::server::{MysqlServer, MysqlSpawnConfig, MysqlSpawnRef};
use servers::opentsdb::OpentsdbServer;
use servers::postgres::PostgresServer;
use servers::promql::PromqlServer;
use servers::query_handler::grpc::ServerGrpcQueryHandlerAdaptor;
use servers::query_handler::sql::ServerSqlQueryHandlerAdaptor;
use servers::server::Server;
use snafu::ResultExt;
use tokio::try_join;
use crate::error::Error::StartServer;
use crate::error::{self, Result};
use crate::frontend::FrontendOptions;
use crate::influxdb::InfluxdbOptions;
@@ -81,12 +84,22 @@ impl Services {
.build()
.context(error::RuntimeResourceSnafu)?,
);
let mysql_server = MysqlServer::create_server(
ServerSqlQueryHandlerAdaptor::arc(instance.clone()),
mysql_io_runtime,
opts.tls.clone(),
user_provider.clone(),
Arc::new(MysqlSpawnRef::new(
ServerSqlQueryHandlerAdaptor::arc(instance.clone()),
user_provider.clone(),
)),
Arc::new(MysqlSpawnConfig::new(
opts.tls.should_force_tls(),
opts.tls
.setup()
.map_err(|e| StartServer {
source: InternalIo { source: e },
})?
.map(Arc::new),
opts.reject_no_database.unwrap_or(false),
)),
);
Some((mysql_server, mysql_addr))
@@ -142,7 +155,7 @@ impl Services {
ServerSqlQueryHandlerAdaptor::arc(instance.clone()),
http_options.clone(),
);
if let Some(user_provider) = user_provider {
if let Some(user_provider) = user_provider.clone() {
http_server.set_user_provider(user_provider);
}
@@ -169,12 +182,26 @@ impl Services {
None
};
let promql_server_and_addr = if let Some(promql_options) = &opts.promql_options {
let promql_addr = parse_addr(&promql_options.addr)?;
let mut promql_server = PromqlServer::create_server(instance.clone());
if let Some(user_provider) = user_provider {
promql_server.set_user_provider(user_provider);
}
Some((promql_server as _, promql_addr))
} else {
None
};
try_join!(
start_server(http_server_and_addr),
start_server(grpc_server_and_addr),
start_server(mysql_server_and_addr),
start_server(postgres_server_and_addr),
start_server(opentsdb_server_and_addr)
start_server(opentsdb_server_and_addr),
start_server(promql_server_and_addr),
)
.context(error::StartServerSnafu)?;
Ok(())

View File

@@ -28,7 +28,7 @@ use crate::error::{self, Result};
const DEFAULT_PLACEHOLDER_VALUE: &str = "default";
// TODO(fys): Extract the common logic in datanode and frontend in the future.
#[allow(dead_code)]
// This function convert insert statement to an `InsertRequest` to region 0.
pub(crate) fn insert_to_request(table: &TableRef, stmt: Insert) -> Result<InsertRequest> {
let columns = stmt.columns();
let values = stmt.values().context(error::ParseSqlSnafu)?;
@@ -86,6 +86,7 @@ pub(crate) fn insert_to_request(table: &TableRef, stmt: Insert) -> Result<Insert
.into_iter()
.map(|(cs, mut b)| (cs.name.to_string(), b.to_vector()))
.collect(),
region_number: 0,
})
}

View File

@@ -12,10 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) mod route;
use std::any::Any;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use api::v1::AlterExpr;
@@ -23,7 +20,6 @@ use async_trait::async_trait;
use catalog::helper::{TableGlobalKey, TableGlobalValue};
use catalog::remote::KvBackendRef;
use client::Database;
use common_catalog::consts::DEFAULT_CATALOG_NAME;
use common_error::prelude::BoxedError;
use common_query::error::Result as QueryResult;
use common_query::logical_plan::Expr;
@@ -37,13 +33,10 @@ use datafusion::physical_plan::{
Partitioning, SendableRecordBatchStream as DfSendableRecordBatchStream,
};
use datafusion_common::DataFusionError;
use datafusion_expr::expr::Expr as DfExpr;
use datafusion_expr::BinaryExpr;
use datatypes::prelude::Value;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use meta_client::rpc::{Peer, TableName};
use meta_client::rpc::TableName;
use partition::manager::PartitionRuleManagerRef;
use snafu::prelude::*;
use store_api::storage::RegionNumber;
use table::error::TableOperationSnafu;
use table::metadata::{FilterPushDownType, TableInfo, TableInfoRef};
use table::requests::{AlterTableRequest, InsertRequest};
@@ -52,17 +45,7 @@ use table::Table;
use tokio::sync::RwLock;
use crate::datanode::DatanodeClients;
use crate::error::{
self, BuildTableMetaSnafu, CatalogEntrySerdeSnafu, CatalogSnafu, ContextValueNotFoundSnafu,
Error, LeaderNotFoundSnafu, RequestDatanodeSnafu, Result, TableNotFoundSnafu, TableSnafu,
};
use crate::partitioning::columns::RangeColumnsPartitionRule;
use crate::partitioning::range::RangePartitionRule;
use crate::partitioning::{
Operator, PartitionBound, PartitionDef, PartitionExpr, PartitionRuleRef,
};
use crate::spliter::WriteSpliter;
use crate::table::route::TableRoutes;
use crate::error::{self, Result};
use crate::table::scan::{DatanodeInstance, TableScanPlan};
pub mod insert;
@@ -72,7 +55,7 @@ pub(crate) mod scan;
pub struct DistTable {
table_name: TableName,
table_info: TableInfoRef,
table_routes: Arc<TableRoutes>,
partition_manager: PartitionRuleManagerRef,
datanode_clients: Arc<DatanodeClients>,
backend: KvBackendRef,
}
@@ -92,20 +75,15 @@ impl Table for DistTable {
}
async fn insert(&self, request: InsertRequest) -> table::Result<usize> {
let partition_rule = self
.find_partition_rule()
let splits = self
.partition_manager
.split_insert_request(&self.table_name, request)
.await
.map_err(BoxedError::new)
.context(TableOperationSnafu)?;
let spliter = WriteSpliter::with_partition_rule(partition_rule);
let inserts = spliter
.split(request)
.map_err(BoxedError::new)
.context(TableOperationSnafu)?;
let output = self
.dist_insert(inserts)
.dist_insert(splits)
.await
.map_err(BoxedError::new)
.context(TableOperationSnafu)?;
@@ -120,30 +98,33 @@ impl Table for DistTable {
limit: Option<usize>,
) -> table::Result<PhysicalPlanRef> {
let partition_rule = self
.find_partition_rule()
.partition_manager
.find_table_partition_rule(&self.table_name)
.await
.map_err(BoxedError::new)
.context(TableOperationSnafu)?;
let regions = self
.find_regions(partition_rule, filters)
.partition_manager
.find_regions_by_filters(partition_rule, filters)
.map_err(BoxedError::new)
.context(TableOperationSnafu)?;
let datanodes = self
.find_datanodes(regions)
.partition_manager
.find_region_datanodes(&self.table_name, regions)
.await
.map_err(BoxedError::new)
.context(TableOperationSnafu)?;
let table_name = &self.table_name;
let mut partition_execs = Vec::with_capacity(datanodes.len());
for (datanode, _regions) in datanodes.iter() {
let client = self.datanode_clients.get_client(datanode).await;
let db = Database::new(&self.table_name.schema_name, client);
let db = Database::new(&table_name.catalog_name, &table_name.schema_name, client);
let datanode_instance = DatanodeInstance::new(Arc::new(self.clone()) as _, db);
// TODO(LFC): Pass in "regions" when Datanode supports multi regions for a table.
partition_execs.push(Arc::new(PartitionExec {
table_name: self.table_name.clone(),
table_name: table_name.clone(),
datanode_instance,
projection: projection.cloned(),
filters: filters.to_vec(),
@@ -175,217 +156,19 @@ impl DistTable {
pub(crate) fn new(
table_name: TableName,
table_info: TableInfoRef,
table_routes: Arc<TableRoutes>,
partition_manager: PartitionRuleManagerRef,
datanode_clients: Arc<DatanodeClients>,
backend: KvBackendRef,
) -> Self {
Self {
table_name,
table_info,
table_routes,
partition_manager,
datanode_clients,
backend,
}
}
// TODO(LFC): Finding regions now seems less efficient, should be further looked into.
fn find_regions(
&self,
partition_rule: PartitionRuleRef<Error>,
filters: &[Expr],
) -> Result<Vec<RegionNumber>> {
let regions = if let Some((first, rest)) = filters.split_first() {
let mut target = Self::find_regions0(partition_rule.clone(), first)?;
for filter in rest {
let regions = Self::find_regions0(partition_rule.clone(), filter)?;
// When all filters are provided as a collection, it often implicitly states that
// "all filters must be satisfied". So we join all the results here.
target.retain(|x| regions.contains(x));
// Failed fast, empty collection join any is empty.
if target.is_empty() {
break;
}
}
target.into_iter().collect::<Vec<_>>()
} else {
partition_rule.find_regions(&[])?
};
ensure!(
!regions.is_empty(),
error::FindRegionsSnafu {
filters: filters.to_vec()
}
);
Ok(regions)
}
// TODO(LFC): Support other types of filter expr:
// - BETWEEN and IN (maybe more)
// - expr with arithmetic like "a + 1 < 10" (should have been optimized in logic plan?)
// - not comparison or neither "AND" nor "OR" operations, for example, "a LIKE x"
fn find_regions0(
partition_rule: PartitionRuleRef<Error>,
filter: &Expr,
) -> Result<HashSet<RegionNumber>> {
let expr = filter.df_expr();
match expr {
DfExpr::BinaryExpr(BinaryExpr { left, op, right }) if is_compare_op(op) => {
let column_op_value = match (left.as_ref(), right.as_ref()) {
(DfExpr::Column(c), DfExpr::Literal(v)) => Some((&c.name, *op, v)),
(DfExpr::Literal(v), DfExpr::Column(c)) => {
Some((&c.name, reverse_operator(op), v))
}
_ => None,
};
if let Some((column, op, sv)) = column_op_value {
let value = sv
.clone()
.try_into()
.with_context(|_| error::ConvertScalarValueSnafu { value: sv.clone() })?;
return Ok(partition_rule
.find_regions(&[PartitionExpr::new(column, op, value)])?
.into_iter()
.collect::<HashSet<RegionNumber>>());
}
}
DfExpr::BinaryExpr(BinaryExpr { left, op, right })
if matches!(op, Operator::And | Operator::Or) =>
{
let left_regions =
Self::find_regions0(partition_rule.clone(), &(*left.clone()).into())?;
let right_regions =
Self::find_regions0(partition_rule.clone(), &(*right.clone()).into())?;
let regions = match op {
Operator::And => left_regions
.intersection(&right_regions)
.cloned()
.collect::<HashSet<RegionNumber>>(),
Operator::Or => left_regions
.union(&right_regions)
.cloned()
.collect::<HashSet<RegionNumber>>(),
_ => unreachable!(),
};
return Ok(regions);
}
_ => (),
}
// Returns all regions for not supported partition expr as a safety hatch.
Ok(partition_rule
.find_regions(&[])?
.into_iter()
.collect::<HashSet<RegionNumber>>())
}
async fn find_datanodes(
&self,
regions: Vec<RegionNumber>,
) -> Result<HashMap<Peer, Vec<RegionNumber>>> {
let route = self.table_routes.get_route(&self.table_name).await?;
let mut datanodes = HashMap::new();
for region in regions.iter() {
let datanode = route
.region_routes
.iter()
.find_map(|x| {
if x.region.id == *region as u64 {
x.leader_peer.clone()
} else {
None
}
})
.context(error::FindDatanodeSnafu { region: *region })?;
datanodes
.entry(datanode)
.or_insert_with(Vec::new)
.push(*region);
}
Ok(datanodes)
}
async fn find_partition_rule(&self) -> Result<PartitionRuleRef<Error>> {
let route = self.table_routes.get_route(&self.table_name).await?;
ensure!(
!route.region_routes.is_empty(),
error::FindRegionRoutesSnafu {
table_name: self.table_name.to_string()
}
);
let mut partitions = Vec::with_capacity(route.region_routes.len());
for r in route.region_routes.iter() {
let partition =
r.region
.partition
.clone()
.context(error::FindRegionPartitionSnafu {
region: r.region.id,
table_name: self.table_name.to_string(),
})?;
let partition_def: PartitionDef = partition.try_into()?;
partitions.push((r.region.id, partition_def));
}
partitions.sort_by(|a, b| a.1.partition_bounds().cmp(b.1.partition_bounds()));
ensure!(
partitions
.windows(2)
.all(|w| w[0].1.partition_columns() == w[1].1.partition_columns()),
error::IllegalTableRoutesDataSnafu {
table_name: self.table_name.to_string(),
err_msg: "partition columns of all regions are not the same"
}
);
let partition_columns = partitions[0].1.partition_columns();
ensure!(
!partition_columns.is_empty(),
error::IllegalTableRoutesDataSnafu {
table_name: self.table_name.to_string(),
err_msg: "no partition columns found"
}
);
let regions = partitions
.iter()
.map(|x| x.0 as u32)
.collect::<Vec<RegionNumber>>();
// TODO(LFC): Serializing and deserializing partition rule is ugly, must find a much more elegant way.
let partition_rule: PartitionRuleRef<Error> = match partition_columns.len() {
1 => {
// Omit the last "MAXVALUE".
let bounds = partitions
.iter()
.filter_map(|(_, p)| match &p.partition_bounds()[0] {
PartitionBound::Value(v) => Some(v.clone()),
PartitionBound::MaxValue => None,
})
.collect::<Vec<Value>>();
Arc::new(RangePartitionRule::new(
partition_columns[0].clone(),
bounds,
regions,
)) as _
}
_ => {
let bounds = partitions
.iter()
.map(|x| x.1.partition_bounds().clone())
.collect::<Vec<Vec<PartitionBound>>>();
Arc::new(RangeColumnsPartitionRule::new(
partition_columns.clone(),
bounds,
regions,
)) as _
}
};
Ok(partition_rule)
}
pub(crate) async fn table_global_value(
&self,
key: &TableGlobalKey,
@@ -394,9 +177,9 @@ impl DistTable {
.backend
.get(key.to_string().as_bytes())
.await
.context(CatalogSnafu)?;
.context(error::CatalogSnafu)?;
Ok(if let Some(raw) = raw {
Some(TableGlobalValue::from_bytes(raw.1).context(CatalogEntrySerdeSnafu)?)
Some(TableGlobalValue::from_bytes(raw.1).context(error::CatalogEntrySerdeSnafu)?)
} else {
None
})
@@ -407,17 +190,17 @@ impl DistTable {
key: TableGlobalKey,
value: TableGlobalValue,
) -> Result<()> {
let value = value.as_bytes().context(CatalogEntrySerdeSnafu)?;
let value = value.as_bytes().context(error::CatalogEntrySerdeSnafu)?;
self.backend
.set(key.to_string().as_bytes(), &value)
.await
.context(CatalogSnafu)
.context(error::CatalogSnafu)
}
async fn handle_alter(&self, context: AlterContext, request: &AlterTableRequest) -> Result<()> {
let alter_expr = context
.get::<AlterExpr>()
.context(ContextValueNotFoundSnafu { key: "AlterExpr" })?;
.context(error::ContextValueNotFoundSnafu { key: "AlterExpr" })?;
self.alter_by_expr(alter_expr).await?;
@@ -426,9 +209,9 @@ impl DistTable {
let new_meta = table_info
.meta
.builder_with_alter_kind(table_name, &request.alter_kind)
.context(TableSnafu)?
.context(error::TableSnafu)?
.build()
.context(BuildTableMetaSnafu {
.context(error::BuildTableMetaSnafu {
table_name: table_name.clone(),
})?;
@@ -441,12 +224,12 @@ impl DistTable {
schema_name: alter_expr.schema_name.clone(),
table_name: alter_expr.table_name.clone(),
};
let mut value = self
.table_global_value(&key)
.await?
.context(TableNotFoundSnafu {
table_name: alter_expr.table_name.clone(),
})?;
let mut value =
self.table_global_value(&key)
.await?
.context(error::TableNotFoundSnafu {
table_name: alter_expr.table_name.clone(),
})?;
value.table_info = new_info.into();
@@ -456,11 +239,17 @@ impl DistTable {
/// Define a `alter_by_expr` instead of impl [`Table::alter`] to avoid redundant conversion between
/// [`table::requests::AlterTableRequest`] and [`AlterExpr`].
async fn alter_by_expr(&self, expr: &AlterExpr) -> Result<()> {
let table_routes = self.table_routes.get_route(&self.table_name).await?;
let table_routes = self
.partition_manager
.find_table_route(&self.table_name)
.await
.with_context(|_| error::FindTableRouteSnafu {
table_name: self.table_name.to_string(),
})?;
let leaders = table_routes.find_leaders();
ensure!(
!leaders.is_empty(),
LeaderNotFoundSnafu {
error::LeaderNotFoundSnafu {
table: format!(
"{:?}.{:?}.{}",
expr.catalog_name, expr.schema_name, expr.table_name
@@ -468,12 +257,13 @@ impl DistTable {
}
);
for datanode in leaders {
let db = Database::new(
DEFAULT_CATALOG_NAME,
self.datanode_clients.get_client(&datanode).await,
);
let client = self.datanode_clients.get_client(&datanode).await;
let db = Database::with_client(client);
debug!("Sending {:?} to {:?}", expr, db);
let result = db.alter(expr.clone()).await.context(RequestDatanodeSnafu)?;
let result = db
.alter(expr.clone())
.await
.context(error::RequestDatanodeSnafu)?;
debug!("Alter table result: {:?}", result);
// TODO(hl): We should further check and track alter result in some global DDL task tracker
}
@@ -494,28 +284,6 @@ fn project_schema(table_schema: SchemaRef, projection: Option<&Vec<usize>>) -> S
}
}
fn is_compare_op(op: &Operator) -> bool {
matches!(
*op,
Operator::Eq
| Operator::NotEq
| Operator::Lt
| Operator::LtEq
| Operator::Gt
| Operator::GtEq
)
}
fn reverse_operator(op: &Operator) -> Operator {
match *op {
Operator::Lt => Operator::Gt,
Operator::Gt => Operator::Lt,
Operator::LtEq => Operator::GtEq,
Operator::GtEq => Operator::LtEq,
_ => *op,
}
}
#[derive(Debug)]
struct DistTableScan {
schema: SchemaRef,
@@ -604,6 +372,8 @@ impl PartitionExec {
#[cfg(test)]
mod test {
use std::collections::HashMap;
use api::v1::column::SemanticType;
use api::v1::{column, Column, ColumnDataType, InsertRequest};
use catalog::error::Result;
@@ -617,7 +387,7 @@ mod test {
use datafusion::prelude::SessionContext;
use datafusion::sql::sqlparser;
use datafusion_expr::expr_fn::{and, binary_expr, col, or};
use datafusion_expr::lit;
use datafusion_expr::{lit, Operator};
use datanode::instance::Instance;
use datatypes::arrow::compute::SortOptions;
use datatypes::prelude::ConcreteDataType;
@@ -626,14 +396,21 @@ mod test {
use meta_client::client::MetaClient;
use meta_client::rpc::router::RegionRoute;
use meta_client::rpc::{Region, Table, TableRoute};
use partition::columns::RangeColumnsPartitionRule;
use partition::manager::PartitionRuleManager;
use partition::partition::{PartitionBound, PartitionDef};
use partition::range::RangePartitionRule;
use partition::route::TableRoutes;
use partition::PartitionRuleRef;
use session::context::QueryContext;
use sql::parser::ParserContext;
use sql::statements::statement::Statement;
use store_api::storage::RegionNumber;
use table::metadata::{TableInfoBuilder, TableMetaBuilder};
use table::TableRef;
use super::*;
use crate::expr_factory::{CreateExprFactory, DefaultCreateExprFactory};
use crate::partitioning::range::RangePartitionRule;
struct DummyKvBackend;
@@ -667,33 +444,8 @@ mod test {
#[tokio::test(flavor = "multi_thread")]
async fn test_find_partition_rule() {
let table_name = TableName::new("greptime", "public", "foo");
let column_schemas = vec![
ColumnSchema::new("ts", ConcreteDataType::uint64_datatype(), false),
ColumnSchema::new("a", ConcreteDataType::int32_datatype(), true),
ColumnSchema::new("b", ConcreteDataType::string_datatype(), true),
];
let schema = Arc::new(Schema::new(column_schemas.clone()));
let meta = TableMetaBuilder::default()
.schema(schema)
.primary_key_indices(vec![])
.next_column_id(1)
.build()
.unwrap();
let table_info = TableInfoBuilder::default()
.name(&table_name.table_name)
.meta(meta)
.build()
.unwrap();
let table_routes = Arc::new(TableRoutes::new(Arc::new(MetaClient::default())));
let table = DistTable {
table_name: table_name.clone(),
table_info: Arc::new(table_info),
table_routes: table_routes.clone(),
datanode_clients: Arc::new(DatanodeClients::new()),
backend: Arc::new(DummyKvBackend),
};
let partition_manager = Arc::new(PartitionRuleManager::new(table_routes.clone()));
let table_route = TableRoute {
table: Table {
@@ -759,7 +511,10 @@ mod test {
.insert_table_route(table_name.clone(), Arc::new(table_route))
.await;
let partition_rule = table.find_partition_rule().await.unwrap();
let partition_rule = partition_manager
.find_table_partition_rule(&table_name)
.await
.unwrap();
let range_rule = partition_rule
.as_any()
.downcast_ref::<RangePartitionRule>()
@@ -838,7 +593,10 @@ mod test {
.insert_table_route(table_name.clone(), Arc::new(table_route))
.await;
let partition_rule = table.find_partition_rule().await.unwrap();
let partition_rule = partition_manager
.find_table_partition_rule(&table_name)
.await
.unwrap();
let range_columns_rule = partition_rule
.as_any()
.downcast_ref::<RangeColumnsPartitionRule>()
@@ -1035,7 +793,7 @@ mod test {
let datanode_instances = instance.datanodes;
let catalog_manager = dist_instance.catalog_manager();
let table_routes = catalog_manager.table_routes();
let partition_manager = catalog_manager.partition_manager();
let datanode_clients = catalog_manager.datanode_clients();
let table_name = TableName::new("greptime", "public", "dist_numbers");
@@ -1074,7 +832,10 @@ mod test {
.await
.unwrap();
let table_route = table_routes.get_route(&table_name).await.unwrap();
let table_route = partition_manager
.find_table_route(&table_name)
.await
.unwrap();
let mut region_to_datanode_mapping = HashMap::new();
for region_route in table_route.region_routes.iter() {
@@ -1090,14 +851,21 @@ mod test {
(2, (30..35).collect::<Vec<i32>>()),
(3, (100..105).collect::<Vec<i32>>()),
];
for (region_id, numbers) in regional_numbers {
let datanode_id = *region_to_datanode_mapping.get(&region_id).unwrap();
for (region_number, numbers) in regional_numbers {
let datanode_id = *region_to_datanode_mapping.get(&region_number).unwrap();
let instance = datanode_instances.get(&datanode_id).unwrap().clone();
let start_ts = global_start_ts;
global_start_ts += numbers.len() as i64;
insert_testing_data(&table_name, instance.clone(), numbers, start_ts).await;
insert_testing_data(
&table_name,
instance.clone(),
numbers,
start_ts,
region_number,
)
.await;
}
let meta = TableMetaBuilder::default()
@@ -1114,7 +882,7 @@ mod test {
DistTable {
table_name,
table_info: Arc::new(table_info),
table_routes,
partition_manager,
datanode_clients,
backend: catalog_manager.backend(),
}
@@ -1125,6 +893,7 @@ mod test {
dn_instance: Arc<Instance>,
data: Vec<i32>,
start_ts: i64,
region_number: RegionNumber,
) {
let row_count = data.len() as u32;
let columns = vec![
@@ -1158,41 +927,22 @@ mod test {
},
];
let request = InsertRequest {
schema_name: table_name.schema_name.clone(),
table_name: table_name.table_name.clone(),
columns,
row_count,
region_number: 0,
region_number,
};
dn_instance.handle_insert(request).await.unwrap();
dn_instance
.handle_insert(request, QueryContext::arc())
.await
.unwrap();
}
#[tokio::test(flavor = "multi_thread")]
async fn test_find_regions() {
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
"a",
ConcreteDataType::int32_datatype(),
true,
)]));
let table_name = TableName::new("greptime", "public", "foo");
let meta = TableMetaBuilder::default()
.schema(schema)
.primary_key_indices(vec![])
.next_column_id(1)
.build()
.unwrap();
let table_info = TableInfoBuilder::default()
.name(&table_name.table_name)
.meta(meta)
.build()
.unwrap();
let table = DistTable {
table_name,
table_info: Arc::new(table_info),
table_routes: Arc::new(TableRoutes::new(Arc::new(MetaClient::default()))),
datanode_clients: Arc::new(DatanodeClients::new()),
backend: Arc::new(DummyKvBackend),
};
let partition_manager = Arc::new(PartitionRuleManager::new(Arc::new(TableRoutes::new(
Arc::new(MetaClient::default()),
))));
// PARTITION BY RANGE (a) (
// PARTITION r1 VALUES LESS THAN (10),
@@ -1200,18 +950,18 @@ mod test {
// PARTITION r3 VALUES LESS THAN (50),
// PARTITION r4 VALUES LESS THAN (MAXVALUE),
// )
let partition_rule: PartitionRuleRef<Error> = Arc::new(RangePartitionRule::new(
let partition_rule: PartitionRuleRef = Arc::new(RangePartitionRule::new(
"a",
vec![10_i32.into(), 20_i32.into(), 50_i32.into()],
vec![0_u32, 1, 2, 3],
)) as _;
let partition_rule_clone = partition_rule.clone();
let test = |filters: Vec<Expr>, expect_regions: Vec<RegionNumber>| {
let mut regions = table
.find_regions(partition_rule.clone(), filters.as_slice())
let mut regions = partition_manager
.find_regions_by_filters(partition_rule_clone.clone(), filters.as_slice())
.unwrap();
regions.sort();
assert_eq!(regions, expect_regions);
};
@@ -1298,7 +1048,7 @@ mod test {
);
// test failed to find regions by contradictory filters
let regions = table.find_regions(
let regions = partition_manager.find_regions_by_filters(
partition_rule,
vec![and(
binary_expr(col("a"), Operator::Lt, lit(20)),
@@ -1309,7 +1059,7 @@ mod test {
); // a < 20 AND a >= 20
assert!(matches!(
regions.unwrap_err(),
error::Error::FindRegions { .. }
partition::error::Error::FindRegions { .. }
));
}
}

View File

@@ -27,7 +27,7 @@ use table::requests::InsertRequest;
use super::DistTable;
use crate::error;
use crate::error::Result;
use crate::error::{FindTableRouteSnafu, Result};
use crate::table::scan::DatanodeInstance;
impl DistTable {
@@ -35,7 +35,14 @@ impl DistTable {
&self,
inserts: HashMap<RegionNumber, InsertRequest>,
) -> Result<Output> {
let route = self.table_routes.get_route(&self.table_name).await?;
let table_name = &self.table_name;
let route = self
.partition_manager
.find_table_route(&self.table_name)
.await
.with_context(|_| FindTableRouteSnafu {
table_name: table_name.to_string(),
})?;
let mut joins = Vec::with_capacity(inserts.len());
for (region_id, insert) in inserts {
@@ -52,11 +59,10 @@ impl DistTable {
.context(error::FindDatanodeSnafu { region: region_id })?;
let client = self.datanode_clients.get_client(&datanode).await;
let db = Database::new(&self.table_name.schema_name, client);
let db = Database::new(&table_name.catalog_name, &table_name.schema_name, client);
let instance = DatanodeInstance::new(Arc::new(self.clone()) as _, db);
// TODO(fys): a separate runtime should be used here.
let join = tokio::spawn(async move {
let join = common_runtime::spawn_write(async move {
instance
.grpc_insert(to_grpc_insert_request(region_id, insert)?)
.await
@@ -131,7 +137,6 @@ fn to_grpc_insert_request(
let table_name = insert.table_name.clone();
let (columns, row_count) = insert_request_to_insert_batch(&insert)?;
Ok(GrpcInsertRequest {
schema_name: insert.schema_name,
table_name,
region_number,
columns,
@@ -180,6 +185,7 @@ mod tests {
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "demo".to_string(),
columns_values,
region_number: 0,
}
}

View File

@@ -20,7 +20,7 @@ use catalog::remote::MetaKvBackend;
use client::Client;
use common_grpc::channel_manager::ChannelManager;
use common_runtime::Builder as RuntimeBuilder;
use datanode::datanode::{DatanodeOptions, ObjectStoreConfig, WalConfig};
use datanode::datanode::{DatanodeOptions, FileConfig, ObjectStoreConfig, WalConfig};
use datanode::instance::Instance as DatanodeInstance;
use meta_client::client::MetaClientBuilder;
use meta_client::rpc::Peer;
@@ -28,6 +28,8 @@ use meta_srv::metasrv::MetaSrvOptions;
use meta_srv::mocks::MockInfo;
use meta_srv::service::store::kv::KvStoreRef;
use meta_srv::service::store::memory::MemStore;
use partition::manager::PartitionRuleManager;
use partition::route::TableRoutes;
use servers::grpc::GrpcServer;
use servers::query_handler::grpc::ServerGrpcQueryHandlerAdaptor;
use servers::Mode;
@@ -39,7 +41,6 @@ use crate::catalog::FrontendCatalogManager;
use crate::datanode::DatanodeClients;
use crate::instance::distributed::DistInstance;
use crate::instance::Instance;
use crate::table::route::TableRoutes;
/// Guard against the `TempDir`s that used in unit tests.
/// (The `TempDir` will be deleted once it goes out of scope.)
@@ -81,9 +82,9 @@ fn create_tmp_dir_and_datanode_opts(name: &str) -> (DatanodeOptions, TestGuard)
dir: wal_tmp_dir.path().to_str().unwrap().to_string(),
..Default::default()
},
storage: ObjectStoreConfig::File {
storage: ObjectStoreConfig::File(FileConfig {
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
},
}),
mode: Mode::Standalone,
..Default::default()
};
@@ -167,9 +168,9 @@ async fn create_distributed_datanode(
dir: wal_tmp_dir.path().to_str().unwrap().to_string(),
..Default::default()
},
storage: ObjectStoreConfig::File {
storage: ObjectStoreConfig::File(FileConfig {
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
},
}),
mode: Mode::Distributed,
..Default::default()
};
@@ -241,10 +242,12 @@ pub(crate) async fn create_distributed_instance(test_name: &str) -> MockDistribu
let meta_backend = Arc::new(MetaKvBackend {
client: meta_client.clone(),
});
let table_routes = Arc::new(TableRoutes::new(meta_client.clone()));
let partition_manager = Arc::new(PartitionRuleManager::new(Arc::new(TableRoutes::new(
meta_client.clone(),
))));
let catalog_manager = Arc::new(FrontendCatalogManager::new(
meta_backend,
table_routes.clone(),
partition_manager,
datanode_clients.clone(),
));

View File

@@ -29,7 +29,7 @@ raft-engine = "0.3"
snafu = { version = "0.7", features = ["backtraces"] }
store-api = { path = "../store-api" }
tempdir = "0.3"
tokio = { version = "1.18", features = ["full"] }
tokio.workspace = true
tokio-util = "0.7"
[dev-dependencies]

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use api::v1::meta::{
CreateRequest as PbCreateRequest, DeleteRequest as PbDeleteRequest, Partition as PbPartition,
@@ -170,12 +170,12 @@ pub struct TableRoute {
}
impl TableRoute {
pub fn find_leaders(&self) -> Vec<Peer> {
pub fn find_leaders(&self) -> HashSet<Peer> {
self.region_routes
.iter()
.flat_map(|x| &x.leader_peer)
.cloned()
.collect::<Vec<Peer>>()
.collect()
}
pub fn find_leader_regions(&self, datanode: &Peer) -> Vec<u32> {
@@ -189,7 +189,7 @@ impl TableRoute {
}
None
})
.collect::<Vec<u32>>()
.collect()
}
}

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use api::v1::meta::heartbeat_server::HeartbeatServer;
use api::v1::meta::router_server::RouterServer;
use api::v1::meta::store_server::StoreServer;
@@ -24,6 +26,7 @@ use crate::election::etcd::EtcdElection;
use crate::metasrv::{MetaSrv, MetaSrvOptions};
use crate::service::admin;
use crate::service::store::etcd::EtcdStore;
use crate::service::store::memory::MemStore;
use crate::{error, Result};
// Bootstrap the rpc server to serve incoming request
@@ -58,10 +61,16 @@ pub fn router(meta_srv: MetaSrv) -> Router {
}
pub async fn make_meta_srv(opts: MetaSrvOptions) -> Result<MetaSrv> {
let kv_store = EtcdStore::with_endpoints([&opts.store_addr]).await?;
let election = EtcdElection::with_endpoints(&opts.server_addr, [&opts.store_addr]).await?;
let (kv_store, election) = if opts.use_memory_store {
(Arc::new(MemStore::new()) as _, None)
} else {
(
EtcdStore::with_endpoints([&opts.store_addr]).await?,
Some(EtcdElection::with_endpoints(&opts.server_addr, [&opts.store_addr]).await?),
)
};
let selector = opts.selector.clone().into();
let meta_srv = MetaSrv::new(opts, kv_store, Some(selector), Some(election), None).await;
let meta_srv = MetaSrv::new(opts, kv_store, Some(selector), election, None).await;
meta_srv.start().await;
Ok(meta_srv)
}

View File

@@ -40,6 +40,7 @@ pub struct MetaSrvOptions {
pub store_addr: String,
pub datanode_lease_secs: i64,
pub selector: SelectorType,
pub use_memory_store: bool,
}
impl Default for MetaSrvOptions {
@@ -50,6 +51,7 @@ impl Default for MetaSrvOptions {
store_addr: "127.0.0.1:2379".to_string(),
datanode_lease_secs: 15,
selector: SelectorType::default(),
use_memory_store: false,
}
}
}

View File

@@ -33,8 +33,7 @@ storage = { path = "../storage" }
store-api = { path = "../store-api" }
table = { path = "../table" }
tempdir = { version = "0.3", optional = true }
tokio = { version = "1.0", features = ["full"] }
tokio.workspace = true
[dev-dependencies]
tempdir = { version = "0.3" }
tokio = { version = "1.18", features = ["full"] }

View File

@@ -16,19 +16,22 @@ use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use async_trait::async_trait;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::ext::BoxedError;
use common_telemetry::logging;
use common_telemetry::tracing::log::info;
use common_telemetry::{debug, logging};
use datatypes::schema::SchemaRef;
use object_store::ObjectStore;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::{
ColumnDescriptorBuilder, ColumnFamilyDescriptor, ColumnFamilyDescriptorBuilder, ColumnId,
CreateOptions, EngineContext as StorageEngineContext, OpenOptions, RegionDescriptorBuilder,
RegionId, RowKeyDescriptor, RowKeyDescriptorBuilder, StorageEngine,
CreateOptions, EngineContext as StorageEngineContext, OpenOptions, Region,
RegionDescriptorBuilder, RegionId, RowKeyDescriptor, RowKeyDescriptorBuilder, StorageEngine,
};
use table::engine::{EngineContext, TableEngine, TableReference};
use table::metadata::{TableId, TableInfoBuilder, TableMetaBuilder, TableType, TableVersion};
use table::error::TableOperationSnafu;
use table::metadata::{
TableId, TableInfo, TableInfoBuilder, TableMetaBuilder, TableType, TableVersion,
};
use table::requests::{
AlterKind, AlterTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest,
};
@@ -39,9 +42,10 @@ use tokio::sync::Mutex;
use crate::config::EngineConfig;
use crate::error::{
self, BuildColumnDescriptorSnafu, BuildColumnFamilyDescriptorSnafu, BuildRegionDescriptorSnafu,
BuildRowKeyDescriptorSnafu, InvalidPrimaryKeySnafu, MissingTimestampIndexSnafu, Result,
TableExistsSnafu,
BuildRowKeyDescriptorSnafu, InvalidPrimaryKeySnafu, MissingTimestampIndexSnafu,
RegionNotFoundSnafu, Result, TableExistsSnafu,
};
use crate::manifest::TableManifest;
use crate::table::MitoTable;
pub const MITO_ENGINE: &str = "mito";
@@ -332,51 +336,53 @@ impl<S: StorageEngine> MitoEngineInner<S> {
)?;
let table_id = request.id;
// TODO(dennis): supports multi regions;
assert_eq!(1, request.region_numbers.len());
let region_number = request.region_numbers[0];
let region_id = region_id(table_id, region_number);
let region_name = region_name(table_id, region_number);
let region_descriptor = RegionDescriptorBuilder::default()
.id(region_id)
.name(&region_name)
.row_key(row_key)
.default_cf(default_cf)
.build()
.context(BuildRegionDescriptorSnafu {
table_name,
region_name,
})?;
let table_dir = table_dir(schema_name, table_id);
let mut regions = HashMap::with_capacity(request.region_numbers.len());
let _lock = self.table_mutex.lock().await;
// Checks again, read lock should be enough since we are guarded by the mutex.
if let Some(table) = self.get_table(&table_ref) {
if request.create_if_not_exists {
return Ok(table);
return if request.create_if_not_exists {
Ok(table)
} else {
return TableExistsSnafu { table_name }.fail();
}
TableExistsSnafu { table_name }.fail()
};
}
let table_dir = table_dir(schema_name, table_id);
let opts = CreateOptions {
parent_dir: table_dir.clone(),
};
for region_number in &request.region_numbers {
let region_id = region_id(table_id, *region_number);
let region = self
.storage_engine
.create_region(&StorageEngineContext::default(), region_descriptor, &opts)
.await
.map_err(BoxedError::new)
.context(error::CreateRegionSnafu)?;
let region_name = region_name(table_id, *region_number);
let region_descriptor = RegionDescriptorBuilder::default()
.id(region_id)
.name(&region_name)
.row_key(row_key.clone())
.default_cf(default_cf.clone())
.build()
.context(BuildRegionDescriptorSnafu {
table_name,
region_name,
})?;
let opts = CreateOptions {
parent_dir: table_dir.clone(),
};
let region = self
.storage_engine
.create_region(&StorageEngineContext::default(), region_descriptor, &opts)
.await
.map_err(BoxedError::new)
.context(error::CreateRegionSnafu)?;
info!("Mito engine created region: {:?}", region.id());
regions.insert(*region_number, region);
}
let table_meta = TableMetaBuilder::default()
.schema(request.schema)
.engine(MITO_ENGINE)
.next_column_id(next_column_id)
.primary_key_indices(request.primary_key_indices.clone())
.region_numbers(vec![region_number])
.region_numbers(request.region_numbers)
.build()
.context(error::BuildTableMetaSnafu { table_name })?;
@@ -395,7 +401,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
table_name,
&table_dir,
table_info,
region,
regions,
self.object_store.clone(),
)
.await?,
@@ -445,28 +451,37 @@ impl<S: StorageEngine> MitoEngineInner<S> {
parent_dir: table_dir.to_string(),
};
// TODO(dennis): supports multi regions;
assert_eq!(request.region_numbers.len(), 1);
let region_number = request.region_numbers[0];
let region_name = region_name(table_id, region_number);
let Some((manifest, table_info)) = self
.recover_table_manifest_and_info(table_name, &table_dir)
.await? else { return Ok(None) };
let region = match self
.storage_engine
.open_region(&engine_ctx, &region_name, &opts)
.await
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?
{
None => return Ok(None),
Some(region) => region,
};
debug!(
"Opening table {}, table info recovered: {:?}",
table_id, table_info
);
let table = Arc::new(
MitoTable::open(table_name, &table_dir, region, self.object_store.clone())
let mut regions = HashMap::with_capacity(table_info.meta.region_numbers.len());
for region_number in &table_info.meta.region_numbers {
let region_name = region_name(table_id, *region_number);
let region = self
.storage_engine
.open_region(&engine_ctx, &region_name, &opts)
.await
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?,
);
.context(table_error::TableOperationSnafu)?
.with_context(|| RegionNotFoundSnafu {
table: format!(
"{}.{}.{}",
request.catalog_name, request.schema_name, request.table_name
),
region: *region_number,
})
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
regions.insert(*region_number, region);
}
let table = Arc::new(MitoTable::new(table_info, regions, manifest));
self.tables
.write()
@@ -480,6 +495,24 @@ impl<S: StorageEngine> MitoEngineInner<S> {
Ok(table)
}
async fn recover_table_manifest_and_info(
&self,
table_name: &str,
table_dir: &str,
) -> TableResult<Option<(TableManifest, TableInfo)>> {
let manifest = MitoTable::<<S as StorageEngine>::Region>::build_manifest(
table_dir,
self.object_store.clone(),
);
let Some(table_info) =
MitoTable::<<S as StorageEngine>::Region>::recover_table_info(table_name, &manifest)
.await
.map_err(BoxedError::new)
.context(TableOperationSnafu)? else { return Ok(None) };
Ok(Some((manifest, table_info)))
}
fn get_table(&self, table_ref: &TableReference) -> Option<TableRef> {
self.tables
.read()
@@ -489,9 +522,9 @@ impl<S: StorageEngine> MitoEngineInner<S> {
}
async fn alter_table(&self, _ctx: &EngineContext, req: AlterTableRequest) -> Result<TableRef> {
let catalog_name = req.catalog_name.as_deref().unwrap_or(DEFAULT_CATALOG_NAME);
let schema_name = req.schema_name.as_deref().unwrap_or(DEFAULT_SCHEMA_NAME);
let table_name = &req.table_name.clone();
let catalog_name = &req.catalog_name;
let schema_name = &req.schema_name;
let table_name = &req.table_name;
if let AlterKind::RenameTable { new_table_name } = &req.alter_kind {
let table_ref = TableReference {
@@ -562,6 +595,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
#[cfg(test)]
mod tests {
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_query::physical_plan::SessionContext;
use common_recordbatch::util;
use datatypes::prelude::ConcreteDataType;
@@ -572,6 +606,7 @@ mod tests {
};
use log_store::NoopLogStore;
use storage::config::EngineConfig as StorageEngineConfig;
use storage::region::RegionImpl;
use storage::EngineImpl;
use store_api::manifest::Manifest;
use store_api::storage::ReadContext;
@@ -580,7 +615,9 @@ mod tests {
use super::*;
use crate::table::test_util;
use crate::table::test_util::{new_insert_request, schema_for_test, MockRegion, TABLE_NAME};
use crate::table::test_util::{
new_insert_request, schema_for_test, TestEngineComponents, TABLE_NAME,
};
async fn setup_table_with_column_default_constraint() -> (TempDir, String, TableRef) {
let table_name = "test_default_constraint";
@@ -757,10 +794,14 @@ mod tests {
#[tokio::test]
async fn test_create_table_insert_scan() {
let (_engine, table, schema, _dir) = test_util::setup_test_engine_and_table().await;
let TestEngineComponents {
table_ref: table,
schema_ref,
dir: _dir,
..
} = test_util::setup_test_engine_and_table().await;
assert_eq!(TableType::Base, table.table_type());
assert_eq!(schema, table.schema());
assert_eq!(schema_ref, table.schema());
let insert_req = new_insert_request("demo".to_string(), HashMap::default());
assert_eq!(0, table.insert(insert_req).await.unwrap());
@@ -839,7 +880,11 @@ mod tests {
async fn test_create_table_scan_batches() {
common_telemetry::init_default_ut_logging();
let (_engine, table, _schema, _dir) = test_util::setup_test_engine_and_table().await;
let TestEngineComponents {
table_ref: table,
dir: _dir,
..
} = test_util::setup_test_engine_and_table().await;
// TODO(yingwen): Custom batch size once the table support setting batch_size.
let default_batch_size = ReadContext::default().batch_size;
@@ -933,12 +978,18 @@ mod tests {
table_name: test_util::TABLE_NAME.to_string(),
// the test table id is 1
table_id: 1,
region_numbers: vec![0],
};
let (engine, table, object_store, _dir) = {
let (engine, table_engine, table, object_store, dir) =
test_util::setup_mock_engine_and_table().await;
let (_engine, storage_engine, table, object_store, _dir) = {
let TestEngineComponents {
table_engine,
storage_engine,
table_ref: table,
object_store,
dir,
..
} = test_util::setup_test_engine_and_table().await;
assert_eq!(MITO_ENGINE, table_engine.name());
// Now try to open the table again.
let reopened = table_engine
@@ -948,11 +999,11 @@ mod tests {
.unwrap();
assert_eq!(table.schema(), reopened.schema());
(engine, table, object_store, dir)
(table_engine, storage_engine, table, object_store, dir)
};
// Construct a new table engine, and try to open the table.
let table_engine = MitoEngine::new(EngineConfig::default(), engine, object_store);
let table_engine = MitoEngine::new(EngineConfig::default(), storage_engine, object_store);
let reopened = table_engine
.open_table(&ctx, open_req.clone())
.await
@@ -962,11 +1013,13 @@ mod tests {
let reopened = reopened
.as_any()
.downcast_ref::<MitoTable<MockRegion>>()
.downcast_ref::<MitoTable<RegionImpl<NoopLogStore>>>()
.unwrap();
let left = table.table_info();
// assert recovered table_info is correct
assert_eq!(table.table_info(), reopened.table_info());
let right = reopened.table_info();
assert_eq!(left, right);
assert_eq!(reopened.manifest().last_version(), 1);
}
@@ -982,8 +1035,8 @@ mod tests {
fn new_add_columns_req(new_tag: &ColumnSchema, new_field: &ColumnSchema) -> AlterTableRequest {
AlterTableRequest {
catalog_name: None,
schema_name: None,
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: TABLE_NAME.to_string(),
alter_kind: AlterKind::AddColumns {
columns: vec![
@@ -1061,8 +1114,8 @@ mod tests {
// Then remove memory and my_field from the table.
let req = AlterTableRequest {
catalog_name: None,
schema_name: None,
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: TABLE_NAME.to_string(),
alter_kind: AlterKind::DropColumns {
names: vec![String::from("memory"), String::from("my_field")],
@@ -1092,8 +1145,13 @@ mod tests {
#[tokio::test]
async fn test_alter_rename_table() {
let (engine, table_engine, _table, object_store, _dir) =
test_util::setup_mock_engine_and_table().await;
let TestEngineComponents {
table_engine,
storage_engine,
object_store,
dir: _dir,
..
} = test_util::setup_test_engine_and_table().await;
let ctx = EngineContext::default();
// register another table
@@ -1116,8 +1174,8 @@ mod tests {
.expect("create table must succeed");
// test renaming a table with an existing name.
let req = AlterTableRequest {
catalog_name: None,
schema_name: None,
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: TABLE_NAME.to_string(),
alter_kind: AlterKind::RenameTable {
new_table_name: another_name.to_string(),
@@ -1132,8 +1190,8 @@ mod tests {
let new_table_name = "test_table";
// test rename table
let req = AlterTableRequest {
catalog_name: None,
schema_name: None,
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: TABLE_NAME.to_string(),
alter_kind: AlterKind::RenameTable {
new_table_name: new_table_name.to_string(),
@@ -1143,13 +1201,12 @@ mod tests {
assert_eq!(table.table_info().name, new_table_name);
let table_engine = MitoEngine::new(EngineConfig::default(), engine, object_store);
let table_engine = MitoEngine::new(EngineConfig::default(), storage_engine, object_store);
let open_req = OpenTableRequest {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: new_table_name.to_string(),
table_id: 1,
region_numbers: vec![0],
};
// test reopen table
@@ -1160,7 +1217,7 @@ mod tests {
.unwrap();
let reopened = reopened
.as_any()
.downcast_ref::<MitoTable<MockRegion>>()
.downcast_ref::<MitoTable<RegionImpl<NoopLogStore>>>()
.unwrap();
assert_eq!(reopened.table_info(), table.table_info());
assert_eq!(reopened.table_info().name, new_table_name);
@@ -1234,7 +1291,11 @@ mod tests {
#[tokio::test]
async fn test_table_delete_rows() {
let (_engine, table, _schema, _dir) = test_util::setup_test_engine_and_table().await;
let TestEngineComponents {
table_ref: table,
dir: _dir,
..
} = test_util::setup_test_engine_and_table().await;
let mut columns_values: HashMap<String, VectorRef> = HashMap::with_capacity(4);
let hosts: VectorRef =

View File

@@ -16,6 +16,7 @@ use std::any::Any;
use common_error::ext::BoxedError;
use common_error::prelude::*;
use store_api::storage::RegionNumber;
use table::metadata::{TableInfoBuilderError, TableMetaBuilderError};
#[derive(Debug, Snafu)]
@@ -154,7 +155,7 @@ pub enum Error {
},
#[snafu(display(
"Projected columnd not found in region, column: {}",
"Projected column not found in region, column: {}",
column_qualified_name
))]
ProjectedColumnNotFound {
@@ -170,6 +171,19 @@ pub enum Error {
#[snafu(backtrace)]
source: table::metadata::ConvertError,
},
#[snafu(display("Cannot find region, table: {}, region: {}", table, region))]
RegionNotFound {
table: String,
region: RegionNumber,
backtrace: Backtrace,
},
#[snafu(display("Invalid region name: {}", region_name))]
InvalidRegionName {
region_name: String,
backtrace: Backtrace,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -198,6 +212,8 @@ impl ErrorExt for Error {
TableInfoNotFound { .. } | ConvertRaw { .. } => StatusCode::Unexpected,
ScanTableManifest { .. } | UpdateTableManifest { .. } => StatusCode::StorageUnavailable,
RegionNotFound { .. } => StatusCode::Internal,
InvalidRegionName { .. } => StatusCode::Internal,
}
}

View File

@@ -16,6 +16,7 @@
pub mod test_util;
use std::any::Any;
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::Arc;
@@ -27,17 +28,18 @@ use common_query::physical_plan::PhysicalPlanRef;
use common_recordbatch::error::{ExternalSnafu, Result as RecordBatchResult};
use common_recordbatch::{RecordBatch, RecordBatchStream};
use common_telemetry::logging;
use datatypes::schema::Schema;
use futures::task::{Context, Poll};
use futures::Stream;
use object_store::ObjectStore;
use snafu::{OptionExt, ResultExt};
use snafu::{ensure, OptionExt, ResultExt};
use store_api::manifest::{self, Manifest, ManifestVersion, MetaActionIterator};
use store_api::storage::{
AddColumn, AlterOperation, AlterRequest, ChunkReader, ReadContext, Region, RegionMeta,
ScanRequest, SchemaRef, Snapshot, WriteContext, WriteRequest,
RegionNumber, ScanRequest, SchemaRef, Snapshot, WriteContext, WriteRequest,
};
use table::error as table_error;
use table::error::Result as TableResult;
use table::error::{RegionSchemaMismatchSnafu, Result as TableResult, TableOperationSnafu};
use table::metadata::{
FilterPushDownType, RawTableInfo, TableInfo, TableInfoRef, TableMeta, TableType,
};
@@ -48,8 +50,9 @@ use table::table::scan::SimpleTableScan;
use table::table::{AlterContext, Table};
use tokio::sync::Mutex;
use crate::error;
use crate::error::{
self, ProjectedColumnNotFoundSnafu, Result, ScanTableManifestSnafu, TableInfoNotFoundSnafu,
ProjectedColumnNotFoundSnafu, RegionNotFoundSnafu, Result, ScanTableManifestSnafu,
UpdateTableManifestSnafu,
};
use crate::manifest::action::*;
@@ -65,8 +68,7 @@ pub struct MitoTable<R: Region> {
manifest: TableManifest,
// guarded by `self.alter_lock`
table_info: ArcSwap<TableInfo>,
// TODO(dennis): a table contains multi regions
region: R,
regions: HashMap<RegionNumber, R>,
alter_lock: Mutex<()>,
}
@@ -85,15 +87,29 @@ impl<R: Region> Table for MitoTable<R> {
return Ok(0);
}
let mut write_request = self.region.write_request();
let region = self
.regions
.get(&request.region_number)
.with_context(|| RegionNotFoundSnafu {
table: common_catalog::format_full_table_name(
&request.catalog_name,
&request.schema_name,
&request.table_name,
),
region: request.region_number,
})
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
let mut write_request = region.write_request();
let columns_values = request.columns_values;
// columns_values is not empty, it's safe to unwrap
let rows_num = columns_values.values().next().unwrap().len();
logging::trace!(
"Insert into table {} with data: {:?}",
"Insert into table {} region {} with data: {:?}",
self.table_info().name,
region.id(),
columns_values
);
@@ -102,8 +118,7 @@ impl<R: Region> Table for MitoTable<R> {
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
let _resp = self
.region
let _resp = region
.write(&WriteContext::default(), write_request)
.await
.map_err(BoxedError::new)
@@ -127,35 +142,64 @@ impl<R: Region> Table for MitoTable<R> {
_limit: Option<usize>,
) -> TableResult<PhysicalPlanRef> {
let read_ctx = ReadContext::default();
let snapshot = self
.region
.snapshot(&read_ctx)
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
let mut readers = Vec::with_capacity(self.regions.len());
let mut first_schema: Option<Arc<Schema>> = None;
let projection = self
.transform_projection(&self.region, projection.cloned())
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
let filters = filters.into();
let scan_request = ScanRequest {
projection,
filters,
..Default::default()
};
let mut reader = snapshot
.scan(&read_ctx, scan_request)
.await
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?
.reader;
let table_info = self.table_info.load();
// TODO(hl): Currently the API between frontend and datanode is under refactoring in
// https://github.com/GreptimeTeam/greptimedb/issues/597 . Once it's finished, query plan
// can carry filtered region info to avoid scanning all regions on datanode.
for region in self.regions.values() {
let snapshot = region
.snapshot(&read_ctx)
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
let projection = self
.transform_projection(region, projection.cloned())
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
let filters = filters.into();
let scan_request = ScanRequest {
projection,
filters,
..Default::default()
};
let reader = snapshot
.scan(&read_ctx, scan_request)
.await
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?
.reader;
let schema = reader.schema().clone();
let stream_schema = schema.clone();
let schema = reader.schema().clone();
if let Some(first_schema) = &first_schema {
// TODO(hl): we assume all regions' schemas are the same, but undergoing table altering
// may make these schemas inconsistent.
ensure!(
first_schema.version() == schema.version(),
RegionSchemaMismatchSnafu {
table: common_catalog::format_full_table_name(
&table_info.catalog_name,
&table_info.schema_name,
&table_info.name
)
}
);
} else {
first_schema = Some(schema);
}
readers.push(reader);
}
// TODO(hl): we assume table contains at least one region, but with region migration this
// assumption may become invalid.
let stream_schema = first_schema.unwrap();
let schema = stream_schema.clone();
let stream = Box::pin(async_stream::try_stream! {
while let Some(chunk) = reader.next_chunk().await.map_err(BoxedError::new).context(ExternalSnafu)? {
yield RecordBatch::new(stream_schema.clone(), chunk.columns)?
for mut reader in readers {
while let Some(chunk) = reader.next_chunk().await.map_err(BoxedError::new).context(ExternalSnafu)? {
yield RecordBatch::new(stream_schema.clone(), chunk.columns)?
}
}
});
@@ -218,24 +262,26 @@ impl<R: Region> Table for MitoTable<R> {
{
// TODO(yingwen): Error handling. Maybe the region need to provide a method to
// validate the request first.
let region = self.region();
let region_meta = region.in_memory_metadata();
let alter_req = AlterRequest {
operation: alter_op,
version: region_meta.version(),
};
// Alter the region.
logging::debug!(
"start altering region {} of table {}, with request {:?}",
region.name(),
table_name,
alter_req,
);
region
.alter(alter_req)
.await
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
let regions = self.regions();
for region in regions.values() {
let region_meta = region.in_memory_metadata();
let alter_req = AlterRequest {
operation: alter_op.clone(),
version: region_meta.version(),
};
// Alter the region.
logging::debug!(
"start altering region {} of table {}, with request {:?}",
region.name(),
table_name,
alter_req,
);
region
.alter(alter_req)
.await
.map_err(BoxedError::new)
.context(TableOperationSnafu)?;
}
}
// Update in memory metadata of the table.
self.set_table_info(new_info);
@@ -247,30 +293,33 @@ impl<R: Region> Table for MitoTable<R> {
if request.key_column_values.is_empty() {
return Ok(0);
}
let mut rows_deleted = 0;
// TODO(hl): Should be tracked by procedure.
// TODO(hl): Parse delete request into region->keys instead of delete in each region
for region in self.regions.values() {
let mut write_request = region.write_request();
let key_column_values = request.key_column_values.clone();
// Safety: key_column_values isn't empty.
let rows_num = key_column_values.values().next().unwrap().len();
let mut write_request = self.region.write_request();
logging::trace!(
"Delete from table {} where key_columns are: {:?}",
self.table_info().name,
key_column_values
);
let key_column_values = request.key_column_values;
// Safety: key_column_values isn't empty.
let rows_num = key_column_values.values().next().unwrap().len();
logging::trace!(
"Delete from table {} where key_columns are: {:?}",
self.table_info().name,
key_column_values
);
write_request
.delete(key_column_values)
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
self.region
.write(&WriteContext::default(), write_request)
.await
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
Ok(rows_num)
write_request
.delete(key_column_values)
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
region
.write(&WriteContext::default(), write_request)
.await
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
rows_deleted += rows_num;
}
Ok(rows_deleted)
}
}
@@ -299,10 +348,14 @@ fn column_qualified_name(table_name: &str, region_name: &str, column_name: &str)
}
impl<R: Region> MitoTable<R> {
fn new(table_info: TableInfo, region: R, manifest: TableManifest) -> Self {
pub(crate) fn new(
table_info: TableInfo,
regions: HashMap<RegionNumber, R>,
manifest: TableManifest,
) -> Self {
Self {
table_info: ArcSwap::new(Arc::new(table_info)),
region,
regions,
manifest,
alter_lock: Mutex::new(()),
}
@@ -368,7 +421,7 @@ impl<R: Region> MitoTable<R> {
table_name: &str,
table_dir: &str,
table_info: TableInfo,
region: R,
regions: HashMap<RegionNumber, R>,
object_store: ObjectStore,
) -> Result<MitoTable<R>> {
let manifest = TableManifest::new(&table_manifest_dir(table_dir), object_store);
@@ -383,25 +436,14 @@ impl<R: Region> MitoTable<R> {
.await
.context(UpdateTableManifestSnafu { table_name })?;
Ok(MitoTable::new(table_info, region, manifest))
Ok(MitoTable::new(table_info, regions, manifest))
}
pub async fn open(
table_name: &str,
table_dir: &str,
region: R,
object_store: ObjectStore,
) -> Result<MitoTable<R>> {
let manifest = TableManifest::new(&table_manifest_dir(table_dir), object_store);
let mut table_info = Self::recover_table_info(table_name, &manifest)
.await?
.context(TableInfoNotFoundSnafu { table_name })?;
table_info.meta.region_numbers = vec![(region.id() & 0xFFFFFFFF) as u32];
Ok(MitoTable::new(table_info, region, manifest))
pub(crate) fn build_manifest(table_dir: &str, object_store: ObjectStore) -> TableManifest {
TableManifest::new(&table_manifest_dir(table_dir), object_store)
}
async fn recover_table_info(
pub(crate) async fn recover_table_info(
table_name: &str,
manifest: &TableManifest,
) -> Result<Option<TableInfo>> {
@@ -449,8 +491,8 @@ impl<R: Region> MitoTable<R> {
}
#[inline]
pub fn region(&self) -> &R {
&self.region
pub fn regions(&self) -> &HashMap<RegionNumber, R> {
&self.regions
}
pub fn set_table_info(&self, table_info: TableInfo) {

View File

@@ -47,6 +47,7 @@ pub fn new_insert_request(
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name,
columns_values,
region_number: 0,
}
}
@@ -115,22 +116,27 @@ fn new_create_request(schema: SchemaRef) -> CreateTableRequest {
}
}
pub async fn setup_test_engine_and_table() -> (
MitoEngine<EngineImpl<NoopLogStore>>,
TableRef,
SchemaRef,
TempDir,
) {
pub struct TestEngineComponents {
pub table_engine: MitoEngine<EngineImpl<NoopLogStore>>,
pub storage_engine: EngineImpl<NoopLogStore>,
pub table_ref: TableRef,
pub schema_ref: SchemaRef,
pub object_store: ObjectStore,
pub dir: TempDir,
}
pub async fn setup_test_engine_and_table() -> TestEngineComponents {
let (dir, object_store) = new_test_object_store("setup_test_engine_and_table").await;
let storage_engine = EngineImpl::new(
StorageEngineConfig::default(),
Arc::new(NoopLogStore::default()),
object_store.clone(),
);
let table_engine = MitoEngine::new(
EngineConfig::default(),
EngineImpl::new(
StorageEngineConfig::default(),
Arc::new(NoopLogStore::default()),
object_store.clone(),
),
object_store,
storage_engine.clone(),
object_store.clone(),
);
let schema = Arc::new(schema_for_test());
@@ -142,7 +148,14 @@ pub async fn setup_test_engine_and_table() -> (
.await
.unwrap();
(table_engine, table, schema, dir)
TestEngineComponents {
table_engine,
storage_engine,
table_ref: table,
schema_ref: schema,
object_store,
dir,
}
}
pub async fn setup_mock_engine_and_table(

View File

@@ -6,11 +6,14 @@ license.workspace = true
[dependencies]
futures = { version = "0.3" }
opendal = { version = "0.24", features = ["layers-tracing", "layers-metrics"] }
opendal = { version = "0.25.1", features = [
"layers-tracing",
"layers-metrics",
] }
tokio.workspace = true
[dev-dependencies]
anyhow = "1.0"
common-telemetry = { path = "../common/telemetry" }
tempdir = "0.3"
uuid = { version = "1", features = ["serde", "v4"] }
uuid.workspace = true

Some files were not shown because too many files have changed in this diff Show More