mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-06 21:32:58 +00:00
Compare commits
78 Commits
v0.1.0-alp
...
v0.1.0-alp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
249ebc6937 | ||
|
|
c1b8981f61 | ||
|
|
949cd3e3af | ||
|
|
b26982c5d7 | ||
|
|
4fdf26810c | ||
|
|
7f59758e69 | ||
|
|
a521ab5041 | ||
|
|
833216d317 | ||
|
|
90c832b33d | ||
|
|
8959dbcef8 | ||
|
|
2034b40f33 | ||
|
|
55e6be7af1 | ||
|
|
f9bfb121db | ||
|
|
6fb413ae50 | ||
|
|
beb07fc895 | ||
|
|
4275e47bdb | ||
|
|
6720bc5f7c | ||
|
|
4052563248 | ||
|
|
952e1bd626 | ||
|
|
8232015998 | ||
|
|
d82a3a7d58 | ||
|
|
0599465685 | ||
|
|
13d51250ba | ||
|
|
6127706b5b | ||
|
|
2e17e9c4b5 | ||
|
|
b0cbfa7ffb | ||
|
|
20172338e8 | ||
|
|
9c53f9b24c | ||
|
|
6d24f7ebb6 | ||
|
|
68c2de8e45 | ||
|
|
a17dcbc511 | ||
|
|
53ab19ea5a | ||
|
|
84c44cf540 | ||
|
|
020b9936cd | ||
|
|
75dcf2467b | ||
|
|
eea5393f96 | ||
|
|
3d312d389d | ||
|
|
fdc73fb52f | ||
|
|
2a36e26d19 | ||
|
|
baef640fe3 | ||
|
|
5fddb799f7 | ||
|
|
f372229b18 | ||
|
|
4085fc7899 | ||
|
|
30940e692a | ||
|
|
b371ce0f48 | ||
|
|
ac7f52d303 | ||
|
|
051768b735 | ||
|
|
c5b0d2431f | ||
|
|
4038dd4067 | ||
|
|
8be0f05570 | ||
|
|
69f06eec8b | ||
|
|
7b37e99a45 | ||
|
|
c09775d17f | ||
|
|
4a9cf49637 | ||
|
|
9f865b50ab | ||
|
|
b407ebf6bb | ||
|
|
c144a1b20e | ||
|
|
0791c65149 | ||
|
|
62fcb54258 | ||
|
|
2b6b979d5a | ||
|
|
b6fa316c65 | ||
|
|
ca5734edb3 | ||
|
|
5428ad364e | ||
|
|
663c725838 | ||
|
|
c94b544e4a | ||
|
|
f465040acc | ||
|
|
22ae983280 | ||
|
|
e1f326295f | ||
|
|
6d762aa9dc | ||
|
|
d4b09f69ab | ||
|
|
1f0b39cc8d | ||
|
|
dee5ccec9e | ||
|
|
f8788273d5 | ||
|
|
df465308cc | ||
|
|
e7b4d2b9cd | ||
|
|
bf408e3b96 | ||
|
|
73e6e2e01b | ||
|
|
8faa6b0f09 |
4
.env.example
Normal file
4
.env.example
Normal file
@@ -0,0 +1,4 @@
|
||||
# Settings for s3 test
|
||||
GT_S3_BUCKET=S3 bucket
|
||||
GT_S3_ACCESS_KEY_ID=S3 access key id
|
||||
GT_S3_ACCESS_KEY=S3 secret access key
|
||||
4
.github/pull_request_template.md
vendored
4
.github/pull_request_template.md
vendored
@@ -13,7 +13,7 @@ Please explain IN DETAIL what the changes are in this PR and why they are needed
|
||||
|
||||
## Checklist
|
||||
|
||||
- [] I have written the necessary rustdoc comments.
|
||||
- [] I have added the necessary unit tests and integration tests.
|
||||
- [ ] I have written the necessary rustdoc comments.
|
||||
- [ ] I have added the necessary unit tests and integration tests.
|
||||
|
||||
## Refer to a related PR or issue link (optional)
|
||||
|
||||
24
.github/workflows/develop.yml
vendored
24
.github/workflows/develop.yml
vendored
@@ -26,6 +26,13 @@ env:
|
||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
||||
|
||||
jobs:
|
||||
typos:
|
||||
name: Spell Check with Typos
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: crate-ci/typos@v1.0.4
|
||||
|
||||
check:
|
||||
name: Check
|
||||
if: github.event.pull_request.draft == false
|
||||
@@ -42,6 +49,23 @@ jobs:
|
||||
- name: Run cargo check
|
||||
run: cargo check --workspace --all-targets
|
||||
|
||||
toml:
|
||||
name: Toml Check
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
- name: Rust Cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
- name: Install taplo
|
||||
run: cargo install taplo-cli --version ^0.8 --locked
|
||||
- name: Run taplo
|
||||
run: taplo format --check --option "indent_string= "
|
||||
|
||||
# Use coverage to run test.
|
||||
# test:
|
||||
# name: Test Suite
|
||||
|
||||
25
.github/workflows/doc-issue.yml
vendored
Normal file
25
.github/workflows/doc-issue.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
name: Create Issue in docs repo on doc related changes
|
||||
|
||||
on:
|
||||
issues:
|
||||
types:
|
||||
- labeled
|
||||
pull_request_target:
|
||||
types:
|
||||
- labeled
|
||||
|
||||
jobs:
|
||||
doc_issue:
|
||||
if: github.event.label.name == 'doc update required'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: create an issue in doc repo
|
||||
uses: dacbd/create-issue-action@main
|
||||
with:
|
||||
owner: GreptimeTeam
|
||||
repo: docs
|
||||
token: ${{ secrets.DOCS_REPO_TOKEN }}
|
||||
title: Update docs for ${{ github.event.issue.title || github.event.pull_request.title }}
|
||||
body: |
|
||||
A document change request is generated from
|
||||
${{ github.event.issue.html_url || github.event.pull_request.html_url }}
|
||||
57
.github/workflows/release.yml
vendored
57
.github/workflows/release.yml
vendored
@@ -2,6 +2,9 @@ on:
|
||||
push:
|
||||
tags:
|
||||
- "v*.*.*"
|
||||
schedule:
|
||||
# At 00:00 on Monday.
|
||||
- cron: '0 0 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
name: Release
|
||||
@@ -9,6 +12,12 @@ name: Release
|
||||
env:
|
||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
||||
|
||||
# FIXME(zyy17): Would be better to use `gh release list -L 1 | cut -f 3` to get the latest release version tag, but for a long time, we will stay at 'v0.1.0-alpha-*'.
|
||||
SCHEDULED_BUILD_VERSION_PREFIX: v0.1.0-alpha
|
||||
|
||||
# In the future, we can change SCHEDULED_PERIOD to nightly.
|
||||
SCHEDULED_PERIOD: weekly
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build binary
|
||||
@@ -106,8 +115,32 @@ jobs:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
|
||||
- name: Configure scheduled build version # the version would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}, like v0.1.0-alpha-20221119-weekly.
|
||||
shell: bash
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
buildTime=`date "+%Y%m%d"`
|
||||
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
|
||||
echo "SCHEDULED_BUILD_VERSION=${SCHEDULED_BUILD_VERSION}" >> $GITHUB_ENV
|
||||
|
||||
- name: Create scheduled build git tag
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
git tag ${{ env.SCHEDULED_BUILD_VERSION }}
|
||||
|
||||
- name: Publish scheduled release # configure the different release title and tags.
|
||||
uses: softprops/action-gh-release@v1
|
||||
if: github.event_name == 'schedule'
|
||||
with:
|
||||
name: "Release ${{ env.SCHEDULED_BUILD_VERSION }}"
|
||||
tag_name: ${{ env.SCHEDULED_BUILD_VERSION }}
|
||||
generate_release_notes: true
|
||||
files: |
|
||||
**/greptime-*
|
||||
|
||||
- name: Publish release
|
||||
uses: softprops/action-gh-release@v1
|
||||
if: github.event_name != 'schedule'
|
||||
with:
|
||||
name: "Release ${{ github.ref_name }}"
|
||||
files: |
|
||||
@@ -145,12 +178,12 @@ jobs:
|
||||
tar xvf greptime-linux-arm64.tgz
|
||||
rm greptime-linux-arm64.tgz
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
- name: Login to UCloud Container Registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
registry: uhub.service.ucloud.cn
|
||||
username: ${{ secrets.UCLOUD_USERNAME }}
|
||||
password: ${{ secrets.UCLOUD_PASSWORD }}
|
||||
|
||||
- name: Login to Dockerhub
|
||||
uses: docker/login-action@v2
|
||||
@@ -158,11 +191,20 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Configure scheduled build image tag # the tag would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}
|
||||
shell: bash
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
buildTime=`date "+%Y%m%d"`
|
||||
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
|
||||
echo "IMAGE_TAG=${SCHEDULED_BUILD_VERSION:1}" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure tag # If the release tag is v0.1.0, then the image version tag will be 0.1.0.
|
||||
shell: bash
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
VERSION=${{ github.ref_name }}
|
||||
echo "VERSION=${VERSION:1}" >> $GITHUB_ENV
|
||||
echo "IMAGE_TAG=${VERSION:1}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
@@ -179,5 +221,6 @@ jobs:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
greptime/greptimedb:latest
|
||||
greptime/greptimedb:${{ env.VERSION }}
|
||||
ghcr.io/greptimeteam/greptimedb:${{ env.VERSION }}
|
||||
greptime/greptimedb:${{ env.IMAGE_TAG }}
|
||||
uhub.service.ucloud.cn/greptime/greptimedb:latest
|
||||
uhub.service.ucloud.cn/greptime/greptimedb:${{ env.IMAGE_TAG }}
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -18,6 +18,7 @@ debug/
|
||||
|
||||
# JetBrains IDE config directory
|
||||
.idea/
|
||||
*.iml
|
||||
|
||||
# VSCode IDE config directory
|
||||
.vscode/
|
||||
@@ -31,3 +32,6 @@ logs/
|
||||
|
||||
# Benchmark dataset
|
||||
benchmarks/data
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
@@ -9,7 +9,7 @@ repos:
|
||||
rev: e6a795bc6b2c0958f9ef52af4863bbd7cc17238f
|
||||
hooks:
|
||||
- id: cargo-sort
|
||||
args: ["--workspace", "--print"]
|
||||
args: ["--workspace"]
|
||||
|
||||
- repo: https://github.com/doublify/pre-commit-rust
|
||||
rev: v1.0
|
||||
|
||||
1315
Cargo.lock
generated
1315
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -11,28 +11,32 @@ members = [
|
||||
"src/common/function",
|
||||
"src/common/function-macro",
|
||||
"src/common/grpc",
|
||||
"src/common/grpc-expr",
|
||||
"src/common/query",
|
||||
"src/common/recordbatch",
|
||||
"src/common/runtime",
|
||||
"src/common/substrait",
|
||||
"src/common/insert",
|
||||
"src/common/telemetry",
|
||||
"src/common/time",
|
||||
"src/datanode",
|
||||
"src/datatypes",
|
||||
"src/datatypes2",
|
||||
"src/frontend",
|
||||
"src/log-store",
|
||||
"src/meta-client",
|
||||
"src/meta-srv",
|
||||
"src/mito",
|
||||
"src/object-store",
|
||||
"src/query",
|
||||
"src/script",
|
||||
"src/servers",
|
||||
"src/session",
|
||||
"src/sql",
|
||||
"src/storage",
|
||||
"src/store-api",
|
||||
"src/table",
|
||||
"src/mito",
|
||||
"tests-integration",
|
||||
"tests/runner",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
|
||||
15
README.md
15
README.md
@@ -53,8 +53,9 @@ To compile GreptimeDB from source, you'll need:
|
||||
install correct Rust version for you.
|
||||
- Protobuf: `protoc` is required for compiling `.proto` files. `protobuf` is
|
||||
available from major package manager on macos and linux distributions. You can
|
||||
find an installation instructions
|
||||
[here](https://grpc.io/docs/protoc-installation/).
|
||||
find an installation instructions [here](https://grpc.io/docs/protoc-installation/).
|
||||
**Note that `protoc` version needs to be >= 3.15** because we have used the `optional`
|
||||
keyword. You can check it with `protoc --version`.
|
||||
|
||||
#### Build with Docker
|
||||
|
||||
@@ -114,7 +115,10 @@ about Kubernetes deployment, check our [docs](https://docs.greptime.com/).
|
||||
4. Query the data:
|
||||
|
||||
```SQL
|
||||
mysql> SELECT * FROM monitor;
|
||||
SELECT * FROM monitor;
|
||||
```
|
||||
|
||||
```TEXT
|
||||
+-------+---------------------+------+--------+
|
||||
| host | ts | cpu | memory |
|
||||
+-------+---------------------+------+--------+
|
||||
@@ -156,6 +160,8 @@ break things. Benchmark on development branch may not represent its potential
|
||||
performance. We release pre-built binaries constantly for functional
|
||||
evaluation. Do not use it in production at the moment.
|
||||
|
||||
For future plans, check out [GreptimeDB roadmap](https://github.com/GreptimeTeam/greptimedb/issues/669).
|
||||
|
||||
## Community
|
||||
|
||||
Our core team is thrilled too see you participate in any ways you like. When you are stuck, try to
|
||||
@@ -169,7 +175,7 @@ community, please check out:
|
||||
|
||||
In addition, you may:
|
||||
|
||||
- View our official [Blog](https://greptime.com/blog)
|
||||
- View our official [Blog](https://greptime.com/blogs/index)
|
||||
- Connect us with [Linkedin](https://www.linkedin.com/company/greptime/)
|
||||
- Follow us on [Twitter](https://twitter.com/greptime)
|
||||
|
||||
@@ -189,3 +195,4 @@ Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.
|
||||
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion).
|
||||
- [OpenDAL](https://github.com/datafuselabs/opendal) from [Datafuse Labs](https://github.com/datafuselabs) gives GreptimeDB a very general and elegant data access abstraction layer.
|
||||
- GreptimeDB’s meta service is based on [etcd](https://etcd.io/).
|
||||
- GreptimeDB uses [RustPython](https://github.com/RustPython/RustPython) for experimental embedded python scripting.
|
||||
|
||||
@@ -28,9 +28,8 @@ use arrow::datatypes::{DataType, Float64Type, Int64Type};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use clap::Parser;
|
||||
use client::admin::Admin;
|
||||
use client::api::v1::codec::InsertBatch;
|
||||
use client::api::v1::column::Values;
|
||||
use client::api::v1::{insert_expr, Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
|
||||
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
|
||||
use client::{Client, Database, Select};
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
|
||||
@@ -100,16 +99,13 @@ async fn write_data(
|
||||
|
||||
for record_batch in record_batch_reader {
|
||||
let record_batch = record_batch.unwrap();
|
||||
let row_count = record_batch.num_rows();
|
||||
let insert_batch = convert_record_batch(record_batch).into();
|
||||
let (columns, row_count) = convert_record_batch(record_batch);
|
||||
let insert_expr = InsertExpr {
|
||||
schema_name: "public".to_string(),
|
||||
table_name: TABLE_NAME.to_string(),
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: vec![insert_batch],
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
region_number: 0,
|
||||
columns,
|
||||
row_count,
|
||||
};
|
||||
let now = Instant::now();
|
||||
db.insert(insert_expr).await.unwrap();
|
||||
@@ -125,7 +121,7 @@ async fn write_data(
|
||||
total_rpc_elapsed_ms
|
||||
}
|
||||
|
||||
fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
|
||||
fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
|
||||
let schema = record_batch.schema();
|
||||
let fields = schema.fields();
|
||||
let row_count = record_batch.num_rows();
|
||||
@@ -143,10 +139,7 @@ fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
|
||||
columns.push(column);
|
||||
}
|
||||
|
||||
InsertBatch {
|
||||
columns,
|
||||
row_count: row_count as _,
|
||||
}
|
||||
(columns, row_count as _)
|
||||
}
|
||||
|
||||
fn build_values(column: &ArrayRef) -> Values {
|
||||
|
||||
@@ -7,3 +7,4 @@ coverage:
|
||||
patch: off
|
||||
ignore:
|
||||
- "**/error*.rs" # ignore all error.rs files
|
||||
- "tests/runner/*.rs" # ignore integration test runner
|
||||
|
||||
@@ -3,15 +3,16 @@ mode = 'distributed'
|
||||
rpc_addr = '127.0.0.1:3001'
|
||||
wal_dir = '/tmp/greptimedb/wal'
|
||||
rpc_runtime_size = 8
|
||||
mysql_addr = '127.0.0.1:3306'
|
||||
mysql_addr = '127.0.0.1:4406'
|
||||
mysql_runtime_size = 4
|
||||
enable_memory_catalog = false
|
||||
|
||||
[storage]
|
||||
type = 'File'
|
||||
data_dir = '/tmp/greptimedb/data/'
|
||||
|
||||
[meta_client_opts]
|
||||
metasrv_addr = '1.1.1.1:3002'
|
||||
metasrv_addrs = ['127.0.0.1:3002']
|
||||
timeout_millis = 3000
|
||||
connect_timeout_millis = 5000
|
||||
tcp_nodelay = false
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
mode = 'distributed'
|
||||
datanode_rpc_addr = '127.0.0.1:3001'
|
||||
http_addr = '127.0.0.1:4000'
|
||||
|
||||
[http_options]
|
||||
addr = '127.0.0.1:4000'
|
||||
timeout = "30s"
|
||||
|
||||
[meta_client_opts]
|
||||
metasrv_addr = '1.1.1.1:3002'
|
||||
metasrv_addrs = ['127.0.0.1:3002']
|
||||
timeout_millis = 3000
|
||||
connect_timeout_millis = 5000
|
||||
tcp_nodelay = false
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
node_id = 0
|
||||
mode = 'standalone'
|
||||
http_addr = '127.0.0.1:4000'
|
||||
datanode_mysql_addr = '127.0.0.1:3306'
|
||||
datanode_mysql_runtime_size = 4
|
||||
wal_dir = '/tmp/greptimedb/wal/'
|
||||
enable_memory_catalog = false
|
||||
|
||||
[http_options]
|
||||
addr = '127.0.0.1:4000'
|
||||
timeout = "30s"
|
||||
|
||||
[storage]
|
||||
type = 'File'
|
||||
|
||||
@@ -55,7 +55,7 @@ The DataFusion basically execute aggregate like this:
|
||||
2. Call `update_batch` on each accumulator with partitioned data, to let you update your aggregate calculation.
|
||||
3. Call `state` to get each accumulator's internal state, the medial calculation result.
|
||||
4. Call `merge_batch` to merge all accumulator's internal state to one.
|
||||
5. Execute `evalute` on the chosen one to get the final calculation result.
|
||||
5. Execute `evaluate` on the chosen one to get the final calculation result.
|
||||
|
||||
Once you know the meaning of each method, you can easily write your accumulator. You can refer to `Median` accumulator or `SUM` accumulator defined in file `my_sum_udaf_example.rs` for more details.
|
||||
|
||||
@@ -63,7 +63,7 @@ Once you know the meaning of each method, you can easily write your accumulator.
|
||||
|
||||
You can call `register_aggregate_function` method in query engine to register your aggregate function. To do that, you have to new an instance of struct `AggregateFunctionMeta`. The struct has three fields, first is the name of your aggregate function's name. The function name is case-sensitive due to DataFusion's restriction. We strongly recommend using lowercase for your name. If you have to use uppercase name, wrap your aggregate function with quotation marks. For example, if you define an aggregate function named "my_aggr", you can use "`SELECT MY_AGGR(x)`"; if you define "my_AGGR", you have to use "`SELECT "my_AGGR"(x)`".
|
||||
|
||||
The second field is arg_counts ,the count of the arguments. Like accumulator `percentile`, caculating the p_number of the column. We need to input the value of column and the value of p to cacalate, and so the count of the arguments is two.
|
||||
The second field is arg_counts ,the count of the arguments. Like accumulator `percentile`, calculating the p_number of the column. We need to input the value of column and the value of p to cacalate, and so the count of the arguments is two.
|
||||
|
||||
The third field is a function about how to create your accumulator creator that you defined in step 1 above. Create creator, that's a bit intertwined, but it is how we make DataFusion use a newly created aggregate function each time it executes a SQL, preventing the stored input types from affecting each other. The key detail can be starting looking at our `DfContextProviderAdapter` struct's `get_aggregate_meta` method.
|
||||
|
||||
|
||||
@@ -7,8 +7,8 @@ license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
common-base = { path = "../common/base" }
|
||||
common-time = { path = "../common/time" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
prost = "0.11"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
@@ -20,9 +20,7 @@ fn main() {
|
||||
.file_descriptor_set_path(default_out_dir.join("greptime_fd.bin"))
|
||||
.compile(
|
||||
&[
|
||||
"greptime/v1/insert.proto",
|
||||
"greptime/v1/select.proto",
|
||||
"greptime/v1/physical_plan.proto",
|
||||
"greptime/v1/greptime.proto",
|
||||
"greptime/v1/meta/common.proto",
|
||||
"greptime/v1/meta/heartbeat.proto",
|
||||
|
||||
@@ -20,6 +20,7 @@ message AdminExpr {
|
||||
CreateExpr create = 2;
|
||||
AlterExpr alter = 3;
|
||||
CreateDatabaseExpr create_database = 4;
|
||||
DropTableExpr drop_table = 5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,18 +52,33 @@ message AlterExpr {
|
||||
string table_name = 3;
|
||||
oneof kind {
|
||||
AddColumns add_columns = 4;
|
||||
DropColumns drop_columns = 5;
|
||||
}
|
||||
}
|
||||
|
||||
message DropTableExpr {
|
||||
string catalog_name = 1;
|
||||
string schema_name = 2;
|
||||
string table_name = 3;
|
||||
}
|
||||
|
||||
message AddColumns {
|
||||
repeated AddColumn add_columns = 1;
|
||||
}
|
||||
|
||||
message DropColumns {
|
||||
repeated DropColumn drop_columns = 1;
|
||||
}
|
||||
|
||||
message AddColumn {
|
||||
ColumnDef column_def = 1;
|
||||
bool is_key = 2;
|
||||
}
|
||||
|
||||
message DropColumn {
|
||||
string name = 1;
|
||||
}
|
||||
|
||||
message CreateDatabaseExpr {
|
||||
//TODO(hl): maybe rename to schema_name?
|
||||
string database_name = 1;
|
||||
|
||||
@@ -2,6 +2,7 @@ syntax = "proto3";
|
||||
|
||||
package greptime.v1;
|
||||
|
||||
import "greptime/v1/column.proto";
|
||||
import "greptime/v1/common.proto";
|
||||
|
||||
message DatabaseRequest {
|
||||
@@ -28,39 +29,23 @@ message SelectExpr {
|
||||
oneof expr {
|
||||
string sql = 1;
|
||||
bytes logical_plan = 2;
|
||||
PhysicalPlan physical_plan = 15;
|
||||
}
|
||||
}
|
||||
|
||||
message PhysicalPlan {
|
||||
bytes original_ql = 1;
|
||||
bytes plan = 2;
|
||||
}
|
||||
|
||||
message InsertExpr {
|
||||
string schema_name = 1;
|
||||
string table_name = 2;
|
||||
|
||||
message Values {
|
||||
repeated bytes values = 1;
|
||||
}
|
||||
// Data is represented here.
|
||||
repeated Column columns = 3;
|
||||
|
||||
oneof expr {
|
||||
Values values = 3;
|
||||
// The row_count of all columns, which include null and non-null values.
|
||||
//
|
||||
// Note: the row_count of all columns in a InsertExpr must be same.
|
||||
uint32 row_count = 4;
|
||||
|
||||
// TODO(LFC): Remove field "sql" in InsertExpr.
|
||||
// When Frontend instance received an insertion SQL (`insert into ...`), it's anticipated to parse the SQL and
|
||||
// assemble the values to insert to feed Datanode. In other words, inserting data through Datanode instance's GRPC
|
||||
// interface shouldn't use SQL directly.
|
||||
// Then why the "sql" field exists here? It's because the Frontend needs table schema to create the values to insert,
|
||||
// which is currently not able to find anywhere. (Maybe the table schema is suppose to be fetched from Meta?)
|
||||
// The "sql" field is meant to be removed in the future.
|
||||
string sql = 4;
|
||||
}
|
||||
|
||||
/// The region number of current insert request.
|
||||
// The region number of current insert request.
|
||||
uint32 region_number = 5;
|
||||
map<string, bytes> options = 6;
|
||||
}
|
||||
|
||||
// TODO(jiachun)
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.codec;
|
||||
|
||||
import "greptime/v1/column.proto";
|
||||
|
||||
message InsertBatch {
|
||||
repeated Column columns = 1;
|
||||
uint32 row_count = 2;
|
||||
}
|
||||
|
||||
message RegionNumber {
|
||||
uint32 id = 1;
|
||||
}
|
||||
@@ -39,7 +39,7 @@ message NodeStat {
|
||||
uint64 wcus = 2;
|
||||
// Table number in this node
|
||||
uint64 table_num = 3;
|
||||
// Regon number in this node
|
||||
// Region number in this node
|
||||
uint64 region_num = 4;
|
||||
|
||||
double cpu_usage = 5;
|
||||
|
||||
@@ -5,6 +5,8 @@ package greptime.v1.meta;
|
||||
import "greptime/v1/meta/common.proto";
|
||||
|
||||
service Router {
|
||||
rpc Create(CreateRequest) returns (RouteResponse) {}
|
||||
|
||||
// Fetch routing information for tables. The smallest unit is the complete
|
||||
// routing information(all regions) of a table.
|
||||
//
|
||||
@@ -26,7 +28,14 @@ service Router {
|
||||
//
|
||||
rpc Route(RouteRequest) returns (RouteResponse) {}
|
||||
|
||||
rpc Create(CreateRequest) returns (RouteResponse) {}
|
||||
rpc Delete(DeleteRequest) returns (RouteResponse) {}
|
||||
}
|
||||
|
||||
message CreateRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
TableName table_name = 2;
|
||||
repeated Partition partitions = 3;
|
||||
}
|
||||
|
||||
message RouteRequest {
|
||||
@@ -35,6 +44,12 @@ message RouteRequest {
|
||||
repeated TableName table_names = 2;
|
||||
}
|
||||
|
||||
message DeleteRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
TableName table_name = 2;
|
||||
}
|
||||
|
||||
message RouteResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
@@ -42,13 +57,6 @@ message RouteResponse {
|
||||
repeated TableRoute table_routes = 3;
|
||||
}
|
||||
|
||||
message CreateRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
TableName table_name = 2;
|
||||
repeated Partition partitions = 3;
|
||||
}
|
||||
|
||||
message TableRoute {
|
||||
Table table = 1;
|
||||
repeated RegionRoute region_routes = 2;
|
||||
|
||||
@@ -20,6 +20,9 @@ service Store {
|
||||
|
||||
// DeleteRange deletes the given range from the key-value store.
|
||||
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse);
|
||||
|
||||
// MoveValue atomically renames the key to the given updated key.
|
||||
rpc MoveValue(MoveValueRequest) returns (MoveValueResponse);
|
||||
}
|
||||
|
||||
message RangeRequest {
|
||||
@@ -136,3 +139,21 @@ message DeleteRangeResponse {
|
||||
// returned.
|
||||
repeated KeyValue prev_kvs = 3;
|
||||
}
|
||||
|
||||
message MoveValueRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
// If from_key dose not exist, return the value of to_key (if it exists).
|
||||
// If from_key exists, move the value of from_key to to_key (i.e. rename),
|
||||
// and return the value.
|
||||
bytes from_key = 2;
|
||||
bytes to_key = 3;
|
||||
}
|
||||
|
||||
message MoveValueResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
// If from_key dose not exist, return the value of to_key (if it exists).
|
||||
// If from_key exists, return the value of from_key.
|
||||
KeyValue kv = 2;
|
||||
}
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.codec;
|
||||
|
||||
message PhysicalPlanNode {
|
||||
oneof PhysicalPlanType {
|
||||
ProjectionExecNode projection = 1;
|
||||
MockInputExecNode mock = 99;
|
||||
// TODO(fys): impl other physical plan node
|
||||
}
|
||||
}
|
||||
|
||||
message ProjectionExecNode {
|
||||
PhysicalPlanNode input = 1;
|
||||
repeated PhysicalExprNode expr = 2;
|
||||
repeated string expr_name = 3;
|
||||
}
|
||||
|
||||
message PhysicalExprNode {
|
||||
oneof ExprType {
|
||||
PhysicalColumn column = 1;
|
||||
// TODO(fys): impl other physical expr node
|
||||
}
|
||||
}
|
||||
|
||||
message PhysicalColumn {
|
||||
string name = 1;
|
||||
uint64 index = 2;
|
||||
}
|
||||
|
||||
message MockInputExecNode {
|
||||
string name = 1;
|
||||
}
|
||||
@@ -33,6 +33,28 @@ pub enum Error {
|
||||
from: ConcreteDataType,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to convert column default constraint, column: {}, source: {}",
|
||||
column,
|
||||
source
|
||||
))]
|
||||
ConvertColumnDefaultConstraint {
|
||||
column: String,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Invalid column default constraint, column: {}, source: {}",
|
||||
column,
|
||||
source
|
||||
))]
|
||||
InvalidColumnDefaultConstraint {
|
||||
column: String,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -40,6 +62,8 @@ impl ErrorExt for Error {
|
||||
match self {
|
||||
Error::UnknownColumnDataType { .. } => StatusCode::InvalidArguments,
|
||||
Error::IntoColumnDataType { .. } => StatusCode::Unexpected,
|
||||
Error::ConvertColumnDefaultConstraint { source, .. }
|
||||
| Error::InvalidColumnDefaultConstraint { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
pub use prost::DecodeError;
|
||||
use prost::Message;
|
||||
|
||||
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, RegionNumber, SelectResult};
|
||||
use crate::v1::codec::SelectResult;
|
||||
use crate::v1::meta::TableRouteValue;
|
||||
|
||||
macro_rules! impl_convert_with_bytes {
|
||||
@@ -36,10 +36,7 @@ macro_rules! impl_convert_with_bytes {
|
||||
};
|
||||
}
|
||||
|
||||
impl_convert_with_bytes!(InsertBatch);
|
||||
impl_convert_with_bytes!(SelectResult);
|
||||
impl_convert_with_bytes!(PhysicalPlanNode);
|
||||
impl_convert_with_bytes!(RegionNumber);
|
||||
impl_convert_with_bytes!(TableRouteValue);
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -51,52 +48,6 @@ mod tests {
|
||||
|
||||
const SEMANTIC_TAG: i32 = 0;
|
||||
|
||||
#[test]
|
||||
fn test_convert_insert_batch() {
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let bytes: Vec<u8> = insert_batch.into();
|
||||
let insert: InsertBatch = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(8, insert.row_count);
|
||||
assert_eq!(1, insert.columns.len());
|
||||
|
||||
let column = &insert.columns[0];
|
||||
assert_eq!("foo", column.column_name);
|
||||
assert_eq!(SEMANTIC_TAG, column.semantic_type);
|
||||
assert_eq!(vec![1], column.null_mask);
|
||||
assert_eq!(
|
||||
vec![2, 3, 4, 5, 6, 7, 8],
|
||||
column.values.as_ref().unwrap().i32_values
|
||||
);
|
||||
}
|
||||
|
||||
#[should_panic]
|
||||
#[test]
|
||||
fn test_convert_insert_batch_wrong() {
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let mut bytes: Vec<u8> = insert_batch.into();
|
||||
|
||||
// modify some bytes
|
||||
bytes[0] = 0b1;
|
||||
bytes[1] = 0b1;
|
||||
|
||||
let insert: InsertBatch = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(8, insert.row_count);
|
||||
assert_eq!(1, insert.columns.len());
|
||||
|
||||
let column = &insert.columns[0];
|
||||
assert_eq!("foo", column.column_name);
|
||||
assert_eq!(SEMANTIC_TAG, column.semantic_type);
|
||||
assert_eq!(vec![1], column.null_mask);
|
||||
assert_eq!(
|
||||
vec![2, 3, 4, 5, 6, 7, 8],
|
||||
column.values.as_ref().unwrap().i32_values
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_select_result() {
|
||||
let select_result = mock_select_result();
|
||||
@@ -143,35 +94,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_region_id() {
|
||||
let region_id = RegionNumber { id: 12 };
|
||||
|
||||
let bytes: Vec<u8> = region_id.into();
|
||||
let region_id: RegionNumber = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(12, region_id.id);
|
||||
}
|
||||
|
||||
fn mock_insert_batch() -> InsertBatch {
|
||||
let values = column::Values {
|
||||
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
|
||||
..Default::default()
|
||||
};
|
||||
let null_mask = vec![1];
|
||||
let column = Column {
|
||||
column_name: "foo".to_string(),
|
||||
semantic_type: SEMANTIC_TAG,
|
||||
values: Some(values),
|
||||
null_mask,
|
||||
..Default::default()
|
||||
};
|
||||
InsertBatch {
|
||||
columns: vec![column],
|
||||
row_count: 8,
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_select_result() -> SelectResult {
|
||||
let values = column::Values {
|
||||
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
|
||||
|
||||
@@ -21,4 +21,5 @@ pub mod codec {
|
||||
tonic::include_proto!("greptime.v1.codec");
|
||||
}
|
||||
|
||||
mod column_def;
|
||||
pub mod meta;
|
||||
|
||||
38
src/api/src/v1/column_def.rs
Normal file
38
src/api/src/v1/column_def.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::helper::ColumnDataTypeWrapper;
|
||||
use crate::v1::ColumnDef;
|
||||
|
||||
impl ColumnDef {
|
||||
pub fn try_as_column_schema(&self) -> Result<ColumnSchema> {
|
||||
let data_type = ColumnDataTypeWrapper::try_new(self.datatype)?;
|
||||
|
||||
let constraint = match &self.default_constraint {
|
||||
None => None,
|
||||
Some(v) => Some(
|
||||
ColumnDefaultConstraint::try_from(&v[..])
|
||||
.context(error::ConvertColumnDefaultConstraintSnafu { column: &self.name })?,
|
||||
),
|
||||
};
|
||||
|
||||
ColumnSchema::new(&self.name, data_type.into(), self.is_nullable)
|
||||
.with_default_constraint(constraint)
|
||||
.context(error::InvalidColumnDefaultConstraintSnafu { column: &self.name })
|
||||
}
|
||||
}
|
||||
@@ -145,10 +145,12 @@ gen_set_header!(HeartbeatRequest);
|
||||
gen_set_header!(RouteRequest);
|
||||
gen_set_header!(CreateRequest);
|
||||
gen_set_header!(RangeRequest);
|
||||
gen_set_header!(DeleteRequest);
|
||||
gen_set_header!(PutRequest);
|
||||
gen_set_header!(BatchPutRequest);
|
||||
gen_set_header!(CompareAndPutRequest);
|
||||
gen_set_header!(DeleteRangeRequest);
|
||||
gen_set_header!(MoveValueRequest);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -27,7 +27,6 @@ futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
lazy_static = "1.4"
|
||||
meta-client = { path = "../meta-client" }
|
||||
opendal = "0.17"
|
||||
regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
@@ -39,9 +38,8 @@ tokio = { version = "1.18", features = ["full"] }
|
||||
[dev-dependencies]
|
||||
chrono = "0.4"
|
||||
log-store = { path = "../log-store" }
|
||||
mito = { path = "../mito", features = ["test"] }
|
||||
object-store = { path = "../object-store" }
|
||||
opendal = "0.17"
|
||||
storage = { path = "../storage" }
|
||||
mito = { path = "../mito" }
|
||||
tempdir = "0.3"
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
|
||||
@@ -94,7 +94,7 @@ pub enum Error {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Table {} already exists", table))]
|
||||
#[snafu(display("Table `{}` already exists", table))]
|
||||
TableExists { table: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Schema {} already exists", schema))]
|
||||
@@ -109,6 +109,12 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Operation {} not implemented yet", operation))]
|
||||
Unimplemented {
|
||||
operation: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to open table, table info: {}, source: {}", table_info, source))]
|
||||
OpenTable {
|
||||
table_info: String,
|
||||
@@ -185,8 +191,8 @@ pub enum Error {
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid table schema in catalog, source: {:?}", source))]
|
||||
InvalidSchemaInCatalog {
|
||||
#[snafu(display("Invalid table info in catalog, source: {}", source))]
|
||||
InvalidTableInfoInCatalog {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
@@ -216,11 +222,12 @@ impl ErrorExt for Error {
|
||||
| Error::ValueDeserialize { .. }
|
||||
| Error::Io { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
Error::RegisterTable { .. } => StatusCode::Internal,
|
||||
|
||||
Error::ReadSystemCatalog { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
|
||||
Error::InvalidCatalogValue { source, .. } => source.status_code(),
|
||||
|
||||
Error::RegisterTable { .. } => StatusCode::Internal,
|
||||
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
|
||||
Error::SchemaExists { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
@@ -233,8 +240,10 @@ impl ErrorExt for Error {
|
||||
Error::SystemCatalogTableScan { source } => source.status_code(),
|
||||
Error::SystemCatalogTableScanExec { source } => source.status_code(),
|
||||
Error::InvalidTableSchema { source, .. } => source.status_code(),
|
||||
Error::InvalidSchemaInCatalog { .. } => StatusCode::Unexpected,
|
||||
Error::InvalidTableInfoInCatalog { .. } => StatusCode::Unexpected,
|
||||
Error::Internal { source, .. } => source.status_code(),
|
||||
|
||||
Error::Unimplemented { .. } => StatusCode::Unsupported,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,44 +15,56 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use common_catalog::error::{
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
|
||||
};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::{RawTableMeta, TableId, TableVersion};
|
||||
use table::metadata::{RawTableInfo, TableId, TableVersion};
|
||||
|
||||
use crate::consts::{
|
||||
CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_GLOBAL_KEY_PREFIX, TABLE_REGIONAL_KEY_PREFIX,
|
||||
};
|
||||
use crate::error::{
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
|
||||
};
|
||||
const CATALOG_KEY_PREFIX: &str = "__c";
|
||||
const SCHEMA_KEY_PREFIX: &str = "__s";
|
||||
const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
|
||||
const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
|
||||
|
||||
const ALPHANUMERICS_NAME_PATTERN: &str = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||
|
||||
lazy_static! {
|
||||
static ref CATALOG_KEY_PATTERN: Regex =
|
||||
Regex::new(&format!("^{}-([a-zA-Z_]+)$", CATALOG_KEY_PREFIX)).unwrap();
|
||||
static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-({})$",
|
||||
CATALOG_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)$",
|
||||
SCHEMA_KEY_PREFIX
|
||||
"^{}-({})-({})$",
|
||||
SCHEMA_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN, ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z0-9_]+)$",
|
||||
TABLE_GLOBAL_KEY_PREFIX
|
||||
"^{}-({})-({})-({})$",
|
||||
TABLE_GLOBAL_KEY_PREFIX,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z0-9_]+)-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX
|
||||
"^{}-({})-({})-({})-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
@@ -128,15 +140,18 @@ impl TableGlobalKey {
|
||||
/// table id, table meta(schema...), region id allocation across datanodes.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct TableGlobalValue {
|
||||
/// Table id is the same across all datanodes.
|
||||
pub id: TableId,
|
||||
/// Id of datanode that created the global table info kv. only for debugging.
|
||||
pub node_id: u64,
|
||||
// TODO(LFC): Maybe remove it?
|
||||
/// Allocation of region ids across all datanodes.
|
||||
pub regions_id_map: HashMap<u64, Vec<u32>>,
|
||||
// TODO(LFC): Too much for assembling the table schema that DistTable needs, find another way.
|
||||
pub meta: RawTableMeta,
|
||||
pub table_info: RawTableInfo,
|
||||
}
|
||||
|
||||
impl TableGlobalValue {
|
||||
pub fn table_id(&self) -> TableId {
|
||||
self.table_info.ident.table_id
|
||||
}
|
||||
}
|
||||
|
||||
/// Table regional info that varies between datanode, so it contains a `node_id` field.
|
||||
@@ -258,6 +273,10 @@ macro_rules! define_catalog_value {
|
||||
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
|
||||
}
|
||||
|
||||
pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self, Error> {
|
||||
Self::parse(&String::from_utf8_lossy(bytes.as_ref()))
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
|
||||
Ok(serde_json::to_string(self)
|
||||
.context(SerializeCatalogEntryValueSnafu)?
|
||||
@@ -279,6 +298,7 @@ define_catalog_value!(
|
||||
mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
|
||||
use table::metadata::{RawTableMeta, TableIdent, TableType};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -339,11 +359,23 @@ mod tests {
|
||||
region_numbers: vec![1],
|
||||
};
|
||||
|
||||
let table_info = RawTableInfo {
|
||||
ident: TableIdent {
|
||||
table_id: 42,
|
||||
version: 1,
|
||||
},
|
||||
name: "table_1".to_string(),
|
||||
desc: Some("blah".to_string()),
|
||||
catalog_name: "catalog_1".to_string(),
|
||||
schema_name: "schema_1".to_string(),
|
||||
meta,
|
||||
table_type: TableType::Base,
|
||||
};
|
||||
|
||||
let value = TableGlobalValue {
|
||||
id: 42,
|
||||
node_id: 0,
|
||||
regions_id_map: HashMap::from([(0, vec![1, 2, 3])]),
|
||||
meta,
|
||||
table_info,
|
||||
};
|
||||
let serialized = serde_json::to_string(&value).unwrap();
|
||||
let deserialized = TableGlobalValue::parse(&serialized).unwrap();
|
||||
@@ -15,6 +15,7 @@
|
||||
#![feature(assert_matches)]
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::info;
|
||||
@@ -28,6 +29,7 @@ use crate::error::{CreateTableSnafu, Result};
|
||||
pub use crate::schema::{SchemaProvider, SchemaProviderRef};
|
||||
|
||||
pub mod error;
|
||||
pub mod helper;
|
||||
pub mod local;
|
||||
pub mod remote;
|
||||
pub mod schema;
|
||||
@@ -83,12 +85,17 @@ pub trait CatalogManager: CatalogList {
|
||||
/// Starts a catalog manager.
|
||||
async fn start(&self) -> Result<()>;
|
||||
|
||||
/// Registers a table given given catalog/schema to catalog manager,
|
||||
/// returns table registered.
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize>;
|
||||
/// Registers a table within given catalog/schema to catalog manager,
|
||||
/// returns whether the table registered.
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool>;
|
||||
|
||||
/// Register a schema with catalog name and schema name.
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize>;
|
||||
/// Deregisters a table within given catalog/schema to catalog manager,
|
||||
/// returns whether the table deregistered.
|
||||
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool>;
|
||||
|
||||
/// Register a schema with catalog name and schema name. Retuens whether the
|
||||
/// schema registered.
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool>;
|
||||
|
||||
/// Register a system table, should be called before starting the manager.
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest)
|
||||
@@ -123,6 +130,25 @@ pub struct RegisterTableRequest {
|
||||
pub table: TableRef,
|
||||
}
|
||||
|
||||
impl Debug for RegisterTableRequest {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("RegisterTableRequest")
|
||||
.field("catalog", &self.catalog)
|
||||
.field("schema", &self.schema)
|
||||
.field("table_name", &self.table_name)
|
||||
.field("table_id", &self.table_id)
|
||||
.field("table", &self.table.table_info())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DeregisterTableRequest {
|
||||
pub catalog: String,
|
||||
pub schema: String,
|
||||
pub table_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RegisterSchemaRequest {
|
||||
pub catalog: String,
|
||||
|
||||
@@ -21,7 +21,7 @@ use common_catalog::consts::{
|
||||
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::{error, info};
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, UInt8Vector};
|
||||
use futures_util::lock::Mutex;
|
||||
@@ -36,7 +36,7 @@ use table::TableRef;
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu, Result,
|
||||
SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu, SystemCatalogTypeMismatchSnafu,
|
||||
TableExistsSnafu, TableNotFoundSnafu,
|
||||
TableExistsSnafu, TableNotFoundSnafu, UnimplementedSnafu,
|
||||
};
|
||||
use crate::local::memory::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use crate::system::{
|
||||
@@ -46,8 +46,8 @@ use crate::system::{
|
||||
use crate::tables::SystemCatalog;
|
||||
use crate::{
|
||||
format_full_table_name, handle_system_table_request, CatalogList, CatalogManager,
|
||||
CatalogProvider, CatalogProviderRef, RegisterSchemaRequest, RegisterSystemTableRequest,
|
||||
RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
CatalogProvider, CatalogProviderRef, DeregisterTableRequest, RegisterSchemaRequest,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
|
||||
@@ -57,6 +57,7 @@ pub struct LocalCatalogManager {
|
||||
engine: TableEngineRef,
|
||||
next_table_id: AtomicU32,
|
||||
init_lock: Mutex<bool>,
|
||||
register_lock: Mutex<()>,
|
||||
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
|
||||
}
|
||||
|
||||
@@ -76,6 +77,7 @@ impl LocalCatalogManager {
|
||||
engine,
|
||||
next_table_id: AtomicU32::new(MIN_USER_TABLE_ID),
|
||||
init_lock: Mutex::new(false),
|
||||
register_lock: Mutex::new(()),
|
||||
system_table_requests: Mutex::new(Vec::default()),
|
||||
})
|
||||
}
|
||||
@@ -241,6 +243,7 @@ impl LocalCatalogManager {
|
||||
schema_name: t.schema_name.clone(),
|
||||
table_name: t.table_name.clone(),
|
||||
table_id: t.table_id,
|
||||
region_numbers: vec![0],
|
||||
};
|
||||
|
||||
let option = self
|
||||
@@ -308,7 +311,7 @@ impl CatalogManager for LocalCatalogManager {
|
||||
self.init().await
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let started = self.init_lock.lock().await;
|
||||
|
||||
ensure!(
|
||||
@@ -331,27 +334,50 @@ impl CatalogManager for LocalCatalogManager {
|
||||
schema_info: format!("{}.{}", catalog_name, schema_name),
|
||||
})?;
|
||||
|
||||
if schema.table_exist(&request.table_name)? {
|
||||
return TableExistsSnafu {
|
||||
table: format_full_table_name(catalog_name, schema_name, &request.table_name),
|
||||
{
|
||||
let _lock = self.register_lock.lock().await;
|
||||
if let Some(existing) = schema.table(&request.table_name)? {
|
||||
if existing.table_info().ident.table_id != request.table_id {
|
||||
error!(
|
||||
"Unexpected table register request: {:?}, existing: {:?}",
|
||||
request,
|
||||
existing.table_info()
|
||||
);
|
||||
return TableExistsSnafu {
|
||||
table: format_full_table_name(
|
||||
catalog_name,
|
||||
schema_name,
|
||||
&request.table_name,
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
// Try to register table with same table id, just ignore.
|
||||
Ok(false)
|
||||
} else {
|
||||
// table does not exist
|
||||
self.system
|
||||
.register_table(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
request.table_name.clone(),
|
||||
request.table_id,
|
||||
)
|
||||
.await?;
|
||||
schema.register_table(request.table_name, request.table)?;
|
||||
Ok(true)
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
self.system
|
||||
.register_table(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
request.table_name.clone(),
|
||||
request.table_id,
|
||||
)
|
||||
.await?;
|
||||
|
||||
schema.register_table(request.table_name, request.table)?;
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
|
||||
UnimplementedSnafu {
|
||||
operation: "deregister table",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let started = self.init_lock.lock().await;
|
||||
ensure!(
|
||||
*started,
|
||||
@@ -366,17 +392,21 @@ impl CatalogManager for LocalCatalogManager {
|
||||
.catalogs
|
||||
.catalog(catalog_name)?
|
||||
.context(CatalogNotFoundSnafu { catalog_name })?;
|
||||
if catalog.schema(schema_name)?.is_some() {
|
||||
return SchemaExistsSnafu {
|
||||
schema: schema_name,
|
||||
}
|
||||
.fail();
|
||||
|
||||
{
|
||||
let _lock = self.register_lock.lock().await;
|
||||
ensure!(
|
||||
catalog.schema(schema_name)?.is_none(),
|
||||
SchemaExistsSnafu {
|
||||
schema: schema_name,
|
||||
}
|
||||
);
|
||||
self.system
|
||||
.register_schema(request.catalog, schema_name.clone())
|
||||
.await?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(true)
|
||||
}
|
||||
self.system
|
||||
.register_schema(request.catalog, schema_name.clone())
|
||||
.await?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use common_catalog::consts::MIN_USER_TABLE_ID;
|
||||
use common_telemetry::error;
|
||||
use snafu::OptionExt;
|
||||
use table::metadata::TableId;
|
||||
use table::table::TableIdProvider;
|
||||
@@ -27,8 +28,8 @@ use table::TableRef;
|
||||
use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
|
||||
use crate::schema::SchemaProvider;
|
||||
use crate::{
|
||||
CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, RegisterSchemaRequest,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, SchemaProviderRef,
|
||||
CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, DeregisterTableRequest,
|
||||
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// Simple in-memory list of catalogs
|
||||
@@ -69,7 +70,7 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
@@ -84,10 +85,28 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
})?;
|
||||
schema
|
||||
.register_table(request.table_name, request.table)
|
||||
.map(|v| if v.is_some() { 0 } else { 1 })
|
||||
.map(|v| v.is_none())
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
.context(CatalogNotFoundSnafu {
|
||||
catalog_name: &request.catalog,
|
||||
})?
|
||||
.clone();
|
||||
let schema = catalog
|
||||
.schema(&request.schema)?
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
schema_info: format!("{}.{}", &request.catalog, &request.schema),
|
||||
})?;
|
||||
schema
|
||||
.deregister_table(&request.table_name)
|
||||
.map(|v| v.is_some())
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
@@ -95,11 +114,12 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
catalog_name: &request.catalog,
|
||||
})?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, _request: RegisterSystemTableRequest) -> Result<()> {
|
||||
unimplemented!()
|
||||
// TODO(ruihang): support register system table request
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>> {
|
||||
@@ -251,11 +271,21 @@ impl SchemaProvider for MemorySchemaProvider {
|
||||
}
|
||||
|
||||
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
|
||||
if self.table_exist(name.as_str())? {
|
||||
return TableExistsSnafu { table: name }.fail()?;
|
||||
}
|
||||
let mut tables = self.tables.write().unwrap();
|
||||
Ok(tables.insert(name, table))
|
||||
if let Some(existing) = tables.get(name.as_str()) {
|
||||
// if table with the same name but different table id exists, then it's a fatal bug
|
||||
if existing.table_info().ident.table_id != table.table_info().ident.table_id {
|
||||
error!(
|
||||
"Unexpected table register: {:?}, existing: {:?}",
|
||||
table.table_info(),
|
||||
existing.table_info()
|
||||
);
|
||||
return TableExistsSnafu { table: name }.fail()?;
|
||||
}
|
||||
Ok(Some(existing.clone()))
|
||||
} else {
|
||||
Ok(tables.insert(name, table))
|
||||
}
|
||||
}
|
||||
|
||||
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
@@ -315,7 +345,7 @@ mod tests {
|
||||
.unwrap()
|
||||
.is_none());
|
||||
assert!(provider.table_exist(table_name).unwrap());
|
||||
let other_table = NumbersTable::default();
|
||||
let other_table = NumbersTable::new(12);
|
||||
let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
|
||||
let err = result.err().unwrap();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
@@ -340,4 +370,34 @@ mod tests {
|
||||
.downcast_ref::<MemoryCatalogManager>()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
pub async fn test_catalog_deregister_table() {
|
||||
let catalog = MemoryCatalogManager::default();
|
||||
let schema = catalog
|
||||
.schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let register_table_req = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "numbers".to_string(),
|
||||
table_id: 2333,
|
||||
table: Arc::new(NumbersTable::default()),
|
||||
};
|
||||
catalog.register_table(register_table_req).await.unwrap();
|
||||
assert!(schema.table_exist("numbers").unwrap());
|
||||
|
||||
let deregister_table_req = DeregisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "numbers".to_string(),
|
||||
};
|
||||
catalog
|
||||
.deregister_table(deregister_table_req)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!schema.table_exist("numbers").unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,10 +20,6 @@ use std::sync::Arc;
|
||||
use arc_swap::ArcSwap;
|
||||
use async_stream::stream;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
use common_catalog::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
use common_telemetry::{debug, info};
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
@@ -37,13 +33,17 @@ use tokio::sync::Mutex;
|
||||
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, InvalidTableSchemaSnafu,
|
||||
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu,
|
||||
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, UnimplementedSnafu,
|
||||
};
|
||||
use crate::helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
use crate::remote::{Kv, KvBackendRef};
|
||||
use crate::{
|
||||
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
|
||||
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider,
|
||||
SchemaProviderRef,
|
||||
DeregisterTableRequest, RegisterSchemaRequest, RegisterSystemTableRequest,
|
||||
RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// Catalog manager based on metasrv.
|
||||
@@ -154,8 +154,8 @@ impl RemoteCatalogManager {
|
||||
}
|
||||
let table_key = TableGlobalKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
let table_value = TableGlobalValue::parse(&String::from_utf8_lossy(&v))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
let table_value =
|
||||
TableGlobalValue::from_bytes(&v).context(InvalidCatalogValueSnafu)?;
|
||||
|
||||
info!(
|
||||
"Found catalog table entry, key: {}, value: {:?}",
|
||||
@@ -250,10 +250,7 @@ impl RemoteCatalogManager {
|
||||
let table_ref = self.open_or_create_table(&table_key, &table_value).await?;
|
||||
schema.register_table(table_key.table_name.to_string(), table_ref)?;
|
||||
info!("Registered table {}", &table_key.table_name);
|
||||
if table_value.id > max_table_id {
|
||||
info!("Max table id: {} -> {}", max_table_id, table_value.id);
|
||||
max_table_id = table_value.id;
|
||||
}
|
||||
max_table_id = max_table_id.max(table_value.table_id());
|
||||
table_num += 1;
|
||||
}
|
||||
info!(
|
||||
@@ -311,25 +308,33 @@ impl RemoteCatalogManager {
|
||||
..
|
||||
} = table_key;
|
||||
|
||||
let table_id = table_value.table_id();
|
||||
|
||||
let TableGlobalValue {
|
||||
id,
|
||||
meta,
|
||||
table_info,
|
||||
regions_id_map,
|
||||
..
|
||||
} = table_value;
|
||||
|
||||
// unwrap safety: checked in yielding this table when `iter_remote_tables`
|
||||
let region_numbers = regions_id_map.get(&self.node_id).unwrap();
|
||||
|
||||
let request = OpenTableRequest {
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
table_id: *id,
|
||||
table_id,
|
||||
region_numbers: region_numbers.clone(),
|
||||
};
|
||||
match self
|
||||
.engine
|
||||
.open_table(&context, request)
|
||||
.await
|
||||
.with_context(|_| OpenTableSnafu {
|
||||
table_info: format!("{}.{}.{}, id:{}", catalog_name, schema_name, table_name, id,),
|
||||
table_info: format!(
|
||||
"{}.{}.{}, id:{}",
|
||||
catalog_name, schema_name, table_name, table_id
|
||||
),
|
||||
})? {
|
||||
Some(table) => {
|
||||
info!(
|
||||
@@ -344,6 +349,7 @@ impl RemoteCatalogManager {
|
||||
catalog_name, schema_name, table_name
|
||||
);
|
||||
|
||||
let meta = &table_info.meta;
|
||||
let schema = meta
|
||||
.schema
|
||||
.clone()
|
||||
@@ -353,13 +359,13 @@ impl RemoteCatalogManager {
|
||||
schema: meta.schema.clone(),
|
||||
})?;
|
||||
let req = CreateTableRequest {
|
||||
id: *id,
|
||||
id: table_id,
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
desc: None,
|
||||
schema: Arc::new(schema),
|
||||
region_numbers: regions_id_map.get(&self.node_id).unwrap().clone(), // this unwrap is safe because region_id_map is checked in `iter_remote_tables`
|
||||
region_numbers: region_numbers.clone(),
|
||||
primary_key_indices: meta.primary_key_indices.clone(),
|
||||
create_if_not_exists: true,
|
||||
table_options: meta.options.clone(),
|
||||
@@ -371,7 +377,7 @@ impl RemoteCatalogManager {
|
||||
.context(CreateTableSnafu {
|
||||
table_info: format!(
|
||||
"{}.{}.{}, id:{}",
|
||||
&catalog_name, &schema_name, &table_name, id
|
||||
&catalog_name, &schema_name, &table_name, table_id
|
||||
),
|
||||
})
|
||||
}
|
||||
@@ -405,7 +411,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let catalog_name = request.catalog;
|
||||
let schema_name = request.schema;
|
||||
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
|
||||
@@ -424,10 +430,17 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
.fail();
|
||||
}
|
||||
schema_provider.register_table(request.table_name, request.table)?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
|
||||
UnimplementedSnafu {
|
||||
operation: "deregister table",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let catalog_name = request.catalog;
|
||||
let schema_name = request.schema;
|
||||
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
|
||||
@@ -435,7 +448,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
})?;
|
||||
let schema_provider = self.new_schema_provider(&catalog_name, &schema_name);
|
||||
catalog_provider.register_schema(schema_name, schema_provider)?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
|
||||
|
||||
@@ -43,7 +43,6 @@ use crate::error::{
|
||||
|
||||
pub const ENTRY_TYPE_INDEX: usize = 0;
|
||||
pub const KEY_INDEX: usize = 1;
|
||||
pub const TIMESTAMP_INDEX: usize = 2;
|
||||
pub const VALUE_INDEX: usize = 3;
|
||||
|
||||
pub struct SystemCatalogTable {
|
||||
@@ -87,6 +86,7 @@ impl SystemCatalogTable {
|
||||
schema_name: INFORMATION_SCHEMA_NAME.to_string(),
|
||||
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
|
||||
table_id: SYSTEM_CATALOG_TABLE_ID,
|
||||
region_numbers: vec![0],
|
||||
};
|
||||
let schema = Arc::new(build_system_catalog_schema());
|
||||
let ctx = EngineContext::default();
|
||||
@@ -110,7 +110,7 @@ impl SystemCatalogTable {
|
||||
desc: Some("System catalog table".to_string()),
|
||||
schema: schema.clone(),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX, TIMESTAMP_INDEX],
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX],
|
||||
create_if_not_exists: true,
|
||||
table_options: HashMap::new(),
|
||||
};
|
||||
@@ -134,7 +134,6 @@ impl SystemCatalogTable {
|
||||
.context(error::SystemCatalogTableScanSnafu)?;
|
||||
let stream = scan
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.context(error::SystemCatalogTableScanExecSnafu)?;
|
||||
Ok(stream)
|
||||
}
|
||||
@@ -384,7 +383,7 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
pub fn test_decode_catalog_enrty() {
|
||||
pub fn test_decode_catalog_entry() {
|
||||
let entry = decode_system_catalog(
|
||||
Some(EntryType::Catalog as u8),
|
||||
Some("some_catalog".as_bytes()),
|
||||
@@ -456,7 +455,7 @@ mod tests {
|
||||
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
|
||||
let dir = TempDir::new("system-table-test").unwrap();
|
||||
let store_dir = dir.path().to_string_lossy();
|
||||
let accessor = opendal::services::fs::Builder::default()
|
||||
let accessor = object_store::backend::fs::Builder::default()
|
||||
.root(&store_dir)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
@@ -368,7 +368,6 @@ mod tests {
|
||||
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
|
||||
let mut tables_stream = tables_stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
if let Some(t) = tables_stream.next().await {
|
||||
|
||||
132
src/catalog/tests/local_catalog_tests.rs
Normal file
132
src/catalog/tests/local_catalog_tests.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::local::LocalCatalogManager;
|
||||
use catalog::{CatalogManager, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_telemetry::{error, info};
|
||||
use mito::config::EngineConfig;
|
||||
use table::table::numbers::NumbersTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
async fn create_local_catalog_manager() -> Result<LocalCatalogManager, catalog::error::Error> {
|
||||
let (_dir, object_store) =
|
||||
mito::table::test_util::new_test_object_store("setup_mock_engine_and_table").await;
|
||||
let mock_engine = Arc::new(mito::table::test_util::MockMitoEngine::new(
|
||||
EngineConfig::default(),
|
||||
mito::table::test_util::MockEngine::default(),
|
||||
object_store,
|
||||
));
|
||||
let catalog_manager = LocalCatalogManager::try_new(mock_engine).await.unwrap();
|
||||
catalog_manager.start().await?;
|
||||
Ok(catalog_manager)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_duplicate_register() {
|
||||
let catalog_manager = create_local_catalog_manager().await.unwrap();
|
||||
let request = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id: 42,
|
||||
table: Arc::new(NumbersTable::new(42)),
|
||||
};
|
||||
assert!(catalog_manager
|
||||
.register_table(request.clone())
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
// register table with same table id will succeed with 0 as return val.
|
||||
assert!(!catalog_manager.register_table(request).await.unwrap());
|
||||
|
||||
let err = catalog_manager
|
||||
.register_table(RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id: 43,
|
||||
table: Arc::new(NumbersTable::new(43)),
|
||||
})
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("Table `greptime.public.test_table` already exists"),
|
||||
"Actual error message: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_register() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let rt = Arc::new(tokio::runtime::Builder::new_multi_thread().build().unwrap());
|
||||
let catalog_manager =
|
||||
Arc::new(rt.block_on(async { create_local_catalog_manager().await.unwrap() }));
|
||||
|
||||
let succeed: Arc<Mutex<Option<TableRef>>> = Arc::new(Mutex::new(None));
|
||||
|
||||
let mut handles = Vec::with_capacity(8);
|
||||
for i in 0..8 {
|
||||
let catalog = catalog_manager.clone();
|
||||
let succeed = succeed.clone();
|
||||
let handle = rt.spawn(async move {
|
||||
let table_id = 42 + i;
|
||||
let table = Arc::new(NumbersTable::new(table_id));
|
||||
let req = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id,
|
||||
table: table.clone(),
|
||||
};
|
||||
match catalog.register_table(req).await {
|
||||
Ok(res) => {
|
||||
if res {
|
||||
let mut succeed = succeed.lock().await;
|
||||
info!("Successfully registered table: {}", table_id);
|
||||
*succeed = Some(table);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
error!("Failed to register table {}", table_id);
|
||||
}
|
||||
}
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
rt.block_on(async move {
|
||||
for handle in handles {
|
||||
handle.await.unwrap();
|
||||
}
|
||||
let guard = succeed.lock().await;
|
||||
let table = guard.as_ref().unwrap();
|
||||
let table_registered = catalog_manager
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table")
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
table_registered.table_info().ident.table_id,
|
||||
table.table_info().ident.table_id
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -217,7 +217,7 @@ impl TableEngine for MockTableEngine {
|
||||
&self,
|
||||
_ctx: &EngineContext,
|
||||
_request: DropTableRequest,
|
||||
) -> table::Result<()> {
|
||||
) -> table::Result<bool> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,12 +22,12 @@ mod tests {
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use catalog::remote::{
|
||||
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
|
||||
};
|
||||
use catalog::{CatalogList, CatalogManager, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_catalog::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use datatypes::schema::Schema;
|
||||
use futures_util::StreamExt;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
@@ -202,7 +202,7 @@ mod tests {
|
||||
table_id,
|
||||
table,
|
||||
};
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert!(catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert_eq!(
|
||||
HashSet::from([table_name, "numbers".to_string()]),
|
||||
default_schema
|
||||
@@ -287,7 +287,7 @@ mod tests {
|
||||
.register_schema(schema_name.clone(), schema.clone())
|
||||
.expect("Register schema should not fail");
|
||||
assert!(prev.is_none());
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert!(catalog_manager.register_table(reg_req).await.unwrap());
|
||||
|
||||
assert_eq!(
|
||||
HashSet::from([schema_name.clone()]),
|
||||
|
||||
@@ -11,9 +11,9 @@ async-stream = "0.3"
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-grpc-expr = { path = "../common/grpc-expr" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-insert = { path = "../common/insert" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
|
||||
@@ -12,11 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::*;
|
||||
use client::{Client, Database};
|
||||
|
||||
fn main() {
|
||||
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
|
||||
.unwrap();
|
||||
@@ -29,21 +27,21 @@ async fn run() {
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
let db = Database::new("greptime", client);
|
||||
|
||||
let (columns, row_count) = insert_data();
|
||||
|
||||
let expr = InsertExpr {
|
||||
schema_name: "public".to_string(),
|
||||
table_name: "demo".to_string(),
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: insert_batches(),
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
region_number: 0,
|
||||
columns,
|
||||
row_count,
|
||||
};
|
||||
db.insert(expr).await.unwrap();
|
||||
}
|
||||
|
||||
fn insert_batches() -> Vec<Vec<u8>> {
|
||||
fn insert_data() -> (Vec<Column>, u32) {
|
||||
const SEMANTIC_TAG: i32 = 0;
|
||||
const SEMANTIC_FEILD: i32 = 1;
|
||||
const SEMANTIC_FIELD: i32 = 1;
|
||||
const SEMANTIC_TS: i32 = 2;
|
||||
|
||||
let row_count = 4;
|
||||
@@ -71,7 +69,7 @@ fn insert_batches() -> Vec<Vec<u8>> {
|
||||
};
|
||||
let cpu_column = Column {
|
||||
column_name: "cpu".to_string(),
|
||||
semantic_type: SEMANTIC_FEILD,
|
||||
semantic_type: SEMANTIC_FIELD,
|
||||
values: Some(cpu_vals),
|
||||
null_mask: vec![2],
|
||||
..Default::default()
|
||||
@@ -83,7 +81,7 @@ fn insert_batches() -> Vec<Vec<u8>> {
|
||||
};
|
||||
let mem_column = Column {
|
||||
column_name: "memory".to_string(),
|
||||
semantic_type: SEMANTIC_FEILD,
|
||||
semantic_type: SEMANTIC_FIELD,
|
||||
values: Some(mem_vals),
|
||||
null_mask: vec![4],
|
||||
..Default::default()
|
||||
@@ -101,9 +99,8 @@ fn insert_batches() -> Vec<Vec<u8>> {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let insert_batch = InsertBatch {
|
||||
columns: vec![host_column, cpu_column, mem_column, ts_column],
|
||||
(
|
||||
vec![host_column, cpu_column, mem_column, ts_column],
|
||||
row_count,
|
||||
};
|
||||
vec![insert_batch.into()]
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use client::{Client, Database};
|
||||
use common_grpc::MockExecution;
|
||||
use datafusion::physical_plan::expressions::Column;
|
||||
use datafusion::physical_plan::projection::ProjectionExec;
|
||||
use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
|
||||
use tracing::{event, Level};
|
||||
|
||||
fn main() {
|
||||
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
|
||||
.unwrap();
|
||||
|
||||
run();
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn run() {
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
let db = Database::new("greptime", client);
|
||||
|
||||
let physical = mock_physical_plan();
|
||||
let result = db.physical_plan(physical, None).await;
|
||||
|
||||
event!(Level::INFO, "result: {:#?}", result);
|
||||
}
|
||||
|
||||
fn mock_physical_plan() -> Arc<dyn ExecutionPlan> {
|
||||
let id_expr = Arc::new(Column::new("id", 0)) as Arc<dyn PhysicalExpr>;
|
||||
let age_expr = Arc::new(Column::new("age", 2)) as Arc<dyn PhysicalExpr>;
|
||||
let expr = vec![(id_expr, "id".to_string()), (age_expr, "age".to_string())];
|
||||
|
||||
let input =
|
||||
Arc::new(MockExecution::new("mock_input_exec".to_string())) as Arc<dyn ExecutionPlan>;
|
||||
let projection = ProjectionExec::try_new(expr, input).unwrap();
|
||||
Arc::new(projection)
|
||||
}
|
||||
@@ -58,7 +58,19 @@ impl Admin {
|
||||
header: Some(header),
|
||||
expr: Some(admin_expr::Expr::Alter(expr)),
|
||||
};
|
||||
Ok(self.do_requests(vec![expr]).await?.remove(0))
|
||||
self.do_request(expr).await
|
||||
}
|
||||
|
||||
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<AdminResult> {
|
||||
let header = ExprHeader {
|
||||
version: PROTOCOL_VERSION,
|
||||
};
|
||||
let expr = AdminExpr {
|
||||
header: Some(header),
|
||||
expr: Some(admin_expr::Expr::DropTable(expr)),
|
||||
};
|
||||
|
||||
self.do_request(expr).await
|
||||
}
|
||||
|
||||
/// Invariants: the lengths of input vec (`Vec<AdminExpr>`) and output vec (`Vec<AdminResult>`) are equal.
|
||||
|
||||
@@ -18,22 +18,17 @@ use api::v1::codec::SelectResult as GrpcSelectResult;
|
||||
use api::v1::column::SemanticType;
|
||||
use api::v1::{
|
||||
object_expr, object_result, select_expr, DatabaseRequest, ExprHeader, InsertExpr,
|
||||
MutateResult as GrpcMutateResult, ObjectExpr, ObjectResult as GrpcObjectResult, PhysicalPlan,
|
||||
SelectExpr,
|
||||
MutateResult as GrpcMutateResult, ObjectExpr, ObjectResult as GrpcObjectResult, SelectExpr,
|
||||
};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_grpc::{AsExcutionPlan, DefaultAsPlanImpl};
|
||||
use common_insert::column_to_vector;
|
||||
use common_grpc_expr::column_to_vector;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::physical_plan::ExecutionPlan;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
ColumnToVectorSnafu, ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu,
|
||||
};
|
||||
use crate::error::{ColumnToVectorSnafu, ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu};
|
||||
use crate::{error, Client, Result};
|
||||
|
||||
pub const PROTOCOL_VERSION: u32 = 1;
|
||||
@@ -94,24 +89,6 @@ impl Database {
|
||||
self.do_select(select_expr).await
|
||||
}
|
||||
|
||||
pub async fn physical_plan(
|
||||
&self,
|
||||
physical: Arc<dyn ExecutionPlan>,
|
||||
original_ql: Option<String>,
|
||||
) -> Result<ObjectResult> {
|
||||
let plan = DefaultAsPlanImpl::try_from_physical_plan(physical.clone())
|
||||
.context(EncodePhysicalSnafu { physical })?
|
||||
.bytes;
|
||||
let original_ql = original_ql.unwrap_or_default();
|
||||
let select_expr = SelectExpr {
|
||||
expr: Some(select_expr::Expr::PhysicalPlan(PhysicalPlan {
|
||||
original_ql: original_ql.into_bytes(),
|
||||
plan,
|
||||
})),
|
||||
};
|
||||
self.do_select(select_expr).await
|
||||
}
|
||||
|
||||
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<ObjectResult> {
|
||||
let select_expr = SelectExpr {
|
||||
expr: Some(select_expr::Expr::LogicalPlan(logical_plan)),
|
||||
|
||||
@@ -103,7 +103,7 @@ pub enum Error {
|
||||
#[snafu(display("Failed to convert column to vector, source: {}", source))]
|
||||
ColumnToVector {
|
||||
#[snafu(backtrace)]
|
||||
source: common_insert::error::Error,
|
||||
source: common_grpc_expr::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -18,8 +18,10 @@ common-telemetry = { path = "../common/telemetry", features = [
|
||||
datanode = { path = "../datanode" }
|
||||
frontend = { path = "../frontend" }
|
||||
futures = "0.3"
|
||||
meta-client = { path = "../meta-client" }
|
||||
meta-srv = { path = "../meta-srv" }
|
||||
serde = "1.0"
|
||||
servers = { path = "../servers" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tokio = { version = "1.18", features = ["full"] }
|
||||
toml = "0.5"
|
||||
@@ -27,3 +29,6 @@ toml = "0.5"
|
||||
[dev-dependencies]
|
||||
serde = "1.0"
|
||||
tempdir = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
build-data = "0.1.3"
|
||||
|
||||
29
src/cmd/build.rs
Normal file
29
src/cmd/build.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
const DEFAULT_VALUE: &str = "unknown";
|
||||
fn main() {
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_COMMIT={}",
|
||||
build_data::get_git_commit().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
|
||||
);
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_BRANCH={}",
|
||||
build_data::get_git_branch().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
|
||||
);
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_DIRTY={}",
|
||||
build_data::get_git_dirty().map_or(DEFAULT_VALUE.to_string(), |v| v.to_string())
|
||||
);
|
||||
}
|
||||
@@ -20,7 +20,7 @@ use cmd::{datanode, frontend, metasrv, standalone};
|
||||
use common_telemetry::logging::{error, info};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[clap(name = "greptimedb")]
|
||||
#[clap(name = "greptimedb", version = print_version())]
|
||||
struct Command {
|
||||
#[clap(long, default_value = "/tmp/greptimedb/logs")]
|
||||
log_dir: String,
|
||||
@@ -70,6 +70,19 @@ impl fmt::Display for SubCommand {
|
||||
}
|
||||
}
|
||||
|
||||
fn print_version() -> &'static str {
|
||||
concat!(
|
||||
"\nbranch: ",
|
||||
env!("GIT_BRANCH"),
|
||||
"\ncommit: ",
|
||||
env!("GIT_COMMIT"),
|
||||
"\ndirty: ",
|
||||
env!("GIT_DIRTY"),
|
||||
"\nversion: ",
|
||||
env!("CARGO_PKG_VERSION")
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let cmd = Command::parse();
|
||||
|
||||
@@ -14,8 +14,9 @@
|
||||
|
||||
use clap::Parser;
|
||||
use common_telemetry::logging;
|
||||
use datanode::datanode::{Datanode, DatanodeOptions};
|
||||
use frontend::frontend::Mode;
|
||||
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig};
|
||||
use meta_client::MetaClientOpts;
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{Error, MissingConfigSnafu, Result, StartDatanodeSnafu};
|
||||
@@ -46,7 +47,7 @@ impl SubCommand {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
#[derive(Debug, Parser, Default)]
|
||||
struct StartCommand {
|
||||
#[clap(long)]
|
||||
node_id: Option<u64>,
|
||||
@@ -58,6 +59,10 @@ struct StartCommand {
|
||||
metasrv_addr: Option<String>,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
#[clap(long)]
|
||||
data_dir: Option<String>,
|
||||
#[clap(long)]
|
||||
wal_dir: Option<String>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
@@ -98,7 +103,13 @@ impl TryFrom<StartCommand> for DatanodeOptions {
|
||||
}
|
||||
|
||||
if let Some(meta_addr) = cmd.metasrv_addr {
|
||||
opts.meta_client_opts.metasrv_addr = meta_addr;
|
||||
opts.meta_client_opts
|
||||
.get_or_insert_with(MetaClientOpts::default)
|
||||
.metasrv_addrs = meta_addr
|
||||
.split(',')
|
||||
.map(&str::trim)
|
||||
.map(&str::to_string)
|
||||
.collect::<_>();
|
||||
opts.mode = Mode::Distributed;
|
||||
}
|
||||
|
||||
@@ -108,6 +119,14 @@ impl TryFrom<StartCommand> for DatanodeOptions {
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
if let Some(data_dir) = cmd.data_dir {
|
||||
opts.storage = ObjectStoreConfig::File { data_dir };
|
||||
}
|
||||
|
||||
if let Some(wal_dir) = cmd.wal_dir {
|
||||
opts.wal_dir = wal_dir;
|
||||
}
|
||||
Ok(opts)
|
||||
}
|
||||
}
|
||||
@@ -117,39 +136,41 @@ mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use datanode::datanode::ObjectStoreConfig;
|
||||
use frontend::frontend::Mode;
|
||||
use servers::Mode;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let cmd = StartCommand {
|
||||
node_id: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/datanode.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
..Default::default()
|
||||
};
|
||||
let options: DatanodeOptions = cmd.try_into().unwrap();
|
||||
assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
|
||||
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal_dir);
|
||||
assert_eq!("127.0.0.1:3306".to_string(), options.mysql_addr);
|
||||
assert_eq!("127.0.0.1:4406".to_string(), options.mysql_addr);
|
||||
assert_eq!(4, options.mysql_runtime_size);
|
||||
assert_eq!(
|
||||
"1.1.1.1:3002".to_string(),
|
||||
options.meta_client_opts.metasrv_addr
|
||||
);
|
||||
assert_eq!(5000, options.meta_client_opts.connect_timeout_millis);
|
||||
assert_eq!(3000, options.meta_client_opts.timeout_millis);
|
||||
assert!(!options.meta_client_opts.tcp_nodelay);
|
||||
let MetaClientOpts {
|
||||
metasrv_addrs: metasrv_addr,
|
||||
timeout_millis,
|
||||
connect_timeout_millis,
|
||||
tcp_nodelay,
|
||||
} = options.meta_client_opts.unwrap();
|
||||
|
||||
assert_eq!(vec!["127.0.0.1:3002".to_string()], metasrv_addr);
|
||||
assert_eq!(5000, connect_timeout_millis);
|
||||
assert_eq!(3000, timeout_millis);
|
||||
assert!(!tcp_nodelay);
|
||||
|
||||
match options.storage {
|
||||
ObjectStoreConfig::File { data_dir } => {
|
||||
assert_eq!("/tmp/greptimedb/data/".to_string(), data_dir)
|
||||
}
|
||||
ObjectStoreConfig::S3 { .. } => unreachable!(),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -157,44 +178,30 @@ mod tests {
|
||||
fn test_try_from_cmd() {
|
||||
assert_eq!(
|
||||
Mode::Standalone,
|
||||
DatanodeOptions::try_from(StartCommand {
|
||||
node_id: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: None
|
||||
})
|
||||
.unwrap()
|
||||
.mode
|
||||
DatanodeOptions::try_from(StartCommand::default())
|
||||
.unwrap()
|
||||
.mode
|
||||
);
|
||||
|
||||
let mode = DatanodeOptions::try_from(StartCommand {
|
||||
node_id: Some(42),
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: Some("127.0.0.1:3002".to_string()),
|
||||
config_file: None,
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap()
|
||||
.mode;
|
||||
assert_matches!(mode, Mode::Distributed);
|
||||
|
||||
assert!(DatanodeOptions::try_from(StartCommand {
|
||||
node_id: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: Some("127.0.0.1:3002".to_string()),
|
||||
config_file: None,
|
||||
..Default::default()
|
||||
})
|
||||
.is_err());
|
||||
|
||||
// Providing node_id but leave metasrv_addr absent is ok since metasrv_addr has default value
|
||||
DatanodeOptions::try_from(StartCommand {
|
||||
node_id: Some(42),
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: None,
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
@@ -202,17 +209,23 @@ mod tests {
|
||||
#[test]
|
||||
fn test_merge_config() {
|
||||
let dn_opts = DatanodeOptions::try_from(StartCommand {
|
||||
node_id: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/datanode.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(Some(42), dn_opts.node_id);
|
||||
assert_eq!("1.1.1.1:3002", dn_opts.meta_client_opts.metasrv_addr);
|
||||
let MetaClientOpts {
|
||||
metasrv_addrs: metasrv_addr,
|
||||
timeout_millis,
|
||||
connect_timeout_millis,
|
||||
tcp_nodelay,
|
||||
} = dn_opts.meta_client_opts.unwrap();
|
||||
assert_eq!(vec!["127.0.0.1:3002".to_string()], metasrv_addr);
|
||||
assert_eq!(3000, timeout_millis);
|
||||
assert_eq!(5000, connect_timeout_millis);
|
||||
assert!(!tcp_nodelay);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,12 +25,6 @@ pub enum Error {
|
||||
source: datanode::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build frontend, source: {}", source))]
|
||||
BuildFrontend {
|
||||
#[snafu(backtrace)]
|
||||
source: frontend::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start frontend, source: {}", source))]
|
||||
StartFrontend {
|
||||
#[snafu(backtrace)]
|
||||
@@ -75,7 +69,6 @@ impl ErrorExt for Error {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
Error::IllegalConfig { .. } => StatusCode::InvalidArguments,
|
||||
Error::BuildFrontend { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,10 +90,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_start_node_error() {
|
||||
fn throw_datanode_error() -> StdResult<datanode::error::Error> {
|
||||
datanode::error::MissingFieldSnafu {
|
||||
field: "test_field",
|
||||
}
|
||||
.fail()
|
||||
datanode::error::MissingNodeIdSnafu {}.fail()
|
||||
}
|
||||
|
||||
let e = throw_datanode_error()
|
||||
|
||||
@@ -13,13 +13,16 @@
|
||||
// limitations under the License.
|
||||
|
||||
use clap::Parser;
|
||||
use frontend::frontend::{Frontend, FrontendOptions, Mode};
|
||||
use frontend::frontend::{Frontend, FrontendOptions};
|
||||
use frontend::grpc::GrpcOptions;
|
||||
use frontend::influxdb::InfluxdbOptions;
|
||||
use frontend::instance::Instance;
|
||||
use frontend::mysql::MysqlOptions;
|
||||
use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
use meta_client::MetaClientOpts;
|
||||
use servers::http::HttpOptions;
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
@@ -75,7 +78,7 @@ impl StartCommand {
|
||||
let opts: FrontendOptions = self.try_into()?;
|
||||
let mut frontend = Frontend::new(
|
||||
opts.clone(),
|
||||
Instance::try_new(&opts)
|
||||
Instance::try_new_distributed(&opts)
|
||||
.await
|
||||
.context(error::StartFrontendSnafu)?,
|
||||
);
|
||||
@@ -94,7 +97,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
};
|
||||
|
||||
if let Some(addr) = cmd.http_addr {
|
||||
opts.http_addr = Some(addr);
|
||||
opts.http_options = Some(HttpOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.grpc_addr {
|
||||
opts.grpc_options = Some(GrpcOptions {
|
||||
@@ -124,13 +130,13 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
opts.influxdb_options = Some(InfluxdbOptions { enable });
|
||||
}
|
||||
if let Some(metasrv_addr) = cmd.metasrv_addr {
|
||||
opts.metasrv_addr = Some(
|
||||
metasrv_addr
|
||||
.split(',')
|
||||
.into_iter()
|
||||
.map(|x| x.trim().to_string())
|
||||
.collect::<Vec<String>>(),
|
||||
);
|
||||
opts.meta_client_opts
|
||||
.get_or_insert_with(MetaClientOpts::default)
|
||||
.metasrv_addrs = metasrv_addr
|
||||
.split(',')
|
||||
.map(&str::trim)
|
||||
.map(&str::to_string)
|
||||
.collect::<Vec<_>>();
|
||||
opts.mode = Mode::Distributed;
|
||||
}
|
||||
Ok(opts)
|
||||
@@ -139,6 +145,8 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -155,7 +163,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let opts: FrontendOptions = command.try_into().unwrap();
|
||||
assert_eq!(opts.http_addr, Some("127.0.0.1:1234".to_string()));
|
||||
assert_eq!(opts.http_options.as_ref().unwrap().addr, "127.0.0.1:1234");
|
||||
assert_eq!(opts.mysql_options.as_ref().unwrap().addr, "127.0.0.1:5678");
|
||||
assert_eq!(
|
||||
opts.postgres_options.as_ref().unwrap().addr,
|
||||
@@ -186,4 +194,32 @@ mod tests {
|
||||
|
||||
assert!(!opts.influxdb_options.unwrap().enable);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let command = StartCommand {
|
||||
http_addr: None,
|
||||
grpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
influxdb_enable: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/frontend.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
metasrv_addr: None,
|
||||
};
|
||||
|
||||
let fe_opts = FrontendOptions::try_from(command).unwrap();
|
||||
assert_eq!(Mode::Distributed, fe_opts.mode);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4000".to_string(),
|
||||
fe_opts.http_options.as_ref().unwrap().addr
|
||||
);
|
||||
assert_eq!(
|
||||
Duration::from_secs(30),
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ use clap::Parser;
|
||||
use common_telemetry::info;
|
||||
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig};
|
||||
use datanode::instance::InstanceRef;
|
||||
use frontend::frontend::{Frontend, FrontendOptions, Mode};
|
||||
use frontend::frontend::{Frontend, FrontendOptions};
|
||||
use frontend::grpc::GrpcOptions;
|
||||
use frontend::influxdb::InfluxdbOptions;
|
||||
use frontend::instance::Instance as FeInstance;
|
||||
@@ -25,12 +25,11 @@ use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
use frontend::prometheus::PrometheusOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::http::HttpOptions;
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
use tokio::try_join;
|
||||
|
||||
use crate::error::{
|
||||
BuildFrontendSnafu, Error, IllegalConfigSnafu, Result, StartDatanodeSnafu, StartFrontendSnafu,
|
||||
};
|
||||
use crate::error::{Error, IllegalConfigSnafu, Result, StartDatanodeSnafu, StartFrontendSnafu};
|
||||
use crate::toml_loader;
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -60,7 +59,7 @@ impl SubCommand {
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct StandaloneOptions {
|
||||
pub http_addr: Option<String>,
|
||||
pub http_options: Option<HttpOptions>,
|
||||
pub grpc_options: Option<GrpcOptions>,
|
||||
pub mysql_options: Option<MysqlOptions>,
|
||||
pub postgres_options: Option<PostgresOptions>,
|
||||
@@ -70,14 +69,13 @@ pub struct StandaloneOptions {
|
||||
pub mode: Mode,
|
||||
pub wal_dir: String,
|
||||
pub storage: ObjectStoreConfig,
|
||||
pub datanode_mysql_addr: String,
|
||||
pub datanode_mysql_runtime_size: usize,
|
||||
pub enable_memory_catalog: bool,
|
||||
}
|
||||
|
||||
impl Default for StandaloneOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
http_addr: Some("127.0.0.1:4000".to_string()),
|
||||
http_options: Some(HttpOptions::default()),
|
||||
grpc_options: Some(GrpcOptions::default()),
|
||||
mysql_options: Some(MysqlOptions::default()),
|
||||
postgres_options: Some(PostgresOptions::default()),
|
||||
@@ -87,8 +85,7 @@ impl Default for StandaloneOptions {
|
||||
mode: Mode::Standalone,
|
||||
wal_dir: "/tmp/greptimedb/wal".to_string(),
|
||||
storage: ObjectStoreConfig::default(),
|
||||
datanode_mysql_addr: "127.0.0.1:3306".to_string(),
|
||||
datanode_mysql_runtime_size: 4,
|
||||
enable_memory_catalog: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -96,7 +93,7 @@ impl Default for StandaloneOptions {
|
||||
impl StandaloneOptions {
|
||||
fn frontend_options(self) -> FrontendOptions {
|
||||
FrontendOptions {
|
||||
http_addr: self.http_addr,
|
||||
http_options: self.http_options,
|
||||
grpc_options: self.grpc_options,
|
||||
mysql_options: self.mysql_options,
|
||||
postgres_options: self.postgres_options,
|
||||
@@ -104,8 +101,7 @@ impl StandaloneOptions {
|
||||
influxdb_options: self.influxdb_options,
|
||||
prometheus_options: self.prometheus_options,
|
||||
mode: self.mode,
|
||||
datanode_rpc_addr: "127.0.0.1:3001".to_string(),
|
||||
metasrv_addr: None,
|
||||
meta_client_opts: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,8 +109,7 @@ impl StandaloneOptions {
|
||||
DatanodeOptions {
|
||||
wal_dir: self.wal_dir,
|
||||
storage: self.storage,
|
||||
mysql_addr: self.datanode_mysql_addr,
|
||||
mysql_runtime_size: self.datanode_mysql_runtime_size,
|
||||
enable_memory_catalog: self.enable_memory_catalog,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
@@ -136,18 +131,22 @@ struct StartCommand {
|
||||
influxdb_enable: bool,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
#[clap(short = 'm', long = "memory-catalog")]
|
||||
enable_memory_catalog: bool,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
async fn run(self) -> Result<()> {
|
||||
let enable_memory_catalog = self.enable_memory_catalog;
|
||||
let config_file = self.config_file.clone();
|
||||
let fe_opts = FrontendOptions::try_from(self)?;
|
||||
let dn_opts: DatanodeOptions = {
|
||||
let opts: StandaloneOptions = if let Some(path) = config_file {
|
||||
let mut opts: StandaloneOptions = if let Some(path) = config_file {
|
||||
toml_loader::from_file!(&path)?
|
||||
} else {
|
||||
StandaloneOptions::default()
|
||||
};
|
||||
opts.enable_memory_catalog = enable_memory_catalog;
|
||||
opts.datanode_options()
|
||||
};
|
||||
|
||||
@@ -159,13 +158,16 @@ impl StartCommand {
|
||||
let mut datanode = Datanode::new(dn_opts.clone())
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
let mut frontend = build_frontend(fe_opts, &dn_opts, datanode.get_instance()).await?;
|
||||
let mut frontend = build_frontend(fe_opts, datanode.get_instance()).await?;
|
||||
|
||||
try_join!(
|
||||
async { datanode.start().await.context(StartDatanodeSnafu) },
|
||||
async { frontend.start().await.context(StartFrontendSnafu) }
|
||||
)?;
|
||||
// Start datanode instance before starting services, to avoid requests come in before internal components are started.
|
||||
datanode
|
||||
.start_instance()
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
info!("Datanode instance started");
|
||||
|
||||
frontend.start().await.context(StartFrontendSnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -173,17 +175,9 @@ impl StartCommand {
|
||||
/// Build frontend instance in standalone mode
|
||||
async fn build_frontend(
|
||||
fe_opts: FrontendOptions,
|
||||
dn_opts: &DatanodeOptions,
|
||||
datanode_instance: InstanceRef,
|
||||
) -> Result<Frontend<FeInstance>> {
|
||||
let grpc_server_addr = &dn_opts.rpc_addr;
|
||||
info!(
|
||||
"Build frontend with datanode gRPC addr: {}",
|
||||
grpc_server_addr
|
||||
);
|
||||
let mut frontend_instance = FeInstance::try_new(&fe_opts)
|
||||
.await
|
||||
.context(BuildFrontendSnafu)?;
|
||||
let mut frontend_instance = FeInstance::new_standalone(datanode_instance.clone());
|
||||
frontend_instance.set_catalog_manager(datanode_instance.catalog_manager().clone());
|
||||
frontend_instance.set_script_handler(datanode_instance);
|
||||
Ok(Frontend::new(fe_opts, frontend_instance))
|
||||
@@ -204,7 +198,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
opts.mode = Mode::Standalone;
|
||||
|
||||
if let Some(addr) = cmd.http_addr {
|
||||
opts.http_addr = Some(addr);
|
||||
opts.http_options = Some(HttpOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.rpc_addr {
|
||||
// frontend grpc addr conflict with datanode default grpc addr
|
||||
@@ -254,6 +251,8 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -269,12 +268,19 @@ mod tests {
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
influxdb_enable: false,
|
||||
enable_memory_catalog: false,
|
||||
};
|
||||
|
||||
let fe_opts = FrontendOptions::try_from(cmd).unwrap();
|
||||
assert_eq!(Mode::Standalone, fe_opts.mode);
|
||||
assert_eq!("127.0.0.1:3001".to_string(), fe_opts.datanode_rpc_addr);
|
||||
assert_eq!(Some("127.0.0.1:4000".to_string()), fe_opts.http_addr);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4000".to_string(),
|
||||
fe_opts.http_options.as_ref().unwrap().addr
|
||||
);
|
||||
assert_eq!(
|
||||
Duration::from_secs(30),
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4001".to_string(),
|
||||
fe_opts.grpc_options.unwrap().addr
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use bitvec::prelude as bv;
|
||||
pub use bitvec::prelude;
|
||||
|
||||
// `Lsb0` provides the best codegen for bit manipulation,
|
||||
// see https://github.com/bitvecto-rs/bitvec/blob/main/doc/order/Lsb0.md
|
||||
pub type BitVec = bv::BitVec<u8, bv::Lsb0>;
|
||||
pub type BitVec = prelude::BitVec<u8>;
|
||||
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod bitset;
|
||||
pub mod bit_vec;
|
||||
pub mod buffer;
|
||||
pub mod bytes;
|
||||
|
||||
pub use bitset::BitVec;
|
||||
pub use bit_vec::BitVec;
|
||||
|
||||
@@ -14,7 +14,6 @@ regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
|
||||
[dev-dependencies]
|
||||
chrono = "0.4"
|
||||
|
||||
@@ -25,9 +25,3 @@ pub const MIN_USER_TABLE_ID: u32 = 1024;
|
||||
pub const SYSTEM_CATALOG_TABLE_ID: u32 = 0;
|
||||
/// scripts table id
|
||||
pub const SCRIPTS_TABLE_ID: u32 = 1;
|
||||
|
||||
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
|
||||
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
|
||||
pub(crate) const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
|
||||
pub(crate) const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
|
||||
pub const TABLE_ID_KEY_PREFIX: &str = "__tid";
|
||||
|
||||
@@ -14,10 +14,3 @@
|
||||
|
||||
pub mod consts;
|
||||
pub mod error;
|
||||
mod helper;
|
||||
|
||||
pub use helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
|
||||
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
|
||||
TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
|
||||
@@ -62,6 +62,15 @@ pub enum StatusCode {
|
||||
/// Runtime resources exhausted, like creating threads failed.
|
||||
RuntimeResourcesExhausted = 6000,
|
||||
// ====== End of server related status code =======
|
||||
|
||||
// ====== Begin of auth related status code =====
|
||||
/// User not exist
|
||||
UserNotFound = 7000,
|
||||
/// Unsupported password type
|
||||
UnsupportedPwdType = 7001,
|
||||
/// Username and password does not match
|
||||
UserPwdMismatch = 7002,
|
||||
// ====== End of auth related status code =====
|
||||
}
|
||||
|
||||
impl StatusCode {
|
||||
|
||||
@@ -9,8 +9,8 @@ arc-swap = "1.0"
|
||||
chrono-tz = "0.6"
|
||||
common-error = { path = "../error" }
|
||||
common-function-macro = { path = "../function-macro" }
|
||||
common-time = { path = "../time" }
|
||||
common-query = { path = "../query" }
|
||||
common-time = { path = "../time" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
libc = "0.2"
|
||||
@@ -21,20 +21,6 @@ paste = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
statrs = "0.15"
|
||||
|
||||
[dependencies.arrow]
|
||||
features = [
|
||||
"io_csv",
|
||||
"io_json",
|
||||
"io_parquet",
|
||||
"io_parquet_compression",
|
||||
"io_ipc",
|
||||
"ahash",
|
||||
"compute",
|
||||
"serde_types",
|
||||
]
|
||||
package = "arrow2"
|
||||
version = "0.10"
|
||||
|
||||
[dev-dependencies]
|
||||
ron = "0.7"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
||||
@@ -13,10 +13,12 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod pow;
|
||||
mod rate;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use pow::PowFunction;
|
||||
pub use rate::RateFunction;
|
||||
|
||||
use crate::scalars::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -25,5 +27,6 @@ pub(crate) struct MathFunction;
|
||||
impl MathFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(PowFunction::default()));
|
||||
registry.register(Arc::new(RateFunction::default()))
|
||||
}
|
||||
}
|
||||
|
||||
116
src/common/function/src/scalars/math/rate.rs
Normal file
116
src/common/function/src/scalars/math/rate.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use arrow::array::Array;
|
||||
use common_query::error::{FromArrowArraySnafu, Result, TypeCastSnafu};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::arrow;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
/// generates rates from a sequence of adjacent data points.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct RateFunction;
|
||||
|
||||
impl fmt::Display for RateFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "RATE")
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for RateFunction {
|
||||
fn name(&self) -> &str {
|
||||
"prom_rate"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::uniform(2, ConcreteDataType::numerics(), Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
let val = &columns[0].to_arrow_array();
|
||||
let val_0 = val.slice(0, val.len() - 1);
|
||||
let val_1 = val.slice(1, val.len() - 1);
|
||||
let dv = arrow::compute::arithmetics::sub(&*val_1, &*val_0);
|
||||
let ts = &columns[1].to_arrow_array();
|
||||
let ts_0 = ts.slice(0, ts.len() - 1);
|
||||
let ts_1 = ts.slice(1, ts.len() - 1);
|
||||
let dt = arrow::compute::arithmetics::sub(&*ts_1, &*ts_0);
|
||||
fn all_to_f64(array: &dyn Array) -> Result<Box<dyn Array>> {
|
||||
Ok(arrow::compute::cast::cast(
|
||||
array,
|
||||
&arrow::datatypes::DataType::Float64,
|
||||
arrow::compute::cast::CastOptions {
|
||||
wrapped: true,
|
||||
partial: true,
|
||||
},
|
||||
)
|
||||
.context(TypeCastSnafu {
|
||||
typ: arrow::datatypes::DataType::Float64,
|
||||
})?)
|
||||
}
|
||||
let dv = all_to_f64(&*dv)?;
|
||||
let dt = all_to_f64(&*dt)?;
|
||||
let rate = arrow::compute::arithmetics::div(&*dv, &*dt);
|
||||
let v = Helper::try_into_vector(&rate).context(FromArrowArraySnafu)?;
|
||||
Ok(v)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::Float64Array;
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::vectors::{Float32Vector, Int64Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_rate_function() {
|
||||
let rate = RateFunction::default();
|
||||
assert_eq!("prom_rate", rate.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::float64_datatype(),
|
||||
rate.return_type(&[]).unwrap()
|
||||
);
|
||||
assert!(matches!(rate.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Uniform(2, valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == ConcreteDataType::numerics()
|
||||
));
|
||||
let values = vec![1.0, 3.0, 6.0];
|
||||
let ts = vec![0, 1, 2];
|
||||
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(Float32Vector::from_vec(values)),
|
||||
Arc::new(Int64Vector::from_vec(ts)),
|
||||
];
|
||||
let vector = rate.eval(FunctionContext::default(), &args).unwrap();
|
||||
let arr = vector.to_arrow_array();
|
||||
let expect = Arc::new(Float64Array::from_vec(vec![2.0, 3.0]));
|
||||
let res = arrow::compute::comparison::eq(&*arr, &*expect);
|
||||
res.iter().for_each(|x| assert!(matches!(x, Some(true))));
|
||||
}
|
||||
}
|
||||
@@ -14,9 +14,9 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::PrimitiveArray;
|
||||
use arrow::compute::cast::primitive_to_primitive;
|
||||
use arrow::datatypes::DataType::Float64;
|
||||
use datatypes::arrow::array::PrimitiveArray;
|
||||
use datatypes::arrow::compute::cast::primitive_to_primitive;
|
||||
use datatypes::arrow::datatypes::DataType::Float64;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::type_id::LogicalTypeId;
|
||||
|
||||
@@ -17,11 +17,11 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::compute::arithmetics;
|
||||
use arrow::datatypes::DataType as ArrowDatatype;
|
||||
use arrow::scalar::PrimitiveScalar;
|
||||
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::arrow::compute::arithmetics;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDatatype;
|
||||
use datatypes::arrow::scalar::PrimitiveScalar;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::{TimestampVector, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "common-insert"
|
||||
name = "common-grpc-expr"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
@@ -8,10 +8,12 @@ license = "Apache-2.0"
|
||||
api = { path = "../../api" }
|
||||
async-trait = "0.1"
|
||||
common-base = { path = "../base" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
common-grpc = { path = "../grpc" }
|
||||
common-query = { path = "../query" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
common-time = { path = "../time" }
|
||||
common-query = { path = "../query" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
234
src/common/grpc-expr/src/alter.rs
Normal file
234
src/common/grpc-expr/src/alter.rs
Normal file
@@ -0,0 +1,234 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::{AlterExpr, CreateExpr, DropColumns};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest};
|
||||
|
||||
use crate::error::{
|
||||
ColumnNotFoundSnafu, CreateSchemaSnafu, InvalidColumnDefSnafu, MissingFieldSnafu,
|
||||
MissingTimestampColumnSnafu, Result,
|
||||
};
|
||||
|
||||
/// Convert an [`AlterExpr`] to an optional [`AlterTableRequest`]
|
||||
pub fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest>> {
|
||||
match expr.kind {
|
||||
Some(Kind::AddColumns(add_columns)) => {
|
||||
let add_column_requests = add_columns
|
||||
.add_columns
|
||||
.into_iter()
|
||||
.map(|ac| {
|
||||
let column_def = ac.column_def.context(MissingFieldSnafu {
|
||||
field: "column_def",
|
||||
})?;
|
||||
|
||||
let schema =
|
||||
column_def
|
||||
.try_as_column_schema()
|
||||
.context(InvalidColumnDefSnafu {
|
||||
column: &column_def.name,
|
||||
})?;
|
||||
Ok(AddColumnRequest {
|
||||
column_schema: schema,
|
||||
is_key: ac.is_key,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let alter_kind = AlterKind::AddColumns {
|
||||
columns: add_column_requests,
|
||||
};
|
||||
|
||||
let request = AlterTableRequest {
|
||||
catalog_name: expr.catalog_name,
|
||||
schema_name: expr.schema_name,
|
||||
table_name: expr.table_name,
|
||||
alter_kind,
|
||||
};
|
||||
Ok(Some(request))
|
||||
}
|
||||
Some(Kind::DropColumns(DropColumns { drop_columns })) => {
|
||||
let alter_kind = AlterKind::DropColumns {
|
||||
names: drop_columns.into_iter().map(|c| c.name).collect(),
|
||||
};
|
||||
|
||||
let request = AlterTableRequest {
|
||||
catalog_name: expr.catalog_name,
|
||||
schema_name: expr.schema_name,
|
||||
table_name: expr.table_name,
|
||||
alter_kind,
|
||||
};
|
||||
Ok(Some(request))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
|
||||
let column_schemas = expr
|
||||
.column_defs
|
||||
.iter()
|
||||
.map(|x| {
|
||||
x.try_as_column_schema()
|
||||
.context(InvalidColumnDefSnafu { column: &x.name })
|
||||
})
|
||||
.collect::<Result<Vec<ColumnSchema>>>()?;
|
||||
|
||||
ensure!(
|
||||
column_schemas
|
||||
.iter()
|
||||
.any(|column| column.name == expr.time_index),
|
||||
MissingTimestampColumnSnafu {
|
||||
msg: format!("CreateExpr: {:?}", expr)
|
||||
}
|
||||
);
|
||||
|
||||
let column_schemas = column_schemas
|
||||
.into_iter()
|
||||
.map(|column_schema| {
|
||||
if column_schema.name == expr.time_index {
|
||||
column_schema.with_time_index(true)
|
||||
} else {
|
||||
column_schema
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(Arc::new(
|
||||
SchemaBuilder::try_from(column_schemas)
|
||||
.context(CreateSchemaSnafu)?
|
||||
.build()
|
||||
.context(CreateSchemaSnafu)?,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn create_expr_to_request(table_id: TableId, expr: CreateExpr) -> Result<CreateTableRequest> {
|
||||
let schema = create_table_schema(&expr)?;
|
||||
let primary_key_indices = expr
|
||||
.primary_keys
|
||||
.iter()
|
||||
.map(|key| {
|
||||
schema
|
||||
.column_index_by_name(key)
|
||||
.context(ColumnNotFoundSnafu {
|
||||
column_name: key,
|
||||
table_name: &expr.table_name,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<usize>>>()?;
|
||||
|
||||
let catalog_name = expr
|
||||
.catalog_name
|
||||
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
|
||||
let schema_name = expr
|
||||
.schema_name
|
||||
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
|
||||
|
||||
let region_ids = if expr.region_ids.is_empty() {
|
||||
vec![0]
|
||||
} else {
|
||||
expr.region_ids
|
||||
};
|
||||
|
||||
Ok(CreateTableRequest {
|
||||
id: table_id,
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name: expr.table_name,
|
||||
desc: expr.desc,
|
||||
schema,
|
||||
region_numbers: region_ids,
|
||||
primary_key_indices,
|
||||
create_if_not_exists: expr.create_if_not_exists,
|
||||
table_options: expr.table_options,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::{AddColumn, AddColumns, ColumnDataType, ColumnDef, DropColumn};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_alter_expr_to_request() {
|
||||
let expr = AlterExpr {
|
||||
catalog_name: None,
|
||||
schema_name: None,
|
||||
table_name: "monitor".to_string(),
|
||||
|
||||
kind: Some(Kind::AddColumns(AddColumns {
|
||||
add_columns: vec![AddColumn {
|
||||
column_def: Some(ColumnDef {
|
||||
name: "mem_usage".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
}),
|
||||
is_key: false,
|
||||
}],
|
||||
})),
|
||||
};
|
||||
|
||||
let alter_request = alter_expr_to_request(expr).unwrap().unwrap();
|
||||
assert_eq!(None, alter_request.catalog_name);
|
||||
assert_eq!(None, alter_request.schema_name);
|
||||
assert_eq!("monitor".to_string(), alter_request.table_name);
|
||||
let add_column = match alter_request.alter_kind {
|
||||
AlterKind::AddColumns { mut columns } => columns.pop().unwrap(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
assert!(!add_column.is_key);
|
||||
assert_eq!("mem_usage", add_column.column_schema.name);
|
||||
assert_eq!(
|
||||
ConcreteDataType::float64_datatype(),
|
||||
add_column.column_schema.data_type
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_drop_column_expr() {
|
||||
let expr = AlterExpr {
|
||||
catalog_name: Some("test_catalog".to_string()),
|
||||
schema_name: Some("test_schema".to_string()),
|
||||
table_name: "monitor".to_string(),
|
||||
|
||||
kind: Some(Kind::DropColumns(DropColumns {
|
||||
drop_columns: vec![DropColumn {
|
||||
name: "mem_usage".to_string(),
|
||||
}],
|
||||
})),
|
||||
};
|
||||
|
||||
let alter_request = alter_expr_to_request(expr).unwrap().unwrap();
|
||||
assert_eq!(Some("test_catalog".to_string()), alter_request.catalog_name);
|
||||
assert_eq!(Some("test_schema".to_string()), alter_request.schema_name);
|
||||
assert_eq!("monitor".to_string(), alter_request.table_name);
|
||||
|
||||
let mut drop_names = match alter_request.alter_kind {
|
||||
AlterKind::DropColumns { names } => names,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
assert_eq!(1, drop_names.len());
|
||||
assert_eq!("mem_usage".to_string(), drop_names.pop().unwrap());
|
||||
}
|
||||
}
|
||||
@@ -22,7 +22,7 @@ use snafu::{Backtrace, ErrorCompat};
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Column {} not found in table {}", column_name, table_name))]
|
||||
#[snafu(display("Column `{}` not found in table `{}`", column_name, table_name))]
|
||||
ColumnNotFound {
|
||||
column_name: String,
|
||||
table_name: String,
|
||||
@@ -57,8 +57,8 @@ pub enum Error {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing timestamp column in request"))]
|
||||
MissingTimestampColumn { backtrace: Backtrace },
|
||||
#[snafu(display("Missing timestamp column, msg: {}", msg))]
|
||||
MissingTimestampColumn { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Invalid column proto: {}", err_msg))]
|
||||
InvalidColumnProto {
|
||||
@@ -70,6 +70,26 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Invalid column default constraint, source: {}", source))]
|
||||
ColumnDefaultConstraint {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Invalid column proto definition, column: {}, source: {}",
|
||||
column,
|
||||
source
|
||||
))]
|
||||
InvalidColumnDef {
|
||||
column: String,
|
||||
#[snafu(backtrace)]
|
||||
source: api::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -87,6 +107,9 @@ impl ErrorExt for Error {
|
||||
| Error::MissingTimestampColumn { .. } => StatusCode::InvalidArguments,
|
||||
Error::InvalidColumnProto { .. } => StatusCode::InvalidArguments,
|
||||
Error::CreateVector { .. } => StatusCode::InvalidArguments,
|
||||
Error::MissingField { .. } => StatusCode::InvalidArguments,
|
||||
Error::ColumnDefaultConstraint { source, .. } => source.status_code(),
|
||||
Error::InvalidColumnDef { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
@@ -14,11 +14,9 @@
|
||||
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{SemanticType, Values};
|
||||
use api::v1::{AddColumn, AddColumns, Column, ColumnDataType, ColumnDef, CreateExpr};
|
||||
use common_base::BitVec;
|
||||
@@ -35,9 +33,8 @@ use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequ
|
||||
use table::Table;
|
||||
|
||||
use crate::error::{
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DecodeInsertSnafu,
|
||||
DuplicatedTimestampColumnSnafu, IllegalInsertDataSnafu, InvalidColumnProtoSnafu,
|
||||
MissingTimestampColumnSnafu, Result,
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DuplicatedTimestampColumnSnafu,
|
||||
IllegalInsertDataSnafu, InvalidColumnProtoSnafu, MissingTimestampColumnSnafu, Result,
|
||||
};
|
||||
const TAG_SEMANTIC_TYPE: i32 = SemanticType::Tag as i32;
|
||||
const TIMESTAMP_SEMANTIC_TYPE: i32 = SemanticType::Timestamp as i32;
|
||||
@@ -52,35 +49,25 @@ fn build_column_def(column_name: &str, datatype: i32, nullable: bool) -> ColumnD
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_new_columns(
|
||||
schema: &SchemaRef,
|
||||
insert_batches: &[InsertBatch],
|
||||
) -> Result<Option<AddColumns>> {
|
||||
pub fn find_new_columns(schema: &SchemaRef, columns: &[Column]) -> Result<Option<AddColumns>> {
|
||||
let mut columns_to_add = Vec::default();
|
||||
let mut new_columns: HashSet<String> = HashSet::default();
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
if *row_count == 0 || columns.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if schema.column_schema_by_name(column_name).is_none() && !new_columns.contains(column_name)
|
||||
{
|
||||
if schema.column_schema_by_name(column_name).is_none()
|
||||
&& !new_columns.contains(column_name)
|
||||
{
|
||||
let column_def = Some(build_column_def(column_name, *datatype, true));
|
||||
columns_to_add.push(AddColumn {
|
||||
column_def,
|
||||
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
|
||||
});
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
let column_def = Some(build_column_def(column_name, *datatype, true));
|
||||
columns_to_add.push(AddColumn {
|
||||
column_def,
|
||||
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
|
||||
});
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -201,89 +188,84 @@ pub fn build_create_expr_from_insertion(
|
||||
schema_name: &str,
|
||||
table_id: Option<TableId>,
|
||||
table_name: &str,
|
||||
insert_batches: &[InsertBatch],
|
||||
columns: &[Column],
|
||||
) -> Result<CreateExpr> {
|
||||
let mut new_columns: HashSet<String> = HashSet::default();
|
||||
let mut column_defs = Vec::default();
|
||||
let mut primary_key_indices = Vec::default();
|
||||
let mut timestamp_index = usize::MAX;
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
if *row_count == 0 || columns.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if !new_columns.contains(column_name) {
|
||||
let mut is_nullable = true;
|
||||
match *semantic_type {
|
||||
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
|
||||
TIMESTAMP_SEMANTIC_TYPE => {
|
||||
ensure!(
|
||||
timestamp_index == usize::MAX,
|
||||
DuplicatedTimestampColumnSnafu {
|
||||
exists: &columns[timestamp_index].column_name,
|
||||
duplicated: column_name,
|
||||
}
|
||||
);
|
||||
timestamp_index = column_defs.len();
|
||||
// Timestamp column must not be null.
|
||||
is_nullable = false;
|
||||
}
|
||||
_ => {}
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if !new_columns.contains(column_name) {
|
||||
let mut is_nullable = true;
|
||||
match *semantic_type {
|
||||
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
|
||||
TIMESTAMP_SEMANTIC_TYPE => {
|
||||
ensure!(
|
||||
timestamp_index == usize::MAX,
|
||||
DuplicatedTimestampColumnSnafu {
|
||||
exists: &columns[timestamp_index].column_name,
|
||||
duplicated: column_name,
|
||||
}
|
||||
);
|
||||
timestamp_index = column_defs.len();
|
||||
// Timestamp column must not be null.
|
||||
is_nullable = false;
|
||||
}
|
||||
|
||||
let column_def = build_column_def(column_name, *datatype, is_nullable);
|
||||
column_defs.push(column_def);
|
||||
new_columns.insert(column_name.to_string());
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let column_def = build_column_def(column_name, *datatype, is_nullable);
|
||||
column_defs.push(column_def);
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
|
||||
ensure!(timestamp_index != usize::MAX, MissingTimestampColumnSnafu);
|
||||
let timestamp_field_name = columns[timestamp_index].column_name.clone();
|
||||
|
||||
let primary_keys = primary_key_indices
|
||||
.iter()
|
||||
.map(|idx| columns[*idx].column_name.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expr = CreateExpr {
|
||||
catalog_name: Some(catalog_name.to_string()),
|
||||
schema_name: Some(schema_name.to_string()),
|
||||
table_name: table_name.to_string(),
|
||||
desc: Some("Created on insertion".to_string()),
|
||||
column_defs,
|
||||
time_index: timestamp_field_name,
|
||||
primary_keys,
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
table_id,
|
||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||
};
|
||||
|
||||
return Ok(expr);
|
||||
}
|
||||
|
||||
IllegalInsertDataSnafu.fail()
|
||||
ensure!(
|
||||
timestamp_index != usize::MAX,
|
||||
MissingTimestampColumnSnafu { msg: table_name }
|
||||
);
|
||||
let timestamp_field_name = columns[timestamp_index].column_name.clone();
|
||||
|
||||
let primary_keys = primary_key_indices
|
||||
.iter()
|
||||
.map(|idx| columns[*idx].column_name.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expr = CreateExpr {
|
||||
catalog_name: Some(catalog_name.to_string()),
|
||||
schema_name: Some(schema_name.to_string()),
|
||||
table_name: table_name.to_string(),
|
||||
desc: Some("Created on insertion".to_string()),
|
||||
column_defs,
|
||||
time_index: timestamp_field_name,
|
||||
primary_keys,
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
table_id,
|
||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
pub fn insertion_expr_to_request(
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
insert_batches: Vec<InsertBatch>,
|
||||
insert_batches: Vec<(Vec<Column>, u32)>,
|
||||
table: Arc<dyn Table>,
|
||||
) -> Result<InsertRequest> {
|
||||
let schema = table.schema();
|
||||
let mut columns_builders = HashMap::with_capacity(schema.column_schemas().len());
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
for (columns, row_count) in insert_batches {
|
||||
for Column {
|
||||
column_name,
|
||||
values,
|
||||
@@ -329,14 +311,6 @@ pub fn insertion_expr_to_request(
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn insert_batches(bytes_vec: &[Vec<u8>]) -> Result<Vec<InsertBatch>> {
|
||||
bytes_vec
|
||||
.iter()
|
||||
.map(|bytes| bytes.deref().try_into().context(DecodeInsertSnafu))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn add_values_to_builder(
|
||||
builder: &mut VectorBuilder,
|
||||
values: Values,
|
||||
@@ -463,9 +437,8 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{self, SemanticType, Values};
|
||||
use api::v1::{insert_expr, Column, ColumnDataType};
|
||||
use api::v1::{Column, ColumnDataType};
|
||||
use common_base::BitVec;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_query::prelude::Expr;
|
||||
@@ -479,11 +452,12 @@ mod tests {
|
||||
use table::Table;
|
||||
|
||||
use super::{
|
||||
build_create_expr_from_insertion, convert_values, find_new_columns, insert_batches,
|
||||
insertion_expr_to_request, is_null, TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
|
||||
build_create_expr_from_insertion, convert_values, insertion_expr_to_request, is_null,
|
||||
TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
|
||||
};
|
||||
use crate::error;
|
||||
use crate::error::ColumnDataTypeSnafu;
|
||||
use crate::insert::find_new_columns;
|
||||
|
||||
#[inline]
|
||||
fn build_column_schema(
|
||||
@@ -508,11 +482,10 @@ mod tests {
|
||||
|
||||
assert!(build_create_expr_from_insertion("", "", table_id, table_name, &[]).is_err());
|
||||
|
||||
let mock_batch_bytes = mock_insert_batches();
|
||||
let insert_batches = insert_batches(&mock_batch_bytes).unwrap();
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let create_expr =
|
||||
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batches)
|
||||
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batch.0)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(table_id, create_expr.table_id);
|
||||
@@ -598,9 +571,9 @@ mod tests {
|
||||
|
||||
assert!(find_new_columns(&schema, &[]).unwrap().is_none());
|
||||
|
||||
let mock_insert_bytes = mock_insert_batches();
|
||||
let insert_batches = insert_batches(&mock_insert_bytes).unwrap();
|
||||
let add_columns = find_new_columns(&schema, &insert_batches).unwrap().unwrap();
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let add_columns = find_new_columns(&schema, &insert_batch.0).unwrap().unwrap();
|
||||
|
||||
assert_eq!(2, add_columns.add_columns.len());
|
||||
let host_column = &add_columns.add_columns[0];
|
||||
@@ -630,10 +603,7 @@ mod tests {
|
||||
fn test_insertion_expr_to_request() {
|
||||
let table: Arc<dyn Table> = Arc::new(DemoTable {});
|
||||
|
||||
let values = insert_expr::Values {
|
||||
values: mock_insert_batches(),
|
||||
};
|
||||
let insert_batches = insert_batches(&values.values).unwrap();
|
||||
let insert_batches = vec![mock_insert_batch()];
|
||||
let insert_req =
|
||||
insertion_expr_to_request("greptime", "public", "demo", insert_batches, table).unwrap();
|
||||
|
||||
@@ -731,7 +701,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_insert_batches() -> Vec<Vec<u8>> {
|
||||
fn mock_insert_batch() -> (Vec<Column>, u32) {
|
||||
let row_count = 2;
|
||||
|
||||
let host_vals = column::Values {
|
||||
@@ -782,10 +752,9 @@ mod tests {
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
};
|
||||
|
||||
let insert_batch = InsertBatch {
|
||||
columns: vec![host_column, cpu_column, mem_column, ts_column],
|
||||
(
|
||||
vec![host_column, cpu_column, mem_column, ts_column],
|
||||
row_count,
|
||||
};
|
||||
vec![insert_batch.into()]
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
#![feature(assert_matches)]
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -12,9 +13,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod alter;
|
||||
pub mod error;
|
||||
mod insert;
|
||||
|
||||
pub use alter::{alter_expr_to_request, create_expr_to_request, create_table_schema};
|
||||
pub use insert::{
|
||||
build_alter_table_request, build_create_expr_from_insertion, column_to_vector,
|
||||
find_new_columns, insert_batches, insertion_expr_to_request,
|
||||
find_new_columns, insertion_expr_to_request,
|
||||
};
|
||||
@@ -12,30 +12,16 @@ common-error = { path = "../error" }
|
||||
common-query = { path = "../query" }
|
||||
common-recordbatch = { path = "../recordbatch" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
dashmap = "5.4"
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
tonic = "0.8"
|
||||
tower = "0.4"
|
||||
|
||||
[dependencies.arrow]
|
||||
package = "arrow2"
|
||||
version = "0.10"
|
||||
features = [
|
||||
"io_csv",
|
||||
"io_json",
|
||||
"io_parquet",
|
||||
"io_parquet_compression",
|
||||
"io_ipc",
|
||||
"ahash",
|
||||
"compute",
|
||||
"serde_types",
|
||||
]
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.4"
|
||||
rand = "0.8"
|
||||
|
||||
@@ -14,9 +14,7 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use api::DecodeError;
|
||||
use common_error::prelude::{ErrorExt, StatusCode};
|
||||
use datafusion::error::DataFusionError;
|
||||
use snafu::{Backtrace, ErrorCompat, Snafu};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -24,33 +22,9 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Unexpected empty physical plan type: {}", name))]
|
||||
EmptyPhysicalPlan { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unexpected empty physical expr: {}", name))]
|
||||
EmptyPhysicalExpr { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unsupported datafusion execution plan: {}", name))]
|
||||
UnsupportedDfPlan { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unsupported datafusion physical expr: {}", name))]
|
||||
UnsupportedDfExpr { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Failed to new datafusion projection exec, source: {}", source))]
|
||||
NewProjection {
|
||||
source: DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode physical plan node, source: {}", source))]
|
||||
DecodePhysicalPlanNode {
|
||||
source: DecodeError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Write type mismatch, column name: {}, expected: {}, actual: {}",
|
||||
column_name,
|
||||
@@ -89,17 +63,8 @@ pub enum Error {
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::EmptyPhysicalPlan { .. }
|
||||
| Error::EmptyPhysicalExpr { .. }
|
||||
| Error::MissingField { .. }
|
||||
| Error::TypeMismatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::UnsupportedDfPlan { .. } | Error::UnsupportedDfExpr { .. } => {
|
||||
StatusCode::Unsupported
|
||||
}
|
||||
Error::NewProjection { .. }
|
||||
| Error::DecodePhysicalPlanNode { .. }
|
||||
| Error::CreateChannel { .. }
|
||||
| Error::Conversion { .. } => StatusCode::Internal,
|
||||
Error::MissingField { .. } | Error::TypeMismatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::CreateChannel { .. } | Error::Conversion { .. } => StatusCode::Internal,
|
||||
Error::CollectRecordBatches { source } => source.status_code(),
|
||||
Error::ColumnDataType { source } => source.status_code(),
|
||||
}
|
||||
@@ -126,50 +91,6 @@ mod tests {
|
||||
None
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_physical_plan_error() {
|
||||
let e = throw_none_option()
|
||||
.context(EmptyPhysicalPlanSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_physical_expr_error() {
|
||||
let e = throw_none_option()
|
||||
.context(EmptyPhysicalExprSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unsupported_df_plan_error() {
|
||||
let e = throw_none_option()
|
||||
.context(UnsupportedDfPlanSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Unsupported);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unsupported_df_expr_error() {
|
||||
let e = throw_none_option()
|
||||
.context(UnsupportedDfExprSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Unsupported);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_field_error() {
|
||||
let e = throw_none_option()
|
||||
@@ -181,33 +102,6 @@ mod tests {
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_new_projection_error() {
|
||||
fn throw_df_error() -> StdResult<DataFusionError> {
|
||||
Err(DataFusionError::NotImplemented("".to_string()))
|
||||
}
|
||||
|
||||
let e = throw_df_error().context(NewProjectionSnafu).err().unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Internal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_physical_plan_node_error() {
|
||||
fn throw_decode_error() -> StdResult<DecodeError> {
|
||||
Err(DecodeError::new("test"))
|
||||
}
|
||||
|
||||
let e = throw_decode_error()
|
||||
.context(DecodePhysicalPlanNodeSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Internal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_type_mismatch_error() {
|
||||
let e = throw_none_option()
|
||||
|
||||
@@ -14,10 +14,7 @@
|
||||
|
||||
pub mod channel_manager;
|
||||
pub mod error;
|
||||
pub mod physical;
|
||||
pub mod select;
|
||||
pub mod writer;
|
||||
|
||||
pub use error::Error;
|
||||
pub use physical::plan::{DefaultAsPlanImpl, MockExecution};
|
||||
pub use physical::AsExcutionPlan;
|
||||
|
||||
@@ -1,100 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::result::Result;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::codec;
|
||||
use datafusion::physical_plan::expressions::Column as DfColumn;
|
||||
use datafusion::physical_plan::PhysicalExpr as DfPhysicalExpr;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{EmptyPhysicalExprSnafu, Error, UnsupportedDfExprSnafu};
|
||||
|
||||
// grpc -> datafusion (physical expr)
|
||||
pub(crate) fn parse_grpc_physical_expr(
|
||||
proto: &codec::PhysicalExprNode,
|
||||
) -> Result<Arc<dyn DfPhysicalExpr>, Error> {
|
||||
let expr_type = proto.expr_type.as_ref().context(EmptyPhysicalExprSnafu {
|
||||
name: format!("{:?}", proto),
|
||||
})?;
|
||||
|
||||
// TODO(fys): impl other physical expr
|
||||
let pexpr: Arc<dyn DfPhysicalExpr> = match expr_type {
|
||||
codec::physical_expr_node::ExprType::Column(c) => {
|
||||
let pcol = DfColumn::new(&c.name, c.index as usize);
|
||||
Arc::new(pcol)
|
||||
}
|
||||
};
|
||||
Ok(pexpr)
|
||||
}
|
||||
|
||||
// datafusion -> grpc (physical expr)
|
||||
pub(crate) fn parse_df_physical_expr(
|
||||
df_expr: Arc<dyn DfPhysicalExpr>,
|
||||
) -> Result<codec::PhysicalExprNode, Error> {
|
||||
let expr = df_expr.as_any();
|
||||
|
||||
// TODO(fys): impl other physical expr
|
||||
if let Some(expr) = expr.downcast_ref::<DfColumn>() {
|
||||
Ok(codec::PhysicalExprNode {
|
||||
expr_type: Some(codec::physical_expr_node::ExprType::Column(
|
||||
codec::PhysicalColumn {
|
||||
name: expr.name().to_string(),
|
||||
index: expr.index() as u64,
|
||||
},
|
||||
)),
|
||||
})
|
||||
} else {
|
||||
UnsupportedDfExprSnafu {
|
||||
name: df_expr.to_string(),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::codec::physical_expr_node::ExprType::Column;
|
||||
use api::v1::codec::{PhysicalColumn, PhysicalExprNode};
|
||||
use datafusion::physical_plan::expressions::Column as DfColumn;
|
||||
use datafusion::physical_plan::PhysicalExpr;
|
||||
|
||||
use crate::physical::expr::{parse_df_physical_expr, parse_grpc_physical_expr};
|
||||
|
||||
#[test]
|
||||
fn test_column_convert() {
|
||||
// mock df_column_expr
|
||||
let df_column = DfColumn::new("name", 11);
|
||||
let df_column_clone = df_column.clone();
|
||||
let df_expr = Arc::new(df_column) as Arc<dyn PhysicalExpr>;
|
||||
|
||||
// mock grpc_column_expr
|
||||
let grpc_expr = PhysicalExprNode {
|
||||
expr_type: Some(Column(PhysicalColumn {
|
||||
name: "name".to_owned(),
|
||||
index: 11,
|
||||
})),
|
||||
};
|
||||
|
||||
let result = parse_df_physical_expr(df_expr).unwrap();
|
||||
assert_eq!(grpc_expr, result);
|
||||
|
||||
let result = parse_grpc_physical_expr(&grpc_expr).unwrap();
|
||||
let df_column = result.as_any().downcast_ref::<DfColumn>().unwrap();
|
||||
assert_eq!(df_column_clone, df_column.to_owned());
|
||||
}
|
||||
}
|
||||
@@ -1,280 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Deref;
|
||||
use std::result::Result;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::codec::physical_plan_node::PhysicalPlanType;
|
||||
use api::v1::codec::{MockInputExecNode, PhysicalPlanNode, ProjectionExecNode};
|
||||
use arrow::array::{PrimitiveArray, Utf8Array};
|
||||
use arrow::datatypes::{DataType, Field, Schema};
|
||||
use async_trait::async_trait;
|
||||
use datafusion::execution::runtime_env::RuntimeEnv;
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datafusion::physical_plan::memory::MemoryStream;
|
||||
use datafusion::physical_plan::projection::ProjectionExec;
|
||||
use datafusion::physical_plan::{
|
||||
ExecutionPlan, PhysicalExpr, SendableRecordBatchStream, Statistics,
|
||||
};
|
||||
use datafusion::record_batch::RecordBatch;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
DecodePhysicalPlanNodeSnafu, EmptyPhysicalPlanSnafu, Error, MissingFieldSnafu,
|
||||
NewProjectionSnafu, UnsupportedDfPlanSnafu,
|
||||
};
|
||||
use crate::physical::{expr, AsExcutionPlan, ExecutionPlanRef};
|
||||
|
||||
pub struct DefaultAsPlanImpl {
|
||||
pub bytes: Vec<u8>,
|
||||
}
|
||||
|
||||
impl AsExcutionPlan for DefaultAsPlanImpl {
|
||||
type Error = Error;
|
||||
|
||||
// Vec<u8> -> PhysicalPlanNode -> ExecutionPlanRef
|
||||
fn try_into_physical_plan(&self) -> Result<ExecutionPlanRef, Self::Error> {
|
||||
let physicalplan_node: PhysicalPlanNode = self
|
||||
.bytes
|
||||
.deref()
|
||||
.try_into()
|
||||
.context(DecodePhysicalPlanNodeSnafu)?;
|
||||
physicalplan_node.try_into_physical_plan()
|
||||
}
|
||||
|
||||
// ExecutionPlanRef -> PhysicalPlanNode -> Vec<u8>
|
||||
fn try_from_physical_plan(plan: ExecutionPlanRef) -> Result<Self, Self::Error>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
let bytes: Vec<u8> = PhysicalPlanNode::try_from_physical_plan(plan)?.into();
|
||||
Ok(DefaultAsPlanImpl { bytes })
|
||||
}
|
||||
}
|
||||
|
||||
impl AsExcutionPlan for PhysicalPlanNode {
|
||||
type Error = Error;
|
||||
|
||||
fn try_into_physical_plan(&self) -> Result<ExecutionPlanRef, Self::Error> {
|
||||
let plan = self
|
||||
.physical_plan_type
|
||||
.as_ref()
|
||||
.context(EmptyPhysicalPlanSnafu {
|
||||
name: format!("{:?}", self),
|
||||
})?;
|
||||
|
||||
// TODO(fys): impl other physical plan type
|
||||
match plan {
|
||||
PhysicalPlanType::Projection(projection) => {
|
||||
let input = if let Some(input) = &projection.input {
|
||||
input.as_ref().try_into_physical_plan()?
|
||||
} else {
|
||||
MissingFieldSnafu { field: "input" }.fail()?
|
||||
};
|
||||
let exprs = projection
|
||||
.expr
|
||||
.iter()
|
||||
.zip(projection.expr_name.iter())
|
||||
.map(|(expr, name)| {
|
||||
Ok((expr::parse_grpc_physical_expr(expr)?, name.to_string()))
|
||||
})
|
||||
.collect::<Result<Vec<(Arc<dyn PhysicalExpr>, String)>, Error>>()?;
|
||||
|
||||
let projection =
|
||||
ProjectionExec::try_new(exprs, input).context(NewProjectionSnafu)?;
|
||||
|
||||
Ok(Arc::new(projection))
|
||||
}
|
||||
PhysicalPlanType::Mock(mock) => Ok(Arc::new(MockExecution {
|
||||
name: mock.name.to_string(),
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_from_physical_plan(plan: ExecutionPlanRef) -> Result<Self, Self::Error>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
let plan = plan.as_any();
|
||||
|
||||
if let Some(exec) = plan.downcast_ref::<ProjectionExec>() {
|
||||
let input = PhysicalPlanNode::try_from_physical_plan(exec.input().to_owned())?;
|
||||
|
||||
let expr = exec
|
||||
.expr()
|
||||
.iter()
|
||||
.map(|expr| expr::parse_df_physical_expr(expr.0.clone()))
|
||||
.collect::<Result<Vec<_>, Error>>()?;
|
||||
|
||||
let expr_name = exec.expr().iter().map(|expr| expr.1.clone()).collect();
|
||||
|
||||
Ok(PhysicalPlanNode {
|
||||
physical_plan_type: Some(PhysicalPlanType::Projection(Box::new(
|
||||
ProjectionExecNode {
|
||||
input: Some(Box::new(input)),
|
||||
expr,
|
||||
expr_name,
|
||||
},
|
||||
))),
|
||||
})
|
||||
} else if let Some(exec) = plan.downcast_ref::<MockExecution>() {
|
||||
Ok(PhysicalPlanNode {
|
||||
physical_plan_type: Some(PhysicalPlanType::Mock(MockInputExecNode {
|
||||
name: exec.name.clone(),
|
||||
})),
|
||||
})
|
||||
} else {
|
||||
UnsupportedDfPlanSnafu {
|
||||
name: format!("{:?}", plan),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fys): use "test" feature to enable it
|
||||
#[derive(Debug)]
|
||||
pub struct MockExecution {
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl MockExecution {
|
||||
pub fn new(name: String) -> Self {
|
||||
Self { name }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionPlan for MockExecution {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> arrow::datatypes::SchemaRef {
|
||||
let field1 = Field::new("id", DataType::UInt32, false);
|
||||
let field2 = Field::new("name", DataType::Utf8, false);
|
||||
let field3 = Field::new("age", DataType::UInt32, false);
|
||||
Arc::new(arrow::datatypes::Schema::new(vec![field1, field2, field3]))
|
||||
}
|
||||
|
||||
fn output_partitioning(&self) -> datafusion::physical_plan::Partitioning {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn output_ordering(
|
||||
&self,
|
||||
) -> Option<&[datafusion::physical_plan::expressions::PhysicalSortExpr]> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<ExecutionPlanRef> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
&self,
|
||||
_children: Vec<ExecutionPlanRef>,
|
||||
) -> datafusion::error::Result<ExecutionPlanRef> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
_runtime: Arc<RuntimeEnv>,
|
||||
) -> datafusion::error::Result<SendableRecordBatchStream> {
|
||||
let id_array = Arc::new(PrimitiveArray::from_slice([1u32, 2, 3, 4, 5]));
|
||||
let name_array = Arc::new(Utf8Array::<i32>::from_slice([
|
||||
"zhangsan", "lisi", "wangwu", "Tony", "Mike",
|
||||
]));
|
||||
let age_array = Arc::new(PrimitiveArray::from_slice([25u32, 28, 27, 35, 25]));
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::UInt32, false),
|
||||
Field::new("name", DataType::Utf8, false),
|
||||
Field::new("age", DataType::UInt32, false),
|
||||
]));
|
||||
let record_batch =
|
||||
RecordBatch::try_new(schema, vec![id_array, name_array, age_array]).unwrap();
|
||||
let data: Vec<RecordBatch> = vec![record_batch];
|
||||
let projection = Some(vec![0, 1, 2]);
|
||||
let stream = MemoryStream::try_new(data, self.schema(), projection).unwrap();
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::codec::PhysicalPlanNode;
|
||||
use datafusion::physical_plan::expressions::Column;
|
||||
use datafusion::physical_plan::projection::ProjectionExec;
|
||||
|
||||
use crate::physical::plan::{DefaultAsPlanImpl, MockExecution};
|
||||
use crate::physical::{AsExcutionPlan, ExecutionPlanRef};
|
||||
|
||||
#[test]
|
||||
fn test_convert_df_projection_with_bytes() {
|
||||
let projection_exec = mock_df_projection();
|
||||
|
||||
let bytes = DefaultAsPlanImpl::try_from_physical_plan(projection_exec).unwrap();
|
||||
let exec = bytes.try_into_physical_plan().unwrap();
|
||||
|
||||
verify_df_porjection(exec);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_df_with_grpc_projection() {
|
||||
let projection_exec = mock_df_projection();
|
||||
|
||||
let projection_node = PhysicalPlanNode::try_from_physical_plan(projection_exec).unwrap();
|
||||
let exec = projection_node.try_into_physical_plan().unwrap();
|
||||
|
||||
verify_df_porjection(exec);
|
||||
}
|
||||
|
||||
fn mock_df_projection() -> Arc<ProjectionExec> {
|
||||
let mock_input = Arc::new(MockExecution {
|
||||
name: "mock_input".to_string(),
|
||||
});
|
||||
let column1 = Arc::new(Column::new("id", 0));
|
||||
let column2 = Arc::new(Column::new("name", 1));
|
||||
Arc::new(
|
||||
ProjectionExec::try_new(
|
||||
vec![(column1, "id".to_string()), (column2, "name".to_string())],
|
||||
mock_input,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
fn verify_df_porjection(exec: ExecutionPlanRef) {
|
||||
let projection_exec = exec.as_any().downcast_ref::<ProjectionExec>().unwrap();
|
||||
let mock_input = projection_exec
|
||||
.input()
|
||||
.as_any()
|
||||
.downcast_ref::<MockExecution>()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!("mock_input", mock_input.name);
|
||||
assert_eq!(2, projection_exec.expr().len());
|
||||
assert_eq!("id", projection_exec.expr()[0].1);
|
||||
assert_eq!("name", projection_exec.expr()[1].1);
|
||||
}
|
||||
}
|
||||
@@ -19,12 +19,12 @@ use api::result::{build_err_result, ObjectResultBuilder};
|
||||
use api::v1::codec::SelectResult;
|
||||
use api::v1::column::{SemanticType, Values};
|
||||
use api::v1::{Column, ObjectResult};
|
||||
use arrow::array::{Array, BooleanArray, PrimitiveArray};
|
||||
use common_base::BitVec;
|
||||
use common_error::prelude::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::{util, RecordBatches, SendableRecordBatchStream};
|
||||
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
||||
use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray};
|
||||
use datatypes::arrow_array::{BinaryArray, StringArray};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -47,13 +47,9 @@ pub async fn to_object_result(output: std::result::Result<Output, impl ErrorExt>
|
||||
}
|
||||
}
|
||||
async fn collect(stream: SendableRecordBatchStream) -> Result<ObjectResult> {
|
||||
let schema = stream.schema();
|
||||
|
||||
let recordbatches = util::collect(stream)
|
||||
let recordbatches = RecordBatches::try_collect(stream)
|
||||
.await
|
||||
.and_then(|batches| RecordBatches::try_new(schema, batches))
|
||||
.context(error::CollectRecordBatchesSnafu)?;
|
||||
|
||||
let object_result = build_result(recordbatches)?;
|
||||
Ok(object_result)
|
||||
}
|
||||
@@ -136,7 +132,8 @@ pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
|
||||
}
|
||||
|
||||
macro_rules! convert_arrow_array_to_grpc_vals {
|
||||
($data_type: expr, $arrays: ident, $(($Type: pat, $CastType: ty, $field: ident, $MapFunction: expr)), +) => {
|
||||
($data_type: expr, $arrays: ident, $(($Type: pat, $CastType: ty, $field: ident, $MapFunction: expr)), +) => {{
|
||||
use datatypes::arrow::datatypes::{DataType, TimeUnit};
|
||||
match $data_type {
|
||||
$(
|
||||
$Type => {
|
||||
@@ -155,7 +152,7 @@ macro_rules! convert_arrow_array_to_grpc_vals {
|
||||
)+
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
};
|
||||
}};
|
||||
}
|
||||
|
||||
pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
|
||||
@@ -164,7 +161,6 @@ pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
|
||||
}
|
||||
let data_type = arrays[0].data_type();
|
||||
|
||||
use arrow::datatypes::DataType;
|
||||
convert_arrow_array_to_grpc_vals!(
|
||||
data_type, arrays,
|
||||
|
||||
@@ -192,7 +188,7 @@ pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
|
||||
(DataType::Date32, PrimitiveArray<i32>, date_values, |x| {*x as i32}),
|
||||
(DataType::Date64, PrimitiveArray<i64>, datetime_values,|x| {*x as i64}),
|
||||
|
||||
(DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, _), PrimitiveArray<i64>, ts_millis_values, |x| {*x})
|
||||
(DataType::Timestamp(TimeUnit::Millisecond, _), PrimitiveArray<i64>, ts_millis_values, |x| {*x})
|
||||
)
|
||||
}
|
||||
|
||||
@@ -200,11 +196,10 @@ pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, BooleanArray, PrimitiveArray};
|
||||
use arrow::datatypes::{DataType, Field};
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datatypes::arrow::datatypes::Schema as ArrowSchema;
|
||||
use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||
use datatypes::arrow_array::StringArray;
|
||||
use datatypes::schema::Schema;
|
||||
use datatypes::vectors::{UInt32Vector, VectorRef};
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{SemanticType, Values};
|
||||
use api::v1::{Column, ColumnDataType};
|
||||
use common_base::BitVec;
|
||||
@@ -24,12 +23,14 @@ use crate::error::{Result, TypeMismatchSnafu};
|
||||
|
||||
type ColumnName = String;
|
||||
|
||||
type RowCount = u32;
|
||||
|
||||
// TODO(fys): will remove in the future.
|
||||
#[derive(Default)]
|
||||
pub struct LinesWriter {
|
||||
column_name_index: HashMap<ColumnName, usize>,
|
||||
null_masks: Vec<BitVec>,
|
||||
batch: InsertBatch,
|
||||
batch: (Vec<Column>, RowCount),
|
||||
lines: usize,
|
||||
}
|
||||
|
||||
@@ -171,20 +172,20 @@ impl LinesWriter {
|
||||
|
||||
pub fn commit(&mut self) {
|
||||
let batch = &mut self.batch;
|
||||
batch.row_count += 1;
|
||||
batch.1 += 1;
|
||||
|
||||
for i in 0..batch.columns.len() {
|
||||
for i in 0..batch.0.len() {
|
||||
let null_mask = &mut self.null_masks[i];
|
||||
if batch.row_count as usize > null_mask.len() {
|
||||
if batch.1 as usize > null_mask.len() {
|
||||
null_mask.push(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finish(mut self) -> InsertBatch {
|
||||
pub fn finish(mut self) -> (Vec<Column>, RowCount) {
|
||||
let null_masks = self.null_masks;
|
||||
for (i, null_mask) in null_masks.into_iter().enumerate() {
|
||||
let columns = &mut self.batch.columns;
|
||||
let columns = &mut self.batch.0;
|
||||
columns[i].null_mask = null_mask.into_vec();
|
||||
}
|
||||
self.batch
|
||||
@@ -204,9 +205,9 @@ impl LinesWriter {
|
||||
let batch = &mut self.batch;
|
||||
let to_insert = self.lines;
|
||||
let mut null_mask = BitVec::with_capacity(to_insert);
|
||||
null_mask.extend(BitVec::repeat(true, batch.row_count as usize));
|
||||
null_mask.extend(BitVec::repeat(true, batch.1 as usize));
|
||||
self.null_masks.push(null_mask);
|
||||
batch.columns.push(Column {
|
||||
batch.0.push(Column {
|
||||
column_name: column_name.to_string(),
|
||||
semantic_type: semantic_type.into(),
|
||||
values: Some(Values::with_capacity(datatype, to_insert)),
|
||||
@@ -217,7 +218,7 @@ impl LinesWriter {
|
||||
new_idx
|
||||
}
|
||||
};
|
||||
(column_idx, &mut self.batch.columns[column_idx])
|
||||
(column_idx, &mut self.batch.0[column_idx])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -282,9 +283,9 @@ mod tests {
|
||||
writer.commit();
|
||||
|
||||
let insert_batch = writer.finish();
|
||||
assert_eq!(3, insert_batch.row_count);
|
||||
assert_eq!(3, insert_batch.1);
|
||||
|
||||
let columns = insert_batch.columns;
|
||||
let columns = insert_batch.0;
|
||||
assert_eq!(9, columns.len());
|
||||
|
||||
let column = &columns[0];
|
||||
|
||||
@@ -18,10 +18,6 @@ datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
statrs = "0.15"
|
||||
|
||||
[dependencies.arrow]
|
||||
package = "arrow2"
|
||||
version = "0.10"
|
||||
|
||||
[dev-dependencies]
|
||||
common-base = { path = "../base" }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
|
||||
@@ -80,7 +80,7 @@ impl From<ColumnarValue> for DfColumnarValue {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::vectors::BooleanVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -14,9 +14,11 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDatatype;
|
||||
use arrow::error::ArrowError;
|
||||
use common_error::prelude::*;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDatatype;
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use statrs::StatsError;
|
||||
@@ -26,6 +28,11 @@ common_error::define_opaque_error!(Error);
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum InnerError {
|
||||
#[snafu(display("Fail to cast array to {:?}, source: {}", typ, source))]
|
||||
TypeCast {
|
||||
source: ArrowError,
|
||||
typ: arrow::datatypes::DataType,
|
||||
},
|
||||
#[snafu(display("Fail to execute function, source: {}", source))]
|
||||
ExecuteFunction {
|
||||
source: DataFusionError,
|
||||
@@ -51,6 +58,12 @@ pub enum InnerError {
|
||||
source: DataTypeError,
|
||||
},
|
||||
|
||||
#[snafu(display("Fail to cast arrow array into vector: {}", source))]
|
||||
FromArrowArray {
|
||||
#[snafu(backtrace)]
|
||||
source: DataTypeError,
|
||||
},
|
||||
|
||||
#[snafu(display("Fail to cast arrow array into vector: {:?}, {}", data_type, source))]
|
||||
IntoVector {
|
||||
#[snafu(backtrace)]
|
||||
@@ -138,13 +151,16 @@ impl ErrorExt for InnerError {
|
||||
InnerError::InvalidInputs { source, .. }
|
||||
| InnerError::IntoVector { source, .. }
|
||||
| InnerError::FromScalarValue { source }
|
||||
| InnerError::ConvertArrowSchema { source } => source.status_code(),
|
||||
| InnerError::ConvertArrowSchema { source }
|
||||
| InnerError::FromArrowArray { source } => source.status_code(),
|
||||
|
||||
InnerError::ExecuteRepeatedly { .. }
|
||||
| InnerError::GeneralDataFusion { .. }
|
||||
| InnerError::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
|
||||
|
||||
InnerError::UnsupportedInputDataType { .. } => StatusCode::InvalidArguments,
|
||||
InnerError::UnsupportedInputDataType { .. } | InnerError::TypeCast { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
|
||||
InnerError::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
|
||||
InnerError::ExecutePhysicalPlan { source } => source.status_code(),
|
||||
@@ -180,7 +196,7 @@ impl From<BoxedError> for Error {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::error::ArrowError;
|
||||
use datatypes::arrow::error::ArrowError;
|
||||
use snafu::GenerateImplicitData;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -14,8 +14,8 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use datafusion_expr::ReturnTypeFunction as DfReturnTypeFunction;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::ArrayRef;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datafusion_common::Result as DfResult;
|
||||
use datafusion_expr::Accumulator as DfAccumulator;
|
||||
use datatypes::arrow::array::ArrayRef;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{Helper as VectorHelper, VectorRef};
|
||||
@@ -266,9 +266,9 @@ fn try_convert_list_value(list: ListValue) -> Result<ScalarValue> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType;
|
||||
use common_base::bytes::{Bytes, StringBytes};
|
||||
use datafusion_common::ScalarValue;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -72,12 +72,12 @@ pub fn create_aggregate_function(
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::BooleanArray;
|
||||
use arrow::datatypes::DataType;
|
||||
use datafusion_expr::{
|
||||
ColumnarValue as DfColumnarValue, ScalarUDF as DfScalarUDF,
|
||||
TypeSignature as DfTypeSignature,
|
||||
};
|
||||
use datatypes::arrow::array::BooleanArray;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::{BooleanVector, VectorRef};
|
||||
|
||||
|
||||
@@ -19,11 +19,11 @@
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use datafusion_expr::{
|
||||
AccumulatorFunctionImplementation as DfAccumulatorFunctionImplementation,
|
||||
AggregateUDF as DfAggregateUdf, StateTypeFunction as DfStateTypeFunction,
|
||||
};
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::prelude::*;
|
||||
|
||||
use crate::function::{
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
//! Udf module contains foundational types that are used to represent UDFs.
|
||||
//! It's modifed from datafusion.
|
||||
//! It's modified from datafusion.
|
||||
use std::fmt;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_recordbatch::adapter::{DfRecordBatchStreamAdapter, RecordBatchStreamAdapter};
|
||||
use common_recordbatch::adapter::{AsyncRecordBatchStreamAdapter, DfRecordBatchStreamAdapter};
|
||||
use common_recordbatch::{DfSendableRecordBatchStream, SendableRecordBatchStream};
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::error::Result as DfResult;
|
||||
@@ -39,7 +39,6 @@ pub type PhysicalPlanRef = Arc<dyn PhysicalPlan>;
|
||||
/// creating the actual `async` [`SendableRecordBatchStream`]s
|
||||
/// of [`RecordBatch`] that incrementally compute the operator's
|
||||
/// output from its input partition.
|
||||
#[async_trait]
|
||||
pub trait PhysicalPlan: Debug + Send + Sync {
|
||||
/// Returns the physical plan as [`Any`](std::any::Any) so that it can be
|
||||
/// downcast to a specific implementation.
|
||||
@@ -61,7 +60,7 @@ pub trait PhysicalPlan: Debug + Send + Sync {
|
||||
fn with_new_children(&self, children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef>;
|
||||
|
||||
/// Creates an RecordBatch stream.
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
runtime: Arc<RuntimeEnv>,
|
||||
@@ -84,7 +83,6 @@ impl PhysicalPlanAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PhysicalPlan for PhysicalPlanAdapter {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
@@ -118,18 +116,15 @@ impl PhysicalPlan for PhysicalPlanAdapter {
|
||||
Ok(Arc::new(PhysicalPlanAdapter::new(self.schema(), plan)))
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
runtime: Arc<RuntimeEnv>,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let stream = self
|
||||
.df_plan
|
||||
.execute(partition, runtime)
|
||||
.await
|
||||
.context(error::DataFusionExecutionPlanSnafu)?;
|
||||
let stream = RecordBatchStreamAdapter::try_new(stream)
|
||||
.context(error::ConvertDfRecordBatchStreamSnafu)?;
|
||||
let df_plan = self.df_plan.clone();
|
||||
let stream = Box::pin(async move { df_plan.execute(partition, runtime).await });
|
||||
let stream = AsyncRecordBatchStreamAdapter::new(self.schema(), stream);
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
@@ -187,7 +182,7 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
||||
partition: usize,
|
||||
runtime: Arc<RuntimeEnv>,
|
||||
) -> DfResult<DfSendableRecordBatchStream> {
|
||||
let stream = self.0.execute(partition, runtime).await?;
|
||||
let stream = self.0.execute(partition, runtime)?;
|
||||
Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
|
||||
}
|
||||
|
||||
@@ -199,7 +194,6 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::arrow_print;
|
||||
use datafusion::datasource::TableProvider as DfTableProvider;
|
||||
@@ -209,6 +203,7 @@ mod test {
|
||||
use datafusion::prelude::ExecutionContext;
|
||||
use datafusion_common::field_util::SchemaExt;
|
||||
use datafusion_expr::Expr;
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||
use datatypes::schema::Schema;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
@@ -250,7 +245,6 @@ mod test {
|
||||
schema: SchemaRef,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PhysicalPlan for MyExecutionPlan {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
@@ -272,7 +266,7 @@ mod test {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
_runtime: Arc<RuntimeEnv>,
|
||||
|
||||
@@ -15,9 +15,9 @@
|
||||
//! Signature module contains foundational types that are used to represent signatures, types,
|
||||
//! and return types of functions.
|
||||
//! Copied and modified from datafusion.
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
pub use datafusion::physical_plan::functions::Volatility;
|
||||
use datafusion_expr::{Signature as DfSignature, TypeSignature as DfTypeSignature};
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
|
||||
@@ -53,7 +53,7 @@ pub struct Signature {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn concret_types_to_arrow_types(ts: Vec<ConcreteDataType>) -> Vec<ArrowDataType> {
|
||||
fn concrete_types_to_arrow_types(ts: Vec<ConcreteDataType>) -> Vec<ArrowDataType> {
|
||||
ts.iter().map(ConcreteDataType::as_arrow_type).collect()
|
||||
}
|
||||
|
||||
@@ -118,14 +118,14 @@ impl From<TypeSignature> for DfTypeSignature {
|
||||
fn from(type_signature: TypeSignature) -> DfTypeSignature {
|
||||
match type_signature {
|
||||
TypeSignature::Variadic(types) => {
|
||||
DfTypeSignature::Variadic(concret_types_to_arrow_types(types))
|
||||
DfTypeSignature::Variadic(concrete_types_to_arrow_types(types))
|
||||
}
|
||||
TypeSignature::VariadicEqual => DfTypeSignature::VariadicEqual,
|
||||
TypeSignature::Uniform(n, types) => {
|
||||
DfTypeSignature::Uniform(n, concret_types_to_arrow_types(types))
|
||||
DfTypeSignature::Uniform(n, concrete_types_to_arrow_types(types))
|
||||
}
|
||||
TypeSignature::Exact(types) => {
|
||||
DfTypeSignature::Exact(concret_types_to_arrow_types(types))
|
||||
DfTypeSignature::Exact(concrete_types_to_arrow_types(types))
|
||||
}
|
||||
TypeSignature::Any(n) => DfTypeSignature::Any(n),
|
||||
TypeSignature::OneOf(ts) => {
|
||||
@@ -143,7 +143,7 @@ impl From<Signature> for DfSignature {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
@@ -19,8 +20,10 @@ use std::task::{Context, Poll};
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::arrow::error::{ArrowError, Result as ArrowResult};
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use futures::ready;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
@@ -28,6 +31,14 @@ use crate::{
|
||||
DfSendableRecordBatchStream, RecordBatch, RecordBatchStream, SendableRecordBatchStream, Stream,
|
||||
};
|
||||
|
||||
type FutureStream = Pin<
|
||||
Box<
|
||||
dyn std::future::Future<
|
||||
Output = std::result::Result<DfSendableRecordBatchStream, DataFusionError>,
|
||||
> + Send,
|
||||
>,
|
||||
>;
|
||||
|
||||
/// Greptime SendableRecordBatchStream -> DataFusion RecordBatchStream
|
||||
pub struct DfRecordBatchStreamAdapter {
|
||||
stream: SendableRecordBatchStream,
|
||||
@@ -104,3 +115,71 @@ impl Stream for RecordBatchStreamAdapter {
|
||||
self.stream.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
enum AsyncRecordBatchStreamAdapterState {
|
||||
Uninit(FutureStream),
|
||||
Inited(std::result::Result<DfSendableRecordBatchStream, DataFusionError>),
|
||||
}
|
||||
|
||||
pub struct AsyncRecordBatchStreamAdapter {
|
||||
schema: SchemaRef,
|
||||
state: AsyncRecordBatchStreamAdapterState,
|
||||
}
|
||||
|
||||
impl AsyncRecordBatchStreamAdapter {
|
||||
pub fn new(schema: SchemaRef, stream: FutureStream) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
state: AsyncRecordBatchStreamAdapterState::Uninit(stream),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchStream for AsyncRecordBatchStreamAdapter {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for AsyncRecordBatchStreamAdapter {
|
||||
type Item = Result<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
loop {
|
||||
match &mut self.state {
|
||||
AsyncRecordBatchStreamAdapterState::Uninit(stream_future) => {
|
||||
self.state = AsyncRecordBatchStreamAdapterState::Inited(ready!(Pin::new(
|
||||
stream_future
|
||||
)
|
||||
.poll(cx)));
|
||||
continue;
|
||||
}
|
||||
AsyncRecordBatchStreamAdapterState::Inited(stream) => match stream {
|
||||
Ok(stream) => {
|
||||
return Poll::Ready(ready!(Pin::new(stream).poll_next(cx)).map(|df| {
|
||||
Ok(RecordBatch {
|
||||
schema: self.schema(),
|
||||
df_recordbatch: df.context(error::PollStreamSnafu)?,
|
||||
})
|
||||
}));
|
||||
}
|
||||
Err(e) => {
|
||||
return Poll::Ready(Some(
|
||||
error::CreateRecordBatchesSnafu {
|
||||
reason: format!("Read error {:?} from stream", e),
|
||||
}
|
||||
.fail()
|
||||
.map_err(|e| e.into()),
|
||||
))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This is not supported for lazy stream.
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
(0, None)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,12 +21,13 @@ use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::arrow_print;
|
||||
use datafusion::physical_plan::memory::MemoryStream;
|
||||
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use error::Result;
|
||||
use futures::task::{Context, Poll};
|
||||
use futures::Stream;
|
||||
use futures::{Stream, TryStreamExt};
|
||||
pub use recordbatch::RecordBatch;
|
||||
use snafu::ensure;
|
||||
|
||||
@@ -79,6 +80,12 @@ impl RecordBatches {
|
||||
Ok(Self { schema, batches })
|
||||
}
|
||||
|
||||
pub async fn try_collect(stream: SendableRecordBatchStream) -> Result<Self> {
|
||||
let schema = stream.schema();
|
||||
let batches = stream.try_collect::<Vec<_>>().await?;
|
||||
Ok(Self { schema, batches })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
@@ -132,6 +139,19 @@ impl RecordBatches {
|
||||
index: 0,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn into_df_stream(self) -> DfSendableRecordBatchStream {
|
||||
let df_record_batches = self
|
||||
.batches
|
||||
.into_iter()
|
||||
.map(|batch| batch.df_recordbatch)
|
||||
.collect();
|
||||
// unwrap safety: `MemoryStream::try_new` won't fail
|
||||
Box::pin(
|
||||
MemoryStream::try_new(df_record_batches, self.schema.arrow_schema().clone(), None)
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SimpleRecordBatchStream {
|
||||
|
||||
@@ -23,6 +23,7 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
|
||||
// TODO(yingwen): We should hold vectors in the RecordBatch.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct RecordBatch {
|
||||
pub schema: SchemaRef,
|
||||
@@ -103,6 +104,7 @@ impl<'a> Iterator for RecordBatchRowIterator<'a> {
|
||||
} else {
|
||||
let mut row = Vec::with_capacity(self.columns);
|
||||
|
||||
// TODO(yingwen): Get from the vector if RecordBatch also holds vectors.
|
||||
for col in 0..self.columns {
|
||||
let column_array = self.record_batch.df_recordbatch.column(col);
|
||||
match arrow_array_get(column_array.as_ref(), self.row_cursor)
|
||||
|
||||
@@ -9,9 +9,11 @@ bytes = "1.1"
|
||||
catalog = { path = "../../catalog" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
futures = "0.3"
|
||||
prost = "0.9"
|
||||
|
||||
77
src/common/substrait/src/context.rs
Normal file
77
src/common/substrait/src/context.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use datafusion::logical_plan::DFSchemaRef;
|
||||
use substrait_proto::protobuf::extensions::simple_extension_declaration::{
|
||||
ExtensionFunction, MappingType,
|
||||
};
|
||||
use substrait_proto::protobuf::extensions::SimpleExtensionDeclaration;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ConvertorContext {
|
||||
scalar_fn_names: HashMap<String, u32>,
|
||||
scalar_fn_map: HashMap<u32, String>,
|
||||
df_schema: Option<DFSchemaRef>,
|
||||
}
|
||||
|
||||
impl ConvertorContext {
|
||||
pub fn register_scalar_fn<S: AsRef<str>>(&mut self, name: S) -> u32 {
|
||||
if let Some(anchor) = self.scalar_fn_names.get(name.as_ref()) {
|
||||
return *anchor;
|
||||
}
|
||||
|
||||
let next_anchor = self.scalar_fn_map.len() as _;
|
||||
self.scalar_fn_map
|
||||
.insert(next_anchor, name.as_ref().to_string());
|
||||
self.scalar_fn_names
|
||||
.insert(name.as_ref().to_string(), next_anchor);
|
||||
next_anchor
|
||||
}
|
||||
|
||||
pub fn register_scalar_with_anchor<S: AsRef<str>>(&mut self, name: S, anchor: u32) {
|
||||
self.scalar_fn_map.insert(anchor, name.as_ref().to_string());
|
||||
self.scalar_fn_names
|
||||
.insert(name.as_ref().to_string(), anchor);
|
||||
}
|
||||
|
||||
pub fn find_scalar_fn(&self, anchor: u32) -> Option<&str> {
|
||||
self.scalar_fn_map.get(&anchor).map(|s| s.as_str())
|
||||
}
|
||||
|
||||
pub fn generate_function_extension(&self) -> Vec<SimpleExtensionDeclaration> {
|
||||
let mut result = Vec::with_capacity(self.scalar_fn_map.len());
|
||||
for (anchor, name) in &self.scalar_fn_map {
|
||||
let declaration = SimpleExtensionDeclaration {
|
||||
mapping_type: Some(MappingType::ExtensionFunction(ExtensionFunction {
|
||||
extension_uri_reference: 0,
|
||||
function_anchor: *anchor,
|
||||
name: name.clone(),
|
||||
})),
|
||||
};
|
||||
result.push(declaration);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn set_df_schema(&mut self, schema: DFSchemaRef) {
|
||||
debug_assert!(self.df_schema.is_none());
|
||||
self.df_schema.get_or_insert(schema);
|
||||
}
|
||||
|
||||
pub(crate) fn df_schema(&self) -> Option<&DFSchemaRef> {
|
||||
self.df_schema.as_ref()
|
||||
}
|
||||
}
|
||||
762
src/common/substrait/src/df_expr.rs
Normal file
762
src/common/substrait/src/df_expr.rs
Normal file
@@ -0,0 +1,762 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::str::FromStr;
|
||||
|
||||
use datafusion::logical_plan::{Column, Expr};
|
||||
use datafusion_expr::{expr_fn, lit, BuiltinScalarFunction, Operator};
|
||||
use datatypes::schema::Schema;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use substrait_proto::protobuf::expression::field_reference::ReferenceType as FieldReferenceType;
|
||||
use substrait_proto::protobuf::expression::reference_segment::{
|
||||
ReferenceType as SegReferenceType, StructField,
|
||||
};
|
||||
use substrait_proto::protobuf::expression::{
|
||||
FieldReference, Literal, ReferenceSegment, RexType, ScalarFunction,
|
||||
};
|
||||
use substrait_proto::protobuf::function_argument::ArgType;
|
||||
use substrait_proto::protobuf::Expression;
|
||||
|
||||
use crate::context::ConvertorContext;
|
||||
use crate::error::{
|
||||
EmptyExprSnafu, InvalidParametersSnafu, MissingFieldSnafu, Result, UnsupportedExprSnafu,
|
||||
};
|
||||
use crate::types::{literal_type_to_scalar_value, scalar_value_as_literal_type};
|
||||
|
||||
/// Convert substrait's `Expression` to DataFusion's `Expr`.
|
||||
pub(crate) fn to_df_expr(
|
||||
ctx: &ConvertorContext,
|
||||
expression: Expression,
|
||||
schema: &Schema,
|
||||
) -> Result<Expr> {
|
||||
let expr_rex_type = expression.rex_type.context(EmptyExprSnafu)?;
|
||||
match expr_rex_type {
|
||||
RexType::Literal(l) => {
|
||||
let t = l.literal_type.context(MissingFieldSnafu {
|
||||
field: "LiteralType",
|
||||
plan: "Literal",
|
||||
})?;
|
||||
let v = literal_type_to_scalar_value(t)?;
|
||||
Ok(lit(v))
|
||||
}
|
||||
RexType::Selection(selection) => convert_selection_rex(*selection, schema),
|
||||
RexType::ScalarFunction(scalar_fn) => convert_scalar_function(ctx, scalar_fn, schema),
|
||||
RexType::WindowFunction(_)
|
||||
| RexType::IfThen(_)
|
||||
| RexType::SwitchExpression(_)
|
||||
| RexType::SingularOrList(_)
|
||||
| RexType::MultiOrList(_)
|
||||
| RexType::Cast(_)
|
||||
| RexType::Subquery(_)
|
||||
| RexType::Enum(_) => UnsupportedExprSnafu {
|
||||
name: format!("substrait expression {:?}", expr_rex_type),
|
||||
}
|
||||
.fail()?,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert Substrait's `FieldReference` - `DirectReference` - `StructField` to Datafusion's
|
||||
/// `Column` expr.
|
||||
pub fn convert_selection_rex(selection: FieldReference, schema: &Schema) -> Result<Expr> {
|
||||
if let Some(FieldReferenceType::DirectReference(direct_ref)) = selection.reference_type
|
||||
&& let Some(SegReferenceType::StructField(field)) = direct_ref.reference_type {
|
||||
let column_name = schema.column_name_by_index(field.field as _).to_string();
|
||||
Ok(Expr::Column(Column {
|
||||
relation: None,
|
||||
name: column_name,
|
||||
}))
|
||||
} else {
|
||||
InvalidParametersSnafu {
|
||||
reason: "Only support direct struct reference in Selection Rex",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn convert_scalar_function(
|
||||
ctx: &ConvertorContext,
|
||||
scalar_fn: ScalarFunction,
|
||||
schema: &Schema,
|
||||
) -> Result<Expr> {
|
||||
// convert argument
|
||||
let mut inputs = VecDeque::with_capacity(scalar_fn.arguments.len());
|
||||
for arg in scalar_fn.arguments {
|
||||
if let Some(ArgType::Value(sub_expr)) = arg.arg_type {
|
||||
inputs.push_back(to_df_expr(ctx, sub_expr, schema)?);
|
||||
} else {
|
||||
InvalidParametersSnafu {
|
||||
reason: "Only value expression arg is supported to be function argument",
|
||||
}
|
||||
.fail()?;
|
||||
}
|
||||
}
|
||||
|
||||
// convert this scalar function
|
||||
// map function name
|
||||
let anchor = scalar_fn.function_reference;
|
||||
let fn_name = ctx
|
||||
.find_scalar_fn(anchor)
|
||||
.with_context(|| InvalidParametersSnafu {
|
||||
reason: format!("Unregistered scalar function reference: {}", anchor),
|
||||
})?;
|
||||
|
||||
// convenient util
|
||||
let ensure_arg_len = |expected: usize| -> Result<()> {
|
||||
ensure!(
|
||||
inputs.len() == expected,
|
||||
InvalidParametersSnafu {
|
||||
reason: format!(
|
||||
"Invalid number of scalar function {}, expected {} but found {}",
|
||||
fn_name,
|
||||
expected,
|
||||
inputs.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// construct DataFusion expr
|
||||
let expr = match fn_name {
|
||||
// begin binary exprs, with the same order of DF `Operator`'s definition.
|
||||
"eq" | "equal" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"not_eq" | "not_equal" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.not_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"lt" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().lt(inputs.pop_front().unwrap())
|
||||
}
|
||||
"lt_eq" | "lte" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.lt_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"gt" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().gt(inputs.pop_front().unwrap())
|
||||
}
|
||||
"gt_eq" | "gte" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.gt_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"plus" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Plus,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"minus" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Minus,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"multiply" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Multiply,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"divide" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Divide,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"modulo" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Modulo,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"and" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::and(inputs.pop_front().unwrap(), inputs.pop_front().unwrap())
|
||||
}
|
||||
"or" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::or(inputs.pop_front().unwrap(), inputs.pop_front().unwrap())
|
||||
}
|
||||
"like" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.like(inputs.pop_front().unwrap())
|
||||
}
|
||||
"not_like" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.not_like(inputs.pop_front().unwrap())
|
||||
}
|
||||
"is_distinct_from" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::IsDistinctFrom,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"is_not_distinct_from" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::IsNotDistinctFrom,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_i_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexIMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_not_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexNotMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_not_i_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexNotIMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"bitwise_and" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::BitwiseAnd,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"bitwise_or" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::BitwiseOr,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
// end binary exprs
|
||||
// start other direct expr, with the same order of DF `Expr`'s definition.
|
||||
"not" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().not()
|
||||
}
|
||||
"is_not_null" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().is_not_null()
|
||||
}
|
||||
"is_null" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().is_null()
|
||||
}
|
||||
"negative" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Negative(Box::new(inputs.pop_front().unwrap()))
|
||||
}
|
||||
// skip GetIndexedField, unimplemented.
|
||||
"between" => {
|
||||
ensure_arg_len(3)?;
|
||||
Expr::Between {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
negated: false,
|
||||
low: Box::new(inputs.pop_front().unwrap()),
|
||||
high: Box::new(inputs.pop_front().unwrap()),
|
||||
}
|
||||
}
|
||||
"not_between" => {
|
||||
ensure_arg_len(3)?;
|
||||
Expr::Between {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
negated: true,
|
||||
low: Box::new(inputs.pop_front().unwrap()),
|
||||
high: Box::new(inputs.pop_front().unwrap()),
|
||||
}
|
||||
}
|
||||
// skip Case, is covered in substrait::SwitchExpression.
|
||||
// skip Cast and TryCast, is covered in substrait::Cast.
|
||||
"sort" | "sort_des" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Sort {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
asc: false,
|
||||
nulls_first: false,
|
||||
}
|
||||
}
|
||||
"sort_asc" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Sort {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
asc: true,
|
||||
nulls_first: false,
|
||||
}
|
||||
}
|
||||
// those are datafusion built-in "scalar functions".
|
||||
"abs"
|
||||
| "acos"
|
||||
| "asin"
|
||||
| "atan"
|
||||
| "atan2"
|
||||
| "ceil"
|
||||
| "cos"
|
||||
| "exp"
|
||||
| "floor"
|
||||
| "ln"
|
||||
| "log"
|
||||
| "log10"
|
||||
| "log2"
|
||||
| "power"
|
||||
| "pow"
|
||||
| "round"
|
||||
| "signum"
|
||||
| "sin"
|
||||
| "sqrt"
|
||||
| "tan"
|
||||
| "trunc"
|
||||
| "coalesce"
|
||||
| "make_array"
|
||||
| "ascii"
|
||||
| "bit_length"
|
||||
| "btrim"
|
||||
| "char_length"
|
||||
| "character_length"
|
||||
| "concat"
|
||||
| "concat_ws"
|
||||
| "chr"
|
||||
| "current_date"
|
||||
| "current_time"
|
||||
| "date_part"
|
||||
| "datepart"
|
||||
| "date_trunc"
|
||||
| "datetrunc"
|
||||
| "date_bin"
|
||||
| "initcap"
|
||||
| "left"
|
||||
| "length"
|
||||
| "lower"
|
||||
| "lpad"
|
||||
| "ltrim"
|
||||
| "md5"
|
||||
| "nullif"
|
||||
| "octet_length"
|
||||
| "random"
|
||||
| "regexp_replace"
|
||||
| "repeat"
|
||||
| "replace"
|
||||
| "reverse"
|
||||
| "right"
|
||||
| "rpad"
|
||||
| "rtrim"
|
||||
| "sha224"
|
||||
| "sha256"
|
||||
| "sha384"
|
||||
| "sha512"
|
||||
| "digest"
|
||||
| "split_part"
|
||||
| "starts_with"
|
||||
| "strpos"
|
||||
| "substr"
|
||||
| "to_hex"
|
||||
| "to_timestamp"
|
||||
| "to_timestamp_millis"
|
||||
| "to_timestamp_micros"
|
||||
| "to_timestamp_seconds"
|
||||
| "now"
|
||||
| "translate"
|
||||
| "trim"
|
||||
| "upper"
|
||||
| "uuid"
|
||||
| "regexp_match"
|
||||
| "struct"
|
||||
| "from_unixtime"
|
||||
| "arrow_typeof" => Expr::ScalarFunction {
|
||||
fun: BuiltinScalarFunction::from_str(fn_name).unwrap(),
|
||||
args: inputs.into(),
|
||||
},
|
||||
// skip ScalarUDF, unimplemented.
|
||||
// skip AggregateFunction, is covered in substrait::AggregateRel
|
||||
// skip WindowFunction, is covered in substrait WindowFunction
|
||||
// skip AggregateUDF, unimplemented.
|
||||
// skip InList, unimplemented
|
||||
// skip Wildcard, unimplemented.
|
||||
// end other direct expr
|
||||
_ => UnsupportedExprSnafu {
|
||||
name: format!("scalar function {}", fn_name),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
/// Convert DataFusion's `Expr` to substrait's `Expression`
|
||||
pub fn expression_from_df_expr(
|
||||
ctx: &mut ConvertorContext,
|
||||
expr: &Expr,
|
||||
schema: &Schema,
|
||||
) -> Result<Expression> {
|
||||
let expression = match expr {
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::Alias(..) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Column(column) => {
|
||||
let field_reference = convert_column(column, schema)?;
|
||||
Expression {
|
||||
rex_type: Some(RexType::Selection(Box::new(field_reference))),
|
||||
}
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::ScalarVariable(..) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Literal(v) => {
|
||||
let t = scalar_value_as_literal_type(v)?;
|
||||
let l = Literal {
|
||||
nullable: true,
|
||||
type_variation_reference: 0,
|
||||
literal_type: Some(t),
|
||||
};
|
||||
Expression {
|
||||
rex_type: Some(RexType::Literal(l)),
|
||||
}
|
||||
}
|
||||
Expr::BinaryExpr { left, op, right } => {
|
||||
let left = expression_from_df_expr(ctx, left, schema)?;
|
||||
let right = expression_from_df_expr(ctx, right, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![left, right]);
|
||||
let op_name = utils::name_df_operator(op);
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::Not(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "not";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::IsNotNull(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "is_not_null";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::IsNull(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "is_null";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::Negative(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "negative";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::GetIndexedField { .. } => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Between {
|
||||
expr,
|
||||
negated,
|
||||
low,
|
||||
high,
|
||||
} => {
|
||||
let expr = expression_from_df_expr(ctx, expr, schema)?;
|
||||
let low = expression_from_df_expr(ctx, low, schema)?;
|
||||
let high = expression_from_df_expr(ctx, high, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![expr, low, high]);
|
||||
let op_name = if *negated { "not_between" } else { "between" };
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::Case { .. } | Expr::Cast { .. } | Expr::TryCast { .. } => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Sort {
|
||||
expr,
|
||||
asc,
|
||||
nulls_first: _,
|
||||
} => {
|
||||
let expr = expression_from_df_expr(ctx, expr, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![expr]);
|
||||
let op_name = if *asc { "sort_asc" } else { "sort_des" };
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::ScalarFunction { fun, args } => {
|
||||
let arguments = utils::expression_to_argument(
|
||||
args.iter()
|
||||
.map(|e| expression_from_df_expr(ctx, e, schema))
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
);
|
||||
let op_name = utils::name_builtin_scalar_function(fun);
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::ScalarUDF { .. }
|
||||
| Expr::AggregateFunction { .. }
|
||||
| Expr::WindowFunction { .. }
|
||||
| Expr::AggregateUDF { .. }
|
||||
| Expr::InList { .. }
|
||||
| Expr::Wildcard => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
Ok(expression)
|
||||
}
|
||||
|
||||
/// Convert DataFusion's `Column` expr into substrait's `FieldReference` -
|
||||
/// `DirectReference` - `StructField`.
|
||||
pub fn convert_column(column: &Column, schema: &Schema) -> Result<FieldReference> {
|
||||
let column_name = &column.name;
|
||||
let field_index =
|
||||
schema
|
||||
.column_index_by_name(column_name)
|
||||
.with_context(|| MissingFieldSnafu {
|
||||
field: format!("{:?}", column),
|
||||
plan: format!("schema: {:?}", schema),
|
||||
})?;
|
||||
|
||||
Ok(FieldReference {
|
||||
reference_type: Some(FieldReferenceType::DirectReference(ReferenceSegment {
|
||||
reference_type: Some(SegReferenceType::StructField(Box::new(StructField {
|
||||
field: field_index as _,
|
||||
child: None,
|
||||
}))),
|
||||
})),
|
||||
root_type: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Some utils special for this `DataFusion::Expr` and `Substrait::Expression` conversion.
|
||||
mod utils {
|
||||
use datafusion_expr::{BuiltinScalarFunction, Operator};
|
||||
use substrait_proto::protobuf::expression::{RexType, ScalarFunction};
|
||||
use substrait_proto::protobuf::function_argument::ArgType;
|
||||
use substrait_proto::protobuf::{Expression, FunctionArgument};
|
||||
|
||||
pub(crate) fn name_df_operator(op: &Operator) -> &str {
|
||||
match op {
|
||||
Operator::Eq => "equal",
|
||||
Operator::NotEq => "not_equal",
|
||||
Operator::Lt => "lt",
|
||||
Operator::LtEq => "lte",
|
||||
Operator::Gt => "gt",
|
||||
Operator::GtEq => "gte",
|
||||
Operator::Plus => "plus",
|
||||
Operator::Minus => "minus",
|
||||
Operator::Multiply => "multiply",
|
||||
Operator::Divide => "divide",
|
||||
Operator::Modulo => "modulo",
|
||||
Operator::And => "and",
|
||||
Operator::Or => "or",
|
||||
Operator::Like => "like",
|
||||
Operator::NotLike => "not_like",
|
||||
Operator::IsDistinctFrom => "is_distinct_from",
|
||||
Operator::IsNotDistinctFrom => "is_not_distinct_from",
|
||||
Operator::RegexMatch => "regex_match",
|
||||
Operator::RegexIMatch => "regex_i_match",
|
||||
Operator::RegexNotMatch => "regex_not_match",
|
||||
Operator::RegexNotIMatch => "regex_not_i_match",
|
||||
Operator::BitwiseAnd => "bitwise_and",
|
||||
Operator::BitwiseOr => "bitwise_or",
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert list of [Expression] to [FunctionArgument] vector.
|
||||
pub(crate) fn expression_to_argument<I: IntoIterator<Item = Expression>>(
|
||||
expressions: I,
|
||||
) -> Vec<FunctionArgument> {
|
||||
expressions
|
||||
.into_iter()
|
||||
.map(|expr| FunctionArgument {
|
||||
arg_type: Some(ArgType::Value(expr)),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Convenient builder for [Expression]
|
||||
pub(crate) fn build_scalar_function_expression(
|
||||
function_reference: u32,
|
||||
arguments: Vec<FunctionArgument>,
|
||||
) -> Expression {
|
||||
Expression {
|
||||
rex_type: Some(RexType::ScalarFunction(ScalarFunction {
|
||||
function_reference,
|
||||
arguments,
|
||||
output_type: None,
|
||||
..Default::default()
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn name_builtin_scalar_function(fun: &BuiltinScalarFunction) -> &str {
|
||||
match fun {
|
||||
BuiltinScalarFunction::Abs => "abs",
|
||||
BuiltinScalarFunction::Acos => "acos",
|
||||
BuiltinScalarFunction::Asin => "asin",
|
||||
BuiltinScalarFunction::Atan => "atan",
|
||||
BuiltinScalarFunction::Ceil => "ceil",
|
||||
BuiltinScalarFunction::Cos => "cos",
|
||||
BuiltinScalarFunction::Digest => "digest",
|
||||
BuiltinScalarFunction::Exp => "exp",
|
||||
BuiltinScalarFunction::Floor => "floor",
|
||||
BuiltinScalarFunction::Ln => "ln",
|
||||
BuiltinScalarFunction::Log => "log",
|
||||
BuiltinScalarFunction::Log10 => "log10",
|
||||
BuiltinScalarFunction::Log2 => "log2",
|
||||
BuiltinScalarFunction::Round => "round",
|
||||
BuiltinScalarFunction::Signum => "signum",
|
||||
BuiltinScalarFunction::Sin => "sin",
|
||||
BuiltinScalarFunction::Sqrt => "sqrt",
|
||||
BuiltinScalarFunction::Tan => "tan",
|
||||
BuiltinScalarFunction::Trunc => "trunc",
|
||||
BuiltinScalarFunction::Array => "make_array",
|
||||
BuiltinScalarFunction::Ascii => "ascii",
|
||||
BuiltinScalarFunction::BitLength => "bit_length",
|
||||
BuiltinScalarFunction::Btrim => "btrim",
|
||||
BuiltinScalarFunction::CharacterLength => "character_length",
|
||||
BuiltinScalarFunction::Chr => "chr",
|
||||
BuiltinScalarFunction::Concat => "concat",
|
||||
BuiltinScalarFunction::ConcatWithSeparator => "concat_ws",
|
||||
BuiltinScalarFunction::DatePart => "date_part",
|
||||
BuiltinScalarFunction::DateTrunc => "date_trunc",
|
||||
BuiltinScalarFunction::InitCap => "initcap",
|
||||
BuiltinScalarFunction::Left => "left",
|
||||
BuiltinScalarFunction::Lpad => "lpad",
|
||||
BuiltinScalarFunction::Lower => "lower",
|
||||
BuiltinScalarFunction::Ltrim => "ltrim",
|
||||
BuiltinScalarFunction::MD5 => "md5",
|
||||
BuiltinScalarFunction::NullIf => "nullif",
|
||||
BuiltinScalarFunction::OctetLength => "octet_length",
|
||||
BuiltinScalarFunction::Random => "random",
|
||||
BuiltinScalarFunction::RegexpReplace => "regexp_replace",
|
||||
BuiltinScalarFunction::Repeat => "repeat",
|
||||
BuiltinScalarFunction::Replace => "replace",
|
||||
BuiltinScalarFunction::Reverse => "reverse",
|
||||
BuiltinScalarFunction::Right => "right",
|
||||
BuiltinScalarFunction::Rpad => "rpad",
|
||||
BuiltinScalarFunction::Rtrim => "rtrim",
|
||||
BuiltinScalarFunction::SHA224 => "sha224",
|
||||
BuiltinScalarFunction::SHA256 => "sha256",
|
||||
BuiltinScalarFunction::SHA384 => "sha384",
|
||||
BuiltinScalarFunction::SHA512 => "sha512",
|
||||
BuiltinScalarFunction::SplitPart => "split_part",
|
||||
BuiltinScalarFunction::StartsWith => "starts_with",
|
||||
BuiltinScalarFunction::Strpos => "strpos",
|
||||
BuiltinScalarFunction::Substr => "substr",
|
||||
BuiltinScalarFunction::ToHex => "to_hex",
|
||||
BuiltinScalarFunction::ToTimestamp => "to_timestamp",
|
||||
BuiltinScalarFunction::ToTimestampMillis => "to_timestamp_millis",
|
||||
BuiltinScalarFunction::ToTimestampMicros => "to_timestamp_macros",
|
||||
BuiltinScalarFunction::ToTimestampSeconds => "to_timestamp_seconds",
|
||||
BuiltinScalarFunction::Now => "now",
|
||||
BuiltinScalarFunction::Translate => "translate",
|
||||
BuiltinScalarFunction::Trim => "trim",
|
||||
BuiltinScalarFunction::Upper => "upper",
|
||||
BuiltinScalarFunction::RegexpMatch => "regexp_match",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::schema::ColumnSchema;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn expr_round_trip() {
|
||||
let expr = expr_fn::and(
|
||||
expr_fn::col("column_a").lt_eq(expr_fn::col("column_b")),
|
||||
expr_fn::col("column_a").gt(expr_fn::col("column_b")),
|
||||
);
|
||||
|
||||
let schema = Schema::new(vec![
|
||||
ColumnSchema::new(
|
||||
"column_a",
|
||||
datatypes::data_type::ConcreteDataType::int64_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"column_b",
|
||||
datatypes::data_type::ConcreteDataType::float64_datatype(),
|
||||
true,
|
||||
),
|
||||
]);
|
||||
|
||||
let mut ctx = ConvertorContext::default();
|
||||
let substrait_expr = expression_from_df_expr(&mut ctx, &expr, &schema).unwrap();
|
||||
let converted_expr = to_df_expr(&ctx, substrait_expr, &schema).unwrap();
|
||||
|
||||
assert_eq!(expr, converted_expr);
|
||||
}
|
||||
}
|
||||
@@ -17,76 +17,135 @@ use std::sync::Arc;
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_telemetry::debug;
|
||||
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
|
||||
use datafusion::datasource::TableProvider;
|
||||
use datafusion::logical_plan::plan::Filter;
|
||||
use datafusion::logical_plan::{LogicalPlan, TableScan, ToDFSchema};
|
||||
use datafusion::physical_plan::project_schema;
|
||||
use prost::Message;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use substrait_proto::protobuf::expression::mask_expression::{StructItem, StructSelect};
|
||||
use substrait_proto::protobuf::expression::MaskExpression;
|
||||
use substrait_proto::protobuf::extensions::simple_extension_declaration::MappingType;
|
||||
use substrait_proto::protobuf::plan_rel::RelType as PlanRelType;
|
||||
use substrait_proto::protobuf::read_rel::{NamedTable, ReadType};
|
||||
use substrait_proto::protobuf::rel::RelType;
|
||||
use substrait_proto::protobuf::{PlanRel, ReadRel, Rel};
|
||||
use substrait_proto::protobuf::{FilterRel, Plan, PlanRel, ReadRel, Rel};
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
use crate::context::ConvertorContext;
|
||||
use crate::df_expr::{expression_from_df_expr, to_df_expr};
|
||||
use crate::error::{
|
||||
DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error, InternalSnafu,
|
||||
self, DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error, InternalSnafu,
|
||||
InvalidParametersSnafu, MissingFieldSnafu, SchemaNotMatchSnafu, TableNotFoundSnafu,
|
||||
UnknownPlanSnafu, UnsupportedExprSnafu, UnsupportedPlanSnafu,
|
||||
};
|
||||
use crate::schema::{from_schema, to_schema};
|
||||
use crate::SubstraitPlan;
|
||||
|
||||
pub struct DFLogicalSubstraitConvertor {
|
||||
catalog_manager: CatalogManagerRef,
|
||||
}
|
||||
pub struct DFLogicalSubstraitConvertor;
|
||||
|
||||
impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
type Error = Error;
|
||||
|
||||
type Plan = LogicalPlan;
|
||||
|
||||
fn decode<B: Buf + Send>(&self, message: B) -> Result<Self::Plan, Self::Error> {
|
||||
let plan_rel = PlanRel::decode(message).context(DecodeRelSnafu)?;
|
||||
let rel = match plan_rel.rel_type.context(EmptyPlanSnafu)? {
|
||||
PlanRelType::Rel(rel) => rel,
|
||||
PlanRelType::Root(_) => UnsupportedPlanSnafu {
|
||||
name: "Root Relation",
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
self.convert_rel(rel)
|
||||
fn decode<B: Buf + Send>(
|
||||
&self,
|
||||
message: B,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<Self::Plan, Self::Error> {
|
||||
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
|
||||
self.convert_plan(plan, catalog_manager)
|
||||
}
|
||||
|
||||
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
|
||||
let rel = self.convert_plan(plan)?;
|
||||
let plan_rel = PlanRel {
|
||||
rel_type: Some(PlanRelType::Rel(rel)),
|
||||
};
|
||||
let plan = self.convert_df_plan(plan)?;
|
||||
|
||||
let mut buf = BytesMut::new();
|
||||
plan_rel.encode(&mut buf).context(EncodeRelSnafu)?;
|
||||
plan.encode(&mut buf).context(EncodeRelSnafu)?;
|
||||
|
||||
Ok(buf.freeze())
|
||||
}
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertor {
|
||||
pub fn new(catalog_manager: CatalogManagerRef) -> Self {
|
||||
Self { catalog_manager }
|
||||
}
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertor {
|
||||
pub fn convert_rel(&self, rel: Rel) -> Result<LogicalPlan, Error> {
|
||||
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
|
||||
let logical_plan = match rel_type {
|
||||
RelType::Read(read_rel) => self.convert_read_rel(read_rel),
|
||||
RelType::Filter(_filter_rel) => UnsupportedPlanSnafu {
|
||||
name: "Filter Relation",
|
||||
fn convert_plan(
|
||||
&self,
|
||||
mut plan: Plan,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
// prepare convertor context
|
||||
let mut ctx = ConvertorContext::default();
|
||||
for simple_ext in plan.extensions {
|
||||
if let Some(MappingType::ExtensionFunction(function_extension)) =
|
||||
simple_ext.mapping_type
|
||||
{
|
||||
ctx.register_scalar_with_anchor(
|
||||
function_extension.name,
|
||||
function_extension.function_anchor,
|
||||
);
|
||||
} else {
|
||||
debug!("Encounter unsupported substrait extension {:?}", simple_ext);
|
||||
}
|
||||
}
|
||||
|
||||
// extract rel
|
||||
let rel = if let Some(PlanRel { rel_type }) = plan.relations.pop()
|
||||
&& let Some(PlanRelType::Rel(rel)) = rel_type {
|
||||
rel
|
||||
} else {
|
||||
UnsupportedPlanSnafu {
|
||||
name: "Emply or non-Rel relation",
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
|
||||
self.rel_to_logical_plan(&mut ctx, Box::new(rel), catalog_manager)
|
||||
}
|
||||
|
||||
fn rel_to_logical_plan(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
rel: Box<Rel>,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
|
||||
|
||||
// build logical plan
|
||||
let logical_plan = match rel_type {
|
||||
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, catalog_manager)?,
|
||||
RelType::Filter(filter) => {
|
||||
let FilterRel {
|
||||
common: _,
|
||||
input,
|
||||
condition,
|
||||
advanced_extension: _,
|
||||
} = *filter;
|
||||
|
||||
let input = input.context(MissingFieldSnafu {
|
||||
field: "input",
|
||||
plan: "Filter",
|
||||
})?;
|
||||
let input = Arc::new(self.rel_to_logical_plan(ctx, input, catalog_manager)?);
|
||||
|
||||
let condition = condition.context(MissingFieldSnafu {
|
||||
field: "condition",
|
||||
plan: "Filter",
|
||||
})?;
|
||||
|
||||
let schema = ctx.df_schema().context(InvalidParametersSnafu {
|
||||
reason: "the underlying TableScan plan should have included a table schema",
|
||||
})?;
|
||||
let schema = schema
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let predicate = to_df_expr(ctx, *condition, &schema)?;
|
||||
|
||||
LogicalPlan::Filter(Filter { predicate, input })
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Fetch(_fetch_rel) => UnsupportedPlanSnafu {
|
||||
name: "Fetch Relation",
|
||||
}
|
||||
@@ -127,14 +186,18 @@ impl DFLogicalSubstraitConvertor {
|
||||
name: "Cross Relation",
|
||||
}
|
||||
.fail()?,
|
||||
}?;
|
||||
};
|
||||
|
||||
Ok(logical_plan)
|
||||
}
|
||||
|
||||
fn convert_read_rel(&self, read_rel: Box<ReadRel>) -> Result<LogicalPlan, Error> {
|
||||
fn convert_read_rel(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
read_rel: Box<ReadRel>,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
|
||||
|
||||
let read_type = read_rel.read_type.context(MissingFieldSnafu {
|
||||
field: "read_type",
|
||||
plan: "Read",
|
||||
@@ -168,8 +231,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
.map(|mask_expr| self.convert_mask_expression(mask_expr));
|
||||
|
||||
// Get table handle from catalog manager
|
||||
let table_ref = self
|
||||
.catalog_manager
|
||||
let table_ref = catalog_manager
|
||||
.table(&catalog_name, &schema_name, &table_name)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?
|
||||
@@ -178,31 +240,40 @@ impl DFLogicalSubstraitConvertor {
|
||||
})?;
|
||||
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
|
||||
|
||||
// Get schema directly from the table, and compare it with the schema retrived from substrait proto.
|
||||
// Get schema directly from the table, and compare it with the schema retrieved from substrait proto.
|
||||
let stored_schema = adapter.schema();
|
||||
let retrived_schema = to_schema(read_rel.base_schema.unwrap_or_default())?;
|
||||
let retrived_arrow_schema = retrived_schema.arrow_schema();
|
||||
let retrieved_schema = to_schema(read_rel.base_schema.unwrap_or_default())?;
|
||||
let retrieved_arrow_schema = retrieved_schema.arrow_schema();
|
||||
ensure!(
|
||||
stored_schema.fields == retrived_arrow_schema.fields,
|
||||
same_schema_without_metadata(&stored_schema, retrieved_arrow_schema),
|
||||
SchemaNotMatchSnafu {
|
||||
substrait_schema: retrived_arrow_schema.clone(),
|
||||
substrait_schema: retrieved_arrow_schema.clone(),
|
||||
storage_schema: stored_schema
|
||||
}
|
||||
);
|
||||
|
||||
// Convert filter
|
||||
let filters = if let Some(filter) = read_rel.filter {
|
||||
vec![to_df_expr(ctx, *filter, &retrieved_schema)?]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
// Calculate the projected schema
|
||||
let projected_schema = project_schema(&stored_schema, projection.as_ref())
|
||||
.context(DFInternalSnafu)?
|
||||
.to_dfschema_ref()
|
||||
.context(DFInternalSnafu)?;
|
||||
|
||||
// TODO(ruihang): Support filters and limit
|
||||
ctx.set_df_schema(projected_schema.clone());
|
||||
|
||||
// TODO(ruihang): Support limit
|
||||
Ok(LogicalPlan::TableScan(TableScan {
|
||||
table_name,
|
||||
table_name: format!("{}.{}.{}", catalog_name, schema_name, table_name),
|
||||
source: adapter,
|
||||
projection,
|
||||
projected_schema,
|
||||
filters: vec![],
|
||||
filters,
|
||||
limit: None,
|
||||
}))
|
||||
}
|
||||
@@ -219,16 +290,42 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertor {
|
||||
pub fn convert_plan(&self, plan: LogicalPlan) -> Result<Rel, Error> {
|
||||
match plan {
|
||||
fn logical_plan_to_rel(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
plan: Arc<LogicalPlan>,
|
||||
) -> Result<Rel, Error> {
|
||||
Ok(match &*plan {
|
||||
LogicalPlan::Projection(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Projection",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Filter(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Filter",
|
||||
LogicalPlan::Filter(filter) => {
|
||||
let input = Some(Box::new(
|
||||
self.logical_plan_to_rel(ctx, filter.input.clone())?,
|
||||
));
|
||||
|
||||
let schema = plan
|
||||
.schema()
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let condition = Some(Box::new(expression_from_df_expr(
|
||||
ctx,
|
||||
&filter.predicate,
|
||||
&schema,
|
||||
)?));
|
||||
|
||||
let rel = FilterRel {
|
||||
common: None,
|
||||
input,
|
||||
condition,
|
||||
advanced_extension: None,
|
||||
};
|
||||
Rel {
|
||||
rel_type: Some(RelType::Filter(Box::new(rel))),
|
||||
}
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Window(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Window",
|
||||
}
|
||||
@@ -258,10 +355,10 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::TableScan(table_scan) => {
|
||||
let read_rel = self.convert_table_scan_plan(table_scan)?;
|
||||
Ok(Rel {
|
||||
let read_rel = self.convert_table_scan_plan(ctx, table_scan)?;
|
||||
Rel {
|
||||
rel_type: Some(RelType::Read(Box::new(read_rel))),
|
||||
})
|
||||
}
|
||||
}
|
||||
LogicalPlan::EmptyRelation(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical EmptyRelation",
|
||||
@@ -284,10 +381,36 @@ impl DFLogicalSubstraitConvertor {
|
||||
),
|
||||
}
|
||||
.fail()?,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn convert_table_scan_plan(&self, table_scan: TableScan) -> Result<ReadRel, Error> {
|
||||
fn convert_df_plan(&self, plan: LogicalPlan) -> Result<Plan, Error> {
|
||||
let mut ctx = ConvertorContext::default();
|
||||
|
||||
let rel = self.logical_plan_to_rel(&mut ctx, Arc::new(plan))?;
|
||||
|
||||
// convert extension
|
||||
let extensions = ctx.generate_function_extension();
|
||||
|
||||
// assemble PlanRel
|
||||
let plan_rel = PlanRel {
|
||||
rel_type: Some(PlanRelType::Rel(rel)),
|
||||
};
|
||||
|
||||
Ok(Plan {
|
||||
extension_uris: vec![],
|
||||
extensions,
|
||||
relations: vec![plan_rel],
|
||||
advanced_extensions: None,
|
||||
expected_type_urls: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
pub fn convert_table_scan_plan(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
table_scan: &TableScan,
|
||||
) -> Result<ReadRel, Error> {
|
||||
let provider = table_scan
|
||||
.source
|
||||
.as_any()
|
||||
@@ -308,15 +431,32 @@ impl DFLogicalSubstraitConvertor {
|
||||
// assemble projection
|
||||
let projection = table_scan
|
||||
.projection
|
||||
.map(|proj| self.convert_schema_projection(&proj));
|
||||
.as_ref()
|
||||
.map(|x| self.convert_schema_projection(x));
|
||||
|
||||
// assemble base (unprojected) schema using Table's schema.
|
||||
let base_schema = from_schema(&provider.table().schema())?;
|
||||
|
||||
// make conjunction over a list of filters and convert the result to substrait
|
||||
let filter = if let Some(conjunction) = table_scan
|
||||
.filters
|
||||
.iter()
|
||||
.cloned()
|
||||
.reduce(|accum, expr| accum.and(expr))
|
||||
{
|
||||
Some(Box::new(expression_from_df_expr(
|
||||
ctx,
|
||||
&conjunction,
|
||||
&provider.table().schema(),
|
||||
)?))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let read_rel = ReadRel {
|
||||
common: None,
|
||||
base_schema: Some(base_schema),
|
||||
filter: None,
|
||||
filter,
|
||||
projection,
|
||||
advanced_extension: None,
|
||||
read_type: Some(read_type),
|
||||
@@ -342,6 +482,13 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
}
|
||||
|
||||
fn same_schema_without_metadata(lhs: &ArrowSchemaRef, rhs: &ArrowSchemaRef) -> bool {
|
||||
lhs.fields.len() == rhs.fields.len()
|
||||
&& lhs.fields.iter().zip(rhs.fields.iter()).all(|(x, y)| {
|
||||
x.name == y.name && x.data_type == y.data_type && x.is_nullable == y.is_nullable
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use catalog::local::{LocalCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
@@ -393,10 +540,10 @@ mod test {
|
||||
}
|
||||
|
||||
async fn logical_plan_round_trip(plan: LogicalPlan, catalog: CatalogManagerRef) {
|
||||
let convertor = DFLogicalSubstraitConvertor::new(catalog);
|
||||
let convertor = DFLogicalSubstraitConvertor;
|
||||
|
||||
let proto = convertor.encode(plan.clone()).unwrap();
|
||||
let tripped_plan = convertor.decode(proto).unwrap();
|
||||
let tripped_plan = convertor.decode(proto, catalog).unwrap();
|
||||
|
||||
assert_eq!(format!("{:?}", plan), format!("{:?}", tripped_plan));
|
||||
}
|
||||
@@ -418,6 +565,7 @@ mod test {
|
||||
.await
|
||||
.unwrap();
|
||||
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
|
||||
|
||||
let projection = vec![1, 3, 5];
|
||||
let df_schema = adapter.schema().to_dfschema().unwrap();
|
||||
let projected_fields = projection
|
||||
@@ -428,7 +576,10 @@ mod test {
|
||||
Arc::new(DFSchema::new_with_metadata(projected_fields, Default::default()).unwrap());
|
||||
|
||||
let table_scan_plan = LogicalPlan::TableScan(TableScan {
|
||||
table_name: DEFAULT_TABLE_NAME.to_string(),
|
||||
table_name: format!(
|
||||
"{}.{}.{}",
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, DEFAULT_TABLE_NAME
|
||||
),
|
||||
source: adapter,
|
||||
projection: Some(projection),
|
||||
projected_schema,
|
||||
|
||||
@@ -23,10 +23,10 @@ use snafu::{Backtrace, ErrorCompat, Snafu};
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Unsupported physical expr: {}", name))]
|
||||
#[snafu(display("Unsupported physical plan: {}", name))]
|
||||
UnsupportedPlan { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unsupported physical plan: {}", name))]
|
||||
#[snafu(display("Unsupported expr: {}", name))]
|
||||
UnsupportedExpr { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unsupported concrete type: {:?}", ty))]
|
||||
@@ -81,7 +81,7 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Table quering not found: {}", name))]
|
||||
#[snafu(display("Table querying not found: {}", name))]
|
||||
TableNotFound { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Cannot convert plan doesn't belong to GreptimeDB"))]
|
||||
@@ -99,6 +99,12 @@ pub enum Error {
|
||||
storage_schema: datafusion::arrow::datatypes::SchemaRef,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert DataFusion schema, source: {}", source))]
|
||||
ConvertDfSchema {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -120,6 +126,7 @@ impl ErrorExt for Error {
|
||||
| Error::TableNotFound { .. }
|
||||
| Error::SchemaNotMatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::DFInternal { .. } | Error::Internal { .. } => StatusCode::Internal,
|
||||
Error::ConvertDfSchema { source } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,12 +12,17 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(let_chains)]
|
||||
|
||||
mod context;
|
||||
mod df_expr;
|
||||
mod df_logical;
|
||||
pub mod error;
|
||||
mod schema;
|
||||
mod types;
|
||||
|
||||
use bytes::{Buf, Bytes};
|
||||
use catalog::CatalogManagerRef;
|
||||
|
||||
pub use crate::df_logical::DFLogicalSubstraitConvertor;
|
||||
|
||||
@@ -26,7 +31,11 @@ pub trait SubstraitPlan {
|
||||
|
||||
type Plan;
|
||||
|
||||
fn decode<B: Buf + Send>(&self, message: B) -> Result<Self::Plan, Self::Error>;
|
||||
fn decode<B: Buf + Send>(
|
||||
&self,
|
||||
message: B,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<Self::Plan, Self::Error>;
|
||||
|
||||
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error>;
|
||||
}
|
||||
|
||||
@@ -12,17 +12,19 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Methods that perform convertion between Substrait's type ([Type](SType)) and GreptimeDB's type ([ConcreteDataType]).
|
||||
//! Methods that perform conversion between Substrait's type ([Type](SType)) and GreptimeDB's type ([ConcreteDataType]).
|
||||
//!
|
||||
//! Substrait use [type variation](https://substrait.io/types/type_variations/) to express different "logical types".
|
||||
//! Current we only have variations on integer types. Variation 0 (system prefered) are the same with base types, which
|
||||
//! Current we only have variations on integer types. Variation 0 (system preferred) are the same with base types, which
|
||||
//! are signed integer (i.e. I8 -> [i8]), and Variation 1 stands for unsigned integer (i.e. I8 -> [u8]).
|
||||
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use substrait_proto::protobuf::expression::literal::LiteralType;
|
||||
use substrait_proto::protobuf::r#type::{self as s_type, Kind, Nullability};
|
||||
use substrait_proto::protobuf::Type as SType;
|
||||
use substrait_proto::protobuf::{Type as SType, Type};
|
||||
|
||||
use crate::error::{Result, UnsupportedConcreteTypeSnafu, UnsupportedSubstraitTypeSnafu};
|
||||
use crate::error::{self, Result, UnsupportedConcreteTypeSnafu, UnsupportedSubstraitTypeSnafu};
|
||||
|
||||
macro_rules! substrait_kind {
|
||||
($desc:ident, $concrete_ty:ident) => {{
|
||||
@@ -134,3 +136,67 @@ pub fn from_concrete_type(ty: ConcreteDataType, nullability: Option<bool>) -> Re
|
||||
|
||||
Ok(SType { kind })
|
||||
}
|
||||
|
||||
pub(crate) fn scalar_value_as_literal_type(v: &ScalarValue) -> Result<LiteralType> {
|
||||
Ok(if v.is_null() {
|
||||
LiteralType::Null(Type { kind: None })
|
||||
} else {
|
||||
match v {
|
||||
ScalarValue::Boolean(Some(v)) => LiteralType::Boolean(*v),
|
||||
ScalarValue::Float32(Some(v)) => LiteralType::Fp32(*v),
|
||||
ScalarValue::Float64(Some(v)) => LiteralType::Fp64(*v),
|
||||
ScalarValue::Int8(Some(v)) => LiteralType::I8(*v as i32),
|
||||
ScalarValue::Int16(Some(v)) => LiteralType::I16(*v as i32),
|
||||
ScalarValue::Int32(Some(v)) => LiteralType::I32(*v),
|
||||
ScalarValue::Int64(Some(v)) => LiteralType::I64(*v),
|
||||
ScalarValue::LargeUtf8(Some(v)) => LiteralType::String(v.clone()),
|
||||
ScalarValue::LargeBinary(Some(v)) => LiteralType::Binary(v.clone()),
|
||||
// TODO(LFC): Implement other conversions: ScalarValue => LiteralType
|
||||
_ => {
|
||||
return error::UnsupportedExprSnafu {
|
||||
name: format!("{:?}", v),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn literal_type_to_scalar_value(t: LiteralType) -> Result<ScalarValue> {
|
||||
Ok(match t {
|
||||
LiteralType::Null(Type { kind: Some(kind) }) => match kind {
|
||||
Kind::Bool(_) => ScalarValue::Boolean(None),
|
||||
Kind::I8(_) => ScalarValue::Int8(None),
|
||||
Kind::I16(_) => ScalarValue::Int16(None),
|
||||
Kind::I32(_) => ScalarValue::Int32(None),
|
||||
Kind::I64(_) => ScalarValue::Int64(None),
|
||||
Kind::Fp32(_) => ScalarValue::Float32(None),
|
||||
Kind::Fp64(_) => ScalarValue::Float64(None),
|
||||
Kind::String(_) => ScalarValue::LargeUtf8(None),
|
||||
Kind::Binary(_) => ScalarValue::LargeBinary(None),
|
||||
// TODO(LFC): Implement other conversions: Kind => ScalarValue
|
||||
_ => {
|
||||
return error::UnsupportedSubstraitTypeSnafu {
|
||||
ty: format!("{:?}", kind),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
},
|
||||
LiteralType::Boolean(v) => ScalarValue::Boolean(Some(v)),
|
||||
LiteralType::I8(v) => ScalarValue::Int8(Some(v as i8)),
|
||||
LiteralType::I16(v) => ScalarValue::Int16(Some(v as i16)),
|
||||
LiteralType::I32(v) => ScalarValue::Int32(Some(v)),
|
||||
LiteralType::I64(v) => ScalarValue::Int64(Some(v)),
|
||||
LiteralType::Fp32(v) => ScalarValue::Float32(Some(v)),
|
||||
LiteralType::Fp64(v) => ScalarValue::Float64(Some(v)),
|
||||
LiteralType::String(v) => ScalarValue::LargeUtf8(Some(v)),
|
||||
LiteralType::Binary(v) => ScalarValue::LargeBinary(Some(v)),
|
||||
// TODO(LFC): Implement other conversions: LiteralType => ScalarValue
|
||||
_ => {
|
||||
return error::UnsupportedSubstraitTypeSnafu {
|
||||
ty: format!("{:?}", t),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -147,6 +147,18 @@ impl From<i64> for Timestamp {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Timestamp> for i64 {
|
||||
fn from(t: Timestamp) -> Self {
|
||||
t.value
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Timestamp> for serde_json::Value {
|
||||
fn from(d: Timestamp) -> Self {
|
||||
serde_json::Value::String(d.to_iso8601_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum TimeUnit {
|
||||
Second,
|
||||
@@ -197,6 +209,7 @@ impl Hash for Timestamp {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use chrono::Offset;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -318,4 +331,39 @@ mod tests {
|
||||
let ts = Timestamp::from_millis(ts_millis);
|
||||
assert_eq!("1969-12-31 23:59:58.999+0000", ts.to_iso8601_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_to_json_value() {
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:01+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Second)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:00.001+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Millisecond)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:00.000001+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Microsecond)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:00.000000001+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Nanosecond)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,21 +11,22 @@ python = ["dep:script"]
|
||||
[dependencies]
|
||||
api = { path = "../api" }
|
||||
async-trait = "0.1"
|
||||
axum = "0.6.0-rc.2"
|
||||
axum-macros = "0.3.0-rc.1"
|
||||
axum = "0.6"
|
||||
axum-macros = "0.3"
|
||||
backon = "0.2"
|
||||
catalog = { path = "../catalog" }
|
||||
common-base = { path = "../common/base" }
|
||||
common-catalog = { path = "../common/catalog" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-grpc-expr = { path = "../common/grpc-expr" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
common-insert = { path = "../common/insert" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
"simd",
|
||||
] }
|
||||
datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
@@ -34,60 +35,32 @@ log-store = { path = "../log-store" }
|
||||
meta-client = { path = "../meta-client" }
|
||||
meta-srv = { path = "../meta-srv", features = ["mock"] }
|
||||
metrics = "0.20"
|
||||
mito = { path = "../mito", features = ["test"] }
|
||||
object-store = { path = "../object-store" }
|
||||
query = { path = "../query" }
|
||||
script = { path = "../script", features = ["python"], optional = true }
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
servers = { path = "../servers" }
|
||||
session = { path = "../session" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
sql = { path = "../sql" }
|
||||
storage = { path = "../storage" }
|
||||
store-api = { path = "../store-api" }
|
||||
substrait = { path = "../common/substrait" }
|
||||
table = { path = "../table" }
|
||||
mito = { path = "../mito", features = ["test"] }
|
||||
tokio = { version = "1.18", features = ["full"] }
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
tonic = "0.8"
|
||||
tower = { version = "0.4", features = ["full"] }
|
||||
tower-http = { version = "0.3", features = ["full"] }
|
||||
frontend = { path = "../frontend" }
|
||||
|
||||
[dependencies.arrow]
|
||||
package = "arrow2"
|
||||
version = "0.10"
|
||||
features = [
|
||||
"io_csv",
|
||||
"io_json",
|
||||
"io_parquet",
|
||||
"io_parquet_compression",
|
||||
"io_ipc",
|
||||
"ahash",
|
||||
"compute",
|
||||
"serde_types",
|
||||
]
|
||||
|
||||
[dev-dependencies]
|
||||
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
|
||||
client = { path = "../client" }
|
||||
common-query = { path = "../common/query" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
"simd",
|
||||
] }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
tempdir = "0.3"
|
||||
|
||||
[dev-dependencies.arrow]
|
||||
package = "arrow2"
|
||||
version = "0.10"
|
||||
features = [
|
||||
"io_csv",
|
||||
"io_json",
|
||||
"io_parquet",
|
||||
"io_parquet_compression",
|
||||
"io_ipc",
|
||||
"ahash",
|
||||
"compute",
|
||||
"serde_types",
|
||||
]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user