mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-14 17:23:09 +00:00
Merge branch 'dev' into replace-arrow2
This commit is contained in:
4
.env.example
Normal file
4
.env.example
Normal file
@@ -0,0 +1,4 @@
|
||||
# Settings for s3 test
|
||||
GT_S3_BUCKET=S3 bucket
|
||||
GT_S3_ACCESS_KEY_ID=S3 access key id
|
||||
GT_S3_ACCESS_KEY=S3 secret access key
|
||||
4
.github/pull_request_template.md
vendored
4
.github/pull_request_template.md
vendored
@@ -13,7 +13,7 @@ Please explain IN DETAIL what the changes are in this PR and why they are needed
|
||||
|
||||
## Checklist
|
||||
|
||||
- [] I have written the necessary rustdoc comments.
|
||||
- [] I have added the necessary unit tests and integration tests.
|
||||
- [ ] I have written the necessary rustdoc comments.
|
||||
- [ ] I have added the necessary unit tests and integration tests.
|
||||
|
||||
## Refer to a related PR or issue link (optional)
|
||||
|
||||
24
.github/workflows/develop.yml
vendored
24
.github/workflows/develop.yml
vendored
@@ -26,6 +26,13 @@ env:
|
||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
||||
|
||||
jobs:
|
||||
typos:
|
||||
name: Spell Check with Typos
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: crate-ci/typos@v1.0.4
|
||||
|
||||
check:
|
||||
name: Check
|
||||
if: github.event.pull_request.draft == false
|
||||
@@ -42,6 +49,23 @@ jobs:
|
||||
- name: Run cargo check
|
||||
run: cargo check --workspace --all-targets
|
||||
|
||||
toml:
|
||||
name: Toml Check
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
- name: Rust Cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
- name: Install taplo
|
||||
run: cargo install taplo-cli --version ^0.8 --locked
|
||||
- name: Run taplo
|
||||
run: taplo format --check --option "indent_string= "
|
||||
|
||||
# Use coverage to run test.
|
||||
# test:
|
||||
# name: Test Suite
|
||||
|
||||
25
.github/workflows/doc-issue.yml
vendored
Normal file
25
.github/workflows/doc-issue.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
name: Create Issue in docs repo on doc related changes
|
||||
|
||||
on:
|
||||
issues:
|
||||
types:
|
||||
- labeled
|
||||
pull_request_target:
|
||||
types:
|
||||
- labeled
|
||||
|
||||
jobs:
|
||||
doc_issue:
|
||||
if: github.event.label.name == 'doc update required'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: create an issue in doc repo
|
||||
uses: dacbd/create-issue-action@main
|
||||
with:
|
||||
owner: GreptimeTeam
|
||||
repo: docs
|
||||
token: ${{ secrets.DOCS_REPO_TOKEN }}
|
||||
title: Update docs for ${{ github.event.issue.title || github.event.pull_request.title }}
|
||||
body: |
|
||||
A document change request is generated from
|
||||
${{ github.event.issue.html_url || github.event.pull_request.html_url }}
|
||||
32
.github/workflows/release.yml
vendored
32
.github/workflows/release.yml
vendored
@@ -3,9 +3,8 @@ on:
|
||||
tags:
|
||||
- "v*.*.*"
|
||||
schedule:
|
||||
# At 00:00 Everyday
|
||||
# https://crontab.guru/every-day-at-midnight
|
||||
- cron: '0 0 * * *'
|
||||
# At 00:00 on Monday.
|
||||
- cron: '0 0 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
name: Release
|
||||
@@ -14,7 +13,10 @@ env:
|
||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
||||
|
||||
# FIXME(zyy17): Would be better to use `gh release list -L 1 | cut -f 3` to get the latest release version tag, but for a long time, we will stay at 'v0.1.0-alpha-*'.
|
||||
NIGHTLY_BUILD_VERSION_PREFIX: v0.1.0-alpha
|
||||
SCHEDULED_BUILD_VERSION_PREFIX: v0.1.0-alpha
|
||||
|
||||
# In the future, we can change SCHEDULED_PERIOD to nightly.
|
||||
SCHEDULED_PERIOD: weekly
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -113,25 +115,25 @@ jobs:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
|
||||
- name: Configure nightly build version # the version would be ${NIGHTLY_BUILD_VERSION_PREFIX}-YYYYMMDD-nightly, like v0.1.0-alpha-20221119-nightly.
|
||||
- name: Configure scheduled build version # the version would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}, like v0.1.0-alpha-20221119-weekly.
|
||||
shell: bash
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
buildTime=`date "+%Y%m%d"`
|
||||
NIGHTLY_VERSION=${{ env.NIGHTLY_BUILD_VERSION_PREFIX }}-$buildTime-nightly
|
||||
echo "NIGHTLY_VERSION=${NIGHTLY_VERSION}" >> $GITHUB_ENV
|
||||
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
|
||||
echo "SCHEDULED_BUILD_VERSION=${SCHEDULED_BUILD_VERSION}" >> $GITHUB_ENV
|
||||
|
||||
- name: Create nightly git tag
|
||||
- name: Create scheduled build git tag
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
git tag ${{ env.NIGHTLY_VERSION }}
|
||||
git tag ${{ env.SCHEDULED_BUILD_VERSION }}
|
||||
|
||||
- name: Publish nightly release # configure the different release title and tags.
|
||||
- name: Publish scheduled release # configure the different release title and tags.
|
||||
uses: softprops/action-gh-release@v1
|
||||
if: github.event_name == 'schedule'
|
||||
with:
|
||||
name: "Release ${{ env.NIGHTLY_VERSION }}"
|
||||
tag_name: ${{ env.NIGHTLY_VERSION }}
|
||||
name: "Release ${{ env.SCHEDULED_BUILD_VERSION }}"
|
||||
tag_name: ${{ env.SCHEDULED_BUILD_VERSION }}
|
||||
generate_release_notes: true
|
||||
files: |
|
||||
**/greptime-*
|
||||
@@ -189,13 +191,13 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Configure nightly build image tag # the tag would be ${NIGHTLY_BUILD_VERSION_PREFIX}-YYYYMMDD-nightly
|
||||
- name: Configure scheduled build image tag # the tag would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}
|
||||
shell: bash
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
buildTime=`date "+%Y%m%d"`
|
||||
NIGHTLY_VERSION=${{ env.NIGHTLY_BUILD_VERSION_PREFIX }}-$buildTime-nightly
|
||||
echo "IMAGE_TAG=${NIGHTLY_VERSION:1}" >> $GITHUB_ENV
|
||||
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
|
||||
echo "IMAGE_TAG=${SCHEDULED_BUILD_VERSION:1}" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure tag # If the release tag is v0.1.0, then the image version tag will be 0.1.0.
|
||||
shell: bash
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -18,6 +18,7 @@ debug/
|
||||
|
||||
# JetBrains IDE config directory
|
||||
.idea/
|
||||
*.iml
|
||||
|
||||
# VSCode IDE config directory
|
||||
.vscode/
|
||||
@@ -31,3 +32,6 @@ logs/
|
||||
|
||||
# Benchmark dataset
|
||||
benchmarks/data
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
@@ -9,7 +9,7 @@ repos:
|
||||
rev: e6a795bc6b2c0958f9ef52af4863bbd7cc17238f
|
||||
hooks:
|
||||
- id: cargo-sort
|
||||
args: ["--workspace", "--print"]
|
||||
args: ["--workspace"]
|
||||
|
||||
- repo: https://github.com/doublify/pre-commit-rust
|
||||
rev: v1.0
|
||||
|
||||
545
Cargo.lock
generated
545
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -11,28 +11,32 @@ members = [
|
||||
"src/common/function",
|
||||
"src/common/function-macro",
|
||||
"src/common/grpc",
|
||||
"src/common/grpc-expr",
|
||||
"src/common/query",
|
||||
"src/common/recordbatch",
|
||||
"src/common/runtime",
|
||||
"src/common/substrait",
|
||||
"src/common/insert",
|
||||
"src/common/telemetry",
|
||||
"src/common/time",
|
||||
"src/datanode",
|
||||
"src/datatypes",
|
||||
"src/datatypes2",
|
||||
"src/frontend",
|
||||
"src/log-store",
|
||||
"src/meta-client",
|
||||
"src/meta-srv",
|
||||
"src/mito",
|
||||
"src/object-store",
|
||||
"src/query",
|
||||
"src/script",
|
||||
"src/servers",
|
||||
"src/session",
|
||||
"src/sql",
|
||||
"src/storage",
|
||||
"src/store-api",
|
||||
"src/table",
|
||||
"src/mito",
|
||||
"tests-integration",
|
||||
"tests/runner",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
|
||||
@@ -56,7 +56,6 @@ To compile GreptimeDB from source, you'll need:
|
||||
find an installation instructions [here](https://grpc.io/docs/protoc-installation/).
|
||||
**Note that `protoc` version needs to be >= 3.15** because we have used the `optional`
|
||||
keyword. You can check it with `protoc --version`.
|
||||
|
||||
|
||||
#### Build with Docker
|
||||
|
||||
@@ -161,6 +160,8 @@ break things. Benchmark on development branch may not represent its potential
|
||||
performance. We release pre-built binaries constantly for functional
|
||||
evaluation. Do not use it in production at the moment.
|
||||
|
||||
For future plans, check out [GreptimeDB roadmap](https://github.com/GreptimeTeam/greptimedb/issues/669).
|
||||
|
||||
## Community
|
||||
|
||||
Our core team is thrilled too see you participate in any ways you like. When you are stuck, try to
|
||||
|
||||
@@ -28,9 +28,8 @@ use arrow::datatypes::{DataType, Float64Type, Int64Type};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use clap::Parser;
|
||||
use client::admin::Admin;
|
||||
use client::api::v1::codec::InsertBatch;
|
||||
use client::api::v1::column::Values;
|
||||
use client::api::v1::{insert_expr, Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
|
||||
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
|
||||
use client::{Client, Database, Select};
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
|
||||
@@ -100,16 +99,13 @@ async fn write_data(
|
||||
|
||||
for record_batch in record_batch_reader {
|
||||
let record_batch = record_batch.unwrap();
|
||||
let row_count = record_batch.num_rows();
|
||||
let insert_batch = convert_record_batch(record_batch).into();
|
||||
let (columns, row_count) = convert_record_batch(record_batch);
|
||||
let insert_expr = InsertExpr {
|
||||
schema_name: "public".to_string(),
|
||||
table_name: TABLE_NAME.to_string(),
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: vec![insert_batch],
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
region_number: 0,
|
||||
columns,
|
||||
row_count,
|
||||
};
|
||||
let now = Instant::now();
|
||||
db.insert(insert_expr).await.unwrap();
|
||||
@@ -125,7 +121,7 @@ async fn write_data(
|
||||
total_rpc_elapsed_ms
|
||||
}
|
||||
|
||||
fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
|
||||
fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
|
||||
let schema = record_batch.schema();
|
||||
let fields = schema.fields();
|
||||
let row_count = record_batch.num_rows();
|
||||
@@ -143,10 +139,7 @@ fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
|
||||
columns.push(column);
|
||||
}
|
||||
|
||||
InsertBatch {
|
||||
columns,
|
||||
row_count: row_count as _,
|
||||
}
|
||||
(columns, row_count as _)
|
||||
}
|
||||
|
||||
fn build_values(column: &ArrayRef) -> Values {
|
||||
|
||||
@@ -7,3 +7,4 @@ coverage:
|
||||
patch: off
|
||||
ignore:
|
||||
- "**/error*.rs" # ignore all error.rs files
|
||||
- "tests/runner/*.rs" # ignore integration test runner
|
||||
|
||||
@@ -5,6 +5,7 @@ wal_dir = '/tmp/greptimedb/wal'
|
||||
rpc_runtime_size = 8
|
||||
mysql_addr = '127.0.0.1:4406'
|
||||
mysql_runtime_size = 4
|
||||
enable_memory_catalog = false
|
||||
|
||||
[storage]
|
||||
type = 'File'
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
mode = 'distributed'
|
||||
datanode_rpc_addr = '127.0.0.1:3001'
|
||||
http_addr = '127.0.0.1:4000'
|
||||
|
||||
[http_options]
|
||||
addr = '127.0.0.1:4000'
|
||||
timeout = "30s"
|
||||
|
||||
[meta_client_opts]
|
||||
metasrv_addrs = ['127.0.0.1:3002']
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
node_id = 0
|
||||
mode = 'standalone'
|
||||
http_addr = '127.0.0.1:4000'
|
||||
wal_dir = '/tmp/greptimedb/wal/'
|
||||
enable_memory_catalog = false
|
||||
|
||||
[http_options]
|
||||
addr = '127.0.0.1:4000'
|
||||
timeout = "30s"
|
||||
|
||||
[storage]
|
||||
type = 'File'
|
||||
|
||||
@@ -7,8 +7,8 @@ license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
common-base = { path = "../common/base" }
|
||||
common-time = { path = "../common/time" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
prost = "0.11"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
@@ -20,7 +20,6 @@ fn main() {
|
||||
.file_descriptor_set_path(default_out_dir.join("greptime_fd.bin"))
|
||||
.compile(
|
||||
&[
|
||||
"greptime/v1/insert.proto",
|
||||
"greptime/v1/select.proto",
|
||||
"greptime/v1/physical_plan.proto",
|
||||
"greptime/v1/greptime.proto",
|
||||
|
||||
@@ -20,6 +20,7 @@ message AdminExpr {
|
||||
CreateExpr create = 2;
|
||||
AlterExpr alter = 3;
|
||||
CreateDatabaseExpr create_database = 4;
|
||||
DropTableExpr drop_table = 5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,6 +56,12 @@ message AlterExpr {
|
||||
}
|
||||
}
|
||||
|
||||
message DropTableExpr {
|
||||
string catalog_name = 1;
|
||||
string schema_name = 2;
|
||||
string table_name = 3;
|
||||
}
|
||||
|
||||
message AddColumns {
|
||||
repeated AddColumn add_columns = 1;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ syntax = "proto3";
|
||||
|
||||
package greptime.v1;
|
||||
|
||||
import "greptime/v1/column.proto";
|
||||
import "greptime/v1/common.proto";
|
||||
|
||||
message DatabaseRequest {
|
||||
@@ -41,26 +42,16 @@ message InsertExpr {
|
||||
string schema_name = 1;
|
||||
string table_name = 2;
|
||||
|
||||
message Values {
|
||||
repeated bytes values = 1;
|
||||
}
|
||||
// Data is represented here.
|
||||
repeated Column columns = 3;
|
||||
|
||||
oneof expr {
|
||||
Values values = 3;
|
||||
// The row_count of all columns, which include null and non-null values.
|
||||
//
|
||||
// Note: the row_count of all columns in a InsertExpr must be same.
|
||||
uint32 row_count = 4;
|
||||
|
||||
// TODO(LFC): Remove field "sql" in InsertExpr.
|
||||
// When Frontend instance received an insertion SQL (`insert into ...`), it's anticipated to parse the SQL and
|
||||
// assemble the values to insert to feed Datanode. In other words, inserting data through Datanode instance's GRPC
|
||||
// interface shouldn't use SQL directly.
|
||||
// Then why the "sql" field exists here? It's because the Frontend needs table schema to create the values to insert,
|
||||
// which is currently not able to find anywhere. (Maybe the table schema is suppose to be fetched from Meta?)
|
||||
// The "sql" field is meant to be removed in the future.
|
||||
string sql = 4;
|
||||
}
|
||||
|
||||
/// The region number of current insert request.
|
||||
// The region number of current insert request.
|
||||
uint32 region_number = 5;
|
||||
map<string, bytes> options = 6;
|
||||
}
|
||||
|
||||
// TODO(jiachun)
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.codec;
|
||||
|
||||
import "greptime/v1/column.proto";
|
||||
|
||||
message InsertBatch {
|
||||
repeated Column columns = 1;
|
||||
uint32 row_count = 2;
|
||||
}
|
||||
|
||||
message RegionNumber {
|
||||
uint32 id = 1;
|
||||
}
|
||||
@@ -12,7 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod column_def;
|
||||
pub mod error;
|
||||
pub mod helper;
|
||||
pub mod prometheus;
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
pub use prost::DecodeError;
|
||||
use prost::Message;
|
||||
|
||||
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, RegionNumber, SelectResult};
|
||||
use crate::v1::codec::{PhysicalPlanNode, SelectResult};
|
||||
use crate::v1::meta::TableRouteValue;
|
||||
|
||||
macro_rules! impl_convert_with_bytes {
|
||||
@@ -36,10 +36,8 @@ macro_rules! impl_convert_with_bytes {
|
||||
};
|
||||
}
|
||||
|
||||
impl_convert_with_bytes!(InsertBatch);
|
||||
impl_convert_with_bytes!(SelectResult);
|
||||
impl_convert_with_bytes!(PhysicalPlanNode);
|
||||
impl_convert_with_bytes!(RegionNumber);
|
||||
impl_convert_with_bytes!(TableRouteValue);
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -51,52 +49,6 @@ mod tests {
|
||||
|
||||
const SEMANTIC_TAG: i32 = 0;
|
||||
|
||||
#[test]
|
||||
fn test_convert_insert_batch() {
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let bytes: Vec<u8> = insert_batch.into();
|
||||
let insert: InsertBatch = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(8, insert.row_count);
|
||||
assert_eq!(1, insert.columns.len());
|
||||
|
||||
let column = &insert.columns[0];
|
||||
assert_eq!("foo", column.column_name);
|
||||
assert_eq!(SEMANTIC_TAG, column.semantic_type);
|
||||
assert_eq!(vec![1], column.null_mask);
|
||||
assert_eq!(
|
||||
vec![2, 3, 4, 5, 6, 7, 8],
|
||||
column.values.as_ref().unwrap().i32_values
|
||||
);
|
||||
}
|
||||
|
||||
#[should_panic]
|
||||
#[test]
|
||||
fn test_convert_insert_batch_wrong() {
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let mut bytes: Vec<u8> = insert_batch.into();
|
||||
|
||||
// modify some bytes
|
||||
bytes[0] = 0b1;
|
||||
bytes[1] = 0b1;
|
||||
|
||||
let insert: InsertBatch = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(8, insert.row_count);
|
||||
assert_eq!(1, insert.columns.len());
|
||||
|
||||
let column = &insert.columns[0];
|
||||
assert_eq!("foo", column.column_name);
|
||||
assert_eq!(SEMANTIC_TAG, column.semantic_type);
|
||||
assert_eq!(vec![1], column.null_mask);
|
||||
assert_eq!(
|
||||
vec![2, 3, 4, 5, 6, 7, 8],
|
||||
column.values.as_ref().unwrap().i32_values
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_select_result() {
|
||||
let select_result = mock_select_result();
|
||||
@@ -143,35 +95,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_region_id() {
|
||||
let region_id = RegionNumber { id: 12 };
|
||||
|
||||
let bytes: Vec<u8> = region_id.into();
|
||||
let region_id: RegionNumber = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(12, region_id.id);
|
||||
}
|
||||
|
||||
fn mock_insert_batch() -> InsertBatch {
|
||||
let values = column::Values {
|
||||
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
|
||||
..Default::default()
|
||||
};
|
||||
let null_mask = vec![1];
|
||||
let column = Column {
|
||||
column_name: "foo".to_string(),
|
||||
semantic_type: SEMANTIC_TAG,
|
||||
values: Some(values),
|
||||
null_mask,
|
||||
..Default::default()
|
||||
};
|
||||
InsertBatch {
|
||||
columns: vec![column],
|
||||
row_count: 8,
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_select_result() -> SelectResult {
|
||||
let values = column::Values {
|
||||
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
|
||||
|
||||
@@ -21,4 +21,5 @@ pub mod codec {
|
||||
tonic::include_proto!("greptime.v1.codec");
|
||||
}
|
||||
|
||||
mod column_def;
|
||||
pub mod meta;
|
||||
|
||||
@@ -25,7 +25,6 @@ futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
lazy_static = "1.4"
|
||||
meta-client = { path = "../meta-client" }
|
||||
opendal = "0.20"
|
||||
regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
@@ -37,9 +36,8 @@ tokio = { version = "1.18", features = ["full"] }
|
||||
[dev-dependencies]
|
||||
chrono = "0.4"
|
||||
log-store = { path = "../log-store" }
|
||||
mito = { path = "../mito", features = ["test"] }
|
||||
object-store = { path = "../object-store" }
|
||||
opendal = "0.20"
|
||||
storage = { path = "../storage" }
|
||||
mito = { path = "../mito" }
|
||||
tempdir = "0.3"
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
|
||||
@@ -94,7 +94,7 @@ pub enum Error {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Table {} already exists", table))]
|
||||
#[snafu(display("Table `{}` already exists", table))]
|
||||
TableExists { table: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Schema {} already exists", schema))]
|
||||
@@ -109,6 +109,12 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Operation {} not implemented yet", operation))]
|
||||
Unimplemented {
|
||||
operation: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to open table, table info: {}, source: {}", table_info, source))]
|
||||
OpenTable {
|
||||
table_info: String,
|
||||
@@ -216,11 +222,12 @@ impl ErrorExt for Error {
|
||||
| Error::ValueDeserialize { .. }
|
||||
| Error::Io { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
Error::RegisterTable { .. } => StatusCode::Internal,
|
||||
|
||||
Error::ReadSystemCatalog { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
|
||||
Error::InvalidCatalogValue { source, .. } => source.status_code(),
|
||||
|
||||
Error::RegisterTable { .. } => StatusCode::Internal,
|
||||
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
|
||||
Error::SchemaExists { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
@@ -235,6 +242,8 @@ impl ErrorExt for Error {
|
||||
Error::InvalidTableSchema { source, .. } => source.status_code(),
|
||||
Error::InvalidTableInfoInCatalog { .. } => StatusCode::Unexpected,
|
||||
Error::Internal { source, .. } => source.status_code(),
|
||||
|
||||
Error::Unimplemented { .. } => StatusCode::Unsupported,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#![feature(assert_matches)]
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::info;
|
||||
@@ -83,12 +84,17 @@ pub trait CatalogManager: CatalogList {
|
||||
/// Starts a catalog manager.
|
||||
async fn start(&self) -> Result<()>;
|
||||
|
||||
/// Registers a table given given catalog/schema to catalog manager,
|
||||
/// returns table registered.
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize>;
|
||||
/// Registers a table within given catalog/schema to catalog manager,
|
||||
/// returns whether the table registered.
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool>;
|
||||
|
||||
/// Register a schema with catalog name and schema name.
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize>;
|
||||
/// Deregisters a table within given catalog/schema to catalog manager,
|
||||
/// returns whether the table deregistered.
|
||||
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool>;
|
||||
|
||||
/// Register a schema with catalog name and schema name. Retuens whether the
|
||||
/// schema registered.
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool>;
|
||||
|
||||
/// Register a system table, should be called before starting the manager.
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest)
|
||||
@@ -123,6 +129,25 @@ pub struct RegisterTableRequest {
|
||||
pub table: TableRef,
|
||||
}
|
||||
|
||||
impl Debug for RegisterTableRequest {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("RegisterTableRequest")
|
||||
.field("catalog", &self.catalog)
|
||||
.field("schema", &self.schema)
|
||||
.field("table_name", &self.table_name)
|
||||
.field("table_id", &self.table_id)
|
||||
.field("table", &self.table.table_info())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DeregisterTableRequest {
|
||||
pub catalog: String,
|
||||
pub schema: String,
|
||||
pub table_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RegisterSchemaRequest {
|
||||
pub catalog: String,
|
||||
|
||||
@@ -21,7 +21,7 @@ use common_catalog::consts::{
|
||||
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::{error, info};
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, UInt8Vector};
|
||||
use futures_util::lock::Mutex;
|
||||
@@ -36,7 +36,7 @@ use table::TableRef;
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu, Result,
|
||||
SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu, SystemCatalogTypeMismatchSnafu,
|
||||
TableExistsSnafu, TableNotFoundSnafu,
|
||||
TableExistsSnafu, TableNotFoundSnafu, UnimplementedSnafu,
|
||||
};
|
||||
use crate::local::memory::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use crate::system::{
|
||||
@@ -46,8 +46,8 @@ use crate::system::{
|
||||
use crate::tables::SystemCatalog;
|
||||
use crate::{
|
||||
format_full_table_name, handle_system_table_request, CatalogList, CatalogManager,
|
||||
CatalogProvider, CatalogProviderRef, RegisterSchemaRequest, RegisterSystemTableRequest,
|
||||
RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
CatalogProvider, CatalogProviderRef, DeregisterTableRequest, RegisterSchemaRequest,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
|
||||
@@ -57,6 +57,7 @@ pub struct LocalCatalogManager {
|
||||
engine: TableEngineRef,
|
||||
next_table_id: AtomicU32,
|
||||
init_lock: Mutex<bool>,
|
||||
register_lock: Mutex<()>,
|
||||
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
|
||||
}
|
||||
|
||||
@@ -76,6 +77,7 @@ impl LocalCatalogManager {
|
||||
engine,
|
||||
next_table_id: AtomicU32::new(MIN_USER_TABLE_ID),
|
||||
init_lock: Mutex::new(false),
|
||||
register_lock: Mutex::new(()),
|
||||
system_table_requests: Mutex::new(Vec::default()),
|
||||
})
|
||||
}
|
||||
@@ -309,7 +311,7 @@ impl CatalogManager for LocalCatalogManager {
|
||||
self.init().await
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let started = self.init_lock.lock().await;
|
||||
|
||||
ensure!(
|
||||
@@ -332,27 +334,50 @@ impl CatalogManager for LocalCatalogManager {
|
||||
schema_info: format!("{}.{}", catalog_name, schema_name),
|
||||
})?;
|
||||
|
||||
if schema.table_exist(&request.table_name)? {
|
||||
return TableExistsSnafu {
|
||||
table: format_full_table_name(catalog_name, schema_name, &request.table_name),
|
||||
{
|
||||
let _lock = self.register_lock.lock().await;
|
||||
if let Some(existing) = schema.table(&request.table_name)? {
|
||||
if existing.table_info().ident.table_id != request.table_id {
|
||||
error!(
|
||||
"Unexpected table register request: {:?}, existing: {:?}",
|
||||
request,
|
||||
existing.table_info()
|
||||
);
|
||||
return TableExistsSnafu {
|
||||
table: format_full_table_name(
|
||||
catalog_name,
|
||||
schema_name,
|
||||
&request.table_name,
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
// Try to register table with same table id, just ignore.
|
||||
Ok(false)
|
||||
} else {
|
||||
// table does not exist
|
||||
self.system
|
||||
.register_table(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
request.table_name.clone(),
|
||||
request.table_id,
|
||||
)
|
||||
.await?;
|
||||
schema.register_table(request.table_name, request.table)?;
|
||||
Ok(true)
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
self.system
|
||||
.register_table(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
request.table_name.clone(),
|
||||
request.table_id,
|
||||
)
|
||||
.await?;
|
||||
|
||||
schema.register_table(request.table_name, request.table)?;
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
|
||||
UnimplementedSnafu {
|
||||
operation: "deregister table",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let started = self.init_lock.lock().await;
|
||||
ensure!(
|
||||
*started,
|
||||
@@ -367,17 +392,21 @@ impl CatalogManager for LocalCatalogManager {
|
||||
.catalogs
|
||||
.catalog(catalog_name)?
|
||||
.context(CatalogNotFoundSnafu { catalog_name })?;
|
||||
if catalog.schema(schema_name)?.is_some() {
|
||||
return SchemaExistsSnafu {
|
||||
schema: schema_name,
|
||||
}
|
||||
.fail();
|
||||
|
||||
{
|
||||
let _lock = self.register_lock.lock().await;
|
||||
ensure!(
|
||||
catalog.schema(schema_name)?.is_none(),
|
||||
SchemaExistsSnafu {
|
||||
schema: schema_name,
|
||||
}
|
||||
);
|
||||
self.system
|
||||
.register_schema(request.catalog, schema_name.clone())
|
||||
.await?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(true)
|
||||
}
|
||||
self.system
|
||||
.register_schema(request.catalog, schema_name.clone())
|
||||
.await?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use common_catalog::consts::MIN_USER_TABLE_ID;
|
||||
use common_telemetry::error;
|
||||
use snafu::OptionExt;
|
||||
use table::metadata::TableId;
|
||||
use table::table::TableIdProvider;
|
||||
@@ -27,8 +28,8 @@ use table::TableRef;
|
||||
use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
|
||||
use crate::schema::SchemaProvider;
|
||||
use crate::{
|
||||
CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, RegisterSchemaRequest,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, SchemaProviderRef,
|
||||
CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, DeregisterTableRequest,
|
||||
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// Simple in-memory list of catalogs
|
||||
@@ -69,7 +70,7 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
@@ -84,10 +85,28 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
})?;
|
||||
schema
|
||||
.register_table(request.table_name, request.table)
|
||||
.map(|v| if v.is_some() { 0 } else { 1 })
|
||||
.map(|v| v.is_none())
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
.context(CatalogNotFoundSnafu {
|
||||
catalog_name: &request.catalog,
|
||||
})?
|
||||
.clone();
|
||||
let schema = catalog
|
||||
.schema(&request.schema)?
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
schema_info: format!("{}.{}", &request.catalog, &request.schema),
|
||||
})?;
|
||||
schema
|
||||
.deregister_table(&request.table_name)
|
||||
.map(|v| v.is_some())
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
@@ -95,11 +114,12 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
catalog_name: &request.catalog,
|
||||
})?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, _request: RegisterSystemTableRequest) -> Result<()> {
|
||||
unimplemented!()
|
||||
// TODO(ruihang): support register system table request
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>> {
|
||||
@@ -251,11 +271,21 @@ impl SchemaProvider for MemorySchemaProvider {
|
||||
}
|
||||
|
||||
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
|
||||
if self.table_exist(name.as_str())? {
|
||||
return TableExistsSnafu { table: name }.fail()?;
|
||||
}
|
||||
let mut tables = self.tables.write().unwrap();
|
||||
Ok(tables.insert(name, table))
|
||||
if let Some(existing) = tables.get(name.as_str()) {
|
||||
// if table with the same name but different table id exists, then it's a fatal bug
|
||||
if existing.table_info().ident.table_id != table.table_info().ident.table_id {
|
||||
error!(
|
||||
"Unexpected table register: {:?}, existing: {:?}",
|
||||
table.table_info(),
|
||||
existing.table_info()
|
||||
);
|
||||
return TableExistsSnafu { table: name }.fail()?;
|
||||
}
|
||||
Ok(Some(existing.clone()))
|
||||
} else {
|
||||
Ok(tables.insert(name, table))
|
||||
}
|
||||
}
|
||||
|
||||
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
@@ -315,7 +345,7 @@ mod tests {
|
||||
.unwrap()
|
||||
.is_none());
|
||||
assert!(provider.table_exist(table_name).unwrap());
|
||||
let other_table = NumbersTable::default();
|
||||
let other_table = NumbersTable::new(12);
|
||||
let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
|
||||
let err = result.err().unwrap();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
@@ -340,4 +370,34 @@ mod tests {
|
||||
.downcast_ref::<MemoryCatalogManager>()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
pub async fn test_catalog_deregister_table() {
|
||||
let catalog = MemoryCatalogManager::default();
|
||||
let schema = catalog
|
||||
.schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let register_table_req = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "numbers".to_string(),
|
||||
table_id: 2333,
|
||||
table: Arc::new(NumbersTable::default()),
|
||||
};
|
||||
catalog.register_table(register_table_req).await.unwrap();
|
||||
assert!(schema.table_exist("numbers").unwrap());
|
||||
|
||||
let deregister_table_req = DeregisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "numbers".to_string(),
|
||||
};
|
||||
catalog
|
||||
.deregister_table(deregister_table_req)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!schema.table_exist("numbers").unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,13 +37,13 @@ use tokio::sync::Mutex;
|
||||
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, InvalidTableSchemaSnafu,
|
||||
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu,
|
||||
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, UnimplementedSnafu,
|
||||
};
|
||||
use crate::remote::{Kv, KvBackendRef};
|
||||
use crate::{
|
||||
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
|
||||
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider,
|
||||
SchemaProviderRef,
|
||||
DeregisterTableRequest, RegisterSchemaRequest, RegisterSystemTableRequest,
|
||||
RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// Catalog manager based on metasrv.
|
||||
@@ -154,8 +154,8 @@ impl RemoteCatalogManager {
|
||||
}
|
||||
let table_key = TableGlobalKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
let table_value = TableGlobalValue::parse(&String::from_utf8_lossy(&v))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
let table_value =
|
||||
TableGlobalValue::from_bytes(&v).context(InvalidCatalogValueSnafu)?;
|
||||
|
||||
info!(
|
||||
"Found catalog table entry, key: {}, value: {:?}",
|
||||
@@ -411,7 +411,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let catalog_name = request.catalog;
|
||||
let schema_name = request.schema;
|
||||
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
|
||||
@@ -430,10 +430,17 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
.fail();
|
||||
}
|
||||
schema_provider.register_table(request.table_name, request.table)?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
|
||||
UnimplementedSnafu {
|
||||
operation: "deregister table",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let catalog_name = request.catalog;
|
||||
let schema_name = request.schema;
|
||||
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
|
||||
@@ -441,7 +448,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
})?;
|
||||
let schema_provider = self.new_schema_provider(&catalog_name, &schema_name);
|
||||
catalog_provider.register_schema(schema_name, schema_provider)?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
|
||||
|
||||
@@ -43,7 +43,6 @@ use crate::error::{
|
||||
|
||||
pub const ENTRY_TYPE_INDEX: usize = 0;
|
||||
pub const KEY_INDEX: usize = 1;
|
||||
pub const TIMESTAMP_INDEX: usize = 2;
|
||||
pub const VALUE_INDEX: usize = 3;
|
||||
|
||||
pub struct SystemCatalogTable {
|
||||
@@ -111,7 +110,7 @@ impl SystemCatalogTable {
|
||||
desc: Some("System catalog table".to_string()),
|
||||
schema: schema.clone(),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX, TIMESTAMP_INDEX],
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX],
|
||||
create_if_not_exists: true,
|
||||
table_options: HashMap::new(),
|
||||
};
|
||||
@@ -456,7 +455,7 @@ mod tests {
|
||||
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
|
||||
let dir = TempDir::new("system-table-test").unwrap();
|
||||
let store_dir = dir.path().to_string_lossy();
|
||||
let accessor = opendal::services::fs::Builder::default()
|
||||
let accessor = object_store::backend::fs::Builder::default()
|
||||
.root(&store_dir)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
132
src/catalog/tests/local_catalog_tests.rs
Normal file
132
src/catalog/tests/local_catalog_tests.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::local::LocalCatalogManager;
|
||||
use catalog::{CatalogManager, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_telemetry::{error, info};
|
||||
use mito::config::EngineConfig;
|
||||
use table::table::numbers::NumbersTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
async fn create_local_catalog_manager() -> Result<LocalCatalogManager, catalog::error::Error> {
|
||||
let (_dir, object_store) =
|
||||
mito::table::test_util::new_test_object_store("setup_mock_engine_and_table").await;
|
||||
let mock_engine = Arc::new(mito::table::test_util::MockMitoEngine::new(
|
||||
EngineConfig::default(),
|
||||
mito::table::test_util::MockEngine::default(),
|
||||
object_store,
|
||||
));
|
||||
let catalog_manager = LocalCatalogManager::try_new(mock_engine).await.unwrap();
|
||||
catalog_manager.start().await?;
|
||||
Ok(catalog_manager)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_duplicate_register() {
|
||||
let catalog_manager = create_local_catalog_manager().await.unwrap();
|
||||
let request = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id: 42,
|
||||
table: Arc::new(NumbersTable::new(42)),
|
||||
};
|
||||
assert!(catalog_manager
|
||||
.register_table(request.clone())
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
// register table with same table id will succeed with 0 as return val.
|
||||
assert!(!catalog_manager.register_table(request).await.unwrap());
|
||||
|
||||
let err = catalog_manager
|
||||
.register_table(RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id: 43,
|
||||
table: Arc::new(NumbersTable::new(43)),
|
||||
})
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("Table `greptime.public.test_table` already exists"),
|
||||
"Actual error message: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_register() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let rt = Arc::new(tokio::runtime::Builder::new_multi_thread().build().unwrap());
|
||||
let catalog_manager =
|
||||
Arc::new(rt.block_on(async { create_local_catalog_manager().await.unwrap() }));
|
||||
|
||||
let succeed: Arc<Mutex<Option<TableRef>>> = Arc::new(Mutex::new(None));
|
||||
|
||||
let mut handles = Vec::with_capacity(8);
|
||||
for i in 0..8 {
|
||||
let catalog = catalog_manager.clone();
|
||||
let succeed = succeed.clone();
|
||||
let handle = rt.spawn(async move {
|
||||
let table_id = 42 + i;
|
||||
let table = Arc::new(NumbersTable::new(table_id));
|
||||
let req = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id,
|
||||
table: table.clone(),
|
||||
};
|
||||
match catalog.register_table(req).await {
|
||||
Ok(res) => {
|
||||
if res {
|
||||
let mut succeed = succeed.lock().await;
|
||||
info!("Successfully registered table: {}", table_id);
|
||||
*succeed = Some(table);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
error!("Failed to register table {}", table_id);
|
||||
}
|
||||
}
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
rt.block_on(async move {
|
||||
for handle in handles {
|
||||
handle.await.unwrap();
|
||||
}
|
||||
let guard = succeed.lock().await;
|
||||
let table = guard.as_ref().unwrap();
|
||||
let table_registered = catalog_manager
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table")
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
table_registered.table_info().ident.table_id,
|
||||
table.table_info().ident.table_id
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -217,7 +217,7 @@ impl TableEngine for MockTableEngine {
|
||||
&self,
|
||||
_ctx: &EngineContext,
|
||||
_request: DropTableRequest,
|
||||
) -> table::Result<()> {
|
||||
) -> table::Result<bool> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -202,7 +202,7 @@ mod tests {
|
||||
table_id,
|
||||
table,
|
||||
};
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert!(catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert_eq!(
|
||||
HashSet::from([table_name, "numbers".to_string()]),
|
||||
default_schema
|
||||
@@ -287,7 +287,7 @@ mod tests {
|
||||
.register_schema(schema_name.clone(), schema.clone())
|
||||
.expect("Register schema should not fail");
|
||||
assert!(prev.is_none());
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert!(catalog_manager.register_table(reg_req).await.unwrap());
|
||||
|
||||
assert_eq!(
|
||||
HashSet::from([schema_name.clone()]),
|
||||
|
||||
@@ -11,9 +11,9 @@ async-stream = "0.3"
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-grpc-expr = { path = "../common/grpc-expr" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-insert = { path = "../common/insert" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../datatypes" }
|
||||
|
||||
@@ -12,11 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::*;
|
||||
use client::{Client, Database};
|
||||
|
||||
fn main() {
|
||||
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
|
||||
.unwrap();
|
||||
@@ -29,19 +27,19 @@ async fn run() {
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
let db = Database::new("greptime", client);
|
||||
|
||||
let (columns, row_count) = insert_data();
|
||||
|
||||
let expr = InsertExpr {
|
||||
schema_name: "public".to_string(),
|
||||
table_name: "demo".to_string(),
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: insert_batches(),
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
region_number: 0,
|
||||
columns,
|
||||
row_count,
|
||||
};
|
||||
db.insert(expr).await.unwrap();
|
||||
}
|
||||
|
||||
fn insert_batches() -> Vec<Vec<u8>> {
|
||||
fn insert_data() -> (Vec<Column>, u32) {
|
||||
const SEMANTIC_TAG: i32 = 0;
|
||||
const SEMANTIC_FIELD: i32 = 1;
|
||||
const SEMANTIC_TS: i32 = 2;
|
||||
@@ -101,9 +99,8 @@ fn insert_batches() -> Vec<Vec<u8>> {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let insert_batch = InsertBatch {
|
||||
columns: vec![host_column, cpu_column, mem_column, ts_column],
|
||||
(
|
||||
vec![host_column, cpu_column, mem_column, ts_column],
|
||||
row_count,
|
||||
};
|
||||
vec![insert_batch.into()]
|
||||
)
|
||||
}
|
||||
|
||||
@@ -58,7 +58,19 @@ impl Admin {
|
||||
header: Some(header),
|
||||
expr: Some(admin_expr::Expr::Alter(expr)),
|
||||
};
|
||||
Ok(self.do_requests(vec![expr]).await?.remove(0))
|
||||
self.do_request(expr).await
|
||||
}
|
||||
|
||||
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<AdminResult> {
|
||||
let header = ExprHeader {
|
||||
version: PROTOCOL_VERSION,
|
||||
};
|
||||
let expr = AdminExpr {
|
||||
header: Some(header),
|
||||
expr: Some(admin_expr::Expr::DropTable(expr)),
|
||||
};
|
||||
|
||||
self.do_request(expr).await
|
||||
}
|
||||
|
||||
/// Invariants: the lengths of input vec (`Vec<AdminExpr>`) and output vec (`Vec<AdminResult>`) are equal.
|
||||
|
||||
@@ -23,7 +23,7 @@ use api::v1::{
|
||||
};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_grpc::{AsExecutionPlan, DefaultAsPlanImpl};
|
||||
use common_insert::column_to_vector;
|
||||
use common_grpc_expr::column_to_vector;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::physical_plan::ExecutionPlan;
|
||||
|
||||
@@ -103,7 +103,7 @@ pub enum Error {
|
||||
#[snafu(display("Failed to convert column to vector, source: {}", source))]
|
||||
ColumnToVector {
|
||||
#[snafu(backtrace)]
|
||||
source: common_insert::error::Error,
|
||||
source: common_grpc_expr::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -15,13 +15,13 @@ common-error = { path = "../common/error" }
|
||||
common-telemetry = { path = "../common/telemetry", features = [
|
||||
"deadlock_detection",
|
||||
] }
|
||||
meta-client = { path = "../meta-client" }
|
||||
datanode = { path = "../datanode" }
|
||||
frontend = { path = "../frontend" }
|
||||
futures = "0.3"
|
||||
meta-client = { path = "../meta-client" }
|
||||
meta-srv = { path = "../meta-srv" }
|
||||
serde = "1.0"
|
||||
servers = {path = "../servers"}
|
||||
servers = { path = "../servers" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tokio = { version = "1.18", features = ["full"] }
|
||||
toml = "0.5"
|
||||
@@ -29,3 +29,6 @@ toml = "0.5"
|
||||
[dev-dependencies]
|
||||
serde = "1.0"
|
||||
tempdir = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
build-data = "0.1.3"
|
||||
|
||||
19
src/cmd/build.rs
Normal file
19
src/cmd/build.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
fn main() {
|
||||
build_data::set_GIT_BRANCH();
|
||||
build_data::set_GIT_COMMIT();
|
||||
build_data::set_GIT_DIRTY();
|
||||
}
|
||||
@@ -20,7 +20,7 @@ use cmd::{datanode, frontend, metasrv, standalone};
|
||||
use common_telemetry::logging::{error, info};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[clap(name = "greptimedb")]
|
||||
#[clap(name = "greptimedb", version = print_version())]
|
||||
struct Command {
|
||||
#[clap(long, default_value = "/tmp/greptimedb/logs")]
|
||||
log_dir: String,
|
||||
@@ -70,6 +70,17 @@ impl fmt::Display for SubCommand {
|
||||
}
|
||||
}
|
||||
|
||||
fn print_version() -> &'static str {
|
||||
concat!(
|
||||
"\nbranch: ",
|
||||
env!("GIT_BRANCH"),
|
||||
"\ncommit: ",
|
||||
env!("GIT_COMMIT"),
|
||||
"\ndirty: ",
|
||||
env!("GIT_DIRTY")
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let cmd = Command::parse();
|
||||
|
||||
@@ -170,6 +170,7 @@ mod tests {
|
||||
ObjectStoreConfig::File { data_dir } => {
|
||||
assert_eq!("/tmp/greptimedb/data/".to_string(), data_dir)
|
||||
}
|
||||
ObjectStoreConfig::S3 { .. } => unreachable!(),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -97,10 +97,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_start_node_error() {
|
||||
fn throw_datanode_error() -> StdResult<datanode::error::Error> {
|
||||
datanode::error::MissingFieldSnafu {
|
||||
field: "test_field",
|
||||
}
|
||||
.fail()
|
||||
datanode::error::MissingNodeIdSnafu {}.fail()
|
||||
}
|
||||
|
||||
let e = throw_datanode_error()
|
||||
|
||||
@@ -21,6 +21,7 @@ use frontend::mysql::MysqlOptions;
|
||||
use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
use meta_client::MetaClientOpts;
|
||||
use servers::http::HttpOptions;
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -96,7 +97,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
};
|
||||
|
||||
if let Some(addr) = cmd.http_addr {
|
||||
opts.http_addr = Some(addr);
|
||||
opts.http_options = Some(HttpOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.grpc_addr {
|
||||
opts.grpc_options = Some(GrpcOptions {
|
||||
@@ -141,6 +145,8 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -157,7 +163,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let opts: FrontendOptions = command.try_into().unwrap();
|
||||
assert_eq!(opts.http_addr, Some("127.0.0.1:1234".to_string()));
|
||||
assert_eq!(opts.http_options.as_ref().unwrap().addr, "127.0.0.1:1234");
|
||||
assert_eq!(opts.mysql_options.as_ref().unwrap().addr, "127.0.0.1:5678");
|
||||
assert_eq!(
|
||||
opts.postgres_options.as_ref().unwrap().addr,
|
||||
@@ -188,4 +194,33 @@ mod tests {
|
||||
|
||||
assert!(!opts.influxdb_options.unwrap().enable);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let command = StartCommand {
|
||||
http_addr: None,
|
||||
grpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
influxdb_enable: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/frontend.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
metasrv_addr: None,
|
||||
};
|
||||
|
||||
let fe_opts = FrontendOptions::try_from(command).unwrap();
|
||||
assert_eq!(Mode::Distributed, fe_opts.mode);
|
||||
assert_eq!("127.0.0.1:3001".to_string(), fe_opts.datanode_rpc_addr);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4000".to_string(),
|
||||
fe_opts.http_options.as_ref().unwrap().addr
|
||||
);
|
||||
assert_eq!(
|
||||
Duration::from_secs(30),
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
use frontend::prometheus::PrometheusOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::http::HttpOptions;
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
use tokio::try_join;
|
||||
@@ -61,7 +62,7 @@ impl SubCommand {
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct StandaloneOptions {
|
||||
pub http_addr: Option<String>,
|
||||
pub http_options: Option<HttpOptions>,
|
||||
pub grpc_options: Option<GrpcOptions>,
|
||||
pub mysql_options: Option<MysqlOptions>,
|
||||
pub postgres_options: Option<PostgresOptions>,
|
||||
@@ -71,12 +72,13 @@ pub struct StandaloneOptions {
|
||||
pub mode: Mode,
|
||||
pub wal_dir: String,
|
||||
pub storage: ObjectStoreConfig,
|
||||
pub enable_memory_catalog: bool,
|
||||
}
|
||||
|
||||
impl Default for StandaloneOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
http_addr: Some("127.0.0.1:4000".to_string()),
|
||||
http_options: Some(HttpOptions::default()),
|
||||
grpc_options: Some(GrpcOptions::default()),
|
||||
mysql_options: Some(MysqlOptions::default()),
|
||||
postgres_options: Some(PostgresOptions::default()),
|
||||
@@ -86,6 +88,7 @@ impl Default for StandaloneOptions {
|
||||
mode: Mode::Standalone,
|
||||
wal_dir: "/tmp/greptimedb/wal".to_string(),
|
||||
storage: ObjectStoreConfig::default(),
|
||||
enable_memory_catalog: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -93,7 +96,7 @@ impl Default for StandaloneOptions {
|
||||
impl StandaloneOptions {
|
||||
fn frontend_options(self) -> FrontendOptions {
|
||||
FrontendOptions {
|
||||
http_addr: self.http_addr,
|
||||
http_options: self.http_options,
|
||||
grpc_options: self.grpc_options,
|
||||
mysql_options: self.mysql_options,
|
||||
postgres_options: self.postgres_options,
|
||||
@@ -110,6 +113,7 @@ impl StandaloneOptions {
|
||||
DatanodeOptions {
|
||||
wal_dir: self.wal_dir,
|
||||
storage: self.storage,
|
||||
enable_memory_catalog: self.enable_memory_catalog,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
@@ -131,18 +135,22 @@ struct StartCommand {
|
||||
influxdb_enable: bool,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
#[clap(short = 'm', long = "memory-catalog")]
|
||||
enable_memory_catalog: bool,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
async fn run(self) -> Result<()> {
|
||||
let enable_memory_catalog = self.enable_memory_catalog;
|
||||
let config_file = self.config_file.clone();
|
||||
let fe_opts = FrontendOptions::try_from(self)?;
|
||||
let dn_opts: DatanodeOptions = {
|
||||
let opts: StandaloneOptions = if let Some(path) = config_file {
|
||||
let mut opts: StandaloneOptions = if let Some(path) = config_file {
|
||||
toml_loader::from_file!(&path)?
|
||||
} else {
|
||||
StandaloneOptions::default()
|
||||
};
|
||||
opts.enable_memory_catalog = enable_memory_catalog;
|
||||
opts.datanode_options()
|
||||
};
|
||||
|
||||
@@ -156,8 +164,15 @@ impl StartCommand {
|
||||
.context(StartDatanodeSnafu)?;
|
||||
let mut frontend = build_frontend(fe_opts, &dn_opts, datanode.get_instance()).await?;
|
||||
|
||||
// Start datanode instance before starting services, to avoid requests come in before internal components are started.
|
||||
datanode
|
||||
.start_instance()
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
info!("Datanode instance started");
|
||||
|
||||
try_join!(
|
||||
async { datanode.start().await.context(StartDatanodeSnafu) },
|
||||
async { datanode.start_services().await.context(StartDatanodeSnafu) },
|
||||
async { frontend.start().await.context(StartFrontendSnafu) }
|
||||
)?;
|
||||
|
||||
@@ -199,7 +214,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
opts.mode = Mode::Standalone;
|
||||
|
||||
if let Some(addr) = cmd.http_addr {
|
||||
opts.http_addr = Some(addr);
|
||||
opts.http_options = Some(HttpOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.rpc_addr {
|
||||
// frontend grpc addr conflict with datanode default grpc addr
|
||||
@@ -249,6 +267,8 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -264,12 +284,20 @@ mod tests {
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
influxdb_enable: false,
|
||||
enable_memory_catalog: false,
|
||||
};
|
||||
|
||||
let fe_opts = FrontendOptions::try_from(cmd).unwrap();
|
||||
assert_eq!(Mode::Standalone, fe_opts.mode);
|
||||
assert_eq!("127.0.0.1:3001".to_string(), fe_opts.datanode_rpc_addr);
|
||||
assert_eq!(Some("127.0.0.1:4000".to_string()), fe_opts.http_addr);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4000".to_string(),
|
||||
fe_opts.http_options.as_ref().unwrap().addr
|
||||
);
|
||||
assert_eq!(
|
||||
Duration::from_secs(30),
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4001".to_string(),
|
||||
fe_opts.grpc_options.unwrap().addr
|
||||
|
||||
@@ -28,31 +28,42 @@ use crate::error::{
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
|
||||
};
|
||||
|
||||
const ALPHANUMERICS_NAME_PATTERN: &str = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||
|
||||
lazy_static! {
|
||||
static ref CATALOG_KEY_PATTERN: Regex =
|
||||
Regex::new(&format!("^{}-([a-zA-Z_]+)$", CATALOG_KEY_PREFIX)).unwrap();
|
||||
static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-({})$",
|
||||
CATALOG_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)$",
|
||||
SCHEMA_KEY_PREFIX
|
||||
"^{}-({})-({})$",
|
||||
SCHEMA_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN, ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z0-9_]+)$",
|
||||
TABLE_GLOBAL_KEY_PREFIX
|
||||
"^{}-({})-({})-({})$",
|
||||
TABLE_GLOBAL_KEY_PREFIX,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z0-9_]+)-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX
|
||||
"^{}-({})-({})-({})-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
@@ -261,6 +272,10 @@ macro_rules! define_catalog_value {
|
||||
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
|
||||
}
|
||||
|
||||
pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self, Error> {
|
||||
Self::parse(&String::from_utf8_lossy(bytes.as_ref()))
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
|
||||
Ok(serde_json::to_string(self)
|
||||
.context(SerializeCatalogEntryValueSnafu)?
|
||||
|
||||
@@ -9,8 +9,8 @@ arc-swap = "1.0"
|
||||
chrono-tz = "0.6"
|
||||
common-error = { path = "../error" }
|
||||
common-function-macro = { path = "../function-macro" }
|
||||
common-time = { path = "../time" }
|
||||
common-query = { path = "../query" }
|
||||
common-time = { path = "../time" }
|
||||
datafusion-common = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
libc = "0.2"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "common-insert"
|
||||
name = "common-grpc-expr"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
@@ -8,10 +8,12 @@ license = "Apache-2.0"
|
||||
api = { path = "../../api" }
|
||||
async-trait = "0.1"
|
||||
common-base = { path = "../base" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
common-grpc = { path = "../grpc" }
|
||||
common-query = { path = "../query" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
common-time = { path = "../time" }
|
||||
common-query = { path = "../query" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
234
src/common/grpc-expr/src/alter.rs
Normal file
234
src/common/grpc-expr/src/alter.rs
Normal file
@@ -0,0 +1,234 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::{AlterExpr, CreateExpr, DropColumns};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest};
|
||||
|
||||
use crate::error::{
|
||||
ColumnNotFoundSnafu, CreateSchemaSnafu, InvalidColumnDefSnafu, MissingFieldSnafu,
|
||||
MissingTimestampColumnSnafu, Result,
|
||||
};
|
||||
|
||||
/// Convert an [`AlterExpr`] to an optional [`AlterTableRequest`]
|
||||
pub fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest>> {
|
||||
match expr.kind {
|
||||
Some(Kind::AddColumns(add_columns)) => {
|
||||
let add_column_requests = add_columns
|
||||
.add_columns
|
||||
.into_iter()
|
||||
.map(|ac| {
|
||||
let column_def = ac.column_def.context(MissingFieldSnafu {
|
||||
field: "column_def",
|
||||
})?;
|
||||
|
||||
let schema =
|
||||
column_def
|
||||
.try_as_column_schema()
|
||||
.context(InvalidColumnDefSnafu {
|
||||
column: &column_def.name,
|
||||
})?;
|
||||
Ok(AddColumnRequest {
|
||||
column_schema: schema,
|
||||
is_key: ac.is_key,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let alter_kind = AlterKind::AddColumns {
|
||||
columns: add_column_requests,
|
||||
};
|
||||
|
||||
let request = AlterTableRequest {
|
||||
catalog_name: expr.catalog_name,
|
||||
schema_name: expr.schema_name,
|
||||
table_name: expr.table_name,
|
||||
alter_kind,
|
||||
};
|
||||
Ok(Some(request))
|
||||
}
|
||||
Some(Kind::DropColumns(DropColumns { drop_columns })) => {
|
||||
let alter_kind = AlterKind::DropColumns {
|
||||
names: drop_columns.into_iter().map(|c| c.name).collect(),
|
||||
};
|
||||
|
||||
let request = AlterTableRequest {
|
||||
catalog_name: expr.catalog_name,
|
||||
schema_name: expr.schema_name,
|
||||
table_name: expr.table_name,
|
||||
alter_kind,
|
||||
};
|
||||
Ok(Some(request))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
|
||||
let column_schemas = expr
|
||||
.column_defs
|
||||
.iter()
|
||||
.map(|x| {
|
||||
x.try_as_column_schema()
|
||||
.context(InvalidColumnDefSnafu { column: &x.name })
|
||||
})
|
||||
.collect::<Result<Vec<ColumnSchema>>>()?;
|
||||
|
||||
ensure!(
|
||||
column_schemas
|
||||
.iter()
|
||||
.any(|column| column.name == expr.time_index),
|
||||
MissingTimestampColumnSnafu {
|
||||
msg: format!("CreateExpr: {:?}", expr)
|
||||
}
|
||||
);
|
||||
|
||||
let column_schemas = column_schemas
|
||||
.into_iter()
|
||||
.map(|column_schema| {
|
||||
if column_schema.name == expr.time_index {
|
||||
column_schema.with_time_index(true)
|
||||
} else {
|
||||
column_schema
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(Arc::new(
|
||||
SchemaBuilder::try_from(column_schemas)
|
||||
.context(CreateSchemaSnafu)?
|
||||
.build()
|
||||
.context(CreateSchemaSnafu)?,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn create_expr_to_request(table_id: TableId, expr: CreateExpr) -> Result<CreateTableRequest> {
|
||||
let schema = create_table_schema(&expr)?;
|
||||
let primary_key_indices = expr
|
||||
.primary_keys
|
||||
.iter()
|
||||
.map(|key| {
|
||||
schema
|
||||
.column_index_by_name(key)
|
||||
.context(ColumnNotFoundSnafu {
|
||||
column_name: key,
|
||||
table_name: &expr.table_name,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<usize>>>()?;
|
||||
|
||||
let catalog_name = expr
|
||||
.catalog_name
|
||||
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
|
||||
let schema_name = expr
|
||||
.schema_name
|
||||
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
|
||||
|
||||
let region_ids = if expr.region_ids.is_empty() {
|
||||
vec![0]
|
||||
} else {
|
||||
expr.region_ids
|
||||
};
|
||||
|
||||
Ok(CreateTableRequest {
|
||||
id: table_id,
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name: expr.table_name,
|
||||
desc: expr.desc,
|
||||
schema,
|
||||
region_numbers: region_ids,
|
||||
primary_key_indices,
|
||||
create_if_not_exists: expr.create_if_not_exists,
|
||||
table_options: expr.table_options,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::{AddColumn, AddColumns, ColumnDataType, ColumnDef, DropColumn};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_alter_expr_to_request() {
|
||||
let expr = AlterExpr {
|
||||
catalog_name: None,
|
||||
schema_name: None,
|
||||
table_name: "monitor".to_string(),
|
||||
|
||||
kind: Some(Kind::AddColumns(AddColumns {
|
||||
add_columns: vec![AddColumn {
|
||||
column_def: Some(ColumnDef {
|
||||
name: "mem_usage".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
}),
|
||||
is_key: false,
|
||||
}],
|
||||
})),
|
||||
};
|
||||
|
||||
let alter_request = alter_expr_to_request(expr).unwrap().unwrap();
|
||||
assert_eq!(None, alter_request.catalog_name);
|
||||
assert_eq!(None, alter_request.schema_name);
|
||||
assert_eq!("monitor".to_string(), alter_request.table_name);
|
||||
let add_column = match alter_request.alter_kind {
|
||||
AlterKind::AddColumns { mut columns } => columns.pop().unwrap(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
assert!(!add_column.is_key);
|
||||
assert_eq!("mem_usage", add_column.column_schema.name);
|
||||
assert_eq!(
|
||||
ConcreteDataType::float64_datatype(),
|
||||
add_column.column_schema.data_type
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_drop_column_expr() {
|
||||
let expr = AlterExpr {
|
||||
catalog_name: Some("test_catalog".to_string()),
|
||||
schema_name: Some("test_schema".to_string()),
|
||||
table_name: "monitor".to_string(),
|
||||
|
||||
kind: Some(Kind::DropColumns(DropColumns {
|
||||
drop_columns: vec![DropColumn {
|
||||
name: "mem_usage".to_string(),
|
||||
}],
|
||||
})),
|
||||
};
|
||||
|
||||
let alter_request = alter_expr_to_request(expr).unwrap().unwrap();
|
||||
assert_eq!(Some("test_catalog".to_string()), alter_request.catalog_name);
|
||||
assert_eq!(Some("test_schema".to_string()), alter_request.schema_name);
|
||||
assert_eq!("monitor".to_string(), alter_request.table_name);
|
||||
|
||||
let mut drop_names = match alter_request.alter_kind {
|
||||
AlterKind::DropColumns { names } => names,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
assert_eq!(1, drop_names.len());
|
||||
assert_eq!("mem_usage".to_string(), drop_names.pop().unwrap());
|
||||
}
|
||||
}
|
||||
@@ -22,7 +22,7 @@ use snafu::{Backtrace, ErrorCompat};
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Column {} not found in table {}", column_name, table_name))]
|
||||
#[snafu(display("Column `{}` not found in table `{}`", column_name, table_name))]
|
||||
ColumnNotFound {
|
||||
column_name: String,
|
||||
table_name: String,
|
||||
@@ -57,8 +57,8 @@ pub enum Error {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing timestamp column in request"))]
|
||||
MissingTimestampColumn { backtrace: Backtrace },
|
||||
#[snafu(display("Missing timestamp column, msg: {}", msg))]
|
||||
MissingTimestampColumn { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Invalid column proto: {}", err_msg))]
|
||||
InvalidColumnProto {
|
||||
@@ -70,6 +70,26 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Invalid column default constraint, source: {}", source))]
|
||||
ColumnDefaultConstraint {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Invalid column proto definition, column: {}, source: {}",
|
||||
column,
|
||||
source
|
||||
))]
|
||||
InvalidColumnDef {
|
||||
column: String,
|
||||
#[snafu(backtrace)]
|
||||
source: api::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -87,6 +107,9 @@ impl ErrorExt for Error {
|
||||
| Error::MissingTimestampColumn { .. } => StatusCode::InvalidArguments,
|
||||
Error::InvalidColumnProto { .. } => StatusCode::InvalidArguments,
|
||||
Error::CreateVector { .. } => StatusCode::InvalidArguments,
|
||||
Error::MissingField { .. } => StatusCode::InvalidArguments,
|
||||
Error::ColumnDefaultConstraint { source, .. } => source.status_code(),
|
||||
Error::InvalidColumnDef { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
@@ -14,11 +14,9 @@
|
||||
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{SemanticType, Values};
|
||||
use api::v1::{AddColumn, AddColumns, Column, ColumnDataType, ColumnDef, CreateExpr};
|
||||
use common_base::BitVec;
|
||||
@@ -35,9 +33,8 @@ use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequ
|
||||
use table::Table;
|
||||
|
||||
use crate::error::{
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DecodeInsertSnafu,
|
||||
DuplicatedTimestampColumnSnafu, IllegalInsertDataSnafu, InvalidColumnProtoSnafu,
|
||||
MissingTimestampColumnSnafu, Result,
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DuplicatedTimestampColumnSnafu,
|
||||
IllegalInsertDataSnafu, InvalidColumnProtoSnafu, MissingTimestampColumnSnafu, Result,
|
||||
};
|
||||
const TAG_SEMANTIC_TYPE: i32 = SemanticType::Tag as i32;
|
||||
const TIMESTAMP_SEMANTIC_TYPE: i32 = SemanticType::Timestamp as i32;
|
||||
@@ -52,35 +49,25 @@ fn build_column_def(column_name: &str, datatype: i32, nullable: bool) -> ColumnD
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_new_columns(
|
||||
schema: &SchemaRef,
|
||||
insert_batches: &[InsertBatch],
|
||||
) -> Result<Option<AddColumns>> {
|
||||
pub fn find_new_columns(schema: &SchemaRef, columns: &[Column]) -> Result<Option<AddColumns>> {
|
||||
let mut columns_to_add = Vec::default();
|
||||
let mut new_columns: HashSet<String> = HashSet::default();
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
if *row_count == 0 || columns.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if schema.column_schema_by_name(column_name).is_none() && !new_columns.contains(column_name)
|
||||
{
|
||||
if schema.column_schema_by_name(column_name).is_none()
|
||||
&& !new_columns.contains(column_name)
|
||||
{
|
||||
let column_def = Some(build_column_def(column_name, *datatype, true));
|
||||
columns_to_add.push(AddColumn {
|
||||
column_def,
|
||||
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
|
||||
});
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
let column_def = Some(build_column_def(column_name, *datatype, true));
|
||||
columns_to_add.push(AddColumn {
|
||||
column_def,
|
||||
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
|
||||
});
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -201,89 +188,84 @@ pub fn build_create_expr_from_insertion(
|
||||
schema_name: &str,
|
||||
table_id: Option<TableId>,
|
||||
table_name: &str,
|
||||
insert_batches: &[InsertBatch],
|
||||
columns: &[Column],
|
||||
) -> Result<CreateExpr> {
|
||||
let mut new_columns: HashSet<String> = HashSet::default();
|
||||
let mut column_defs = Vec::default();
|
||||
let mut primary_key_indices = Vec::default();
|
||||
let mut timestamp_index = usize::MAX;
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
if *row_count == 0 || columns.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if !new_columns.contains(column_name) {
|
||||
let mut is_nullable = true;
|
||||
match *semantic_type {
|
||||
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
|
||||
TIMESTAMP_SEMANTIC_TYPE => {
|
||||
ensure!(
|
||||
timestamp_index == usize::MAX,
|
||||
DuplicatedTimestampColumnSnafu {
|
||||
exists: &columns[timestamp_index].column_name,
|
||||
duplicated: column_name,
|
||||
}
|
||||
);
|
||||
timestamp_index = column_defs.len();
|
||||
// Timestamp column must not be null.
|
||||
is_nullable = false;
|
||||
}
|
||||
_ => {}
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if !new_columns.contains(column_name) {
|
||||
let mut is_nullable = true;
|
||||
match *semantic_type {
|
||||
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
|
||||
TIMESTAMP_SEMANTIC_TYPE => {
|
||||
ensure!(
|
||||
timestamp_index == usize::MAX,
|
||||
DuplicatedTimestampColumnSnafu {
|
||||
exists: &columns[timestamp_index].column_name,
|
||||
duplicated: column_name,
|
||||
}
|
||||
);
|
||||
timestamp_index = column_defs.len();
|
||||
// Timestamp column must not be null.
|
||||
is_nullable = false;
|
||||
}
|
||||
|
||||
let column_def = build_column_def(column_name, *datatype, is_nullable);
|
||||
column_defs.push(column_def);
|
||||
new_columns.insert(column_name.to_string());
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let column_def = build_column_def(column_name, *datatype, is_nullable);
|
||||
column_defs.push(column_def);
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
|
||||
ensure!(timestamp_index != usize::MAX, MissingTimestampColumnSnafu);
|
||||
let timestamp_field_name = columns[timestamp_index].column_name.clone();
|
||||
|
||||
let primary_keys = primary_key_indices
|
||||
.iter()
|
||||
.map(|idx| columns[*idx].column_name.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expr = CreateExpr {
|
||||
catalog_name: Some(catalog_name.to_string()),
|
||||
schema_name: Some(schema_name.to_string()),
|
||||
table_name: table_name.to_string(),
|
||||
desc: Some("Created on insertion".to_string()),
|
||||
column_defs,
|
||||
time_index: timestamp_field_name,
|
||||
primary_keys,
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
table_id,
|
||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||
};
|
||||
|
||||
return Ok(expr);
|
||||
}
|
||||
|
||||
IllegalInsertDataSnafu.fail()
|
||||
ensure!(
|
||||
timestamp_index != usize::MAX,
|
||||
MissingTimestampColumnSnafu { msg: table_name }
|
||||
);
|
||||
let timestamp_field_name = columns[timestamp_index].column_name.clone();
|
||||
|
||||
let primary_keys = primary_key_indices
|
||||
.iter()
|
||||
.map(|idx| columns[*idx].column_name.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expr = CreateExpr {
|
||||
catalog_name: Some(catalog_name.to_string()),
|
||||
schema_name: Some(schema_name.to_string()),
|
||||
table_name: table_name.to_string(),
|
||||
desc: Some("Created on insertion".to_string()),
|
||||
column_defs,
|
||||
time_index: timestamp_field_name,
|
||||
primary_keys,
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
table_id,
|
||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
pub fn insertion_expr_to_request(
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
insert_batches: Vec<InsertBatch>,
|
||||
insert_batches: Vec<(Vec<Column>, u32)>,
|
||||
table: Arc<dyn Table>,
|
||||
) -> Result<InsertRequest> {
|
||||
let schema = table.schema();
|
||||
let mut columns_builders = HashMap::with_capacity(schema.column_schemas().len());
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
for (columns, row_count) in insert_batches {
|
||||
for Column {
|
||||
column_name,
|
||||
values,
|
||||
@@ -329,14 +311,6 @@ pub fn insertion_expr_to_request(
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn insert_batches(bytes_vec: &[Vec<u8>]) -> Result<Vec<InsertBatch>> {
|
||||
bytes_vec
|
||||
.iter()
|
||||
.map(|bytes| bytes.deref().try_into().context(DecodeInsertSnafu))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn add_values_to_builder(
|
||||
builder: &mut VectorBuilder,
|
||||
values: Values,
|
||||
@@ -463,9 +437,8 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{self, SemanticType, Values};
|
||||
use api::v1::{insert_expr, Column, ColumnDataType};
|
||||
use api::v1::{Column, ColumnDataType};
|
||||
use common_base::BitVec;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_query::prelude::Expr;
|
||||
@@ -479,11 +452,12 @@ mod tests {
|
||||
use table::Table;
|
||||
|
||||
use super::{
|
||||
build_create_expr_from_insertion, convert_values, find_new_columns, insert_batches,
|
||||
insertion_expr_to_request, is_null, TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
|
||||
build_create_expr_from_insertion, convert_values, insertion_expr_to_request, is_null,
|
||||
TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
|
||||
};
|
||||
use crate::error;
|
||||
use crate::error::ColumnDataTypeSnafu;
|
||||
use crate::insert::find_new_columns;
|
||||
|
||||
#[inline]
|
||||
fn build_column_schema(
|
||||
@@ -508,11 +482,10 @@ mod tests {
|
||||
|
||||
assert!(build_create_expr_from_insertion("", "", table_id, table_name, &[]).is_err());
|
||||
|
||||
let mock_batch_bytes = mock_insert_batches();
|
||||
let insert_batches = insert_batches(&mock_batch_bytes).unwrap();
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let create_expr =
|
||||
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batches)
|
||||
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batch.0)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(table_id, create_expr.table_id);
|
||||
@@ -598,9 +571,9 @@ mod tests {
|
||||
|
||||
assert!(find_new_columns(&schema, &[]).unwrap().is_none());
|
||||
|
||||
let mock_insert_bytes = mock_insert_batches();
|
||||
let insert_batches = insert_batches(&mock_insert_bytes).unwrap();
|
||||
let add_columns = find_new_columns(&schema, &insert_batches).unwrap().unwrap();
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let add_columns = find_new_columns(&schema, &insert_batch.0).unwrap().unwrap();
|
||||
|
||||
assert_eq!(2, add_columns.add_columns.len());
|
||||
let host_column = &add_columns.add_columns[0];
|
||||
@@ -630,10 +603,7 @@ mod tests {
|
||||
fn test_insertion_expr_to_request() {
|
||||
let table: Arc<dyn Table> = Arc::new(DemoTable {});
|
||||
|
||||
let values = insert_expr::Values {
|
||||
values: mock_insert_batches(),
|
||||
};
|
||||
let insert_batches = insert_batches(&values.values).unwrap();
|
||||
let insert_batches = vec![mock_insert_batch()];
|
||||
let insert_req =
|
||||
insertion_expr_to_request("greptime", "public", "demo", insert_batches, table).unwrap();
|
||||
|
||||
@@ -731,7 +701,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_insert_batches() -> Vec<Vec<u8>> {
|
||||
fn mock_insert_batch() -> (Vec<Column>, u32) {
|
||||
let row_count = 2;
|
||||
|
||||
let host_vals = column::Values {
|
||||
@@ -782,10 +752,9 @@ mod tests {
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
};
|
||||
|
||||
let insert_batch = InsertBatch {
|
||||
columns: vec![host_column, cpu_column, mem_column, ts_column],
|
||||
(
|
||||
vec![host_column, cpu_column, mem_column, ts_column],
|
||||
row_count,
|
||||
};
|
||||
vec![insert_batch.into()]
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
#![feature(assert_matches)]
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -12,9 +13,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod alter;
|
||||
pub mod error;
|
||||
mod insert;
|
||||
|
||||
pub use alter::{alter_expr_to_request, create_expr_to_request, create_table_schema};
|
||||
pub use insert::{
|
||||
build_alter_table_request, build_create_expr_from_insertion, column_to_vector,
|
||||
find_new_columns, insert_batches, insertion_expr_to_request,
|
||||
find_new_columns, insertion_expr_to_request,
|
||||
};
|
||||
@@ -12,7 +12,6 @@ common-error = { path = "../error" }
|
||||
common-query = { path = "../query" }
|
||||
common-recordbatch = { path = "../recordbatch" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
dashmap = "5.4"
|
||||
datafusion = "14.0.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{SemanticType, Values};
|
||||
use api::v1::{Column, ColumnDataType};
|
||||
use common_base::BitVec;
|
||||
@@ -24,12 +23,14 @@ use crate::error::{Result, TypeMismatchSnafu};
|
||||
|
||||
type ColumnName = String;
|
||||
|
||||
type RowCount = u32;
|
||||
|
||||
// TODO(fys): will remove in the future.
|
||||
#[derive(Default)]
|
||||
pub struct LinesWriter {
|
||||
column_name_index: HashMap<ColumnName, usize>,
|
||||
null_masks: Vec<BitVec>,
|
||||
batch: InsertBatch,
|
||||
batch: (Vec<Column>, RowCount),
|
||||
lines: usize,
|
||||
}
|
||||
|
||||
@@ -171,20 +172,20 @@ impl LinesWriter {
|
||||
|
||||
pub fn commit(&mut self) {
|
||||
let batch = &mut self.batch;
|
||||
batch.row_count += 1;
|
||||
batch.1 += 1;
|
||||
|
||||
for i in 0..batch.columns.len() {
|
||||
for i in 0..batch.0.len() {
|
||||
let null_mask = &mut self.null_masks[i];
|
||||
if batch.row_count as usize > null_mask.len() {
|
||||
if batch.1 as usize > null_mask.len() {
|
||||
null_mask.push(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finish(mut self) -> InsertBatch {
|
||||
pub fn finish(mut self) -> (Vec<Column>, RowCount) {
|
||||
let null_masks = self.null_masks;
|
||||
for (i, null_mask) in null_masks.into_iter().enumerate() {
|
||||
let columns = &mut self.batch.columns;
|
||||
let columns = &mut self.batch.0;
|
||||
columns[i].null_mask = null_mask.into_vec();
|
||||
}
|
||||
self.batch
|
||||
@@ -204,9 +205,9 @@ impl LinesWriter {
|
||||
let batch = &mut self.batch;
|
||||
let to_insert = self.lines;
|
||||
let mut null_mask = BitVec::with_capacity(to_insert);
|
||||
null_mask.extend(BitVec::repeat(true, batch.row_count as usize));
|
||||
null_mask.extend(BitVec::repeat(true, batch.1 as usize));
|
||||
self.null_masks.push(null_mask);
|
||||
batch.columns.push(Column {
|
||||
batch.0.push(Column {
|
||||
column_name: column_name.to_string(),
|
||||
semantic_type: semantic_type.into(),
|
||||
values: Some(Values::with_capacity(datatype, to_insert)),
|
||||
@@ -217,7 +218,7 @@ impl LinesWriter {
|
||||
new_idx
|
||||
}
|
||||
};
|
||||
(column_idx, &mut self.batch.columns[column_idx])
|
||||
(column_idx, &mut self.batch.0[column_idx])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -282,9 +283,9 @@ mod tests {
|
||||
writer.commit();
|
||||
|
||||
let insert_batch = writer.finish();
|
||||
assert_eq!(3, insert_batch.row_count);
|
||||
assert_eq!(3, insert_batch.1);
|
||||
|
||||
let columns = insert_batch.columns;
|
||||
let columns = insert_batch.0;
|
||||
assert_eq!(9, columns.len());
|
||||
|
||||
let column = &columns[0];
|
||||
|
||||
@@ -23,6 +23,7 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
|
||||
// TODO(yingwen): We should hold vectors in the RecordBatch.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct RecordBatch {
|
||||
pub schema: SchemaRef,
|
||||
@@ -103,6 +104,7 @@ impl<'a> Iterator for RecordBatchRowIterator<'a> {
|
||||
} else {
|
||||
let mut row = Vec::with_capacity(self.columns);
|
||||
|
||||
// TODO(yingwen): Get from the vector if RecordBatch also holds vectors.
|
||||
for col in 0..self.columns {
|
||||
let column_array = self.record_batch.df_recordbatch.column(col);
|
||||
match arrow_array_get(column_array.as_ref(), self.row_cursor)
|
||||
|
||||
@@ -9,7 +9,9 @@ bytes = "1.1"
|
||||
catalog = { path = "../../catalog" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
datafusion = "14.0.0"
|
||||
datafusion-expr = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
futures = "0.3"
|
||||
prost = "0.9"
|
||||
|
||||
66
src/common/substrait/src/context.rs
Normal file
66
src/common/substrait/src/context.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use substrait_proto::protobuf::extensions::simple_extension_declaration::{
|
||||
ExtensionFunction, MappingType,
|
||||
};
|
||||
use substrait_proto::protobuf::extensions::SimpleExtensionDeclaration;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ConvertorContext {
|
||||
scalar_fn_names: HashMap<String, u32>,
|
||||
scalar_fn_map: HashMap<u32, String>,
|
||||
}
|
||||
|
||||
impl ConvertorContext {
|
||||
pub fn register_scalar_fn<S: AsRef<str>>(&mut self, name: S) -> u32 {
|
||||
if let Some(anchor) = self.scalar_fn_names.get(name.as_ref()) {
|
||||
return *anchor;
|
||||
}
|
||||
|
||||
let next_anchor = self.scalar_fn_map.len() as _;
|
||||
self.scalar_fn_map
|
||||
.insert(next_anchor, name.as_ref().to_string());
|
||||
self.scalar_fn_names
|
||||
.insert(name.as_ref().to_string(), next_anchor);
|
||||
next_anchor
|
||||
}
|
||||
|
||||
pub fn register_scalar_with_anchor<S: AsRef<str>>(&mut self, name: S, anchor: u32) {
|
||||
self.scalar_fn_map.insert(anchor, name.as_ref().to_string());
|
||||
self.scalar_fn_names
|
||||
.insert(name.as_ref().to_string(), anchor);
|
||||
}
|
||||
|
||||
pub fn find_scalar_fn(&self, anchor: u32) -> Option<&str> {
|
||||
self.scalar_fn_map.get(&anchor).map(|s| s.as_str())
|
||||
}
|
||||
|
||||
pub fn generate_function_extension(&self) -> Vec<SimpleExtensionDeclaration> {
|
||||
let mut result = Vec::with_capacity(self.scalar_fn_map.len());
|
||||
for (anchor, name) in &self.scalar_fn_map {
|
||||
let declaration = SimpleExtensionDeclaration {
|
||||
mapping_type: Some(MappingType::ExtensionFunction(ExtensionFunction {
|
||||
extension_uri_reference: 0,
|
||||
function_anchor: *anchor,
|
||||
name: name.clone(),
|
||||
})),
|
||||
};
|
||||
result.push(declaration);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
742
src/common/substrait/src/df_expr.rs
Normal file
742
src/common/substrait/src/df_expr.rs
Normal file
@@ -0,0 +1,742 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::str::FromStr;
|
||||
|
||||
use datafusion::logical_plan::{Column, Expr};
|
||||
use datafusion_expr::{expr_fn, BuiltinScalarFunction, Operator};
|
||||
use datatypes::schema::Schema;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use substrait_proto::protobuf::expression::field_reference::ReferenceType as FieldReferenceType;
|
||||
use substrait_proto::protobuf::expression::reference_segment::{
|
||||
ReferenceType as SegReferenceType, StructField,
|
||||
};
|
||||
use substrait_proto::protobuf::expression::{
|
||||
FieldReference, ReferenceSegment, RexType, ScalarFunction,
|
||||
};
|
||||
use substrait_proto::protobuf::function_argument::ArgType;
|
||||
use substrait_proto::protobuf::Expression;
|
||||
|
||||
use crate::context::ConvertorContext;
|
||||
use crate::error::{
|
||||
EmptyExprSnafu, InvalidParametersSnafu, MissingFieldSnafu, Result, UnsupportedExprSnafu,
|
||||
};
|
||||
|
||||
/// Convert substrait's `Expression` to DataFusion's `Expr`.
|
||||
pub fn to_df_expr(ctx: &ConvertorContext, expression: Expression, schema: &Schema) -> Result<Expr> {
|
||||
let expr_rex_type = expression.rex_type.context(EmptyExprSnafu)?;
|
||||
match expr_rex_type {
|
||||
RexType::Literal(_) => UnsupportedExprSnafu {
|
||||
name: "substrait Literal expression",
|
||||
}
|
||||
.fail()?,
|
||||
RexType::Selection(selection) => convert_selection_rex(*selection, schema),
|
||||
RexType::ScalarFunction(scalar_fn) => convert_scalar_function(ctx, scalar_fn, schema),
|
||||
RexType::WindowFunction(_)
|
||||
| RexType::IfThen(_)
|
||||
| RexType::SwitchExpression(_)
|
||||
| RexType::SingularOrList(_)
|
||||
| RexType::MultiOrList(_)
|
||||
| RexType::Cast(_)
|
||||
| RexType::Subquery(_)
|
||||
| RexType::Enum(_) => UnsupportedExprSnafu {
|
||||
name: format!("substrait expression {:?}", expr_rex_type),
|
||||
}
|
||||
.fail()?,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert Substrait's `FieldReference` - `DirectReference` - `StructField` to Datafusion's
|
||||
/// `Column` expr.
|
||||
pub fn convert_selection_rex(selection: FieldReference, schema: &Schema) -> Result<Expr> {
|
||||
if let Some(FieldReferenceType::DirectReference(direct_ref)) = selection.reference_type
|
||||
&& let Some(SegReferenceType::StructField(field)) = direct_ref.reference_type {
|
||||
let column_name = schema.column_name_by_index(field.field as _).to_string();
|
||||
Ok(Expr::Column(Column {
|
||||
relation: None,
|
||||
name: column_name,
|
||||
}))
|
||||
} else {
|
||||
InvalidParametersSnafu {
|
||||
reason: "Only support direct struct reference in Selection Rex",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn convert_scalar_function(
|
||||
ctx: &ConvertorContext,
|
||||
scalar_fn: ScalarFunction,
|
||||
schema: &Schema,
|
||||
) -> Result<Expr> {
|
||||
// convert argument
|
||||
let mut inputs = VecDeque::with_capacity(scalar_fn.arguments.len());
|
||||
for arg in scalar_fn.arguments {
|
||||
if let Some(ArgType::Value(sub_expr)) = arg.arg_type {
|
||||
inputs.push_back(to_df_expr(ctx, sub_expr, schema)?);
|
||||
} else {
|
||||
InvalidParametersSnafu {
|
||||
reason: "Only value expression arg is supported to be function argument",
|
||||
}
|
||||
.fail()?;
|
||||
}
|
||||
}
|
||||
|
||||
// convert this scalar function
|
||||
// map function name
|
||||
let anchor = scalar_fn.function_reference;
|
||||
let fn_name = ctx
|
||||
.find_scalar_fn(anchor)
|
||||
.with_context(|| InvalidParametersSnafu {
|
||||
reason: format!("Unregistered scalar function reference: {}", anchor),
|
||||
})?;
|
||||
|
||||
// convenient util
|
||||
let ensure_arg_len = |expected: usize| -> Result<()> {
|
||||
ensure!(
|
||||
inputs.len() == expected,
|
||||
InvalidParametersSnafu {
|
||||
reason: format!(
|
||||
"Invalid number of scalar function {}, expected {} but found {}",
|
||||
fn_name,
|
||||
expected,
|
||||
inputs.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// construct DataFusion expr
|
||||
let expr = match fn_name {
|
||||
// begin binary exprs, with the same order of DF `Operator`'s definition.
|
||||
"eq" | "equal" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"not_eq" | "not_equal" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.not_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"lt" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().lt(inputs.pop_front().unwrap())
|
||||
}
|
||||
"lt_eq" | "lte" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.lt_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"gt" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().gt(inputs.pop_front().unwrap())
|
||||
}
|
||||
"gt_eq" | "gte" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.gt_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"plus" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Plus,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"minus" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Minus,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"multiply" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Multiply,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"divide" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Divide,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"modulo" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Modulo,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"and" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::and(inputs.pop_front().unwrap(), inputs.pop_front().unwrap())
|
||||
}
|
||||
"or" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::or(inputs.pop_front().unwrap(), inputs.pop_front().unwrap())
|
||||
}
|
||||
"like" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.like(inputs.pop_front().unwrap())
|
||||
}
|
||||
"not_like" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.not_like(inputs.pop_front().unwrap())
|
||||
}
|
||||
"is_distinct_from" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::IsDistinctFrom,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"is_not_distinct_from" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::IsNotDistinctFrom,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_i_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexIMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_not_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexNotMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_not_i_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexNotIMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"bitwise_and" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::BitwiseAnd,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"bitwise_or" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::BitwiseOr,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
// end binary exprs
|
||||
// start other direct expr, with the same order of DF `Expr`'s definition.
|
||||
"not" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().not()
|
||||
}
|
||||
"is_not_null" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().is_not_null()
|
||||
}
|
||||
"is_null" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().is_null()
|
||||
}
|
||||
"negative" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Negative(Box::new(inputs.pop_front().unwrap()))
|
||||
}
|
||||
// skip GetIndexedField, unimplemented.
|
||||
"between" => {
|
||||
ensure_arg_len(3)?;
|
||||
Expr::Between {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
negated: false,
|
||||
low: Box::new(inputs.pop_front().unwrap()),
|
||||
high: Box::new(inputs.pop_front().unwrap()),
|
||||
}
|
||||
}
|
||||
"not_between" => {
|
||||
ensure_arg_len(3)?;
|
||||
Expr::Between {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
negated: true,
|
||||
low: Box::new(inputs.pop_front().unwrap()),
|
||||
high: Box::new(inputs.pop_front().unwrap()),
|
||||
}
|
||||
}
|
||||
// skip Case, is covered in substrait::SwitchExpression.
|
||||
// skip Cast and TryCast, is covered in substrait::Cast.
|
||||
"sort" | "sort_des" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Sort {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
asc: false,
|
||||
nulls_first: false,
|
||||
}
|
||||
}
|
||||
"sort_asc" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Sort {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
asc: true,
|
||||
nulls_first: false,
|
||||
}
|
||||
}
|
||||
// those are datafusion built-in "scalar functions".
|
||||
"abs"
|
||||
| "acos"
|
||||
| "asin"
|
||||
| "atan"
|
||||
| "atan2"
|
||||
| "ceil"
|
||||
| "cos"
|
||||
| "exp"
|
||||
| "floor"
|
||||
| "ln"
|
||||
| "log"
|
||||
| "log10"
|
||||
| "log2"
|
||||
| "power"
|
||||
| "pow"
|
||||
| "round"
|
||||
| "signum"
|
||||
| "sin"
|
||||
| "sqrt"
|
||||
| "tan"
|
||||
| "trunc"
|
||||
| "coalesce"
|
||||
| "make_array"
|
||||
| "ascii"
|
||||
| "bit_length"
|
||||
| "btrim"
|
||||
| "char_length"
|
||||
| "character_length"
|
||||
| "concat"
|
||||
| "concat_ws"
|
||||
| "chr"
|
||||
| "current_date"
|
||||
| "current_time"
|
||||
| "date_part"
|
||||
| "datepart"
|
||||
| "date_trunc"
|
||||
| "datetrunc"
|
||||
| "date_bin"
|
||||
| "initcap"
|
||||
| "left"
|
||||
| "length"
|
||||
| "lower"
|
||||
| "lpad"
|
||||
| "ltrim"
|
||||
| "md5"
|
||||
| "nullif"
|
||||
| "octet_length"
|
||||
| "random"
|
||||
| "regexp_replace"
|
||||
| "repeat"
|
||||
| "replace"
|
||||
| "reverse"
|
||||
| "right"
|
||||
| "rpad"
|
||||
| "rtrim"
|
||||
| "sha224"
|
||||
| "sha256"
|
||||
| "sha384"
|
||||
| "sha512"
|
||||
| "digest"
|
||||
| "split_part"
|
||||
| "starts_with"
|
||||
| "strpos"
|
||||
| "substr"
|
||||
| "to_hex"
|
||||
| "to_timestamp"
|
||||
| "to_timestamp_millis"
|
||||
| "to_timestamp_micros"
|
||||
| "to_timestamp_seconds"
|
||||
| "now"
|
||||
| "translate"
|
||||
| "trim"
|
||||
| "upper"
|
||||
| "uuid"
|
||||
| "regexp_match"
|
||||
| "struct"
|
||||
| "from_unixtime"
|
||||
| "arrow_typeof" => Expr::ScalarFunction {
|
||||
fun: BuiltinScalarFunction::from_str(fn_name).unwrap(),
|
||||
args: inputs.into(),
|
||||
},
|
||||
// skip ScalarUDF, unimplemented.
|
||||
// skip AggregateFunction, is covered in substrait::AggregateRel
|
||||
// skip WindowFunction, is covered in substrait WindowFunction
|
||||
// skip AggregateUDF, unimplemented.
|
||||
// skip InList, unimplemented
|
||||
// skip Wildcard, unimplemented.
|
||||
// end other direct expr
|
||||
_ => UnsupportedExprSnafu {
|
||||
name: format!("scalar function {}", fn_name),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
/// Convert DataFusion's `Expr` to substrait's `Expression`
|
||||
pub fn expression_from_df_expr(
|
||||
ctx: &mut ConvertorContext,
|
||||
expr: &Expr,
|
||||
schema: &Schema,
|
||||
) -> Result<Expression> {
|
||||
let expression = match expr {
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::Alias(..) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Column(column) => {
|
||||
let field_reference = convert_column(column, schema)?;
|
||||
Expression {
|
||||
rex_type: Some(RexType::Selection(Box::new(field_reference))),
|
||||
}
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::ScalarVariable(..) | Expr::Literal(..) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::BinaryExpr { left, op, right } => {
|
||||
let left = expression_from_df_expr(ctx, left, schema)?;
|
||||
let right = expression_from_df_expr(ctx, right, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![left, right]);
|
||||
let op_name = utils::name_df_operator(op);
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::Not(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "not";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::IsNotNull(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "is_not_null";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::IsNull(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "is_null";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::Negative(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "negative";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::GetIndexedField { .. } => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Between {
|
||||
expr,
|
||||
negated,
|
||||
low,
|
||||
high,
|
||||
} => {
|
||||
let expr = expression_from_df_expr(ctx, expr, schema)?;
|
||||
let low = expression_from_df_expr(ctx, low, schema)?;
|
||||
let high = expression_from_df_expr(ctx, high, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![expr, low, high]);
|
||||
let op_name = if *negated { "not_between" } else { "between" };
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::Case { .. } | Expr::Cast { .. } | Expr::TryCast { .. } => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Sort {
|
||||
expr,
|
||||
asc,
|
||||
nulls_first: _,
|
||||
} => {
|
||||
let expr = expression_from_df_expr(ctx, expr, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![expr]);
|
||||
let op_name = if *asc { "sort_asc" } else { "sort_des" };
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::ScalarFunction { fun, args } => {
|
||||
let arguments = utils::expression_to_argument(
|
||||
args.iter()
|
||||
.map(|e| expression_from_df_expr(ctx, e, schema))
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
);
|
||||
let op_name = utils::name_builtin_scalar_function(fun);
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::ScalarUDF { .. }
|
||||
| Expr::AggregateFunction { .. }
|
||||
| Expr::WindowFunction { .. }
|
||||
| Expr::AggregateUDF { .. }
|
||||
| Expr::InList { .. }
|
||||
| Expr::Wildcard => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
Ok(expression)
|
||||
}
|
||||
|
||||
/// Convert DataFusion's `Column` expr into substrait's `FieldReference` -
|
||||
/// `DirectReference` - `StructField`.
|
||||
pub fn convert_column(column: &Column, schema: &Schema) -> Result<FieldReference> {
|
||||
let column_name = &column.name;
|
||||
let field_index =
|
||||
schema
|
||||
.column_index_by_name(column_name)
|
||||
.with_context(|| MissingFieldSnafu {
|
||||
field: format!("{:?}", column),
|
||||
plan: format!("schema: {:?}", schema),
|
||||
})?;
|
||||
|
||||
Ok(FieldReference {
|
||||
reference_type: Some(FieldReferenceType::DirectReference(ReferenceSegment {
|
||||
reference_type: Some(SegReferenceType::StructField(Box::new(StructField {
|
||||
field: field_index as _,
|
||||
child: None,
|
||||
}))),
|
||||
})),
|
||||
root_type: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Some utils special for this `DataFusion::Expr` and `Substrait::Expression` conversion.
|
||||
mod utils {
|
||||
use datafusion_expr::{BuiltinScalarFunction, Operator};
|
||||
use substrait_proto::protobuf::expression::{RexType, ScalarFunction};
|
||||
use substrait_proto::protobuf::function_argument::ArgType;
|
||||
use substrait_proto::protobuf::{Expression, FunctionArgument};
|
||||
|
||||
pub(crate) fn name_df_operator(op: &Operator) -> &str {
|
||||
match op {
|
||||
Operator::Eq => "equal",
|
||||
Operator::NotEq => "not_equal",
|
||||
Operator::Lt => "lt",
|
||||
Operator::LtEq => "lte",
|
||||
Operator::Gt => "gt",
|
||||
Operator::GtEq => "gte",
|
||||
Operator::Plus => "plus",
|
||||
Operator::Minus => "minus",
|
||||
Operator::Multiply => "multiply",
|
||||
Operator::Divide => "divide",
|
||||
Operator::Modulo => "modulo",
|
||||
Operator::And => "and",
|
||||
Operator::Or => "or",
|
||||
Operator::Like => "like",
|
||||
Operator::NotLike => "not_like",
|
||||
Operator::IsDistinctFrom => "is_distinct_from",
|
||||
Operator::IsNotDistinctFrom => "is_not_distinct_from",
|
||||
Operator::RegexMatch => "regex_match",
|
||||
Operator::RegexIMatch => "regex_i_match",
|
||||
Operator::RegexNotMatch => "regex_not_match",
|
||||
Operator::RegexNotIMatch => "regex_not_i_match",
|
||||
Operator::BitwiseAnd => "bitwise_and",
|
||||
Operator::BitwiseOr => "bitwise_or",
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert list of [Expression] to [FunctionArgument] vector.
|
||||
pub(crate) fn expression_to_argument<I: IntoIterator<Item = Expression>>(
|
||||
expressions: I,
|
||||
) -> Vec<FunctionArgument> {
|
||||
expressions
|
||||
.into_iter()
|
||||
.map(|expr| FunctionArgument {
|
||||
arg_type: Some(ArgType::Value(expr)),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Convenient builder for [Expression]
|
||||
pub(crate) fn build_scalar_function_expression(
|
||||
function_reference: u32,
|
||||
arguments: Vec<FunctionArgument>,
|
||||
) -> Expression {
|
||||
Expression {
|
||||
rex_type: Some(RexType::ScalarFunction(ScalarFunction {
|
||||
function_reference,
|
||||
arguments,
|
||||
output_type: None,
|
||||
..Default::default()
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn name_builtin_scalar_function(fun: &BuiltinScalarFunction) -> &str {
|
||||
match fun {
|
||||
BuiltinScalarFunction::Abs => "abs",
|
||||
BuiltinScalarFunction::Acos => "acos",
|
||||
BuiltinScalarFunction::Asin => "asin",
|
||||
BuiltinScalarFunction::Atan => "atan",
|
||||
BuiltinScalarFunction::Ceil => "ceil",
|
||||
BuiltinScalarFunction::Cos => "cos",
|
||||
BuiltinScalarFunction::Digest => "digest",
|
||||
BuiltinScalarFunction::Exp => "exp",
|
||||
BuiltinScalarFunction::Floor => "floor",
|
||||
BuiltinScalarFunction::Ln => "ln",
|
||||
BuiltinScalarFunction::Log => "log",
|
||||
BuiltinScalarFunction::Log10 => "log10",
|
||||
BuiltinScalarFunction::Log2 => "log2",
|
||||
BuiltinScalarFunction::Round => "round",
|
||||
BuiltinScalarFunction::Signum => "signum",
|
||||
BuiltinScalarFunction::Sin => "sin",
|
||||
BuiltinScalarFunction::Sqrt => "sqrt",
|
||||
BuiltinScalarFunction::Tan => "tan",
|
||||
BuiltinScalarFunction::Trunc => "trunc",
|
||||
BuiltinScalarFunction::Array => "make_array",
|
||||
BuiltinScalarFunction::Ascii => "ascii",
|
||||
BuiltinScalarFunction::BitLength => "bit_length",
|
||||
BuiltinScalarFunction::Btrim => "btrim",
|
||||
BuiltinScalarFunction::CharacterLength => "character_length",
|
||||
BuiltinScalarFunction::Chr => "chr",
|
||||
BuiltinScalarFunction::Concat => "concat",
|
||||
BuiltinScalarFunction::ConcatWithSeparator => "concat_ws",
|
||||
BuiltinScalarFunction::DatePart => "date_part",
|
||||
BuiltinScalarFunction::DateTrunc => "date_trunc",
|
||||
BuiltinScalarFunction::InitCap => "initcap",
|
||||
BuiltinScalarFunction::Left => "left",
|
||||
BuiltinScalarFunction::Lpad => "lpad",
|
||||
BuiltinScalarFunction::Lower => "lower",
|
||||
BuiltinScalarFunction::Ltrim => "ltrim",
|
||||
BuiltinScalarFunction::MD5 => "md5",
|
||||
BuiltinScalarFunction::NullIf => "nullif",
|
||||
BuiltinScalarFunction::OctetLength => "octet_length",
|
||||
BuiltinScalarFunction::Random => "random",
|
||||
BuiltinScalarFunction::RegexpReplace => "regexp_replace",
|
||||
BuiltinScalarFunction::Repeat => "repeat",
|
||||
BuiltinScalarFunction::Replace => "replace",
|
||||
BuiltinScalarFunction::Reverse => "reverse",
|
||||
BuiltinScalarFunction::Right => "right",
|
||||
BuiltinScalarFunction::Rpad => "rpad",
|
||||
BuiltinScalarFunction::Rtrim => "rtrim",
|
||||
BuiltinScalarFunction::SHA224 => "sha224",
|
||||
BuiltinScalarFunction::SHA256 => "sha256",
|
||||
BuiltinScalarFunction::SHA384 => "sha384",
|
||||
BuiltinScalarFunction::SHA512 => "sha512",
|
||||
BuiltinScalarFunction::SplitPart => "split_part",
|
||||
BuiltinScalarFunction::StartsWith => "starts_with",
|
||||
BuiltinScalarFunction::Strpos => "strpos",
|
||||
BuiltinScalarFunction::Substr => "substr",
|
||||
BuiltinScalarFunction::ToHex => "to_hex",
|
||||
BuiltinScalarFunction::ToTimestamp => "to_timestamp",
|
||||
BuiltinScalarFunction::ToTimestampMillis => "to_timestamp_millis",
|
||||
BuiltinScalarFunction::ToTimestampMicros => "to_timestamp_macros",
|
||||
BuiltinScalarFunction::ToTimestampSeconds => "to_timestamp_seconds",
|
||||
BuiltinScalarFunction::Now => "now",
|
||||
BuiltinScalarFunction::Translate => "translate",
|
||||
BuiltinScalarFunction::Trim => "trim",
|
||||
BuiltinScalarFunction::Upper => "upper",
|
||||
BuiltinScalarFunction::RegexpMatch => "regexp_match",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::schema::ColumnSchema;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn expr_round_trip() {
|
||||
let expr = expr_fn::and(
|
||||
expr_fn::col("column_a").lt_eq(expr_fn::col("column_b")),
|
||||
expr_fn::col("column_a").gt(expr_fn::col("column_b")),
|
||||
);
|
||||
|
||||
let schema = Schema::new(vec![
|
||||
ColumnSchema::new(
|
||||
"column_a",
|
||||
datatypes::data_type::ConcreteDataType::int64_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"column_b",
|
||||
datatypes::data_type::ConcreteDataType::float64_datatype(),
|
||||
true,
|
||||
),
|
||||
]);
|
||||
|
||||
let mut ctx = ConvertorContext::default();
|
||||
let substrait_expr = expression_from_df_expr(&mut ctx, &expr, &schema).unwrap();
|
||||
let converted_expr = to_df_expr(&ctx, substrait_expr, &schema).unwrap();
|
||||
|
||||
assert_eq!(expr, converted_expr);
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,7 @@ use std::sync::Arc;
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_telemetry::debug;
|
||||
use datafusion::datasource::TableProvider;
|
||||
use datafusion::logical_plan::{LogicalPlan, TableScan, ToDFSchema};
|
||||
use datafusion::physical_plan::project_schema;
|
||||
@@ -24,12 +25,15 @@ use prost::Message;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use substrait_proto::protobuf::expression::mask_expression::{StructItem, StructSelect};
|
||||
use substrait_proto::protobuf::expression::MaskExpression;
|
||||
use substrait_proto::protobuf::extensions::simple_extension_declaration::MappingType;
|
||||
use substrait_proto::protobuf::plan_rel::RelType as PlanRelType;
|
||||
use substrait_proto::protobuf::read_rel::{NamedTable, ReadType};
|
||||
use substrait_proto::protobuf::rel::RelType;
|
||||
use substrait_proto::protobuf::{PlanRel, ReadRel, Rel};
|
||||
use substrait_proto::protobuf::{Plan, PlanRel, ReadRel, Rel};
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
use crate::context::ConvertorContext;
|
||||
use crate::df_expr::{expression_from_df_expr, to_df_expr};
|
||||
use crate::error::{
|
||||
DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error, InternalSnafu,
|
||||
InvalidParametersSnafu, MissingFieldSnafu, SchemaNotMatchSnafu, TableNotFoundSnafu,
|
||||
@@ -48,25 +52,15 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
type Plan = LogicalPlan;
|
||||
|
||||
fn decode<B: Buf + Send>(&self, message: B) -> Result<Self::Plan, Self::Error> {
|
||||
let plan_rel = PlanRel::decode(message).context(DecodeRelSnafu)?;
|
||||
let rel = match plan_rel.rel_type.context(EmptyPlanSnafu)? {
|
||||
PlanRelType::Rel(rel) => rel,
|
||||
PlanRelType::Root(_) => UnsupportedPlanSnafu {
|
||||
name: "Root Relation",
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
self.convert_rel(rel)
|
||||
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
|
||||
self.convert_plan(plan)
|
||||
}
|
||||
|
||||
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
|
||||
let rel = self.convert_plan(plan)?;
|
||||
let plan_rel = PlanRel {
|
||||
rel_type: Some(PlanRelType::Rel(rel)),
|
||||
};
|
||||
let plan = self.convert_df_plan(plan)?;
|
||||
|
||||
let mut buf = BytesMut::new();
|
||||
plan_rel.encode(&mut buf).context(EncodeRelSnafu)?;
|
||||
plan.encode(&mut buf).context(EncodeRelSnafu)?;
|
||||
|
||||
Ok(buf.freeze())
|
||||
}
|
||||
@@ -79,10 +73,37 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertor {
|
||||
pub fn convert_rel(&self, rel: Rel) -> Result<LogicalPlan, Error> {
|
||||
pub fn convert_plan(&self, mut plan: Plan) -> Result<LogicalPlan, Error> {
|
||||
// prepare convertor context
|
||||
let mut ctx = ConvertorContext::default();
|
||||
for simple_ext in plan.extensions {
|
||||
if let Some(MappingType::ExtensionFunction(function_extension)) =
|
||||
simple_ext.mapping_type
|
||||
{
|
||||
ctx.register_scalar_with_anchor(
|
||||
function_extension.name,
|
||||
function_extension.function_anchor,
|
||||
);
|
||||
} else {
|
||||
debug!("Encounter unsupported substrait extension {:?}", simple_ext);
|
||||
}
|
||||
}
|
||||
|
||||
// extract rel
|
||||
let rel = if let Some(PlanRel { rel_type }) = plan.relations.pop()
|
||||
&& let Some(PlanRelType::Rel(rel)) = rel_type {
|
||||
rel
|
||||
} else {
|
||||
UnsupportedPlanSnafu {
|
||||
name: "Emply or non-Rel relation",
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
|
||||
|
||||
// build logical plan
|
||||
let logical_plan = match rel_type {
|
||||
RelType::Read(read_rel) => self.convert_read_rel(read_rel),
|
||||
RelType::Read(read_rel) => self.convert_read_rel(&mut ctx, read_rel),
|
||||
RelType::Filter(_filter_rel) => UnsupportedPlanSnafu {
|
||||
name: "Filter Relation",
|
||||
}
|
||||
@@ -132,9 +153,12 @@ impl DFLogicalSubstraitConvertor {
|
||||
Ok(logical_plan)
|
||||
}
|
||||
|
||||
fn convert_read_rel(&self, read_rel: Box<ReadRel>) -> Result<LogicalPlan, Error> {
|
||||
fn convert_read_rel(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
read_rel: Box<ReadRel>,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
|
||||
|
||||
let read_type = read_rel.read_type.context(MissingFieldSnafu {
|
||||
field: "read_type",
|
||||
plan: "Read",
|
||||
@@ -190,6 +214,13 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
);
|
||||
|
||||
// Convert filter
|
||||
let filters = if let Some(filter) = read_rel.filter {
|
||||
vec![to_df_expr(ctx, *filter, &retrieved_schema)?]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
// Calculate the projected schema
|
||||
let projected_schema = project_schema(&stored_schema, projection.as_ref())
|
||||
.context(DFInternalSnafu)?
|
||||
@@ -202,7 +233,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
source: adapter,
|
||||
projection,
|
||||
projected_schema,
|
||||
filters: vec![],
|
||||
filters,
|
||||
limit: None,
|
||||
}))
|
||||
}
|
||||
@@ -219,8 +250,12 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertor {
|
||||
pub fn convert_plan(&self, plan: LogicalPlan) -> Result<Rel, Error> {
|
||||
match plan {
|
||||
pub fn convert_df_plan(&self, plan: LogicalPlan) -> Result<Plan, Error> {
|
||||
let mut ctx = ConvertorContext::default();
|
||||
|
||||
// TODO(ruihang): extract this translation logic into a separated function
|
||||
// convert PlanRel
|
||||
let rel = match plan {
|
||||
LogicalPlan::Projection(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Projection",
|
||||
}
|
||||
@@ -258,10 +293,10 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::TableScan(table_scan) => {
|
||||
let read_rel = self.convert_table_scan_plan(table_scan)?;
|
||||
Ok(Rel {
|
||||
let read_rel = self.convert_table_scan_plan(&mut ctx, table_scan)?;
|
||||
Rel {
|
||||
rel_type: Some(RelType::Read(Box::new(read_rel))),
|
||||
})
|
||||
}
|
||||
}
|
||||
LogicalPlan::EmptyRelation(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical EmptyRelation",
|
||||
@@ -284,10 +319,30 @@ impl DFLogicalSubstraitConvertor {
|
||||
),
|
||||
}
|
||||
.fail()?,
|
||||
}
|
||||
};
|
||||
|
||||
// convert extension
|
||||
let extensions = ctx.generate_function_extension();
|
||||
|
||||
// assemble PlanRel
|
||||
let plan_rel = PlanRel {
|
||||
rel_type: Some(PlanRelType::Rel(rel)),
|
||||
};
|
||||
|
||||
Ok(Plan {
|
||||
extension_uris: vec![],
|
||||
extensions,
|
||||
relations: vec![plan_rel],
|
||||
advanced_extensions: None,
|
||||
expected_type_urls: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
pub fn convert_table_scan_plan(&self, table_scan: TableScan) -> Result<ReadRel, Error> {
|
||||
pub fn convert_table_scan_plan(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
table_scan: TableScan,
|
||||
) -> Result<ReadRel, Error> {
|
||||
let provider = table_scan
|
||||
.source
|
||||
.as_any()
|
||||
@@ -313,10 +368,25 @@ impl DFLogicalSubstraitConvertor {
|
||||
// assemble base (unprojected) schema using Table's schema.
|
||||
let base_schema = from_schema(&provider.table().schema())?;
|
||||
|
||||
// make conjunction over a list of filters and convert the result to substrait
|
||||
let filter = if let Some(conjunction) = table_scan
|
||||
.filters
|
||||
.into_iter()
|
||||
.reduce(|accum, expr| accum.and(expr))
|
||||
{
|
||||
Some(Box::new(expression_from_df_expr(
|
||||
ctx,
|
||||
&conjunction,
|
||||
&provider.table().schema(),
|
||||
)?))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let read_rel = ReadRel {
|
||||
common: None,
|
||||
base_schema: Some(base_schema),
|
||||
filter: None,
|
||||
filter,
|
||||
projection,
|
||||
advanced_extension: None,
|
||||
read_type: Some(read_type),
|
||||
|
||||
@@ -23,10 +23,10 @@ use snafu::{Backtrace, ErrorCompat, Snafu};
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Unsupported physical expr: {}", name))]
|
||||
#[snafu(display("Unsupported physical plan: {}", name))]
|
||||
UnsupportedPlan { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unsupported physical plan: {}", name))]
|
||||
#[snafu(display("Unsupported expr: {}", name))]
|
||||
UnsupportedExpr { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unsupported concrete type: {:?}", ty))]
|
||||
|
||||
@@ -12,6 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(let_chains)]
|
||||
|
||||
mod context;
|
||||
mod df_expr;
|
||||
mod df_logical;
|
||||
pub mod error;
|
||||
mod schema;
|
||||
|
||||
@@ -147,6 +147,18 @@ impl From<i64> for Timestamp {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Timestamp> for i64 {
|
||||
fn from(t: Timestamp) -> Self {
|
||||
t.value
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Timestamp> for serde_json::Value {
|
||||
fn from(d: Timestamp) -> Self {
|
||||
serde_json::Value::String(d.to_iso8601_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum TimeUnit {
|
||||
Second,
|
||||
@@ -197,6 +209,7 @@ impl Hash for Timestamp {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use chrono::Offset;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -318,4 +331,39 @@ mod tests {
|
||||
let ts = Timestamp::from_millis(ts_millis);
|
||||
assert_eq!("1969-12-31 23:59:58.999+0000", ts.to_iso8601_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_to_json_value() {
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:01+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Second)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:00.001+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Millisecond)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:00.000001+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Microsecond)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:00.000000001+0000",
|
||||
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Nanosecond)) {
|
||||
Value::String(s) => s,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,13 +11,15 @@ python = ["dep:script"]
|
||||
[dependencies]
|
||||
api = { path = "../api" }
|
||||
async-trait = "0.1"
|
||||
axum = "0.6.0-rc.2"
|
||||
axum-macros = "0.3.0-rc.1"
|
||||
axum = "0.6"
|
||||
axum-macros = "0.3"
|
||||
backon = "0.2"
|
||||
catalog = { path = "../catalog" }
|
||||
common-base = { path = "../common/base" }
|
||||
common-catalog = { path = "../common/catalog" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-grpc-expr = { path = "../common/grpc-expr" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
@@ -26,36 +28,39 @@ common-time = { path = "../common/time" }
|
||||
common-insert = { path = "../common/insert" }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../datatypes" }
|
||||
frontend = { path = "../frontend" }
|
||||
futures = "0.3"
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
log-store = { path = "../log-store" }
|
||||
meta-client = { path = "../meta-client" }
|
||||
meta-srv = { path = "../meta-srv", features = ["mock"] }
|
||||
metrics = "0.20"
|
||||
mito = { path = "../mito", features = ["test"] }
|
||||
object-store = { path = "../object-store" }
|
||||
query = { path = "../query" }
|
||||
script = { path = "../script", features = ["python"], optional = true }
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
servers = { path = "../servers" }
|
||||
session = { path = "../session" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
sql = { path = "../sql" }
|
||||
storage = { path = "../storage" }
|
||||
store-api = { path = "../store-api" }
|
||||
substrait = { path = "../common/substrait" }
|
||||
table = { path = "../table" }
|
||||
mito = { path = "../mito", features = ["test"] }
|
||||
tokio = { version = "1.18", features = ["full"] }
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
tonic = "0.8"
|
||||
tower = { version = "0.4", features = ["full"] }
|
||||
tower-http = { version = "0.3", features = ["full"] }
|
||||
frontend = { path = "../frontend" }
|
||||
|
||||
[dev-dependencies]
|
||||
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
|
||||
tempdir = "0.3"axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
|
||||
client = { path = "../client" }
|
||||
common-query = { path = "../common/query" }
|
||||
datafusion = "14.0.0"
|
||||
datafusion-common = "14.0.0"
|
||||
tempdir = "0.3"
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
|
||||
@@ -26,7 +26,15 @@ use crate::server::Services;
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ObjectStoreConfig {
|
||||
File { data_dir: String },
|
||||
File {
|
||||
data_dir: String,
|
||||
},
|
||||
S3 {
|
||||
bucket: String,
|
||||
root: String,
|
||||
access_key_id: String,
|
||||
secret_access_key: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl Default for ObjectStoreConfig {
|
||||
@@ -47,6 +55,7 @@ pub struct DatanodeOptions {
|
||||
pub meta_client_opts: Option<MetaClientOpts>,
|
||||
pub wal_dir: String,
|
||||
pub storage: ObjectStoreConfig,
|
||||
pub enable_memory_catalog: bool,
|
||||
pub mode: Mode,
|
||||
}
|
||||
|
||||
@@ -61,6 +70,7 @@ impl Default for DatanodeOptions {
|
||||
meta_client_opts: None,
|
||||
wal_dir: "/tmp/greptimedb/wal".to_string(),
|
||||
storage: ObjectStoreConfig::default(),
|
||||
enable_memory_catalog: false,
|
||||
mode: Mode::Standalone,
|
||||
}
|
||||
}
|
||||
@@ -86,9 +96,18 @@ impl Datanode {
|
||||
|
||||
pub async fn start(&mut self) -> Result<()> {
|
||||
info!("Starting datanode instance...");
|
||||
self.instance.start().await?;
|
||||
self.services.start(&self.opts).await?;
|
||||
Ok(())
|
||||
self.start_instance().await?;
|
||||
self.start_services().await
|
||||
}
|
||||
|
||||
/// Start only the internal component of datanode.
|
||||
pub async fn start_instance(&mut self) -> Result<()> {
|
||||
self.instance.start().await
|
||||
}
|
||||
|
||||
/// Start services of datanode. This method call will block until services are shutdown.
|
||||
pub async fn start_services(&mut self) -> Result<()> {
|
||||
self.services.start(&self.opts).await
|
||||
}
|
||||
|
||||
pub fn get_instance(&self) -> InstanceRef {
|
||||
|
||||
@@ -18,6 +18,8 @@ use common_error::prelude::*;
|
||||
use storage::error::Error as StorageError;
|
||||
use table::error::Error as TableError;
|
||||
|
||||
use crate::datanode::ObjectStoreConfig;
|
||||
|
||||
/// Business error of datanode.
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
@@ -73,6 +75,13 @@ pub enum Error {
|
||||
source: TableError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to drop table {}, source: {}", table_name, source))]
|
||||
DropTable {
|
||||
table_name: String,
|
||||
#[snafu(backtrace)]
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Table not found: {}", table_name))]
|
||||
TableNotFound { table_name: String },
|
||||
|
||||
@@ -82,9 +91,6 @@ pub enum Error {
|
||||
table_name: String,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Missing timestamp column in request"))]
|
||||
MissingTimestampColumn { backtrace: Backtrace },
|
||||
|
||||
@@ -138,10 +144,10 @@ pub enum Error {
|
||||
#[snafu(display("Failed to storage engine, source: {}", source))]
|
||||
OpenStorageEngine { source: StorageError },
|
||||
|
||||
#[snafu(display("Failed to init backend, dir: {}, source: {}", dir, source))]
|
||||
#[snafu(display("Failed to init backend, config: {:#?}, source: {}", config, source))]
|
||||
InitBackend {
|
||||
dir: String,
|
||||
source: std::io::Error,
|
||||
config: ObjectStoreConfig,
|
||||
source: object_store::Error,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
@@ -202,21 +208,16 @@ pub enum Error {
|
||||
source: common_grpc::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Column datatype error, source: {}", source))]
|
||||
ColumnDataType {
|
||||
#[snafu(display("Failed to convert alter expr to request: {}", source))]
|
||||
AlterExprToRequest {
|
||||
#[snafu(backtrace)]
|
||||
source: api::error::Error,
|
||||
source: common_grpc_expr::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Invalid column proto definition, column: {}, source: {}",
|
||||
column,
|
||||
source
|
||||
))]
|
||||
InvalidColumnDef {
|
||||
column: String,
|
||||
#[snafu(display("Failed to convert create expr to request: {}", source))]
|
||||
CreateExprToRequest {
|
||||
#[snafu(backtrace)]
|
||||
source: api::error::Error,
|
||||
source: common_grpc_expr::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse SQL, source: {}", source))]
|
||||
@@ -263,7 +264,7 @@ pub enum Error {
|
||||
#[snafu(display("Failed to insert data, source: {}", source))]
|
||||
InsertData {
|
||||
#[snafu(backtrace)]
|
||||
source: common_insert::error::Error,
|
||||
source: common_grpc_expr::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Insert batch is empty"))]
|
||||
@@ -306,6 +307,7 @@ impl ErrorExt for Error {
|
||||
Error::CreateTable { source, .. }
|
||||
| Error::GetTable { source, .. }
|
||||
| Error::AlterTable { source, .. } => source.status_code(),
|
||||
Error::DropTable { source, .. } => source.status_code(),
|
||||
|
||||
Error::Insert { source, .. } => source.status_code(),
|
||||
|
||||
@@ -316,6 +318,8 @@ impl ErrorExt for Error {
|
||||
source.status_code()
|
||||
}
|
||||
|
||||
Error::AlterExprToRequest { source, .. }
|
||||
| Error::CreateExprToRequest { source, .. } => source.status_code(),
|
||||
Error::CreateSchema { source, .. }
|
||||
| Error::ConvertSchema { source, .. }
|
||||
| Error::VectorComputation { source } => source.status_code(),
|
||||
@@ -324,7 +328,6 @@ impl ErrorExt for Error {
|
||||
| Error::InvalidSql { .. }
|
||||
| Error::KeyColumnNotFound { .. }
|
||||
| Error::InvalidPrimaryKey { .. }
|
||||
| Error::MissingField { .. }
|
||||
| Error::MissingTimestampColumn { .. }
|
||||
| Error::CatalogNotFound { .. }
|
||||
| Error::SchemaNotFound { .. }
|
||||
@@ -343,10 +346,6 @@ impl ErrorExt for Error {
|
||||
| Error::UnsupportedExpr { .. }
|
||||
| Error::Catalog { .. } => StatusCode::Internal,
|
||||
|
||||
Error::ColumnDataType { source } | Error::InvalidColumnDef { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
|
||||
Error::InitBackend { .. } => StatusCode::StorageUnavailable,
|
||||
Error::OpenLogStore { source } => source.status_code(),
|
||||
Error::StartScriptManager { source } => source.status_code(),
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{fs, path};
|
||||
|
||||
use backon::ExponentialBackoff;
|
||||
use catalog::remote::MetaKvBackend;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
|
||||
@@ -26,8 +27,9 @@ use meta_client::client::{MetaClient, MetaClientBuilder};
|
||||
use meta_client::MetaClientOpts;
|
||||
use mito::config::EngineConfig as TableEngineConfig;
|
||||
use mito::engine::MitoEngine;
|
||||
use object_store::layers::LoggingLayer;
|
||||
use object_store::services::fs::Builder;
|
||||
use object_store::layers::{LoggingLayer, MetricsLayer, RetryLayer, TracingLayer};
|
||||
use object_store::services::fs::Builder as FsBuilder;
|
||||
use object_store::services::s3::Builder as S3Builder;
|
||||
use object_store::{util, ObjectStore};
|
||||
use query::query_engine::{QueryEngineFactory, QueryEngineRef};
|
||||
use servers::Mode;
|
||||
@@ -99,17 +101,29 @@ impl Instance {
|
||||
// create remote catalog manager
|
||||
let (catalog_manager, factory, table_id_provider) = match opts.mode {
|
||||
Mode::Standalone => {
|
||||
let catalog = Arc::new(
|
||||
catalog::local::LocalCatalogManager::try_new(table_engine.clone())
|
||||
.await
|
||||
.context(CatalogSnafu)?,
|
||||
);
|
||||
let factory = QueryEngineFactory::new(catalog.clone());
|
||||
(
|
||||
catalog.clone() as CatalogManagerRef,
|
||||
factory,
|
||||
Some(catalog as TableIdProviderRef),
|
||||
)
|
||||
if opts.enable_memory_catalog {
|
||||
let catalog = Arc::new(catalog::local::MemoryCatalogManager::default());
|
||||
let factory = QueryEngineFactory::new(catalog.clone());
|
||||
|
||||
(
|
||||
catalog.clone() as CatalogManagerRef,
|
||||
factory,
|
||||
Some(catalog as TableIdProviderRef),
|
||||
)
|
||||
} else {
|
||||
let catalog = Arc::new(
|
||||
catalog::local::LocalCatalogManager::try_new(table_engine.clone())
|
||||
.await
|
||||
.context(CatalogSnafu)?,
|
||||
);
|
||||
let factory = QueryEngineFactory::new(catalog.clone());
|
||||
|
||||
(
|
||||
catalog.clone() as CatalogManagerRef,
|
||||
factory,
|
||||
Some(catalog as TableIdProviderRef),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Mode::Distributed => {
|
||||
@@ -139,7 +153,11 @@ impl Instance {
|
||||
};
|
||||
Ok(Self {
|
||||
query_engine: query_engine.clone(),
|
||||
sql_handler: SqlHandler::new(table_engine, catalog_manager.clone()),
|
||||
sql_handler: SqlHandler::new(
|
||||
table_engine,
|
||||
catalog_manager.clone(),
|
||||
query_engine.clone(),
|
||||
),
|
||||
catalog_manager,
|
||||
physical_planner: PhysicalPlanner::new(query_engine),
|
||||
script_executor,
|
||||
@@ -170,24 +188,64 @@ impl Instance {
|
||||
}
|
||||
|
||||
pub(crate) async fn new_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
|
||||
// TODO(dennis): supports other backend
|
||||
let data_dir = util::normalize_dir(match store_config {
|
||||
ObjectStoreConfig::File { data_dir } => data_dir,
|
||||
});
|
||||
let object_store = match store_config {
|
||||
ObjectStoreConfig::File { data_dir } => new_fs_object_store(data_dir).await,
|
||||
ObjectStoreConfig::S3 { .. } => new_s3_object_store(store_config).await,
|
||||
};
|
||||
|
||||
object_store.map(|object_store| {
|
||||
object_store
|
||||
.layer(RetryLayer::new(ExponentialBackoff::default().with_jitter()))
|
||||
.layer(MetricsLayer)
|
||||
.layer(LoggingLayer)
|
||||
.layer(TracingLayer)
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn new_s3_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
|
||||
let (root, secret_key, key_id, bucket) = match store_config {
|
||||
ObjectStoreConfig::S3 {
|
||||
bucket,
|
||||
root,
|
||||
access_key_id,
|
||||
secret_access_key,
|
||||
} => (root, secret_access_key, access_key_id, bucket),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let root = util::normalize_dir(root);
|
||||
info!("The s3 storage bucket is: {}, root is: {}", bucket, &root);
|
||||
|
||||
let accessor = S3Builder::default()
|
||||
.root(&root)
|
||||
.bucket(bucket)
|
||||
.access_key_id(key_id)
|
||||
.secret_access_key(secret_key)
|
||||
.build()
|
||||
.with_context(|_| error::InitBackendSnafu {
|
||||
config: store_config.clone(),
|
||||
})?;
|
||||
|
||||
Ok(ObjectStore::new(accessor))
|
||||
}
|
||||
|
||||
pub(crate) async fn new_fs_object_store(data_dir: &str) -> Result<ObjectStore> {
|
||||
let data_dir = util::normalize_dir(data_dir);
|
||||
fs::create_dir_all(path::Path::new(&data_dir))
|
||||
.context(error::CreateDirSnafu { dir: &data_dir })?;
|
||||
info!("The file storage directory is: {}", &data_dir);
|
||||
|
||||
info!("The storage directory is: {}", &data_dir);
|
||||
let atomic_write_dir = format!("{}/.tmp/", data_dir);
|
||||
|
||||
let accessor = Builder::default()
|
||||
let accessor = FsBuilder::default()
|
||||
.root(&data_dir)
|
||||
.atomic_write_dir(&atomic_write_dir)
|
||||
.build()
|
||||
.context(error::InitBackendSnafu { dir: &data_dir })?;
|
||||
.context(error::InitBackendSnafu {
|
||||
config: ObjectStoreConfig::File { data_dir },
|
||||
})?;
|
||||
|
||||
let object_store = ObjectStore::new(accessor).layer(LoggingLayer); // Add logging
|
||||
|
||||
Ok(object_store)
|
||||
Ok(ObjectStore::new(accessor))
|
||||
}
|
||||
|
||||
/// Create metasrv client instance and spawn heartbeat loop.
|
||||
|
||||
@@ -12,9 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::result::{build_err_result, AdminResultBuilder, ObjectResultBuilder};
|
||||
use api::v1::{
|
||||
admin_expr, insert_expr, object_expr, select_expr, AdminExpr, AdminResult, CreateDatabaseExpr,
|
||||
admin_expr, object_expr, select_expr, AdminExpr, AdminResult, Column, CreateDatabaseExpr,
|
||||
ObjectExpr, ObjectResult, SelectExpr,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
@@ -22,10 +24,11 @@ use common_catalog::consts::DEFAULT_CATALOG_NAME;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_grpc::select::to_object_result;
|
||||
use common_insert::insertion_expr_to_request;
|
||||
use common_grpc_expr::insertion_expr_to_request;
|
||||
use common_query::Output;
|
||||
use query::plan::LogicalPlan;
|
||||
use servers::query_handler::{GrpcAdminHandler, GrpcQueryHandler};
|
||||
use session::context::QueryContext;
|
||||
use snafu::prelude::*;
|
||||
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
use table::requests::CreateDatabaseRequest;
|
||||
@@ -44,7 +47,7 @@ impl Instance {
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
values: insert_expr::Values,
|
||||
insert_batches: Vec<(Vec<Column>, u32)>,
|
||||
) -> Result<Output> {
|
||||
let schema_provider = self
|
||||
.catalog_manager
|
||||
@@ -55,11 +58,7 @@ impl Instance {
|
||||
.context(CatalogSnafu)?
|
||||
.context(SchemaNotFoundSnafu { name: schema_name })?;
|
||||
|
||||
let insert_batches =
|
||||
common_insert::insert_batches(&values.values).context(InsertDataSnafu)?;
|
||||
|
||||
ensure!(!insert_batches.is_empty(), EmptyInsertBatchSnafu);
|
||||
|
||||
let table = schema_provider
|
||||
.table(table_name)
|
||||
.context(CatalogSnafu)?
|
||||
@@ -87,10 +86,10 @@ impl Instance {
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
values: insert_expr::Values,
|
||||
insert_batches: Vec<(Vec<Column>, u32)>,
|
||||
) -> ObjectResult {
|
||||
match self
|
||||
.execute_grpc_insert(catalog_name, schema_name, table_name, values)
|
||||
.execute_grpc_insert(catalog_name, schema_name, table_name, insert_batches)
|
||||
.await
|
||||
{
|
||||
Ok(Output::AffectedRows(rows)) => ObjectResultBuilder::new()
|
||||
@@ -114,7 +113,9 @@ impl Instance {
|
||||
async fn do_handle_select(&self, select_expr: SelectExpr) -> Result<Output> {
|
||||
let expr = select_expr.expr;
|
||||
match expr {
|
||||
Some(select_expr::Expr::Sql(sql)) => self.execute_sql(&sql).await,
|
||||
Some(select_expr::Expr::Sql(sql)) => {
|
||||
self.execute_sql(&sql, Arc::new(QueryContext::new())).await
|
||||
}
|
||||
Some(select_expr::Expr::LogicalPlan(plan)) => self.execute_logical(plan).await,
|
||||
Some(select_expr::Expr::PhysicalPlan(api::v1::PhysicalPlan { original_ql, plan })) => {
|
||||
self.physical_planner
|
||||
@@ -170,25 +171,13 @@ impl GrpcQueryHandler for Instance {
|
||||
let catalog_name = DEFAULT_CATALOG_NAME;
|
||||
let schema_name = &insert_expr.schema_name;
|
||||
let table_name = &insert_expr.table_name;
|
||||
let expr = insert_expr
|
||||
.expr
|
||||
.context(servers::error::InvalidQuerySnafu {
|
||||
reason: "missing `expr` in `InsertExpr`",
|
||||
})?;
|
||||
|
||||
// TODO(fys): _region_number is for later use.
|
||||
let _region_number: u32 = insert_expr.region_number;
|
||||
|
||||
match expr {
|
||||
insert_expr::Expr::Values(values) => {
|
||||
self.handle_insert(catalog_name, schema_name, table_name, values)
|
||||
.await
|
||||
}
|
||||
insert_expr::Expr::Sql(sql) => {
|
||||
let output = self.execute_sql(&sql).await;
|
||||
to_object_result(output).await
|
||||
}
|
||||
}
|
||||
let insert_batches = vec![(insert_expr.columns, insert_expr.row_count)];
|
||||
self.handle_insert(catalog_name, schema_name, table_name, insert_batches)
|
||||
.await
|
||||
}
|
||||
Some(object_expr::Expr::Select(select_expr)) => self.handle_select(select_expr).await,
|
||||
other => {
|
||||
@@ -211,6 +200,9 @@ impl GrpcAdminHandler for Instance {
|
||||
Some(admin_expr::Expr::CreateDatabase(create_database_expr)) => {
|
||||
self.execute_create_database(create_database_expr).await
|
||||
}
|
||||
Some(admin_expr::Expr::DropTable(drop_table_expr)) => {
|
||||
self.handle_drop_table(drop_table_expr).await
|
||||
}
|
||||
other => {
|
||||
return servers::error::NotSupportedSnafu {
|
||||
feat: format!("{:?}", other),
|
||||
|
||||
@@ -13,25 +13,27 @@
|
||||
// limitations under the License.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use common_telemetry::logging::{error, info};
|
||||
use common_telemetry::timer;
|
||||
use servers::query_handler::SqlQueryHandler;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::prelude::*;
|
||||
use sql::ast::ObjectName;
|
||||
use sql::statements::statement::Statement;
|
||||
use table::engine::TableReference;
|
||||
use table::requests::CreateDatabaseRequest;
|
||||
|
||||
use crate::error::{
|
||||
BumpTableIdSnafu, CatalogNotFoundSnafu, CatalogSnafu, ExecuteSqlSnafu, ParseSqlSnafu, Result,
|
||||
SchemaNotFoundSnafu, TableIdProviderNotFoundSnafu,
|
||||
};
|
||||
use crate::error::{self, BumpTableIdSnafu, ExecuteSqlSnafu, Result, TableIdProviderNotFoundSnafu};
|
||||
use crate::instance::Instance;
|
||||
use crate::metric;
|
||||
use crate::sql::SqlRequest;
|
||||
|
||||
impl Instance {
|
||||
pub async fn execute_sql(&self, sql: &str) -> Result<Output> {
|
||||
pub async fn execute_sql(&self, sql: &str, query_ctx: QueryContextRef) -> Result<Output> {
|
||||
let stmt = self
|
||||
.query_engine
|
||||
.sql_to_statement(sql)
|
||||
@@ -41,7 +43,7 @@ impl Instance {
|
||||
Statement::Query(_) => {
|
||||
let logical_plan = self
|
||||
.query_engine
|
||||
.statement_to_plan(stmt)
|
||||
.statement_to_plan(stmt, query_ctx)
|
||||
.context(ExecuteSqlSnafu)?;
|
||||
|
||||
self.query_engine
|
||||
@@ -50,20 +52,15 @@ impl Instance {
|
||||
.context(ExecuteSqlSnafu)
|
||||
}
|
||||
Statement::Insert(i) => {
|
||||
let (catalog_name, schema_name, _table_name) =
|
||||
i.full_table_name().context(ParseSqlSnafu)?;
|
||||
|
||||
let schema_provider = self
|
||||
.catalog_manager
|
||||
.catalog(&catalog_name)
|
||||
.context(CatalogSnafu)?
|
||||
.context(CatalogNotFoundSnafu { name: catalog_name })?
|
||||
.schema(&schema_name)
|
||||
.context(CatalogSnafu)?
|
||||
.context(SchemaNotFoundSnafu { name: schema_name })?;
|
||||
|
||||
let request = self.sql_handler.insert_to_request(schema_provider, *i)?;
|
||||
self.sql_handler.execute(request).await
|
||||
let (catalog, schema, table) =
|
||||
table_idents_to_full_name(i.table_name(), query_ctx.clone())?;
|
||||
let table_ref = TableReference::full(&catalog, &schema, &table);
|
||||
let request = self.sql_handler.insert_to_request(
|
||||
self.catalog_manager.clone(),
|
||||
*i,
|
||||
table_ref,
|
||||
)?;
|
||||
self.sql_handler.execute(request, query_ctx).await
|
||||
}
|
||||
|
||||
Statement::CreateDatabase(c) => {
|
||||
@@ -74,7 +71,7 @@ impl Instance {
|
||||
info!("Creating a new database: {}", request.db_name);
|
||||
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::CreateDatabase(request))
|
||||
.execute(SqlRequest::CreateDatabase(request), query_ctx)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -89,49 +86,116 @@ impl Instance {
|
||||
let _engine_name = c.engine.clone();
|
||||
// TODO(hl): Select table engine by engine_name
|
||||
|
||||
let request = self.sql_handler.create_to_request(table_id, c)?;
|
||||
let catalog_name = &request.catalog_name;
|
||||
let schema_name = &request.schema_name;
|
||||
let table_name = &request.table_name;
|
||||
let name = c.name.clone();
|
||||
let (catalog, schema, table) = table_idents_to_full_name(&name, query_ctx.clone())?;
|
||||
let table_ref = TableReference::full(&catalog, &schema, &table);
|
||||
let request = self.sql_handler.create_to_request(table_id, c, table_ref)?;
|
||||
let table_id = request.id;
|
||||
info!(
|
||||
"Creating table, catalog: {:?}, schema: {:?}, table name: {:?}, table id: {}",
|
||||
catalog_name, schema_name, table_name, table_id
|
||||
catalog, schema, table, table_id
|
||||
);
|
||||
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::CreateTable(request))
|
||||
.execute(SqlRequest::CreateTable(request), query_ctx)
|
||||
.await
|
||||
}
|
||||
Statement::Alter(alter_table) => {
|
||||
let req = self.sql_handler.alter_to_request(alter_table)?;
|
||||
self.sql_handler.execute(SqlRequest::Alter(req)).await
|
||||
let name = alter_table.table_name().clone();
|
||||
let (catalog, schema, table) = table_idents_to_full_name(&name, query_ctx.clone())?;
|
||||
let table_ref = TableReference::full(&catalog, &schema, &table);
|
||||
let req = self.sql_handler.alter_to_request(alter_table, table_ref)?;
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::Alter(req), query_ctx)
|
||||
.await
|
||||
}
|
||||
Statement::DropTable(drop_table) => {
|
||||
let req = self.sql_handler.drop_table_to_request(drop_table);
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::DropTable(req), query_ctx)
|
||||
.await
|
||||
}
|
||||
Statement::ShowDatabases(stmt) => {
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::ShowDatabases(stmt))
|
||||
.execute(SqlRequest::ShowDatabases(stmt), query_ctx)
|
||||
.await
|
||||
}
|
||||
Statement::ShowTables(stmt) => {
|
||||
self.sql_handler.execute(SqlRequest::ShowTables(stmt)).await
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::ShowTables(stmt), query_ctx)
|
||||
.await
|
||||
}
|
||||
Statement::Explain(stmt) => {
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::Explain(Box::new(stmt)), query_ctx)
|
||||
.await
|
||||
}
|
||||
Statement::DescribeTable(stmt) => {
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::DescribeTable(stmt))
|
||||
.execute(SqlRequest::DescribeTable(stmt), query_ctx)
|
||||
.await
|
||||
}
|
||||
Statement::ShowCreateTable(_stmt) => {
|
||||
unimplemented!("SHOW CREATE TABLE is unimplemented yet");
|
||||
}
|
||||
Statement::Use(db) => {
|
||||
ensure!(
|
||||
self.catalog_manager
|
||||
.schema(DEFAULT_CATALOG_NAME, &db)
|
||||
.context(error::CatalogSnafu)?
|
||||
.is_some(),
|
||||
error::SchemaNotFoundSnafu { name: &db }
|
||||
);
|
||||
|
||||
query_ctx.set_current_schema(&db);
|
||||
|
||||
Ok(Output::RecordBatches(RecordBatches::empty()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(LFC): Refactor consideration: move this function to some helper mod,
|
||||
// could be done together or after `TableReference`'s refactoring, when issue #559 is resolved.
|
||||
/// Converts maybe fully-qualified table name (`<catalog>.<schema>.<table>`) to tuple.
|
||||
fn table_idents_to_full_name(
|
||||
obj_name: &ObjectName,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<(String, String, String)> {
|
||||
match &obj_name.0[..] {
|
||||
[table] => Ok((
|
||||
DEFAULT_CATALOG_NAME.to_string(),
|
||||
query_ctx.current_schema().unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string()),
|
||||
table.value.clone(),
|
||||
)),
|
||||
[schema, table] => Ok((
|
||||
DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema.value.clone(),
|
||||
table.value.clone(),
|
||||
)),
|
||||
[catalog, schema, table] => Ok((
|
||||
catalog.value.clone(),
|
||||
schema.value.clone(),
|
||||
table.value.clone(),
|
||||
)),
|
||||
_ => error::InvalidSqlSnafu {
|
||||
msg: format!(
|
||||
"expect table name to be <catalog>.<schema>.<table>, <schema>.<table> or <table>, actual: {}",
|
||||
obj_name
|
||||
),
|
||||
}.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SqlQueryHandler for Instance {
|
||||
async fn do_query(&self, query: &str) -> servers::error::Result<Output> {
|
||||
async fn do_query(
|
||||
&self,
|
||||
query: &str,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> servers::error::Result<Output> {
|
||||
let _timer = timer!(metric::METRIC_HANDLE_SQL_ELAPSED);
|
||||
self.execute_sql(query)
|
||||
self.execute_sql(query, query_ctx)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!(e; "Instance failed to execute sql");
|
||||
@@ -140,3 +204,78 @@ impl SqlQueryHandler for Instance {
|
||||
.context(servers::error::ExecuteQuerySnafu { query })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use session::context::QueryContext;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_table_idents_to_full_name() {
|
||||
let my_catalog = "my_catalog";
|
||||
let my_schema = "my_schema";
|
||||
let my_table = "my_table";
|
||||
|
||||
let full = ObjectName(vec![my_catalog.into(), my_schema.into(), my_table.into()]);
|
||||
let partial = ObjectName(vec![my_schema.into(), my_table.into()]);
|
||||
let bare = ObjectName(vec![my_table.into()]);
|
||||
|
||||
let using_schema = "foo";
|
||||
let query_ctx = Arc::new(QueryContext::with_current_schema(using_schema.to_string()));
|
||||
let empty_ctx = Arc::new(QueryContext::new());
|
||||
|
||||
assert_eq!(
|
||||
table_idents_to_full_name(&full, query_ctx.clone()).unwrap(),
|
||||
(
|
||||
my_catalog.to_string(),
|
||||
my_schema.to_string(),
|
||||
my_table.to_string()
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
table_idents_to_full_name(&full, empty_ctx.clone()).unwrap(),
|
||||
(
|
||||
my_catalog.to_string(),
|
||||
my_schema.to_string(),
|
||||
my_table.to_string()
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
table_idents_to_full_name(&partial, query_ctx.clone()).unwrap(),
|
||||
(
|
||||
DEFAULT_CATALOG_NAME.to_string(),
|
||||
my_schema.to_string(),
|
||||
my_table.to_string()
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
table_idents_to_full_name(&partial, empty_ctx.clone()).unwrap(),
|
||||
(
|
||||
DEFAULT_CATALOG_NAME.to_string(),
|
||||
my_schema.to_string(),
|
||||
my_table.to_string()
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
table_idents_to_full_name(&bare, query_ctx).unwrap(),
|
||||
(
|
||||
DEFAULT_CATALOG_NAME.to_string(),
|
||||
using_schema.to_string(),
|
||||
my_table.to_string()
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
table_idents_to_full_name(&bare, empty_ctx).unwrap(),
|
||||
(
|
||||
DEFAULT_CATALOG_NAME.to_string(),
|
||||
DEFAULT_SCHEMA_NAME.to_string(),
|
||||
my_table.to_string()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,6 @@ mod metric;
|
||||
mod mock;
|
||||
mod script;
|
||||
pub mod server;
|
||||
mod sql;
|
||||
pub mod sql;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
@@ -58,7 +58,11 @@ impl Instance {
|
||||
let factory = QueryEngineFactory::new(catalog_manager.clone());
|
||||
let query_engine = factory.query_engine();
|
||||
|
||||
let sql_handler = SqlHandler::new(mock_engine.clone(), catalog_manager.clone());
|
||||
let sql_handler = SqlHandler::new(
|
||||
mock_engine.clone(),
|
||||
catalog_manager.clone(),
|
||||
query_engine.clone(),
|
||||
);
|
||||
let physical_planner = PhysicalPlanner::new(query_engine.clone());
|
||||
let script_executor = ScriptExecutor::new(catalog_manager.clone(), query_engine.clone())
|
||||
.await
|
||||
@@ -123,7 +127,11 @@ impl Instance {
|
||||
);
|
||||
Ok(Self {
|
||||
query_engine: query_engine.clone(),
|
||||
sql_handler: SqlHandler::new(table_engine, catalog_manager.clone()),
|
||||
sql_handler: SqlHandler::new(
|
||||
table_engine,
|
||||
catalog_manager.clone(),
|
||||
query_engine.clone(),
|
||||
),
|
||||
catalog_manager,
|
||||
physical_planner: PhysicalPlanner::new(query_engine),
|
||||
script_executor,
|
||||
|
||||
@@ -62,6 +62,7 @@ impl Services {
|
||||
Some(MysqlServer::create_server(
|
||||
instance.clone(),
|
||||
mysql_io_runtime,
|
||||
Default::default(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
@@ -15,19 +15,17 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::result::AdminResultBuilder;
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::{AdminResult, AlterExpr, CreateExpr, DropColumns};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use api::v1::{AdminResult, AlterExpr, CreateExpr, DropTableExpr};
|
||||
use common_error::prelude::{ErrorExt, StatusCode};
|
||||
use common_grpc_expr::{alter_expr_to_request, create_expr_to_request};
|
||||
use common_query::Output;
|
||||
use common_telemetry::{error, info};
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
use futures::TryFutureExt;
|
||||
use session::context::QueryContext;
|
||||
use snafu::prelude::*;
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest};
|
||||
use table::requests::DropTableRequest;
|
||||
|
||||
use crate::error::{self, BumpTableIdSnafu, MissingFieldSnafu, Result};
|
||||
use crate::error::{AlterExprToRequestSnafu, BumpTableIdSnafu, CreateExprToRequestSnafu};
|
||||
use crate::instance::Instance;
|
||||
use crate::sql::SqlRequest;
|
||||
|
||||
@@ -75,9 +73,14 @@ impl Instance {
|
||||
}
|
||||
};
|
||||
|
||||
let request = create_expr_to_request(table_id, expr).await;
|
||||
let request = create_expr_to_request(table_id, expr).context(CreateExprToRequestSnafu);
|
||||
let result = futures::future::ready(request)
|
||||
.and_then(|request| self.sql_handler().execute(SqlRequest::CreateTable(request)))
|
||||
.and_then(|request| {
|
||||
self.sql_handler().execute(
|
||||
SqlRequest::CreateTable(request),
|
||||
Arc::new(QueryContext::new()),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
match result {
|
||||
Ok(Output::AffectedRows(rows)) => AdminResultBuilder::default()
|
||||
@@ -94,18 +97,24 @@ impl Instance {
|
||||
}
|
||||
|
||||
pub(crate) async fn handle_alter(&self, expr: AlterExpr) -> AdminResult {
|
||||
let request = match alter_expr_to_request(expr).transpose() {
|
||||
Some(req) => req,
|
||||
let request = match alter_expr_to_request(expr)
|
||||
.context(AlterExprToRequestSnafu)
|
||||
.transpose()
|
||||
{
|
||||
None => {
|
||||
return AdminResultBuilder::default()
|
||||
.status_code(StatusCode::Success as u32)
|
||||
.mutate_result(0, 0)
|
||||
.build()
|
||||
}
|
||||
Some(req) => req,
|
||||
};
|
||||
|
||||
let result = futures::future::ready(request)
|
||||
.and_then(|request| self.sql_handler().execute(SqlRequest::Alter(request)))
|
||||
.and_then(|request| {
|
||||
self.sql_handler()
|
||||
.execute(SqlRequest::Alter(request), Arc::new(QueryContext::new()))
|
||||
})
|
||||
.await;
|
||||
match result {
|
||||
Ok(Output::AffectedRows(rows)) => AdminResultBuilder::default()
|
||||
@@ -119,156 +128,50 @@ impl Instance {
|
||||
.build(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_expr_to_request(table_id: TableId, expr: CreateExpr) -> Result<CreateTableRequest> {
|
||||
let schema = create_table_schema(&expr)?;
|
||||
let primary_key_indices = expr
|
||||
.primary_keys
|
||||
.iter()
|
||||
.map(|key| {
|
||||
schema
|
||||
.column_index_by_name(key)
|
||||
.context(error::KeyColumnNotFoundSnafu { name: key })
|
||||
})
|
||||
.collect::<Result<Vec<usize>>>()?;
|
||||
|
||||
let catalog_name = expr
|
||||
.catalog_name
|
||||
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
|
||||
let schema_name = expr
|
||||
.schema_name
|
||||
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
|
||||
|
||||
let region_ids = if expr.region_ids.is_empty() {
|
||||
vec![0]
|
||||
} else {
|
||||
expr.region_ids
|
||||
};
|
||||
|
||||
Ok(CreateTableRequest {
|
||||
id: table_id,
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name: expr.table_name,
|
||||
desc: expr.desc,
|
||||
schema,
|
||||
region_numbers: region_ids,
|
||||
primary_key_indices,
|
||||
create_if_not_exists: expr.create_if_not_exists,
|
||||
table_options: expr.table_options,
|
||||
})
|
||||
}
|
||||
|
||||
fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest>> {
|
||||
match expr.kind {
|
||||
Some(Kind::AddColumns(add_columns)) => {
|
||||
let mut add_column_requests = vec![];
|
||||
for add_column_expr in add_columns.add_columns {
|
||||
let column_def = add_column_expr.column_def.context(MissingFieldSnafu {
|
||||
field: "column_def",
|
||||
})?;
|
||||
|
||||
let schema =
|
||||
column_def
|
||||
.try_as_column_schema()
|
||||
.context(error::InvalidColumnDefSnafu {
|
||||
column: &column_def.name,
|
||||
})?;
|
||||
add_column_requests.push(AddColumnRequest {
|
||||
column_schema: schema,
|
||||
is_key: add_column_expr.is_key,
|
||||
})
|
||||
}
|
||||
|
||||
let alter_kind = AlterKind::AddColumns {
|
||||
columns: add_column_requests,
|
||||
};
|
||||
|
||||
let request = AlterTableRequest {
|
||||
catalog_name: expr.catalog_name,
|
||||
schema_name: expr.schema_name,
|
||||
table_name: expr.table_name,
|
||||
alter_kind,
|
||||
};
|
||||
Ok(Some(request))
|
||||
pub(crate) async fn handle_drop_table(&self, expr: DropTableExpr) -> AdminResult {
|
||||
let req = DropTableRequest {
|
||||
catalog_name: expr.catalog_name,
|
||||
schema_name: expr.schema_name,
|
||||
table_name: expr.table_name,
|
||||
};
|
||||
let result = self
|
||||
.sql_handler()
|
||||
.execute(SqlRequest::DropTable(req), Arc::new(QueryContext::new()))
|
||||
.await;
|
||||
match result {
|
||||
Ok(Output::AffectedRows(rows)) => AdminResultBuilder::default()
|
||||
.status_code(StatusCode::Success as u32)
|
||||
.mutate_result(rows as _, 0)
|
||||
.build(),
|
||||
Ok(Output::Stream(_)) | Ok(Output::RecordBatches(_)) => unreachable!(),
|
||||
Err(err) => AdminResultBuilder::default()
|
||||
.status_code(err.status_code() as u32)
|
||||
.err_msg(err.to_string())
|
||||
.build(),
|
||||
}
|
||||
Some(Kind::DropColumns(DropColumns { drop_columns })) => {
|
||||
let alter_kind = AlterKind::DropColumns {
|
||||
names: drop_columns.into_iter().map(|c| c.name).collect(),
|
||||
};
|
||||
|
||||
let request = AlterTableRequest {
|
||||
catalog_name: expr.catalog_name,
|
||||
schema_name: expr.schema_name,
|
||||
table_name: expr.table_name,
|
||||
alter_kind,
|
||||
};
|
||||
Ok(Some(request))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
|
||||
let column_schemas = expr
|
||||
.column_defs
|
||||
.iter()
|
||||
.map(|x| {
|
||||
x.try_as_column_schema()
|
||||
.context(error::InvalidColumnDefSnafu { column: &x.name })
|
||||
})
|
||||
.collect::<Result<Vec<ColumnSchema>>>()?;
|
||||
|
||||
ensure!(
|
||||
column_schemas
|
||||
.iter()
|
||||
.any(|column| column.name == expr.time_index),
|
||||
error::KeyColumnNotFoundSnafu {
|
||||
name: &expr.time_index,
|
||||
}
|
||||
);
|
||||
|
||||
let column_schemas = column_schemas
|
||||
.into_iter()
|
||||
.map(|column_schema| {
|
||||
if column_schema.name == expr.time_index {
|
||||
column_schema.with_time_index(true)
|
||||
} else {
|
||||
column_schema
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(Arc::new(
|
||||
SchemaBuilder::try_from(column_schemas)
|
||||
.context(error::CreateSchemaSnafu)?
|
||||
.build()
|
||||
.context(error::CreateSchemaSnafu)?,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::ColumnDef;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::{ColumnDataType, ColumnDef};
|
||||
use common_catalog::consts::MIN_USER_TABLE_ID;
|
||||
use common_grpc_expr::create_table_schema;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::ColumnDefaultConstraint;
|
||||
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
|
||||
use super::*;
|
||||
use crate::tests::test_util;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_create_expr_to_request() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("create_expr_to_request");
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let expr = testing_create_expr();
|
||||
let request = create_expr_to_request(1024, expr).await.unwrap();
|
||||
assert_eq!(request.id, common_catalog::consts::MIN_USER_TABLE_ID);
|
||||
let request = create_expr_to_request(1024, expr).unwrap();
|
||||
assert_eq!(request.id, MIN_USER_TABLE_ID);
|
||||
assert_eq!(request.catalog_name, "greptime".to_string());
|
||||
assert_eq!(request.schema_name, "public".to_string());
|
||||
assert_eq!(request.table_name, "my-metrics");
|
||||
@@ -279,12 +182,13 @@ mod tests {
|
||||
|
||||
let mut expr = testing_create_expr();
|
||||
expr.primary_keys = vec!["host".to_string(), "not-exist-column".to_string()];
|
||||
let result = create_expr_to_request(1025, expr).await;
|
||||
assert!(result.is_err());
|
||||
assert!(result
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("Specified timestamp key or primary key column not found: not-exist-column"));
|
||||
let result = create_expr_to_request(1025, expr);
|
||||
let err_msg = result.unwrap_err().to_string();
|
||||
assert!(
|
||||
err_msg.contains("Column `not-exist-column` not found in table `my-metrics`"),
|
||||
"{}",
|
||||
err_msg
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -295,14 +199,16 @@ mod tests {
|
||||
|
||||
expr.time_index = "not-exist-column".to_string();
|
||||
let result = create_table_schema(&expr);
|
||||
assert!(result.is_err());
|
||||
assert!(result
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("Specified timestamp key or primary key column not found: not-exist-column"));
|
||||
let err_msg = result.unwrap_err().to_string();
|
||||
assert!(
|
||||
err_msg.contains("Missing timestamp column"),
|
||||
"actual: {}",
|
||||
err_msg
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
fn test_create_column_schema() {
|
||||
let column_def = ColumnDef {
|
||||
name: "a".to_string(),
|
||||
@@ -318,7 +224,7 @@ mod tests {
|
||||
|
||||
let column_def = ColumnDef {
|
||||
name: "a".to_string(),
|
||||
datatype: 12, // string
|
||||
datatype: ColumnDataType::String as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
};
|
||||
@@ -330,7 +236,7 @@ mod tests {
|
||||
let default_constraint = ColumnDefaultConstraint::Value(Value::from("default value"));
|
||||
let column_def = ColumnDef {
|
||||
name: "a".to_string(),
|
||||
datatype: 12, // string
|
||||
datatype: ColumnDataType::String as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: Some(default_constraint.clone().try_into().unwrap()),
|
||||
};
|
||||
@@ -348,25 +254,25 @@ mod tests {
|
||||
let column_defs = vec![
|
||||
ColumnDef {
|
||||
name: "host".to_string(),
|
||||
datatype: 12, // string
|
||||
datatype: ColumnDataType::String as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "ts".to_string(),
|
||||
datatype: 15, // timestamp
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "cpu".to_string(),
|
||||
datatype: 9, // float32
|
||||
datatype: ColumnDataType::Float32 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "memory".to_string(),
|
||||
datatype: 10, // float64
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
|
||||
@@ -12,22 +12,25 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! sql handler
|
||||
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_query::Output;
|
||||
use query::sql::{describe_table, show_databases, show_tables};
|
||||
use common_telemetry::error;
|
||||
use query::query_engine::QueryEngineRef;
|
||||
use query::sql::{describe_table, explain, show_databases, show_tables};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use sql::statements::describe::DescribeTable;
|
||||
use sql::statements::explain::Explain;
|
||||
use sql::statements::show::{ShowDatabases, ShowTables};
|
||||
use table::engine::{EngineContext, TableEngineRef, TableReference};
|
||||
use table::requests::*;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::{self, GetTableSnafu, Result, TableNotFoundSnafu};
|
||||
use crate::error::{ExecuteSqlSnafu, GetTableSnafu, Result, TableNotFoundSnafu};
|
||||
|
||||
mod alter;
|
||||
mod create;
|
||||
mod drop_table;
|
||||
mod insert;
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -36,41 +39,61 @@ pub enum SqlRequest {
|
||||
CreateTable(CreateTableRequest),
|
||||
CreateDatabase(CreateDatabaseRequest),
|
||||
Alter(AlterTableRequest),
|
||||
DropTable(DropTableRequest),
|
||||
ShowDatabases(ShowDatabases),
|
||||
ShowTables(ShowTables),
|
||||
DescribeTable(DescribeTable),
|
||||
Explain(Box<Explain>),
|
||||
}
|
||||
|
||||
// Handler to execute SQL except query
|
||||
pub struct SqlHandler {
|
||||
table_engine: TableEngineRef,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
query_engine: QueryEngineRef,
|
||||
}
|
||||
|
||||
impl SqlHandler {
|
||||
pub fn new(table_engine: TableEngineRef, catalog_manager: CatalogManagerRef) -> Self {
|
||||
pub fn new(
|
||||
table_engine: TableEngineRef,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
query_engine: QueryEngineRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
table_engine,
|
||||
catalog_manager,
|
||||
query_engine,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn execute(&self, request: SqlRequest) -> Result<Output> {
|
||||
match request {
|
||||
// TODO(LFC): Refactor consideration: a context awareness "Planner".
|
||||
// Now we have some query related state (like current using database in session context), maybe
|
||||
// we could create a new struct called `Planner` that stores context and handle these queries
|
||||
// there, instead of executing here in a "static" fashion.
|
||||
pub async fn execute(&self, request: SqlRequest, query_ctx: QueryContextRef) -> Result<Output> {
|
||||
let result = match request {
|
||||
SqlRequest::Insert(req) => self.insert(req).await,
|
||||
SqlRequest::CreateTable(req) => self.create_table(req).await,
|
||||
SqlRequest::CreateDatabase(req) => self.create_database(req).await,
|
||||
SqlRequest::Alter(req) => self.alter(req).await,
|
||||
SqlRequest::DropTable(req) => self.drop_table(req).await,
|
||||
SqlRequest::ShowDatabases(stmt) => {
|
||||
show_databases(stmt, self.catalog_manager.clone()).context(error::ExecuteSqlSnafu)
|
||||
show_databases(stmt, self.catalog_manager.clone()).context(ExecuteSqlSnafu)
|
||||
}
|
||||
SqlRequest::ShowTables(stmt) => {
|
||||
show_tables(stmt, self.catalog_manager.clone()).context(error::ExecuteSqlSnafu)
|
||||
show_tables(stmt, self.catalog_manager.clone(), query_ctx).context(ExecuteSqlSnafu)
|
||||
}
|
||||
SqlRequest::DescribeTable(stmt) => {
|
||||
describe_table(stmt, self.catalog_manager.clone()).context(error::ExecuteSqlSnafu)
|
||||
describe_table(stmt, self.catalog_manager.clone()).context(ExecuteSqlSnafu)
|
||||
}
|
||||
SqlRequest::Explain(stmt) => explain(stmt, self.query_engine.clone(), query_ctx)
|
||||
.await
|
||||
.context(ExecuteSqlSnafu),
|
||||
};
|
||||
if let Err(e) = &result {
|
||||
error!("Datanode execution error: {:?}", e);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn get_table<'a>(&self, table_ref: &'a TableReference) -> Result<TableRef> {
|
||||
@@ -94,7 +117,8 @@ mod tests {
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::SchemaProvider;
|
||||
use catalog::{CatalogList, SchemaProvider};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_time::timestamp::Timestamp;
|
||||
@@ -214,9 +238,17 @@ mod tests {
|
||||
.await
|
||||
.unwrap(),
|
||||
);
|
||||
let catalog_provider = catalog_list.catalog(DEFAULT_CATALOG_NAME).unwrap().unwrap();
|
||||
catalog_provider
|
||||
.register_schema(
|
||||
DEFAULT_SCHEMA_NAME.to_string(),
|
||||
Arc::new(MockSchemaProvider {}),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let factory = QueryEngineFactory::new(catalog_list.clone());
|
||||
let query_engine = factory.query_engine();
|
||||
let sql_handler = SqlHandler::new(table_engine, catalog_list);
|
||||
let sql_handler = SqlHandler::new(table_engine, catalog_list.clone(), query_engine.clone());
|
||||
|
||||
let stmt = match query_engine.sql_to_statement(sql).unwrap() {
|
||||
Statement::Insert(i) => i,
|
||||
@@ -224,9 +256,8 @@ mod tests {
|
||||
unreachable!()
|
||||
}
|
||||
};
|
||||
let schema_provider = Arc::new(MockSchemaProvider {});
|
||||
let request = sql_handler
|
||||
.insert_to_request(schema_provider, *stmt)
|
||||
.insert_to_request(catalog_list.clone(), *stmt, TableReference::bare("demo"))
|
||||
.unwrap();
|
||||
|
||||
match request {
|
||||
|
||||
@@ -16,7 +16,7 @@ use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_query::Output;
|
||||
use snafu::prelude::*;
|
||||
use sql::statements::alter::{AlterTable, AlterTableOperation};
|
||||
use sql::statements::{column_def_to_schema, table_idents_to_full_name};
|
||||
use sql::statements::column_def_to_schema;
|
||||
use table::engine::{EngineContext, TableReference};
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest};
|
||||
|
||||
@@ -53,10 +53,11 @@ impl SqlHandler {
|
||||
Ok(Output::AffectedRows(0))
|
||||
}
|
||||
|
||||
pub(crate) fn alter_to_request(&self, alter_table: AlterTable) -> Result<AlterTableRequest> {
|
||||
let (catalog_name, schema_name, table_name) =
|
||||
table_idents_to_full_name(alter_table.table_name()).context(error::ParseSqlSnafu)?;
|
||||
|
||||
pub(crate) fn alter_to_request(
|
||||
&self,
|
||||
alter_table: AlterTable,
|
||||
table_ref: TableReference,
|
||||
) -> Result<AlterTableRequest> {
|
||||
let alter_kind = match alter_table.alter_operation() {
|
||||
AlterTableOperation::AddConstraint(table_constraint) => {
|
||||
return error::InvalidSqlSnafu {
|
||||
@@ -77,9 +78,9 @@ impl SqlHandler {
|
||||
},
|
||||
};
|
||||
Ok(AlterTableRequest {
|
||||
catalog_name: Some(catalog_name),
|
||||
schema_name: Some(schema_name),
|
||||
table_name,
|
||||
catalog_name: Some(table_ref.catalog.to_string()),
|
||||
schema_name: Some(table_ref.schema.to_string()),
|
||||
table_name: table_ref.table.to_string(),
|
||||
alter_kind,
|
||||
})
|
||||
}
|
||||
@@ -112,7 +113,9 @@ mod tests {
|
||||
async fn test_alter_to_request_with_adding_column() {
|
||||
let handler = create_mock_sql_handler().await;
|
||||
let alter_table = parse_sql("ALTER TABLE my_metric_1 ADD tagk_i STRING Null;");
|
||||
let req = handler.alter_to_request(alter_table).unwrap();
|
||||
let req = handler
|
||||
.alter_to_request(alter_table, TableReference::bare("my_metric_1"))
|
||||
.unwrap();
|
||||
assert_eq!(req.catalog_name, Some("greptime".to_string()));
|
||||
assert_eq!(req.schema_name, Some("public".to_string()));
|
||||
assert_eq!(req.table_name, "my_metric_1");
|
||||
|
||||
@@ -23,10 +23,10 @@ use common_telemetry::tracing::log::error;
|
||||
use datatypes::schema::SchemaBuilder;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::ast::TableConstraint;
|
||||
use sql::statements::column_def_to_schema;
|
||||
use sql::statements::create::CreateTable;
|
||||
use sql::statements::{column_def_to_schema, table_idents_to_full_name};
|
||||
use store_api::storage::consts::TIME_INDEX_NAME;
|
||||
use table::engine::EngineContext;
|
||||
use table::engine::{EngineContext, TableReference};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::*;
|
||||
|
||||
@@ -84,7 +84,6 @@ impl SqlHandler {
|
||||
|
||||
// determine catalog and schema from the very beginning
|
||||
let table_name = req.table_name.clone();
|
||||
let table_id = req.id;
|
||||
let table = self
|
||||
.table_engine
|
||||
.create_table(&ctx, req)
|
||||
@@ -97,7 +96,7 @@ impl SqlHandler {
|
||||
catalog: table.table_info().catalog_name.clone(),
|
||||
schema: table.table_info().schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
table_id,
|
||||
table_id: table.table_info().ident.table_id,
|
||||
table,
|
||||
};
|
||||
|
||||
@@ -115,13 +114,11 @@ impl SqlHandler {
|
||||
&self,
|
||||
table_id: TableId,
|
||||
stmt: CreateTable,
|
||||
table_ref: TableReference,
|
||||
) -> Result<CreateTableRequest> {
|
||||
let mut ts_index = usize::MAX;
|
||||
let mut primary_keys = vec![];
|
||||
|
||||
let (catalog_name, schema_name, table_name) =
|
||||
table_idents_to_full_name(&stmt.name).context(error::ParseSqlSnafu)?;
|
||||
|
||||
let col_map = stmt
|
||||
.columns
|
||||
.iter()
|
||||
@@ -172,7 +169,7 @@ impl SqlHandler {
|
||||
return ConstraintNotSupportedSnafu {
|
||||
constraint: format!("{:?}", c),
|
||||
}
|
||||
.fail()
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -186,14 +183,6 @@ impl SqlHandler {
|
||||
|
||||
ensure!(ts_index != usize::MAX, error::MissingTimestampColumnSnafu);
|
||||
|
||||
if primary_keys.is_empty() {
|
||||
info!(
|
||||
"Creating table: {:?}.{:?}.{} but primary key not set, use time index column: {}",
|
||||
catalog_name, schema_name, table_name, ts_index
|
||||
);
|
||||
primary_keys.push(ts_index);
|
||||
}
|
||||
|
||||
let columns_schemas: Vec<_> = stmt
|
||||
.columns
|
||||
.iter()
|
||||
@@ -212,9 +201,9 @@ impl SqlHandler {
|
||||
|
||||
let request = CreateTableRequest {
|
||||
id: table_id,
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name,
|
||||
catalog_name: table_ref.catalog.to_string(),
|
||||
schema_name: table_ref.schema.to_string(),
|
||||
table_name: table_ref.table.to_string(),
|
||||
desc: None,
|
||||
schema,
|
||||
region_numbers: vec![0],
|
||||
@@ -262,7 +251,9 @@ mod tests {
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(host)) engine=mito with(regions=1);"#,
|
||||
);
|
||||
let c = handler.create_to_request(42, parsed_stmt).unwrap();
|
||||
let c = handler
|
||||
.create_to_request(42, parsed_stmt, TableReference::bare("demo_table"))
|
||||
.unwrap();
|
||||
assert_eq!("demo_table", c.table_name);
|
||||
assert_eq!(42, c.id);
|
||||
assert!(!c.create_if_not_exists);
|
||||
@@ -283,11 +274,12 @@ mod tests {
|
||||
memory double,
|
||||
PRIMARY KEY(host)) engine=mito with(regions=1);"#,
|
||||
);
|
||||
let error = handler.create_to_request(42, parsed_stmt).unwrap_err();
|
||||
let error = handler
|
||||
.create_to_request(42, parsed_stmt, TableReference::bare("demo_table"))
|
||||
.unwrap_err();
|
||||
assert_matches!(error, Error::MissingTimestampColumn { .. });
|
||||
}
|
||||
|
||||
/// If primary key is not specified, time index should be used as primary key.
|
||||
#[tokio::test]
|
||||
pub async fn test_primary_key_not_specified() {
|
||||
let handler = create_mock_sql_handler().await;
|
||||
@@ -300,12 +292,11 @@ mod tests {
|
||||
memory double,
|
||||
TIME INDEX (ts)) engine=mito with(regions=1);"#,
|
||||
);
|
||||
let c = handler.create_to_request(42, parsed_stmt).unwrap();
|
||||
assert_eq!(1, c.primary_key_indices.len());
|
||||
assert_eq!(
|
||||
c.schema.timestamp_index().unwrap(),
|
||||
c.primary_key_indices[0]
|
||||
);
|
||||
let c = handler
|
||||
.create_to_request(42, parsed_stmt, TableReference::bare("demo_table"))
|
||||
.unwrap();
|
||||
assert!(c.primary_key_indices.is_empty());
|
||||
assert_eq!(c.schema.timestamp_index(), Some(1));
|
||||
}
|
||||
|
||||
/// Constraints specified, not column cannot be found.
|
||||
@@ -319,7 +310,9 @@ mod tests {
|
||||
TIME INDEX (ts)) engine=mito with(regions=1);"#,
|
||||
);
|
||||
|
||||
let error = handler.create_to_request(42, parsed_stmt).unwrap_err();
|
||||
let error = handler
|
||||
.create_to_request(42, parsed_stmt, TableReference::bare("demo_table"))
|
||||
.unwrap_err();
|
||||
assert_matches!(error, Error::KeyColumnNotFound { .. });
|
||||
}
|
||||
|
||||
@@ -339,7 +332,9 @@ mod tests {
|
||||
|
||||
let handler = create_mock_sql_handler().await;
|
||||
|
||||
let error = handler.create_to_request(42, create_table).unwrap_err();
|
||||
let error = handler
|
||||
.create_to_request(42, create_table, TableReference::full("c", "s", "demo"))
|
||||
.unwrap_err();
|
||||
assert_matches!(error, Error::InvalidPrimaryKey { .. });
|
||||
}
|
||||
|
||||
@@ -359,7 +354,9 @@ mod tests {
|
||||
|
||||
let handler = create_mock_sql_handler().await;
|
||||
|
||||
let request = handler.create_to_request(42, create_table).unwrap();
|
||||
let request = handler
|
||||
.create_to_request(42, create_table, TableReference::full("c", "s", "demo"))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(42, request.id);
|
||||
assert_eq!("c".to_string(), request.catalog_name);
|
||||
|
||||
71
src/datanode/src/sql/drop_table.rs
Normal file
71
src/datanode/src/sql/drop_table.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use catalog::DeregisterTableRequest;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_query::Output;
|
||||
use common_telemetry::info;
|
||||
use snafu::ResultExt;
|
||||
use sql::statements::drop::DropTable;
|
||||
use table::engine::{EngineContext, TableReference};
|
||||
use table::requests::DropTableRequest;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::sql::SqlHandler;
|
||||
|
||||
impl SqlHandler {
|
||||
pub async fn drop_table(&self, req: DropTableRequest) -> Result<Output> {
|
||||
let deregister_table_req = DeregisterTableRequest {
|
||||
catalog: req.catalog_name.clone(),
|
||||
schema: req.schema_name.clone(),
|
||||
table_name: req.table_name.clone(),
|
||||
};
|
||||
|
||||
let table_reference = TableReference {
|
||||
catalog: &req.catalog_name,
|
||||
schema: &req.schema_name,
|
||||
table: &req.table_name,
|
||||
};
|
||||
let table_full_name = table_reference.to_string();
|
||||
|
||||
self.catalog_manager
|
||||
.deregister_table(deregister_table_req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::DropTableSnafu {
|
||||
table_name: table_full_name.clone(),
|
||||
})?;
|
||||
|
||||
let ctx = EngineContext {};
|
||||
self.table_engine()
|
||||
.drop_table(&ctx, req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::DropTableSnafu {
|
||||
table_name: table_full_name.clone(),
|
||||
})?;
|
||||
|
||||
info!("Successfully dropped table: {}", table_full_name);
|
||||
|
||||
Ok(Output::AffectedRows(1))
|
||||
}
|
||||
|
||||
pub fn drop_table_to_request(&self, drop_table: DropTable) -> DropTableRequest {
|
||||
DropTableRequest {
|
||||
catalog_name: drop_table.catalog_name,
|
||||
schema_name: drop_table.schema_name,
|
||||
table_name: drop_table.table_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use catalog::SchemaProviderRef;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_query::Output;
|
||||
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -23,7 +23,7 @@ use table::engine::TableReference;
|
||||
use table::requests::*;
|
||||
|
||||
use crate::error::{
|
||||
CatalogSnafu, ColumnNotFoundSnafu, ColumnValuesNumberMismatchSnafu, InsertSnafu, ParseSqlSnafu,
|
||||
CatalogSnafu, ColumnNotFoundSnafu, ColumnValuesNumberMismatchSnafu, InsertSnafu,
|
||||
ParseSqlValueSnafu, Result, TableNotFoundSnafu,
|
||||
};
|
||||
use crate::sql::{SqlHandler, SqlRequest};
|
||||
@@ -49,19 +49,18 @@ impl SqlHandler {
|
||||
|
||||
pub(crate) fn insert_to_request(
|
||||
&self,
|
||||
schema_provider: SchemaProviderRef,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
stmt: Insert,
|
||||
table_ref: TableReference,
|
||||
) -> Result<SqlRequest> {
|
||||
let columns = stmt.columns();
|
||||
let values = stmt.values().context(ParseSqlValueSnafu)?;
|
||||
let (catalog_name, schema_name, table_name) =
|
||||
stmt.full_table_name().context(ParseSqlSnafu)?;
|
||||
|
||||
let table = schema_provider
|
||||
.table(&table_name)
|
||||
let table = catalog_manager
|
||||
.table(table_ref.catalog, table_ref.schema, table_ref.table)
|
||||
.context(CatalogSnafu)?
|
||||
.context(TableNotFoundSnafu {
|
||||
table_name: &table_name,
|
||||
table_name: table_ref.table,
|
||||
})?;
|
||||
let schema = table.schema();
|
||||
let columns_num = if columns.is_empty() {
|
||||
@@ -88,7 +87,7 @@ impl SqlHandler {
|
||||
let column_schema =
|
||||
schema.column_schema_by_name(column_name).with_context(|| {
|
||||
ColumnNotFoundSnafu {
|
||||
table_name: &table_name,
|
||||
table_name: table_ref.table,
|
||||
column_name: column_name.to_string(),
|
||||
}
|
||||
})?;
|
||||
@@ -119,9 +118,9 @@ impl SqlHandler {
|
||||
}
|
||||
|
||||
Ok(SqlRequest::Insert(InsertRequest {
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name,
|
||||
catalog_name: table_ref.catalog.to_string(),
|
||||
schema_name: table_ref.schema.to_string(),
|
||||
table_name: table_ref.table.to_string(),
|
||||
columns_values: columns_builders
|
||||
.into_iter()
|
||||
.map(|(c, _, mut b)| (c.to_owned(), b.finish()))
|
||||
|
||||
@@ -12,7 +12,5 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod grpc_test;
|
||||
mod http_test;
|
||||
mod instance_test;
|
||||
pub(crate) mod test_util;
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::DEFAULT_SCHEMA_NAME;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::util;
|
||||
use datafusion::arrow_print;
|
||||
@@ -19,6 +22,7 @@ use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::arrow::array::{Int64Array, UInt64Array, Utf8Array};
|
||||
use datatypes::arrow_array::StringArray;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use session::context::QueryContext;
|
||||
|
||||
use crate::instance::Instance;
|
||||
use crate::tests::test_util;
|
||||
@@ -32,39 +36,33 @@ async fn test_create_database_and_insert_query() {
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let output = instance.execute_sql("create database test").await.unwrap();
|
||||
let output = execute_sql(&instance, "create database test").await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
r#"create table greptime.test.demo(
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"create table greptime.test.demo(
|
||||
host STRING,
|
||||
cpu DOUBLE,
|
||||
memory DOUBLE,
|
||||
ts bigint,
|
||||
TIME INDEX(ts)
|
||||
)"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
r#"insert into test.demo(host, cpu, memory, ts) values
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"insert into test.demo(host, cpu, memory, ts) values
|
||||
('host1', 66.6, 1024, 1655276557000),
|
||||
('host2', 88.8, 333.3, 1655276558000)
|
||||
"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(2)));
|
||||
|
||||
let query_output = instance
|
||||
.execute_sql("select ts from test.demo order by ts")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let query_output = execute_sql(&instance, "select ts from test.demo order by ts").await;
|
||||
match query_output {
|
||||
Output::Stream(s) => {
|
||||
let batches = util::collect(s).await.unwrap();
|
||||
@@ -88,54 +86,50 @@ async fn test_issue477_same_table_name_in_different_databases() {
|
||||
instance.start().await.unwrap();
|
||||
|
||||
// Create database a and b
|
||||
let output = instance.execute_sql("create database a").await.unwrap();
|
||||
let output = execute_sql(&instance, "create database a").await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
let output = instance.execute_sql("create database b").await.unwrap();
|
||||
let output = execute_sql(&instance, "create database b").await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
// Create table a.demo and b.demo
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
r#"create table a.demo(
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"create table a.demo(
|
||||
host STRING,
|
||||
ts bigint,
|
||||
TIME INDEX(ts)
|
||||
)"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
r#"create table b.demo(
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"create table b.demo(
|
||||
host STRING,
|
||||
ts bigint,
|
||||
TIME INDEX(ts)
|
||||
)"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
// Insert different data into a.demo and b.demo
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
r#"insert into a.demo(host, ts) values
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"insert into a.demo(host, ts) values
|
||||
('host1', 1655276557000)
|
||||
"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
r#"insert into b.demo(host, ts) values
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"insert into b.demo(host, ts) values
|
||||
('host2',1655276558000)
|
||||
"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
// Query data and assert
|
||||
@@ -157,7 +151,7 @@ async fn test_issue477_same_table_name_in_different_databases() {
|
||||
}
|
||||
|
||||
async fn assert_query_result(instance: &Instance, sql: &str, ts: i64, host: &str) {
|
||||
let query_output = instance.execute_sql(sql).await.unwrap();
|
||||
let query_output = execute_sql(instance, sql).await;
|
||||
match query_output {
|
||||
Output::Stream(s) => {
|
||||
let batches = util::collect(s).await.unwrap();
|
||||
@@ -200,15 +194,14 @@ async fn setup_test_instance() -> Instance {
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_execute_insert() {
|
||||
let instance = setup_test_instance().await;
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
r#"insert into demo(host, cpu, memory, ts) values
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"insert into demo(host, cpu, memory, ts) values
|
||||
('host1', 66.6, 1024, 1655276557000),
|
||||
('host2', 88.8, 333.3, 1655276558000)
|
||||
"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(2)));
|
||||
}
|
||||
|
||||
@@ -228,22 +221,17 @@ async fn test_execute_insert_query_with_i64_timestamp() {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
r#"insert into demo(host, cpu, memory, ts) values
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"insert into demo(host, cpu, memory, ts) values
|
||||
('host1', 66.6, 1024, 1655276557000),
|
||||
('host2', 88.8, 333.3, 1655276558000)
|
||||
"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(2)));
|
||||
|
||||
let query_output = instance
|
||||
.execute_sql("select ts from demo order by ts")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let query_output = execute_sql(&instance, "select ts from demo order by ts").await;
|
||||
match query_output {
|
||||
Output::Stream(s) => {
|
||||
let batches = util::collect(s).await.unwrap();
|
||||
@@ -257,11 +245,7 @@ async fn test_execute_insert_query_with_i64_timestamp() {
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
let query_output = instance
|
||||
.execute_sql("select ts as time from demo order by ts")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let query_output = execute_sql(&instance, "select ts as time from demo order by ts").await;
|
||||
match query_output {
|
||||
Output::Stream(s) => {
|
||||
let batches = util::collect(s).await.unwrap();
|
||||
@@ -282,10 +266,7 @@ async fn test_execute_query() {
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let output = instance
|
||||
.execute_sql("select sum(number) from numbers limit 20")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(&instance, "select sum(number) from numbers limit 20").await;
|
||||
match output {
|
||||
Output::Stream(recordbatch) => {
|
||||
let numbers = util::collect(recordbatch).await.unwrap();
|
||||
@@ -309,7 +290,7 @@ async fn test_execute_show_databases_tables() {
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let output = instance.execute_sql("show databases").await.unwrap();
|
||||
let output = execute_sql(&instance, "show databases").await;
|
||||
match output {
|
||||
Output::RecordBatches(databases) => {
|
||||
let databases = databases.take();
|
||||
@@ -325,10 +306,7 @@ async fn test_execute_show_databases_tables() {
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
let output = instance
|
||||
.execute_sql("show databases like '%bl%'")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(&instance, "show databases like '%bl%'").await;
|
||||
match output {
|
||||
Output::RecordBatches(databases) => {
|
||||
let databases = databases.take();
|
||||
@@ -344,7 +322,7 @@ async fn test_execute_show_databases_tables() {
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
let output = instance.execute_sql("show tables").await.unwrap();
|
||||
let output = execute_sql(&instance, "show tables").await;
|
||||
match output {
|
||||
Output::RecordBatches(databases) => {
|
||||
let databases = databases.take();
|
||||
@@ -364,7 +342,7 @@ async fn test_execute_show_databases_tables() {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let output = instance.execute_sql("show tables").await.unwrap();
|
||||
let output = execute_sql(&instance, "show tables").await;
|
||||
match output {
|
||||
Output::RecordBatches(databases) => {
|
||||
let databases = databases.take();
|
||||
@@ -376,10 +354,7 @@ async fn test_execute_show_databases_tables() {
|
||||
}
|
||||
|
||||
// show tables like [string]
|
||||
let output = instance
|
||||
.execute_sql("show tables like 'de%'")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(&instance, "show tables like 'de%'").await;
|
||||
match output {
|
||||
Output::RecordBatches(databases) => {
|
||||
let databases = databases.take();
|
||||
@@ -404,9 +379,9 @@ pub async fn test_execute_create() {
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
r#"create table test_table(
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"create table test_table(
|
||||
host string,
|
||||
ts timestamp,
|
||||
cpu double default 0,
|
||||
@@ -414,56 +389,24 @@ pub async fn test_execute_create() {
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(host)
|
||||
) engine=mito with(regions=1);"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
pub async fn test_create_table_illegal_timestamp_type() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let (opts, _guard) =
|
||||
test_util::create_tmp_dir_and_datanode_opts("create_table_illegal_timestamp_type");
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
r#"create table test_table(
|
||||
host string,
|
||||
ts bigint,
|
||||
cpu double default 0,
|
||||
memory double,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(host)
|
||||
) engine=mito with(regions=1);"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
match output {
|
||||
Output::AffectedRows(rows) => {
|
||||
assert_eq!(1, rows);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn check_output_stream(output: Output, expected: Vec<&str>) {
|
||||
match output {
|
||||
Output::Stream(stream) => {
|
||||
let recordbatches = util::collect(stream).await.unwrap();
|
||||
let recordbatch = recordbatches
|
||||
.into_iter()
|
||||
.map(|r| r.df_recordbatch)
|
||||
.collect::<Vec<DfRecordBatch>>();
|
||||
let pretty_print = arrow_print::write(&recordbatch);
|
||||
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
|
||||
assert_eq!(pretty_print, expected);
|
||||
}
|
||||
let recordbatches = match output {
|
||||
Output::Stream(stream) => util::collect(stream).await.unwrap(),
|
||||
Output::RecordBatches(recordbatches) => recordbatches.take(),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
};
|
||||
let recordbatches = recordbatches
|
||||
.into_iter()
|
||||
.map(|r| r.df_recordbatch)
|
||||
.collect::<Vec<DfRecordBatch>>();
|
||||
let pretty_print = arrow_print::write(&recordbatches);
|
||||
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
|
||||
assert_eq!(pretty_print, expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -479,35 +422,30 @@ async fn test_alter_table() {
|
||||
.await
|
||||
.unwrap();
|
||||
// make sure table insertion is ok before altering table
|
||||
instance
|
||||
.execute_sql("insert into demo(host, cpu, memory, ts) values ('host1', 1.1, 100, 1000)")
|
||||
.await
|
||||
.unwrap();
|
||||
execute_sql(
|
||||
&instance,
|
||||
"insert into demo(host, cpu, memory, ts) values ('host1', 1.1, 100, 1000)",
|
||||
)
|
||||
.await;
|
||||
|
||||
// Add column
|
||||
let output = instance
|
||||
.execute_sql("alter table demo add my_tag string null")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(&instance, "alter table demo add my_tag string null").await;
|
||||
assert!(matches!(output, Output::AffectedRows(0)));
|
||||
|
||||
let output = instance
|
||||
.execute_sql(
|
||||
"insert into demo(host, cpu, memory, ts, my_tag) values ('host2', 2.2, 200, 2000, 'hello')",
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
"insert into demo(host, cpu, memory, ts, my_tag) values ('host2', 2.2, 200, 2000, 'hello')",
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
let output = instance
|
||||
.execute_sql("insert into demo(host, cpu, memory, ts) values ('host3', 3.3, 300, 3000)")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
"insert into demo(host, cpu, memory, ts) values ('host3', 3.3, 300, 3000)",
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = instance
|
||||
.execute_sql("select * from demo order by ts")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(&instance, "select * from demo order by ts").await;
|
||||
let expected = vec![
|
||||
"+-------+-----+--------+---------------------+--------+",
|
||||
"| host | cpu | memory | ts | my_tag |",
|
||||
@@ -520,16 +458,10 @@ async fn test_alter_table() {
|
||||
check_output_stream(output, expected).await;
|
||||
|
||||
// Drop a column
|
||||
let output = instance
|
||||
.execute_sql("alter table demo drop column memory")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(&instance, "alter table demo drop column memory").await;
|
||||
assert!(matches!(output, Output::AffectedRows(0)));
|
||||
|
||||
let output = instance
|
||||
.execute_sql("select * from demo order by ts")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(&instance, "select * from demo order by ts").await;
|
||||
let expected = vec![
|
||||
"+-------+-----+---------------------+--------+",
|
||||
"| host | cpu | ts | my_tag |",
|
||||
@@ -542,16 +474,14 @@ async fn test_alter_table() {
|
||||
check_output_stream(output, expected).await;
|
||||
|
||||
// insert a new row
|
||||
let output = instance
|
||||
.execute_sql("insert into demo(host, cpu, ts, my_tag) values ('host4', 400, 4000, 'world')")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
"insert into demo(host, cpu, ts, my_tag) values ('host4', 400, 4000, 'world')",
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = instance
|
||||
.execute_sql("select * from demo order by ts")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(&instance, "select * from demo order by ts").await;
|
||||
let expected = vec![
|
||||
"+-------+-----+---------------------+--------+",
|
||||
"| host | cpu | ts | my_tag |",
|
||||
@@ -580,27 +510,26 @@ async fn test_insert_with_default_value_for_type(type_name: &str) {
|
||||
) engine=mito with(regions=1);"#,
|
||||
type_name
|
||||
);
|
||||
let output = instance.execute_sql(&create_sql).await.unwrap();
|
||||
let output = execute_sql(&instance, &create_sql).await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
// Insert with ts.
|
||||
instance
|
||||
.execute_sql("insert into test_table(host, cpu, ts) values ('host1', 1.1, 1000)")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
"insert into test_table(host, cpu, ts) values ('host1', 1.1, 1000)",
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
// Insert without ts, so it should be filled by default value.
|
||||
let output = instance
|
||||
.execute_sql("insert into test_table(host, cpu) values ('host2', 2.2)")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
"insert into test_table(host, cpu) values ('host2', 2.2)",
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = instance
|
||||
.execute_sql("select host, cpu from test_table")
|
||||
.await
|
||||
.unwrap();
|
||||
let output = execute_sql(&instance, "select host, cpu from test_table").await;
|
||||
let expected = vec![
|
||||
"+-------+-----+",
|
||||
"| host | cpu |",
|
||||
@@ -619,3 +548,70 @@ async fn test_insert_with_default_value() {
|
||||
test_insert_with_default_value_for_type("timestamp").await;
|
||||
test_insert_with_default_value_for_type("bigint").await;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_use_database() {
|
||||
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("use_database");
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let output = execute_sql(&instance, "create database db1").await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = execute_sql_in_db(
|
||||
&instance,
|
||||
"create table tb1(col_i32 int, ts bigint, TIME INDEX(ts))",
|
||||
"db1",
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = execute_sql_in_db(&instance, "show tables", "db1").await;
|
||||
let expected = vec![
|
||||
"+--------+",
|
||||
"| Tables |",
|
||||
"+--------+",
|
||||
"| tb1 |",
|
||||
"+--------+",
|
||||
];
|
||||
check_output_stream(output, expected).await;
|
||||
|
||||
let output = execute_sql_in_db(
|
||||
&instance,
|
||||
r#"insert into tb1(col_i32, ts) values (1, 1655276557000)"#,
|
||||
"db1",
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = execute_sql_in_db(&instance, "select col_i32 from tb1", "db1").await;
|
||||
let expected = vec![
|
||||
"+---------+",
|
||||
"| col_i32 |",
|
||||
"+---------+",
|
||||
"| 1 |",
|
||||
"+---------+",
|
||||
];
|
||||
check_output_stream(output, expected).await;
|
||||
|
||||
// Making a particular database the default by means of the USE statement does not preclude
|
||||
// accessing tables in other databases.
|
||||
let output = execute_sql(&instance, "select number from public.numbers limit 1").await;
|
||||
let expected = vec![
|
||||
"+--------+",
|
||||
"| number |",
|
||||
"+--------+",
|
||||
"| 0 |",
|
||||
"+--------+",
|
||||
];
|
||||
check_output_stream(output, expected).await;
|
||||
}
|
||||
|
||||
async fn execute_sql(instance: &Instance, sql: &str) -> Output {
|
||||
execute_sql_in_db(instance, sql, DEFAULT_SCHEMA_NAME).await
|
||||
}
|
||||
|
||||
async fn execute_sql_in_db(instance: &Instance, sql: &str, db: &str) -> Output {
|
||||
let query_ctx = Arc::new(QueryContext::with_current_schema(db.to_string()));
|
||||
instance.execute_sql(sql, query_ctx).await.unwrap()
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder};
|
||||
use mito::config::EngineConfig;
|
||||
use mito::table::test_util::{new_test_object_store, MockEngine, MockMitoEngine};
|
||||
use query::QueryEngineFactory;
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
@@ -88,7 +89,7 @@ pub async fn create_test_table(
|
||||
.expect("ts is expected to be timestamp column"),
|
||||
),
|
||||
create_if_not_exists: true,
|
||||
primary_key_indices: vec![3, 0], // "host" and "ts" are primary keys
|
||||
primary_key_indices: vec![0], // "host" is in primary keys
|
||||
table_options: HashMap::new(),
|
||||
region_numbers: vec![0],
|
||||
},
|
||||
@@ -121,5 +122,9 @@ pub async fn create_mock_sql_handler() -> SqlHandler {
|
||||
.await
|
||||
.unwrap(),
|
||||
);
|
||||
SqlHandler::new(mock_engine, catalog_manager)
|
||||
|
||||
let catalog_list = catalog::local::new_memory_catalog_list().unwrap();
|
||||
let factory = QueryEngineFactory::new(catalog_list);
|
||||
|
||||
SqlHandler::new(mock_engine, catalog_manager, factory.query_engine())
|
||||
}
|
||||
|
||||
24
src/datatypes2/Cargo.toml
Normal file
24
src/datatypes2/Cargo.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "datatypes2"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
test = []
|
||||
|
||||
[dependencies]
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion-common = "14.0"
|
||||
enum_dispatch = "0.3"
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
ordered-float = { version = "3.0", features = ["serde"] }
|
||||
paste = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
arrow = "26.0"
|
||||
242
src/datatypes2/src/arrow_array.rs
Normal file
242
src/datatypes2/src/arrow_array.rs
Normal file
@@ -0,0 +1,242 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::array::{
|
||||
Array, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
|
||||
Int32Array, Int64Array, Int8Array, ListArray, UInt16Array, UInt32Array, UInt64Array,
|
||||
UInt8Array,
|
||||
};
|
||||
use arrow::datatypes::DataType;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{ConversionSnafu, Result};
|
||||
use crate::value::{ListValue, Value};
|
||||
|
||||
pub type BinaryArray = arrow::array::LargeBinaryArray;
|
||||
pub type MutableBinaryArray = arrow::array::LargeBinaryBuilder;
|
||||
pub type StringArray = arrow::array::StringArray;
|
||||
pub type MutableStringArray = arrow::array::StringBuilder;
|
||||
|
||||
macro_rules! cast_array {
|
||||
($arr: ident, $CastType: ty) => {
|
||||
$arr.as_any()
|
||||
.downcast_ref::<$CastType>()
|
||||
.with_context(|| ConversionSnafu {
|
||||
from: format!("{:?}", $arr.data_type()),
|
||||
})?
|
||||
};
|
||||
}
|
||||
|
||||
// TODO(yingwen): Remove this function.
|
||||
pub fn arrow_array_get(array: &dyn Array, idx: usize) -> Result<Value> {
|
||||
if array.is_null(idx) {
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
|
||||
let result = match array.data_type() {
|
||||
DataType::Null => Value::Null,
|
||||
DataType::Boolean => Value::Boolean(cast_array!(array, BooleanArray).value(idx)),
|
||||
DataType::Binary => Value::Binary(cast_array!(array, BinaryArray).value(idx).into()),
|
||||
DataType::Int8 => Value::Int8(cast_array!(array, Int8Array).value(idx)),
|
||||
DataType::Int16 => Value::Int16(cast_array!(array, Int16Array).value(idx)),
|
||||
DataType::Int32 => Value::Int32(cast_array!(array, Int32Array).value(idx)),
|
||||
DataType::Int64 => Value::Int64(cast_array!(array, Int64Array).value(idx)),
|
||||
DataType::UInt8 => Value::UInt8(cast_array!(array, UInt8Array).value(idx)),
|
||||
DataType::UInt16 => Value::UInt16(cast_array!(array, UInt16Array).value(idx)),
|
||||
DataType::UInt32 => Value::UInt32(cast_array!(array, UInt32Array).value(idx)),
|
||||
DataType::UInt64 => Value::UInt64(cast_array!(array, UInt64Array).value(idx)),
|
||||
DataType::Float32 => Value::Float32(cast_array!(array, Float32Array).value(idx).into()),
|
||||
DataType::Float64 => Value::Float64(cast_array!(array, Float64Array).value(idx).into()),
|
||||
DataType::Utf8 => Value::String(cast_array!(array, StringArray).value(idx).into()),
|
||||
DataType::Date32 => Value::Date(cast_array!(array, Date32Array).value(idx).into()),
|
||||
DataType::Date64 => Value::DateTime(cast_array!(array, Date64Array).value(idx).into()),
|
||||
DataType::Timestamp(t, _) => match t {
|
||||
arrow::datatypes::TimeUnit::Second => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampSecondArray).value(idx),
|
||||
TimeUnit::Second,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Millisecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampMillisecondArray).value(idx),
|
||||
TimeUnit::Millisecond,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Microsecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampMicrosecondArray).value(idx),
|
||||
TimeUnit::Microsecond,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Nanosecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampNanosecondArray).value(idx),
|
||||
TimeUnit::Nanosecond,
|
||||
)),
|
||||
},
|
||||
DataType::List(_) => {
|
||||
let array = cast_array!(array, ListArray).value(idx);
|
||||
let item_type = ConcreteDataType::try_from(array.data_type())?;
|
||||
let values = (0..array.len())
|
||||
.map(|i| arrow_array_get(&*array, i))
|
||||
.collect::<Result<Vec<Value>>>()?;
|
||||
Value::List(ListValue::new(Some(Box::new(values)), item_type))
|
||||
}
|
||||
_ => unimplemented!("Arrow array datatype: {:?}", array.data_type()),
|
||||
};
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
|
||||
LargeBinaryArray, TimestampMicrosecondArray, TimestampMillisecondArray,
|
||||
TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array,
|
||||
UInt8Array,
|
||||
};
|
||||
use arrow::datatypes::Int32Type;
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
use paste::paste;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::types::TimestampType;
|
||||
|
||||
macro_rules! test_arrow_array_get_for_timestamps {
|
||||
( $($unit: ident), *) => {
|
||||
$(
|
||||
paste! {
|
||||
let mut builder = arrow::array::[<Timestamp $unit Array>]::builder(3);
|
||||
builder.append_value(1);
|
||||
builder.append_value(0);
|
||||
builder.append_value(-1);
|
||||
let ts_array = Arc::new(builder.finish()) as Arc<dyn Array>;
|
||||
let v = arrow_array_get(&ts_array, 1).unwrap();
|
||||
assert_eq!(
|
||||
ConcreteDataType::Timestamp(TimestampType::$unit(
|
||||
$crate::types::[<Timestamp $unit Type>]::default(),
|
||||
)),
|
||||
v.data_type()
|
||||
);
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_array() {
|
||||
test_arrow_array_get_for_timestamps![Second, Millisecond, Microsecond, Nanosecond];
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_arrow_array_access() {
|
||||
let array1 = BooleanArray::from(vec![true, true, false, false]);
|
||||
assert_eq!(Value::Boolean(true), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int8Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int8(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt8Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt8(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int16Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int16(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt16Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt16(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int32Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int32(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt32Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt32(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array = Int64Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int64(2), arrow_array_get(&array, 1).unwrap());
|
||||
let array1 = UInt64Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt64(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Float32Array::from(vec![1f32, 2f32, 3f32, 4f32]);
|
||||
assert_eq!(
|
||||
Value::Float32(2f32.into()),
|
||||
arrow_array_get(&array1, 1).unwrap()
|
||||
);
|
||||
let array1 = Float64Array::from(vec![1f64, 2f64, 3f64, 4f64]);
|
||||
assert_eq!(
|
||||
Value::Float64(2f64.into()),
|
||||
arrow_array_get(&array1, 1).unwrap()
|
||||
);
|
||||
|
||||
let array2 = StringArray::from(vec![Some("hello"), None, Some("world")]);
|
||||
assert_eq!(
|
||||
Value::String("hello".into()),
|
||||
arrow_array_get(&array2, 0).unwrap()
|
||||
);
|
||||
assert_eq!(Value::Null, arrow_array_get(&array2, 1).unwrap());
|
||||
|
||||
let array3 = LargeBinaryArray::from(vec![
|
||||
Some("hello".as_bytes()),
|
||||
None,
|
||||
Some("world".as_bytes()),
|
||||
]);
|
||||
assert_eq!(Value::Null, arrow_array_get(&array3, 1).unwrap());
|
||||
|
||||
let array = TimestampSecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(value, Value::Timestamp(Timestamp::new(2, TimeUnit::Second)));
|
||||
let array = TimestampMillisecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Millisecond))
|
||||
);
|
||||
let array = TimestampMicrosecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Microsecond))
|
||||
);
|
||||
let array = TimestampNanosecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Nanosecond))
|
||||
);
|
||||
|
||||
// test list array
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
let arrow_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
|
||||
|
||||
let v0 = arrow_array_get(&arrow_array, 0).unwrap();
|
||||
match v0 {
|
||||
Value::List(list) => {
|
||||
assert!(matches!(list.datatype(), ConcreteDataType::Int32(_)));
|
||||
let items = list.items().as_ref().unwrap();
|
||||
assert_eq!(
|
||||
**items,
|
||||
vec![Value::Int32(1), Value::Int32(2), Value::Int32(3)]
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
assert_eq!(Value::Null, arrow_array_get(&arrow_array, 1).unwrap());
|
||||
let v2 = arrow_array_get(&arrow_array, 2).unwrap();
|
||||
match v2 {
|
||||
Value::List(list) => {
|
||||
assert!(matches!(list.datatype(), ConcreteDataType::Int32(_)));
|
||||
let items = list.items().as_ref().unwrap();
|
||||
assert_eq!(**items, vec![Value::Int32(4), Value::Null, Value::Int32(6)]);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
486
src/datatypes2/src/data_type.rs
Normal file
486
src/datatypes2/src/data_type.rs
Normal file
@@ -0,0 +1,486 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{
|
||||
BinaryType, BooleanType, DateTimeType, DateType, Float32Type, Float64Type, Int16Type,
|
||||
Int32Type, Int64Type, Int8Type, ListType, NullType, StringType, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::MutableVector;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[enum_dispatch::enum_dispatch(DataType)]
|
||||
pub enum ConcreteDataType {
|
||||
Null(NullType),
|
||||
Boolean(BooleanType),
|
||||
|
||||
// Numeric types:
|
||||
Int8(Int8Type),
|
||||
Int16(Int16Type),
|
||||
Int32(Int32Type),
|
||||
Int64(Int64Type),
|
||||
UInt8(UInt8Type),
|
||||
UInt16(UInt16Type),
|
||||
UInt32(UInt32Type),
|
||||
UInt64(UInt64Type),
|
||||
Float32(Float32Type),
|
||||
Float64(Float64Type),
|
||||
|
||||
// String types:
|
||||
Binary(BinaryType),
|
||||
String(StringType),
|
||||
|
||||
// Date types:
|
||||
Date(DateType),
|
||||
DateTime(DateTimeType),
|
||||
Timestamp(TimestampType),
|
||||
|
||||
// Compound types:
|
||||
List(ListType),
|
||||
}
|
||||
|
||||
// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
|
||||
// returning all these properties to the `DataType` trait
|
||||
impl ConcreteDataType {
|
||||
pub fn is_float(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_boolean(&self) -> bool {
|
||||
matches!(self, ConcreteDataType::Boolean(_))
|
||||
}
|
||||
|
||||
pub fn is_stringifiable(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::String(_)
|
||||
| ConcreteDataType::Date(_)
|
||||
| ConcreteDataType::DateTime(_)
|
||||
| ConcreteDataType::Timestamp(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_signed(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::Int8(_)
|
||||
| ConcreteDataType::Int16(_)
|
||||
| ConcreteDataType::Int32(_)
|
||||
| ConcreteDataType::Int64(_)
|
||||
| ConcreteDataType::Date(_)
|
||||
| ConcreteDataType::DateTime(_)
|
||||
| ConcreteDataType::Timestamp(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_unsigned(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::UInt8(_)
|
||||
| ConcreteDataType::UInt16(_)
|
||||
| ConcreteDataType::UInt32(_)
|
||||
| ConcreteDataType::UInt64(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn numerics() -> Vec<ConcreteDataType> {
|
||||
vec![
|
||||
ConcreteDataType::int8_datatype(),
|
||||
ConcreteDataType::int16_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint8_datatype(),
|
||||
ConcreteDataType::uint16_datatype(),
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
]
|
||||
}
|
||||
|
||||
/// Convert arrow data type to [ConcreteDataType].
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if given arrow data type is not supported.
|
||||
pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
|
||||
ConcreteDataType::try_from(dt).expect("Unimplemented type")
|
||||
}
|
||||
|
||||
pub fn is_null(&self) -> bool {
|
||||
matches!(self, ConcreteDataType::Null(NullType))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ArrowDataType> for ConcreteDataType {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(dt: &ArrowDataType) -> Result<ConcreteDataType> {
|
||||
let concrete_type = match dt {
|
||||
ArrowDataType::Null => Self::null_datatype(),
|
||||
ArrowDataType::Boolean => Self::boolean_datatype(),
|
||||
ArrowDataType::UInt8 => Self::uint8_datatype(),
|
||||
ArrowDataType::UInt16 => Self::uint16_datatype(),
|
||||
ArrowDataType::UInt32 => Self::uint32_datatype(),
|
||||
ArrowDataType::UInt64 => Self::uint64_datatype(),
|
||||
ArrowDataType::Int8 => Self::int8_datatype(),
|
||||
ArrowDataType::Int16 => Self::int16_datatype(),
|
||||
ArrowDataType::Int32 => Self::int32_datatype(),
|
||||
ArrowDataType::Int64 => Self::int64_datatype(),
|
||||
ArrowDataType::Float32 => Self::float32_datatype(),
|
||||
ArrowDataType::Float64 => Self::float64_datatype(),
|
||||
ArrowDataType::Date32 => Self::date_datatype(),
|
||||
ArrowDataType::Date64 => Self::datetime_datatype(),
|
||||
ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
|
||||
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => Self::string_datatype(),
|
||||
ArrowDataType::List(field) => Self::List(ListType::new(
|
||||
ConcreteDataType::from_arrow_type(field.data_type()),
|
||||
)),
|
||||
_ => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
arrow_type: dt.clone(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(concrete_type)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_new_concrete_type_functions {
|
||||
($($Type: ident), +) => {
|
||||
paste! {
|
||||
impl ConcreteDataType {
|
||||
$(
|
||||
pub fn [<$Type:lower _datatype>]() -> ConcreteDataType {
|
||||
ConcreteDataType::$Type([<$Type Type>]::default())
|
||||
}
|
||||
)+
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_new_concrete_type_functions!(
|
||||
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
|
||||
Binary, Date, DateTime, String
|
||||
);
|
||||
|
||||
impl ConcreteDataType {
|
||||
pub fn timestamp_second_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType::default()))
|
||||
}
|
||||
|
||||
pub fn timestamp_millisecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Millisecond(
|
||||
TimestampMillisecondType::default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timestamp_microsecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Microsecond(
|
||||
TimestampMicrosecondType::default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timestamp_nanosecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType::default()))
|
||||
}
|
||||
|
||||
pub fn timestamp_datatype(unit: TimeUnit) -> Self {
|
||||
match unit {
|
||||
TimeUnit::Second => Self::timestamp_second_datatype(),
|
||||
TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
|
||||
TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
|
||||
TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts from arrow timestamp unit to
|
||||
pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
|
||||
match t {
|
||||
ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
|
||||
ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
|
||||
ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
|
||||
ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(item_type))
|
||||
}
|
||||
}
|
||||
|
||||
/// Data type abstraction.
|
||||
#[enum_dispatch::enum_dispatch]
|
||||
pub trait DataType: std::fmt::Debug + Send + Sync {
|
||||
/// Name of this data type.
|
||||
fn name(&self) -> &str;
|
||||
|
||||
/// Returns id of the Logical data type.
|
||||
fn logical_type_id(&self) -> LogicalTypeId;
|
||||
|
||||
/// Returns the default value of this type.
|
||||
fn default_value(&self) -> Value;
|
||||
|
||||
/// Convert this type as [arrow::datatypes::DataType].
|
||||
fn as_arrow_type(&self) -> ArrowDataType;
|
||||
|
||||
/// Creates a mutable vector with given `capacity` of this type.
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
|
||||
|
||||
/// Returns true if the data type is compatible with timestamp type so we can
|
||||
/// use it as a timestamp.
|
||||
fn is_timestamp_compatible(&self) -> bool;
|
||||
}
|
||||
|
||||
pub type DataTypeRef = Arc<dyn DataType>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::Field;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_concrete_type_as_datatype_trait() {
|
||||
let concrete_type = ConcreteDataType::boolean_datatype();
|
||||
|
||||
assert_eq!("Boolean", concrete_type.name());
|
||||
assert_eq!(Value::Boolean(false), concrete_type.default_value());
|
||||
assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
|
||||
assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_type() {
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Null),
|
||||
ConcreteDataType::Null(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean),
|
||||
ConcreteDataType::Boolean(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Binary),
|
||||
ConcreteDataType::Binary(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary),
|
||||
ConcreteDataType::Binary(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Int8),
|
||||
ConcreteDataType::Int8(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Int16),
|
||||
ConcreteDataType::Int16(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Int32),
|
||||
ConcreteDataType::Int32(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Int64),
|
||||
ConcreteDataType::Int64(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8),
|
||||
ConcreteDataType::UInt8(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16),
|
||||
ConcreteDataType::UInt16(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32),
|
||||
ConcreteDataType::UInt32(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64),
|
||||
ConcreteDataType::UInt64(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Float32),
|
||||
ConcreteDataType::Float32(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Float64),
|
||||
ConcreteDataType::Float64(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
|
||||
ConcreteDataType::String(_)
|
||||
));
|
||||
assert_eq!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::List(Box::new(Field::new(
|
||||
"item",
|
||||
ArrowDataType::Int32,
|
||||
true,
|
||||
)))),
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype()))
|
||||
);
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
|
||||
ConcreteDataType::Date(_)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_timestamp() {
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_timestamp_compatible() {
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp_compatible());
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::int64_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::null_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::binary_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::boolean_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::date_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::string_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::uint64_datatype().is_timestamp_compatible());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_null() {
|
||||
assert!(ConcreteDataType::null_datatype().is_null());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_float() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_float());
|
||||
assert!(ConcreteDataType::float32_datatype().is_float());
|
||||
assert!(ConcreteDataType::float64_datatype().is_float());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_boolean() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_boolean());
|
||||
assert!(!ConcreteDataType::float32_datatype().is_boolean());
|
||||
assert!(ConcreteDataType::boolean_datatype().is_boolean());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_stringifiable() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
|
||||
assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::string_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::date_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_signed() {
|
||||
assert!(ConcreteDataType::int8_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int16_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int32_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int64_datatype().is_signed());
|
||||
assert!(ConcreteDataType::date_datatype().is_signed());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
|
||||
|
||||
assert!(!ConcreteDataType::uint8_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint16_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint32_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint64_datatype().is_signed());
|
||||
|
||||
assert!(!ConcreteDataType::float32_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::float64_datatype().is_signed());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_unsigned() {
|
||||
assert!(!ConcreteDataType::int8_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int16_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int64_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::date_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
|
||||
|
||||
assert!(ConcreteDataType::uint8_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint16_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint32_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint64_datatype().is_unsigned());
|
||||
|
||||
assert!(!ConcreteDataType::float32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::float64_datatype().is_unsigned());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_numerics() {
|
||||
let nums = ConcreteDataType::numerics();
|
||||
assert_eq!(10, nums.len());
|
||||
}
|
||||
}
|
||||
144
src/datatypes2/src/error.rs
Normal file
144
src/datatypes2/src/error.rs
Normal file
@@ -0,0 +1,144 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::prelude::{ErrorCompat, ErrorExt, Snafu, StatusCode};
|
||||
use snafu::Backtrace;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to serialize data, source: {}", source))]
|
||||
Serialize {
|
||||
source: serde_json::Error,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to deserialize data, source: {}, json: {}", source, json))]
|
||||
Deserialize {
|
||||
source: serde_json::Error,
|
||||
backtrace: Backtrace,
|
||||
json: String,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert datafusion type: {}", from))]
|
||||
Conversion { from: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Bad array access, Index out of bounds: {}, size: {}", index, size))]
|
||||
BadArrayAccess {
|
||||
index: usize,
|
||||
size: usize,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Unknown vector, {}", msg))]
|
||||
UnknownVector { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unsupported arrow data type, type: {:?}", arrow_type))]
|
||||
UnsupportedArrowType {
|
||||
arrow_type: arrow::datatypes::DataType,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Timestamp column {} not found", name,))]
|
||||
TimestampNotFound { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to parse version in schema meta, value: {}, source: {}",
|
||||
value,
|
||||
source
|
||||
))]
|
||||
ParseSchemaVersion {
|
||||
value: String,
|
||||
source: std::num::ParseIntError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid timestamp index: {}", index))]
|
||||
InvalidTimestampIndex { index: usize, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Duplicate timestamp index, exists: {}, new: {}", exists, new))]
|
||||
DuplicateTimestampIndex {
|
||||
exists: usize,
|
||||
new: usize,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("{}", msg))]
|
||||
CastType { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Arrow failed to compute, source: {}", source))]
|
||||
ArrowCompute {
|
||||
source: arrow::error::ArrowError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported column default constraint expression: {}", expr))]
|
||||
UnsupportedDefaultExpr { expr: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Default value should not be null for non null column"))]
|
||||
NullDefault { backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Incompatible default value type, reason: {}", reason))]
|
||||
DefaultValueType {
|
||||
reason: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Duplicated metadata for {}", key))]
|
||||
DuplicateMeta { key: String, backtrace: Backtrace },
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
// Inner encoding and decoding error should not be exposed to users.
|
||||
StatusCode::Internal
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
pub fn test_error() {
|
||||
let mut map = HashMap::new();
|
||||
map.insert(true, 1);
|
||||
map.insert(false, 2);
|
||||
|
||||
let result = serde_json::to_string(&map).context(SerializeSnafu);
|
||||
assert!(result.is_err(), "serialize result is: {:?}", result);
|
||||
let err = serde_json::to_string(&map)
|
||||
.context(SerializeSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
assert_eq!(StatusCode::Internal, err.status_code());
|
||||
}
|
||||
}
|
||||
33
src/datatypes2/src/lib.rs
Normal file
33
src/datatypes2/src/lib.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(generic_associated_types)]
|
||||
#![feature(assert_matches)]
|
||||
|
||||
pub mod arrow_array;
|
||||
pub mod data_type;
|
||||
pub mod error;
|
||||
pub mod macros;
|
||||
pub mod prelude;
|
||||
mod scalars;
|
||||
pub mod schema;
|
||||
pub mod serialize;
|
||||
mod timestamp;
|
||||
pub mod type_id;
|
||||
pub mod types;
|
||||
pub mod value;
|
||||
pub mod vectors;
|
||||
|
||||
pub use arrow;
|
||||
pub use error::{Error, Result};
|
||||
68
src/datatypes2/src/macros.rs
Normal file
68
src/datatypes2/src/macros.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Some helper macros for datatypes, copied from databend.
|
||||
|
||||
/// Apply the macro rules to all primitive types.
|
||||
#[macro_export]
|
||||
macro_rules! for_all_primitive_types {
|
||||
($macro:tt $(, $x:tt)*) => {
|
||||
$macro! {
|
||||
[$($x),*],
|
||||
{ i8 },
|
||||
{ i16 },
|
||||
{ i32 },
|
||||
{ i64 },
|
||||
{ u8 },
|
||||
{ u16 },
|
||||
{ u32 },
|
||||
{ u64 },
|
||||
{ f32 },
|
||||
{ f64 }
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Match the logical type and apply `$body` to all primitive types and
|
||||
/// `nbody` to other types.
|
||||
#[macro_export]
|
||||
macro_rules! with_match_primitive_type_id {
|
||||
($key_type:expr, | $_:tt $T:ident | $body:tt, $nbody:tt) => {{
|
||||
macro_rules! __with_ty__ {
|
||||
( $_ $T:ident ) => {
|
||||
$body
|
||||
};
|
||||
}
|
||||
|
||||
use $crate::type_id::LogicalTypeId;
|
||||
use $crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
|
||||
UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
match $key_type {
|
||||
LogicalTypeId::Int8 => __with_ty__! { Int8Type },
|
||||
LogicalTypeId::Int16 => __with_ty__! { Int16Type },
|
||||
LogicalTypeId::Int32 => __with_ty__! { Int32Type },
|
||||
LogicalTypeId::Int64 => __with_ty__! { Int64Type },
|
||||
LogicalTypeId::UInt8 => __with_ty__! { UInt8Type },
|
||||
LogicalTypeId::UInt16 => __with_ty__! { UInt16Type },
|
||||
LogicalTypeId::UInt32 => __with_ty__! { UInt32Type },
|
||||
LogicalTypeId::UInt64 => __with_ty__! { UInt64Type },
|
||||
LogicalTypeId::Float32 => __with_ty__! { Float32Type },
|
||||
LogicalTypeId::Float64 => __with_ty__! { Float64Type },
|
||||
|
||||
_ => $nbody,
|
||||
}
|
||||
}};
|
||||
}
|
||||
20
src/datatypes2/src/prelude.rs
Normal file
20
src/datatypes2/src/prelude.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub use crate::data_type::{ConcreteDataType, DataType, DataTypeRef};
|
||||
pub use crate::macros::*;
|
||||
pub use crate::scalars::{Scalar, ScalarRef, ScalarVector, ScalarVectorBuilder};
|
||||
pub use crate::type_id::LogicalTypeId;
|
||||
pub use crate::value::{Value, ValueRef};
|
||||
pub use crate::vectors::{MutableVector, Validity, Vector, VectorRef};
|
||||
443
src/datatypes2/src/scalars.rs
Normal file
443
src/datatypes2/src/scalars.rs
Normal file
@@ -0,0 +1,443 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
|
||||
UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::{ListValue, ListValueRef, Value};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, ListVector, MutableVector,
|
||||
PrimitiveVector, StringVector, Vector,
|
||||
};
|
||||
|
||||
fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
|
||||
match iter.size_hint() {
|
||||
(_lower, Some(upper)) => upper,
|
||||
(0, None) => 1024,
|
||||
(lower, None) => lower,
|
||||
}
|
||||
}
|
||||
|
||||
/// Owned scalar value
|
||||
/// primitive types, bool, Vec<u8> ...
|
||||
pub trait Scalar: 'static + Sized + Default + Any
|
||||
where
|
||||
for<'a> Self::VectorType: ScalarVector<RefItem<'a> = Self::RefType<'a>>,
|
||||
{
|
||||
type VectorType: ScalarVector<OwnedItem = Self>;
|
||||
type RefType<'a>: ScalarRef<'a, ScalarType = Self>
|
||||
where
|
||||
Self: 'a;
|
||||
/// Get a reference of the current value.
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_>;
|
||||
|
||||
/// Upcast GAT type's lifetime.
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short>;
|
||||
}
|
||||
|
||||
pub trait ScalarRef<'a>: std::fmt::Debug + Clone + Copy + Send + 'a {
|
||||
/// The corresponding [`Scalar`] type.
|
||||
type ScalarType: Scalar<RefType<'a> = Self>;
|
||||
|
||||
/// Convert the reference into an owned value.
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType;
|
||||
}
|
||||
|
||||
/// A sub trait of Vector to add scalar operation support.
|
||||
// This implementation refers to Datebend's [ScalarColumn](https://github.com/datafuselabs/databend/blob/main/common/datavalues/src/scalars/type_.rs)
|
||||
// and skyzh's [type-exercise-in-rust](https://github.com/skyzh/type-exercise-in-rust).
|
||||
pub trait ScalarVector: Vector + Send + Sync + Sized + 'static
|
||||
where
|
||||
for<'a> Self::OwnedItem: Scalar<RefType<'a> = Self::RefItem<'a>>,
|
||||
{
|
||||
type OwnedItem: Scalar<VectorType = Self>;
|
||||
/// The reference item of this vector.
|
||||
type RefItem<'a>: ScalarRef<'a, ScalarType = Self::OwnedItem>
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
/// Iterator type of this vector.
|
||||
type Iter<'a>: Iterator<Item = Option<Self::RefItem<'a>>>
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
/// Builder type to build this vector.
|
||||
type Builder: ScalarVectorBuilder<VectorType = Self>;
|
||||
|
||||
/// Returns the reference to an element at given position.
|
||||
///
|
||||
/// Note: `get()` has bad performance, avoid call this function inside loop.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `idx >= self.len()`.
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>>;
|
||||
|
||||
/// Returns iterator of current vector.
|
||||
fn iter_data(&self) -> Self::Iter<'_>;
|
||||
|
||||
fn from_slice(data: &[Self::RefItem<'_>]) -> Self {
|
||||
let mut builder = Self::Builder::with_capacity(data.len());
|
||||
for item in data {
|
||||
builder.push(Some(*item));
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn from_iterator<'a>(it: impl Iterator<Item = Self::RefItem<'a>>) -> Self {
|
||||
let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
|
||||
for item in it {
|
||||
builder.push(Some(item));
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn from_owned_iterator(it: impl Iterator<Item = Option<Self::OwnedItem>>) -> Self {
|
||||
let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
|
||||
for item in it {
|
||||
match item {
|
||||
Some(item) => builder.push(Some(item.as_scalar_ref())),
|
||||
None => builder.push(None),
|
||||
}
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn from_vec<I: Into<Self::OwnedItem>>(values: Vec<I>) -> Self {
|
||||
let it = values.into_iter();
|
||||
let mut builder = Self::Builder::with_capacity(get_iter_capacity(&it));
|
||||
for item in it {
|
||||
builder.push(Some(item.into().as_scalar_ref()));
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait over all vector builders.
|
||||
pub trait ScalarVectorBuilder: MutableVector {
|
||||
type VectorType: ScalarVector<Builder = Self>;
|
||||
|
||||
/// Create a new builder with initial `capacity`.
|
||||
fn with_capacity(capacity: usize) -> Self;
|
||||
|
||||
/// Push a value into the builder.
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>);
|
||||
|
||||
/// Finish build and return a new vector.
|
||||
fn finish(&mut self) -> Self::VectorType;
|
||||
}
|
||||
|
||||
macro_rules! impl_scalar_for_native {
|
||||
($Native: ident, $DataType: ident) => {
|
||||
impl Scalar for $Native {
|
||||
type VectorType = PrimitiveVector<$DataType>;
|
||||
type RefType<'a> = $Native;
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> $Native {
|
||||
*self
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: $Native) -> $Native {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement [`ScalarRef`] for primitive types. Note that primitive types are both [`Scalar`] and [`ScalarRef`].
|
||||
impl<'a> ScalarRef<'a> for $Native {
|
||||
type ScalarType = $Native;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> $Native {
|
||||
*self
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_scalar_for_native!(u8, UInt8Type);
|
||||
impl_scalar_for_native!(u16, UInt16Type);
|
||||
impl_scalar_for_native!(u32, UInt32Type);
|
||||
impl_scalar_for_native!(u64, UInt64Type);
|
||||
impl_scalar_for_native!(i8, Int8Type);
|
||||
impl_scalar_for_native!(i16, Int16Type);
|
||||
impl_scalar_for_native!(i32, Int32Type);
|
||||
impl_scalar_for_native!(i64, Int64Type);
|
||||
impl_scalar_for_native!(f32, Float32Type);
|
||||
impl_scalar_for_native!(f64, Float64Type);
|
||||
|
||||
impl Scalar for bool {
|
||||
type VectorType = BooleanVector;
|
||||
type RefType<'a> = bool;
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> bool {
|
||||
*self
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: bool) -> bool {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for bool {
|
||||
type ScalarType = bool;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> bool {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for String {
|
||||
type VectorType = StringVector;
|
||||
type RefType<'a> = &'a str;
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> &str {
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: &'long str) -> &'short str {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for &'a str {
|
||||
type ScalarType = String;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> String {
|
||||
self.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for Vec<u8> {
|
||||
type VectorType = BinaryVector;
|
||||
type RefType<'a> = &'a [u8];
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> &[u8] {
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: &'long [u8]) -> &'short [u8] {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for &'a [u8] {
|
||||
type ScalarType = Vec<u8>;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> Vec<u8> {
|
||||
self.to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for Date {
|
||||
type VectorType = DateVector;
|
||||
type RefType<'a> = Date;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for Date {
|
||||
type ScalarType = Date;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for DateTime {
|
||||
type VectorType = DateTimeVector;
|
||||
type RefType<'a> = DateTime;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for DateTime {
|
||||
type ScalarType = DateTime;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.
|
||||
|
||||
impl Scalar for ListValue {
|
||||
type VectorType = ListVector;
|
||||
type RefType<'a> = ListValueRef<'a>;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
ListValueRef::Ref { val: self }
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for ListValueRef<'a> {
|
||||
type ScalarType = ListValue;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
match self {
|
||||
ListValueRef::Indexed { vector, idx } => match vector.get(*idx) {
|
||||
// Normally should not get `Value::Null` if the `ListValueRef` comes
|
||||
// from the iterator of the ListVector, but we avoid panic and just
|
||||
// returns a default list value in such case since `ListValueRef` may
|
||||
// be constructed manually.
|
||||
Value::Null => ListValue::default(),
|
||||
Value::List(v) => v,
|
||||
_ => unreachable!(),
|
||||
},
|
||||
ListValueRef::Ref { val } => (*val).clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::timestamp::TimestampSecond;
|
||||
use crate::vectors::{BinaryVector, Int32Vector, ListVectorBuilder, TimestampSecondVector};
|
||||
|
||||
fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
|
||||
let mut builder = T::Builder::with_capacity(items.len());
|
||||
for item in items {
|
||||
builder.push(*item);
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn assert_vector_eq<'a, T: ScalarVector>(expect: &[Option<T::RefItem<'a>>], vector: &'a T)
|
||||
where
|
||||
T::RefItem<'a>: PartialEq + std::fmt::Debug,
|
||||
{
|
||||
for (a, b) in expect.iter().zip(vector.iter_data()) {
|
||||
assert_eq!(*a, b);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_i32_vector() {
|
||||
let expect = vec![Some(1), Some(2), Some(3), None, Some(5)];
|
||||
let vector: Int32Vector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_binary_vector() {
|
||||
let expect: Vec<Option<&'static [u8]>> = vec![
|
||||
Some(b"a"),
|
||||
Some(b"b"),
|
||||
Some(b"c"),
|
||||
None,
|
||||
Some(b"e"),
|
||||
Some(b""),
|
||||
];
|
||||
let vector: BinaryVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_date_vector() {
|
||||
let expect: Vec<Option<Date>> = vec![
|
||||
Some(Date::new(0)),
|
||||
Some(Date::new(-1)),
|
||||
None,
|
||||
Some(Date::new(1)),
|
||||
];
|
||||
let vector: DateVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_scalar() {
|
||||
let date = Date::new(1);
|
||||
assert_eq!(date, date.as_scalar_ref());
|
||||
assert_eq!(date, date.to_owned_scalar());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_scalar() {
|
||||
let dt = DateTime::new(123);
|
||||
assert_eq!(dt, dt.as_scalar_ref());
|
||||
assert_eq!(dt, dt.to_owned_scalar());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_value_scalar() {
|
||||
let list_value = ListValue::new(
|
||||
Some(Box::new(vec![Value::Int32(123)])),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
);
|
||||
let list_ref = ListValueRef::Ref { val: &list_value };
|
||||
assert_eq!(list_ref, list_value.as_scalar_ref());
|
||||
assert_eq!(list_value, list_ref.to_owned_scalar());
|
||||
|
||||
let mut builder =
|
||||
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1);
|
||||
builder.push(None);
|
||||
builder.push(Some(list_value.as_scalar_ref()));
|
||||
let vector = builder.finish();
|
||||
|
||||
let ref_on_vec = ListValueRef::Indexed {
|
||||
vector: &vector,
|
||||
idx: 0,
|
||||
};
|
||||
assert_eq!(ListValue::default(), ref_on_vec.to_owned_scalar());
|
||||
let ref_on_vec = ListValueRef::Indexed {
|
||||
vector: &vector,
|
||||
idx: 1,
|
||||
};
|
||||
assert_eq!(list_value, ref_on_vec.to_owned_scalar());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_timestamp_vector() {
|
||||
let expect: Vec<Option<TimestampSecond>> = vec![Some(10.into()), None, Some(42.into())];
|
||||
let vector: TimestampSecondVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
let val = vector.get_data(0).unwrap();
|
||||
assert_eq!(val, val.as_scalar_ref());
|
||||
assert_eq!(TimestampSecond::from(10), val.to_owned_scalar());
|
||||
}
|
||||
}
|
||||
430
src/datatypes2/src/schema.rs
Normal file
430
src/datatypes2/src/schema.rs
Normal file
@@ -0,0 +1,430 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod column_schema;
|
||||
mod constraint;
|
||||
mod raw;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::{Field, Schema as ArrowSchema};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::error::{self, Error, Result};
|
||||
pub use crate::schema::column_schema::{ColumnSchema, Metadata};
|
||||
pub use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
pub use crate::schema::raw::RawSchema;
|
||||
|
||||
/// Key used to store version number of the schema in metadata.
|
||||
const VERSION_KEY: &str = "greptime:version";
|
||||
|
||||
/// A common schema, should be immutable.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Schema {
|
||||
column_schemas: Vec<ColumnSchema>,
|
||||
name_to_index: HashMap<String, usize>,
|
||||
arrow_schema: Arc<ArrowSchema>,
|
||||
/// Index of the timestamp key column.
|
||||
///
|
||||
/// Timestamp key column is the column holds the timestamp and forms part of
|
||||
/// the primary key. None means there is no timestamp key column.
|
||||
timestamp_index: Option<usize>,
|
||||
/// Version of the schema.
|
||||
///
|
||||
/// Initial value is zero. The version should bump after altering schema.
|
||||
version: u32,
|
||||
}
|
||||
|
||||
impl Schema {
|
||||
/// Initial version of the schema.
|
||||
pub const INITIAL_VERSION: u32 = 0;
|
||||
|
||||
/// Create a schema from a vector of [ColumnSchema].
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics when ColumnSchema's `default_constraint` can't be serialized into json.
|
||||
pub fn new(column_schemas: Vec<ColumnSchema>) -> Schema {
|
||||
// Builder won't fail in this case
|
||||
SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Try to Create a schema from a vector of [ColumnSchema].
|
||||
pub fn try_new(column_schemas: Vec<ColumnSchema>) -> Result<Schema> {
|
||||
SchemaBuilder::try_from(column_schemas)?.build()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn arrow_schema(&self) -> &Arc<ArrowSchema> {
|
||||
&self.arrow_schema
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn column_schemas(&self) -> &[ColumnSchema] {
|
||||
&self.column_schemas
|
||||
}
|
||||
|
||||
pub fn column_schema_by_name(&self, name: &str) -> Option<&ColumnSchema> {
|
||||
self.name_to_index
|
||||
.get(name)
|
||||
.map(|index| &self.column_schemas[*index])
|
||||
}
|
||||
|
||||
/// Retrieve the column's name by index
|
||||
/// # Panics
|
||||
/// This method **may** panic if the index is out of range of column schemas.
|
||||
#[inline]
|
||||
pub fn column_name_by_index(&self, idx: usize) -> &str {
|
||||
&self.column_schemas[idx].name
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn column_index_by_name(&self, name: &str) -> Option<usize> {
|
||||
self.name_to_index.get(name).copied()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn contains_column(&self, name: &str) -> bool {
|
||||
self.name_to_index.contains_key(name)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn num_columns(&self) -> usize {
|
||||
self.column_schemas.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.column_schemas.is_empty()
|
||||
}
|
||||
|
||||
/// Returns index of the timestamp key column.
|
||||
#[inline]
|
||||
pub fn timestamp_index(&self) -> Option<usize> {
|
||||
self.timestamp_index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn timestamp_column(&self) -> Option<&ColumnSchema> {
|
||||
self.timestamp_index.map(|idx| &self.column_schemas[idx])
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn version(&self) -> u32 {
|
||||
self.version
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn metadata(&self) -> &HashMap<String, String> {
|
||||
&self.arrow_schema.metadata
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SchemaBuilder {
|
||||
column_schemas: Vec<ColumnSchema>,
|
||||
name_to_index: HashMap<String, usize>,
|
||||
fields: Vec<Field>,
|
||||
timestamp_index: Option<usize>,
|
||||
version: u32,
|
||||
metadata: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<ColumnSchema>> for SchemaBuilder {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(column_schemas: Vec<ColumnSchema>) -> Result<SchemaBuilder> {
|
||||
SchemaBuilder::try_from_columns(column_schemas)
|
||||
}
|
||||
}
|
||||
|
||||
impl SchemaBuilder {
|
||||
pub fn try_from_columns(column_schemas: Vec<ColumnSchema>) -> Result<Self> {
|
||||
let FieldsAndIndices {
|
||||
fields,
|
||||
name_to_index,
|
||||
timestamp_index,
|
||||
} = collect_fields(&column_schemas)?;
|
||||
|
||||
Ok(Self {
|
||||
column_schemas,
|
||||
name_to_index,
|
||||
fields,
|
||||
timestamp_index,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn version(mut self, version: u32) -> Self {
|
||||
self.version = version;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add key value pair to metadata.
|
||||
///
|
||||
/// Old metadata with same key would be overwritten.
|
||||
pub fn add_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.metadata.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(mut self) -> Result<Schema> {
|
||||
if let Some(timestamp_index) = self.timestamp_index {
|
||||
validate_timestamp_index(&self.column_schemas, timestamp_index)?;
|
||||
}
|
||||
|
||||
self.metadata
|
||||
.insert(VERSION_KEY.to_string(), self.version.to_string());
|
||||
|
||||
let arrow_schema = ArrowSchema::new(self.fields).with_metadata(self.metadata);
|
||||
|
||||
Ok(Schema {
|
||||
column_schemas: self.column_schemas,
|
||||
name_to_index: self.name_to_index,
|
||||
arrow_schema: Arc::new(arrow_schema),
|
||||
timestamp_index: self.timestamp_index,
|
||||
version: self.version,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct FieldsAndIndices {
|
||||
fields: Vec<Field>,
|
||||
name_to_index: HashMap<String, usize>,
|
||||
timestamp_index: Option<usize>,
|
||||
}
|
||||
|
||||
fn collect_fields(column_schemas: &[ColumnSchema]) -> Result<FieldsAndIndices> {
|
||||
let mut fields = Vec::with_capacity(column_schemas.len());
|
||||
let mut name_to_index = HashMap::with_capacity(column_schemas.len());
|
||||
let mut timestamp_index = None;
|
||||
for (index, column_schema) in column_schemas.iter().enumerate() {
|
||||
if column_schema.is_time_index() {
|
||||
ensure!(
|
||||
timestamp_index.is_none(),
|
||||
error::DuplicateTimestampIndexSnafu {
|
||||
exists: timestamp_index.unwrap(),
|
||||
new: index,
|
||||
}
|
||||
);
|
||||
timestamp_index = Some(index);
|
||||
}
|
||||
let field = Field::try_from(column_schema)?;
|
||||
fields.push(field);
|
||||
name_to_index.insert(column_schema.name.clone(), index);
|
||||
}
|
||||
|
||||
Ok(FieldsAndIndices {
|
||||
fields,
|
||||
name_to_index,
|
||||
timestamp_index,
|
||||
})
|
||||
}
|
||||
|
||||
fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: usize) -> Result<()> {
|
||||
ensure!(
|
||||
timestamp_index < column_schemas.len(),
|
||||
error::InvalidTimestampIndexSnafu {
|
||||
index: timestamp_index,
|
||||
}
|
||||
);
|
||||
|
||||
let column_schema = &column_schemas[timestamp_index];
|
||||
ensure!(
|
||||
column_schema.data_type.is_timestamp_compatible(),
|
||||
error::InvalidTimestampIndexSnafu {
|
||||
index: timestamp_index,
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
column_schema.is_time_index(),
|
||||
error::InvalidTimestampIndexSnafu {
|
||||
index: timestamp_index,
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub type SchemaRef = Arc<Schema>;
|
||||
|
||||
impl TryFrom<Arc<ArrowSchema>> for Schema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(arrow_schema: Arc<ArrowSchema>) -> Result<Schema> {
|
||||
let mut column_schemas = Vec::with_capacity(arrow_schema.fields.len());
|
||||
let mut name_to_index = HashMap::with_capacity(arrow_schema.fields.len());
|
||||
for field in &arrow_schema.fields {
|
||||
let column_schema = ColumnSchema::try_from(field)?;
|
||||
name_to_index.insert(field.name().to_string(), column_schemas.len());
|
||||
column_schemas.push(column_schema);
|
||||
}
|
||||
|
||||
let mut timestamp_index = None;
|
||||
for (index, column_schema) in column_schemas.iter().enumerate() {
|
||||
if column_schema.is_time_index() {
|
||||
validate_timestamp_index(&column_schemas, index)?;
|
||||
ensure!(
|
||||
timestamp_index.is_none(),
|
||||
error::DuplicateTimestampIndexSnafu {
|
||||
exists: timestamp_index.unwrap(),
|
||||
new: index,
|
||||
}
|
||||
);
|
||||
timestamp_index = Some(index);
|
||||
}
|
||||
}
|
||||
|
||||
let version = try_parse_version(&arrow_schema.metadata, VERSION_KEY)?;
|
||||
|
||||
Ok(Self {
|
||||
column_schemas,
|
||||
name_to_index,
|
||||
arrow_schema,
|
||||
timestamp_index,
|
||||
version,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<ArrowSchema> for Schema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(arrow_schema: ArrowSchema) -> Result<Schema> {
|
||||
let arrow_schema = Arc::new(arrow_schema);
|
||||
|
||||
Schema::try_from(arrow_schema)
|
||||
}
|
||||
}
|
||||
|
||||
fn try_parse_version(metadata: &HashMap<String, String>, key: &str) -> Result<u32> {
|
||||
if let Some(value) = metadata.get(key) {
|
||||
let version = value
|
||||
.parse()
|
||||
.context(error::ParseSchemaVersionSnafu { value })?;
|
||||
|
||||
Ok(version)
|
||||
} else {
|
||||
Ok(Schema::INITIAL_VERSION)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
|
||||
#[test]
|
||||
fn test_build_empty_schema() {
|
||||
let schema = SchemaBuilder::default().build().unwrap();
|
||||
assert_eq!(0, schema.num_columns());
|
||||
assert!(schema.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_schema_no_timestamp() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), false),
|
||||
ColumnSchema::new("col2", ConcreteDataType::float64_datatype(), true),
|
||||
];
|
||||
let schema = Schema::new(column_schemas.clone());
|
||||
|
||||
assert_eq!(2, schema.num_columns());
|
||||
assert!(!schema.is_empty());
|
||||
assert!(schema.timestamp_index().is_none());
|
||||
assert!(schema.timestamp_column().is_none());
|
||||
assert_eq!(Schema::INITIAL_VERSION, schema.version());
|
||||
|
||||
for column_schema in &column_schemas {
|
||||
let found = schema.column_schema_by_name(&column_schema.name).unwrap();
|
||||
assert_eq!(column_schema, found);
|
||||
}
|
||||
assert!(schema.column_schema_by_name("col3").is_none());
|
||||
|
||||
let new_schema = Schema::try_from(schema.arrow_schema().clone()).unwrap();
|
||||
|
||||
assert_eq!(schema, new_schema);
|
||||
assert_eq!(column_schemas, schema.column_schemas());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_metadata() {
|
||||
let column_schemas = vec![ColumnSchema::new(
|
||||
"col1",
|
||||
ConcreteDataType::int32_datatype(),
|
||||
false,
|
||||
)];
|
||||
let schema = SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.add_metadata("k1", "v1")
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!("v1", schema.metadata().get("k1").unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_schema_with_timestamp() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas.clone())
|
||||
.unwrap()
|
||||
.version(123)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(1, schema.timestamp_index().unwrap());
|
||||
assert_eq!(&column_schemas[1], schema.timestamp_column().unwrap());
|
||||
assert_eq!(123, schema.version());
|
||||
|
||||
let new_schema = Schema::try_from(schema.arrow_schema().clone()).unwrap();
|
||||
assert_eq!(1, schema.timestamp_index().unwrap());
|
||||
assert_eq!(schema, new_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_schema_wrong_timestamp() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new("col2", ConcreteDataType::float64_datatype(), false),
|
||||
];
|
||||
assert!(SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.build()
|
||||
.is_err());
|
||||
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new("col2", ConcreteDataType::float64_datatype(), false)
|
||||
.with_time_index(true),
|
||||
];
|
||||
|
||||
assert!(SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.build()
|
||||
.is_err());
|
||||
}
|
||||
}
|
||||
305
src/datatypes2/src/schema/column_schema.rs
Normal file
305
src/datatypes2/src/schema/column_schema.rs
Normal file
@@ -0,0 +1,305 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use arrow::datatypes::Field;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
use crate::vectors::VectorRef;
|
||||
|
||||
pub type Metadata = BTreeMap<String, String>;
|
||||
|
||||
/// Key used to store whether the column is time index in arrow field's metadata.
|
||||
const TIME_INDEX_KEY: &str = "greptime:time_index";
|
||||
/// Key used to store default constraint in arrow field's metadata.
|
||||
const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
|
||||
|
||||
/// Schema of a column, used as an immutable struct.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ColumnSchema {
|
||||
pub name: String,
|
||||
pub data_type: ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
is_time_index: bool,
|
||||
default_constraint: Option<ColumnDefaultConstraint>,
|
||||
metadata: Metadata,
|
||||
}
|
||||
|
||||
impl ColumnSchema {
|
||||
pub fn new<T: Into<String>>(
|
||||
name: T,
|
||||
data_type: ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
) -> ColumnSchema {
|
||||
ColumnSchema {
|
||||
name: name.into(),
|
||||
data_type,
|
||||
is_nullable,
|
||||
is_time_index: false,
|
||||
default_constraint: None,
|
||||
metadata: Metadata::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_time_index(&self) -> bool {
|
||||
self.is_time_index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_nullable(&self) -> bool {
|
||||
self.is_nullable
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn default_constraint(&self) -> Option<&ColumnDefaultConstraint> {
|
||||
self.default_constraint.as_ref()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn metadata(&self) -> &Metadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
pub fn with_time_index(mut self, is_time_index: bool) -> Self {
|
||||
self.is_time_index = is_time_index;
|
||||
if is_time_index {
|
||||
self.metadata
|
||||
.insert(TIME_INDEX_KEY.to_string(), "true".to_string());
|
||||
} else {
|
||||
self.metadata.remove(TIME_INDEX_KEY);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_default_constraint(
|
||||
mut self,
|
||||
default_constraint: Option<ColumnDefaultConstraint>,
|
||||
) -> Result<Self> {
|
||||
if let Some(constraint) = &default_constraint {
|
||||
constraint.validate(&self.data_type, self.is_nullable)?;
|
||||
}
|
||||
|
||||
self.default_constraint = default_constraint;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Creates a new [`ColumnSchema`] with given metadata.
|
||||
pub fn with_metadata(mut self, metadata: Metadata) -> Self {
|
||||
self.metadata = metadata;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn create_default_vector(&self, num_rows: usize) -> Result<Option<VectorRef>> {
|
||||
match &self.default_constraint {
|
||||
Some(c) => c
|
||||
.create_default_vector(&self.data_type, self.is_nullable, num_rows)
|
||||
.map(Some),
|
||||
None => {
|
||||
if self.is_nullable {
|
||||
// No default constraint, use null as default value.
|
||||
// TODO(yingwen): Use NullVector once it supports setting logical type.
|
||||
ColumnDefaultConstraint::null_value()
|
||||
.create_default_vector(&self.data_type, self.is_nullable, num_rows)
|
||||
.map(Some)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&Field> for ColumnSchema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(field: &Field) -> Result<ColumnSchema> {
|
||||
let data_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
let mut metadata = field.metadata().cloned().unwrap_or_default();
|
||||
let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
|
||||
Some(json) => {
|
||||
Some(serde_json::from_str(&json).context(error::DeserializeSnafu { json })?)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
let is_time_index = metadata.contains_key(TIME_INDEX_KEY);
|
||||
|
||||
Ok(ColumnSchema {
|
||||
name: field.name().clone(),
|
||||
data_type,
|
||||
is_nullable: field.is_nullable(),
|
||||
is_time_index,
|
||||
default_constraint,
|
||||
metadata,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ColumnSchema> for Field {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(column_schema: &ColumnSchema) -> Result<Field> {
|
||||
let mut metadata = column_schema.metadata.clone();
|
||||
if let Some(value) = &column_schema.default_constraint {
|
||||
// Adds an additional metadata to store the default constraint.
|
||||
let old = metadata.insert(
|
||||
DEFAULT_CONSTRAINT_KEY.to_string(),
|
||||
serde_json::to_string(&value).context(error::SerializeSnafu)?,
|
||||
);
|
||||
|
||||
ensure!(
|
||||
old.is_none(),
|
||||
error::DuplicateMetaSnafu {
|
||||
key: DEFAULT_CONSTRAINT_KEY,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Field::new(
|
||||
&column_schema.name,
|
||||
column_schema.data_type.as_arrow_type(),
|
||||
column_schema.is_nullable(),
|
||||
)
|
||||
.with_metadata(Some(metadata)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use super::*;
|
||||
use crate::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_column_schema() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("test", field.name());
|
||||
assert_eq!(ArrowDataType::Int32, *field.data_type());
|
||||
assert!(field.is_nullable());
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_default_constraint() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::from(99))))
|
||||
.unwrap();
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(DEFAULT_CONSTRAINT_KEY)
|
||||
.is_none());
|
||||
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("test", field.name());
|
||||
assert_eq!(ArrowDataType::Int32, *field.data_type());
|
||||
assert!(field.is_nullable());
|
||||
assert_eq!(
|
||||
"{\"Value\":{\"Int32\":99}}",
|
||||
field
|
||||
.metadata()
|
||||
.unwrap()
|
||||
.get(DEFAULT_CONSTRAINT_KEY)
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_metadata() {
|
||||
let mut metadata = Metadata::new();
|
||||
metadata.insert("k1".to_string(), "v1".to_string());
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(DEFAULT_CONSTRAINT_KEY)
|
||||
.is_none());
|
||||
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("v1", field.metadata().unwrap().get("k1").unwrap());
|
||||
assert!(field
|
||||
.metadata()
|
||||
.unwrap()
|
||||
.get(DEFAULT_CONSTRAINT_KEY)
|
||||
.is_some());
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_duplicate_metadata() {
|
||||
let mut metadata = Metadata::new();
|
||||
metadata.insert(DEFAULT_CONSTRAINT_KEY.to_string(), "v1".to_string());
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
Field::try_from(&column_schema).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_invalid_default_constraint() {
|
||||
ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_default_constraint_try_into_from() {
|
||||
let default_constraint = ColumnDefaultConstraint::Value(Value::from(42i64));
|
||||
|
||||
let bytes: Vec<u8> = default_constraint.clone().try_into().unwrap();
|
||||
let from_value = ColumnDefaultConstraint::try_from(&bytes[..]).unwrap();
|
||||
|
||||
assert_eq!(default_constraint, from_value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_create_default_null() {
|
||||
// Implicit default null.
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
|
||||
let v = column_schema.create_default_vector(5).unwrap().unwrap();
|
||||
assert_eq!(5, v.len());
|
||||
assert!(v.only_null());
|
||||
|
||||
// Explicit default null.
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
let v = column_schema.create_default_vector(5).unwrap().unwrap();
|
||||
assert_eq!(5, v.len());
|
||||
assert!(v.only_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_no_default() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
|
||||
assert!(column_schema.create_default_vector(5).unwrap().is_none());
|
||||
}
|
||||
}
|
||||
306
src/datatypes2/src/schema/constraint.rs
Normal file
306
src/datatypes2/src/schema/constraint.rs
Normal file
@@ -0,0 +1,306 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::util;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{Int64Vector, TimestampMillisecondVector, VectorRef};
|
||||
|
||||
const CURRENT_TIMESTAMP: &str = "current_timestamp()";
|
||||
|
||||
/// Column's default constraint.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum ColumnDefaultConstraint {
|
||||
// A function invocation
|
||||
// TODO(dennis): we save the function expression here, maybe use a struct in future.
|
||||
Function(String),
|
||||
// A value
|
||||
Value(Value),
|
||||
}
|
||||
|
||||
impl TryFrom<&[u8]> for ColumnDefaultConstraint {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(bytes: &[u8]) -> Result<Self> {
|
||||
let json = String::from_utf8_lossy(bytes);
|
||||
serde_json::from_str(&json).context(error::DeserializeSnafu { json })
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<ColumnDefaultConstraint> for Vec<u8> {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(value: ColumnDefaultConstraint) -> std::result::Result<Self, Self::Error> {
|
||||
let s = serde_json::to_string(&value).context(error::SerializeSnafu)?;
|
||||
Ok(s.into_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ColumnDefaultConstraint {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ColumnDefaultConstraint::Function(expr) => write!(f, "{}", expr),
|
||||
ColumnDefaultConstraint::Value(v) => write!(f, "{}", v),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ColumnDefaultConstraint {
|
||||
/// Returns a default null constraint.
|
||||
pub fn null_value() -> ColumnDefaultConstraint {
|
||||
ColumnDefaultConstraint::Value(Value::Null)
|
||||
}
|
||||
|
||||
/// Check whether the constraint is valid for columns with given `data_type`
|
||||
/// and `is_nullable` attributes.
|
||||
pub fn validate(&self, data_type: &ConcreteDataType, is_nullable: bool) -> Result<()> {
|
||||
ensure!(is_nullable || !self.maybe_null(), error::NullDefaultSnafu);
|
||||
|
||||
match self {
|
||||
ColumnDefaultConstraint::Function(expr) => {
|
||||
ensure!(
|
||||
expr == CURRENT_TIMESTAMP,
|
||||
error::UnsupportedDefaultExprSnafu { expr }
|
||||
);
|
||||
ensure!(
|
||||
data_type.is_timestamp_compatible(),
|
||||
error::DefaultValueTypeSnafu {
|
||||
reason: "return value of the function must has timestamp type",
|
||||
}
|
||||
);
|
||||
}
|
||||
ColumnDefaultConstraint::Value(v) => {
|
||||
if !v.is_null() {
|
||||
// Whether the value could be nullable has been checked before, only need
|
||||
// to check the type compatibility here.
|
||||
ensure!(
|
||||
data_type.logical_type_id() == v.logical_type_id(),
|
||||
error::DefaultValueTypeSnafu {
|
||||
reason: format!(
|
||||
"column has type {:?} but default value has type {:?}",
|
||||
data_type.logical_type_id(),
|
||||
v.logical_type_id()
|
||||
),
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a vector that contains `num_rows` default values for given `data_type`.
|
||||
///
|
||||
/// If `is_nullable` is `true`, then this method would returns error if the created
|
||||
/// default value is null.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `num_rows == 0`.
|
||||
pub fn create_default_vector(
|
||||
&self,
|
||||
data_type: &ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
num_rows: usize,
|
||||
) -> Result<VectorRef> {
|
||||
assert!(num_rows > 0);
|
||||
|
||||
match self {
|
||||
ColumnDefaultConstraint::Function(expr) => {
|
||||
// Functions should also ensure its return value is not null when
|
||||
// is_nullable is true.
|
||||
match &expr[..] {
|
||||
// TODO(dennis): we only supports current_timestamp right now,
|
||||
// it's better to use a expression framework in future.
|
||||
CURRENT_TIMESTAMP => create_current_timestamp_vector(data_type, num_rows),
|
||||
_ => error::UnsupportedDefaultExprSnafu { expr }.fail(),
|
||||
}
|
||||
}
|
||||
ColumnDefaultConstraint::Value(v) => {
|
||||
ensure!(is_nullable || !v.is_null(), error::NullDefaultSnafu);
|
||||
|
||||
// TODO(yingwen):
|
||||
// 1. For null value, we could use NullVector once it supports custom logical type.
|
||||
// 2. For non null value, we could use ConstantVector, but it would cause all codes
|
||||
// attempt to downcast the vector fail if they don't check whether the vector is const
|
||||
// first.
|
||||
let mut mutable_vector = data_type.create_mutable_vector(1);
|
||||
mutable_vector.push_value_ref(v.as_value_ref())?;
|
||||
let base_vector = mutable_vector.to_vector();
|
||||
Ok(base_vector.replicate(&[num_rows]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this constraint might creates NULL.
|
||||
fn maybe_null(&self) -> bool {
|
||||
// Once we support more functions, we may return true if given function
|
||||
// could return null.
|
||||
matches!(self, ColumnDefaultConstraint::Value(Value::Null))
|
||||
}
|
||||
}
|
||||
|
||||
fn create_current_timestamp_vector(
|
||||
data_type: &ConcreteDataType,
|
||||
num_rows: usize,
|
||||
) -> Result<VectorRef> {
|
||||
// FIXME(yingwen): We should implements cast in VectorOp so we could cast the millisecond vector
|
||||
// to other data type and avoid this match.
|
||||
match data_type {
|
||||
ConcreteDataType::Timestamp(_) => Ok(Arc::new(TimestampMillisecondVector::from_values(
|
||||
std::iter::repeat(util::current_time_millis()).take(num_rows),
|
||||
))),
|
||||
ConcreteDataType::Int64(_) => Ok(Arc::new(Int64Vector::from_values(
|
||||
std::iter::repeat(util::current_time_millis()).take(num_rows),
|
||||
))),
|
||||
_ => error::DefaultValueTypeSnafu {
|
||||
reason: format!(
|
||||
"Not support to assign current timestamp to {:?} type",
|
||||
data_type
|
||||
),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::vectors::Int32Vector;
|
||||
|
||||
#[test]
|
||||
fn test_null_default_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::null_value();
|
||||
assert!(constraint.maybe_null());
|
||||
let constraint = ColumnDefaultConstraint::Value(Value::Int32(10));
|
||||
assert!(!constraint.maybe_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_null_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::null_value();
|
||||
let data_type = ConcreteDataType::int32_datatype();
|
||||
constraint.validate(&data_type, false).unwrap_err();
|
||||
constraint.validate(&data_type, true).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_value_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::Value(Value::Int32(10));
|
||||
let data_type = ConcreteDataType::int32_datatype();
|
||||
constraint.validate(&data_type, false).unwrap();
|
||||
constraint.validate(&data_type, true).unwrap();
|
||||
|
||||
constraint
|
||||
.validate(&ConcreteDataType::uint32_datatype(), true)
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_function_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
constraint
|
||||
.validate(&ConcreteDataType::timestamp_millisecond_datatype(), false)
|
||||
.unwrap();
|
||||
constraint
|
||||
.validate(&ConcreteDataType::boolean_datatype(), false)
|
||||
.unwrap_err();
|
||||
|
||||
let constraint = ColumnDefaultConstraint::Function("hello()".to_string());
|
||||
constraint
|
||||
.validate(&ConcreteDataType::timestamp_millisecond_datatype(), false)
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_default_vector_by_null() {
|
||||
let constraint = ColumnDefaultConstraint::null_value();
|
||||
let data_type = ConcreteDataType::int32_datatype();
|
||||
constraint
|
||||
.create_default_vector(&data_type, false, 10)
|
||||
.unwrap_err();
|
||||
|
||||
let constraint = ColumnDefaultConstraint::null_value();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, true, 3)
|
||||
.unwrap();
|
||||
assert_eq!(3, v.len());
|
||||
for i in 0..v.len() {
|
||||
assert_eq!(Value::Null, v.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_default_vector_by_value() {
|
||||
let constraint = ColumnDefaultConstraint::Value(Value::Int32(10));
|
||||
let data_type = ConcreteDataType::int32_datatype();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap();
|
||||
let expect: VectorRef = Arc::new(Int32Vector::from_values(vec![10; 4]));
|
||||
assert_eq!(expect, v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_default_vector_by_func() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
// Timestamp type.
|
||||
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap();
|
||||
assert_eq!(4, v.len());
|
||||
assert!(
|
||||
matches!(v.get(0), Value::Timestamp(_)),
|
||||
"v {:?} is not timestamp",
|
||||
v.get(0)
|
||||
);
|
||||
|
||||
// Int64 type.
|
||||
let data_type = ConcreteDataType::int64_datatype();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap();
|
||||
assert_eq!(4, v.len());
|
||||
assert!(
|
||||
matches!(v.get(0), Value::Int64(_)),
|
||||
"v {:?} is not timestamp",
|
||||
v.get(0)
|
||||
);
|
||||
|
||||
let constraint = ColumnDefaultConstraint::Function("no".to_string());
|
||||
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
|
||||
constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_by_func_and_invalid_type() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
let data_type = ConcreteDataType::boolean_datatype();
|
||||
let err = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, Error::DefaultValueType { .. }), "{:?}", err);
|
||||
}
|
||||
}
|
||||
77
src/datatypes2/src/schema/raw.rs
Normal file
77
src/datatypes2/src/schema/raw.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::schema::{ColumnSchema, Schema, SchemaBuilder};
|
||||
|
||||
/// Struct used to serialize and deserialize [`Schema`](crate::schema::Schema).
|
||||
///
|
||||
/// This struct only contains necessary data to recover the Schema.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct RawSchema {
|
||||
pub column_schemas: Vec<ColumnSchema>,
|
||||
pub timestamp_index: Option<usize>,
|
||||
pub version: u32,
|
||||
}
|
||||
|
||||
impl TryFrom<RawSchema> for Schema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(raw: RawSchema) -> Result<Schema> {
|
||||
SchemaBuilder::try_from(raw.column_schemas)?
|
||||
.version(raw.version)
|
||||
.build()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Schema> for RawSchema {
|
||||
fn from(schema: &Schema) -> RawSchema {
|
||||
RawSchema {
|
||||
column_schemas: schema.column_schemas.clone(),
|
||||
timestamp_index: schema.timestamp_index,
|
||||
version: schema.version,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
|
||||
#[test]
|
||||
fn test_raw_convert() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.version(123)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let raw = RawSchema::from(&schema);
|
||||
let schema_new = Schema::try_from(raw).unwrap();
|
||||
|
||||
assert_eq!(schema, schema_new);
|
||||
}
|
||||
}
|
||||
20
src/datatypes2/src/serialize.rs
Normal file
20
src/datatypes2/src/serialize.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
pub trait Serializable: Send + Sync {
|
||||
/// Serialize a column of value with given type to JSON value
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>>;
|
||||
}
|
||||
135
src/datatypes2/src/timestamp.rs
Normal file
135
src/datatypes2/src/timestamp.rs
Normal file
@@ -0,0 +1,135 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::prelude::{Scalar, Value, ValueRef};
|
||||
use crate::scalars::ScalarRef;
|
||||
use crate::types::{
|
||||
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
|
||||
TimestampSecondType, WrapperType,
|
||||
};
|
||||
use crate::vectors::{
|
||||
TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
|
||||
TimestampSecondVector,
|
||||
};
|
||||
|
||||
macro_rules! define_timestamp_with_unit {
|
||||
($unit: ident) => {
|
||||
paste! {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct [<Timestamp $unit>](pub Timestamp);
|
||||
|
||||
impl [<Timestamp $unit>] {
|
||||
pub fn new(val: i64) -> Self {
|
||||
Self(Timestamp::new(val, TimeUnit::$unit))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for [<Timestamp $unit>] {
|
||||
fn default() -> Self {
|
||||
Self::new(0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[<Timestamp $unit>]> for Value {
|
||||
fn from(t: [<Timestamp $unit>]) -> Value {
|
||||
Value::Timestamp(t.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[<Timestamp $unit>]> for serde_json::Value {
|
||||
fn from(t: [<Timestamp $unit>]) -> Self {
|
||||
t.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[<Timestamp $unit>]> for ValueRef<'static> {
|
||||
fn from(t: [<Timestamp $unit>]) -> Self {
|
||||
ValueRef::Timestamp(t.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for [<Timestamp $unit>] {
|
||||
type VectorType = [<Timestamp $unit Vector>];
|
||||
type RefType<'a> = [<Timestamp $unit>];
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(
|
||||
long: Self::RefType<'long>,
|
||||
) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for [<Timestamp $unit>] {
|
||||
type ScalarType = [<Timestamp $unit>];
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl WrapperType for [<Timestamp $unit>] {
|
||||
type LogicalType = [<Timestamp $unit Type>];
|
||||
type Native = i64;
|
||||
|
||||
fn from_native(value: Self::Native) -> Self {
|
||||
Self::new(value)
|
||||
}
|
||||
|
||||
fn into_native(self) -> Self::Native {
|
||||
self.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for [<Timestamp $unit>] {
|
||||
fn from(val: i64) -> Self {
|
||||
[<Timestamp $unit>]::from_native(val)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
define_timestamp_with_unit!(Second);
|
||||
define_timestamp_with_unit!(Millisecond);
|
||||
define_timestamp_with_unit!(Microsecond);
|
||||
define_timestamp_with_unit!(Nanosecond);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_scalar() {
|
||||
let ts = TimestampSecond::new(123);
|
||||
assert_eq!(ts, ts.as_scalar_ref());
|
||||
assert_eq!(ts, ts.to_owned_scalar());
|
||||
let ts = TimestampMillisecond::new(123);
|
||||
assert_eq!(ts, ts.as_scalar_ref());
|
||||
assert_eq!(ts, ts.to_owned_scalar());
|
||||
let ts = TimestampMicrosecond::new(123);
|
||||
assert_eq!(ts, ts.as_scalar_ref());
|
||||
assert_eq!(ts, ts.to_owned_scalar());
|
||||
let ts = TimestampNanosecond::new(123);
|
||||
assert_eq!(ts, ts.as_scalar_ref());
|
||||
assert_eq!(ts, ts.to_owned_scalar());
|
||||
}
|
||||
}
|
||||
93
src/datatypes2/src/type_id.rs
Normal file
93
src/datatypes2/src/type_id.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/// Unique identifier for logical data type.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum LogicalTypeId {
|
||||
Null,
|
||||
|
||||
// Numeric types:
|
||||
Boolean,
|
||||
Int8,
|
||||
Int16,
|
||||
Int32,
|
||||
Int64,
|
||||
UInt8,
|
||||
UInt16,
|
||||
UInt32,
|
||||
UInt64,
|
||||
Float32,
|
||||
Float64,
|
||||
|
||||
// String types:
|
||||
String,
|
||||
Binary,
|
||||
|
||||
// Date & Time types:
|
||||
/// Date representing the elapsed time since UNIX epoch (1970-01-01)
|
||||
/// in days (32 bits).
|
||||
Date,
|
||||
/// Datetime representing the elapsed time since UNIX epoch (1970-01-01) in
|
||||
/// seconds/milliseconds/microseconds/nanoseconds, determined by precision.
|
||||
DateTime,
|
||||
|
||||
TimestampSecond,
|
||||
TimestampMillisecond,
|
||||
TimestampMicrosecond,
|
||||
TimestampNanosecond,
|
||||
|
||||
List,
|
||||
}
|
||||
|
||||
impl LogicalTypeId {
|
||||
/// Create ConcreteDataType based on this id. This method is for test only as it
|
||||
/// would lost some info.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if data type is not supported.
|
||||
#[cfg(any(test, feature = "test"))]
|
||||
pub fn data_type(&self) -> crate::data_type::ConcreteDataType {
|
||||
use crate::data_type::ConcreteDataType;
|
||||
|
||||
match self {
|
||||
LogicalTypeId::Null => ConcreteDataType::null_datatype(),
|
||||
LogicalTypeId::Boolean => ConcreteDataType::boolean_datatype(),
|
||||
LogicalTypeId::Int8 => ConcreteDataType::int8_datatype(),
|
||||
LogicalTypeId::Int16 => ConcreteDataType::int16_datatype(),
|
||||
LogicalTypeId::Int32 => ConcreteDataType::int32_datatype(),
|
||||
LogicalTypeId::Int64 => ConcreteDataType::int64_datatype(),
|
||||
LogicalTypeId::UInt8 => ConcreteDataType::uint8_datatype(),
|
||||
LogicalTypeId::UInt16 => ConcreteDataType::uint16_datatype(),
|
||||
LogicalTypeId::UInt32 => ConcreteDataType::uint32_datatype(),
|
||||
LogicalTypeId::UInt64 => ConcreteDataType::uint64_datatype(),
|
||||
LogicalTypeId::Float32 => ConcreteDataType::float32_datatype(),
|
||||
LogicalTypeId::Float64 => ConcreteDataType::float64_datatype(),
|
||||
LogicalTypeId::String => ConcreteDataType::string_datatype(),
|
||||
LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
|
||||
LogicalTypeId::Date => ConcreteDataType::date_datatype(),
|
||||
LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
|
||||
LogicalTypeId::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
LogicalTypeId::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
}
|
||||
LogicalTypeId::TimestampMicrosecond => {
|
||||
ConcreteDataType::timestamp_microsecond_datatype()
|
||||
}
|
||||
LogicalTypeId::TimestampNanosecond => ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
LogicalTypeId::List => {
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
37
src/datatypes2/src/types.rs
Normal file
37
src/datatypes2/src/types.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod binary_type;
|
||||
mod boolean_type;
|
||||
mod date_type;
|
||||
mod datetime_type;
|
||||
mod list_type;
|
||||
mod null_type;
|
||||
mod primitive_type;
|
||||
mod string_type;
|
||||
|
||||
mod timestamp_type;
|
||||
|
||||
pub use binary_type::BinaryType;
|
||||
pub use boolean_type::BooleanType;
|
||||
pub use date_type::DateType;
|
||||
pub use datetime_type::DateTimeType;
|
||||
pub use list_type::ListType;
|
||||
pub use null_type::NullType;
|
||||
pub use primitive_type::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LogicalPrimitiveType,
|
||||
NativeType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, WrapperType,
|
||||
};
|
||||
pub use string_type::StringType;
|
||||
pub use timestamp_type::*;
|
||||
60
src/datatypes2/src/types/binary_type.rs
Normal file
60
src/datatypes2/src/types/binary_type.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::StringBytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct BinaryType;
|
||||
|
||||
impl BinaryType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for BinaryType {
|
||||
fn name(&self) -> &str {
|
||||
"Binary"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Binary
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
StringBytes::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::LargeBinary
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
59
src/datatypes2/src/types/boolean_type.rs
Normal file
59
src/datatypes2/src/types/boolean_type.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{BooleanVectorBuilder, MutableVector};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct BooleanType;
|
||||
|
||||
impl BooleanType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for BooleanType {
|
||||
fn name(&self) -> &str {
|
||||
"Boolean"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Boolean
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
bool::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Boolean
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BooleanVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
90
src/datatypes2/src/types/date_type.rs
Normal file
90
src/datatypes2/src/types/date_type.rs
Normal file
@@ -0,0 +1,90 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Date32Type};
|
||||
use common_time::Date;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::LogicalPrimitiveType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{DateVector, DateVectorBuilder, MutableVector, Vector};
|
||||
|
||||
/// Data type for Date (YYYY-MM-DD).
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DateType;
|
||||
|
||||
impl DataType for DateType {
|
||||
fn name(&self) -> &str {
|
||||
"Date"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Date
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::Date(Default::default())
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Date32
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(DateVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl LogicalPrimitiveType for DateType {
|
||||
type ArrowPrimitive = Date32Type;
|
||||
type Native = i32;
|
||||
type Wrapper = Date;
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::date_datatype()
|
||||
}
|
||||
|
||||
fn type_name() -> &'static str {
|
||||
"Date"
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&DateVector> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast {} to DateVector", vector.vector_type_name(),),
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Date>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::Date(v) => Ok(Some(v)),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast value {:?} to Date", other,),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user