Compare commits

..

2 Commits

Author SHA1 Message Date
Ruihang Xia
94409967be Merge branch 'main' into create-view 2024-04-22 21:08:22 +08:00
Ruihang Xia
7503992d61 add statement
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-04-17 19:13:54 +08:00
160 changed files with 2754 additions and 6591 deletions

View File

@@ -22,15 +22,15 @@ inputs:
build-dev-builder-ubuntu: build-dev-builder-ubuntu:
description: Build dev-builder-ubuntu image description: Build dev-builder-ubuntu image
required: false required: false
default: "true" default: 'true'
build-dev-builder-centos: build-dev-builder-centos:
description: Build dev-builder-centos image description: Build dev-builder-centos image
required: false required: false
default: "true" default: 'true'
build-dev-builder-android: build-dev-builder-android:
description: Build dev-builder-android image description: Build dev-builder-android image
required: false required: false
default: "true" default: 'true'
runs: runs:
using: composite using: composite
steps: steps:
@@ -47,7 +47,7 @@ runs:
run: | run: |
make dev-builder \ make dev-builder \
BASE_IMAGE=ubuntu \ BASE_IMAGE=ubuntu \
BUILDX_MULTI_PLATFORM_BUILD=all \ BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \ IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \ IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }} IMAGE_TAG=${{ inputs.version }}
@@ -58,7 +58,7 @@ runs:
run: | run: |
make dev-builder \ make dev-builder \
BASE_IMAGE=centos \ BASE_IMAGE=centos \
BUILDX_MULTI_PLATFORM_BUILD=amd64 \ BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \ IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \ IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }} IMAGE_TAG=${{ inputs.version }}

View File

@@ -16,7 +16,7 @@ inputs:
dev-mode: dev-mode:
description: Enable dev mode, only build standard greptime description: Enable dev mode, only build standard greptime
required: false required: false
default: "false" default: 'false'
working-dir: working-dir:
description: Working directory to build the artifacts description: Working directory to build the artifacts
required: false required: false
@@ -68,7 +68,7 @@ runs:
- name: Build greptime on centos base image - name: Build greptime on centos base image
uses: ./.github/actions/build-greptime-binary uses: ./.github/actions/build-greptime-binary
if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds greptime for centos if the host machine is amd64. if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Only build centos7 base image for amd64.
with: with:
base-image: centos base-image: centos
features: servers/dashboard features: servers/dashboard
@@ -79,7 +79,7 @@ runs:
- name: Build greptime on android base image - name: Build greptime on android base image
uses: ./.github/actions/build-greptime-binary uses: ./.github/actions/build-greptime-binary
if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds arm64 greptime binary for android if the host machine amd64. if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Only build android base image on amd64.
with: with:
base-image: android base-image: android
artifacts-dir: greptime-android-arm64-${{ inputs.version }} artifacts-dir: greptime-android-arm64-${{ inputs.version }}

View File

@@ -26,6 +26,8 @@ runs:
using: composite using: composite
steps: steps:
- uses: arduino/setup-protoc@v3 - uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install rust toolchain - name: Install rust toolchain
uses: dtolnay/rust-toolchain@master uses: dtolnay/rust-toolchain@master

View File

@@ -147,9 +147,8 @@ jobs:
- name: Set Rust Fuzz - name: Set Rust Fuzz
shell: bash shell: bash
run: | run: |
sudo apt-get install -y libfuzzer-14-dev sudo apt update && sudo apt install -y libfuzzer-14-dev
rustup install nightly cargo install cargo-fuzz
cargo +nightly install cargo-fuzz
- name: Download pre-built binaries - name: Download pre-built binaries
uses: actions/download-artifact@v4 uses: actions/download-artifact@v4
with: with:
@@ -185,13 +184,13 @@ jobs:
- name: Unzip binaries - name: Unzip binaries
run: tar -xvf ./bins.tar.gz run: tar -xvf ./bins.tar.gz
- name: Run sqlness - name: Run sqlness
run: RUST_BACKTRACE=1 ./bins/sqlness-runner -c ./tests/cases --bins-dir ./bins --preserve-state run: RUST_BACKTRACE=1 ./bins/sqlness-runner -c ./tests/cases --bins-dir ./bins
- name: Upload sqlness logs - name: Upload sqlness logs
if: always() if: always()
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: sqlness-logs name: sqlness-logs
path: /tmp/sqlness-* path: /tmp/greptime-*.log
retention-days: 3 retention-days: 3
sqlness-kafka-wal: sqlness-kafka-wal:
@@ -215,13 +214,13 @@ jobs:
working-directory: tests-integration/fixtures/kafka working-directory: tests-integration/fixtures/kafka
run: docker compose -f docker-compose-standalone.yml up -d --wait run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Run sqlness - name: Run sqlness
run: RUST_BACKTRACE=1 ./bins/sqlness-runner -w kafka -k 127.0.0.1:9092 -c ./tests/cases --bins-dir ./bins --preserve-state run: RUST_BACKTRACE=1 ./bins/sqlness-runner -w kafka -k 127.0.0.1:9092 -c ./tests/cases --bins-dir ./bins
- name: Upload sqlness logs - name: Upload sqlness logs
if: always() if: always()
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: sqlness-logs-with-kafka-wal name: sqlness-logs-with-kafka-wal
path: /tmp/sqlness-* path: /tmp/greptime-*.log
retention-days: 3 retention-days: 3
fmt: fmt:
@@ -331,20 +330,20 @@ jobs:
fail_ci_if_error: false fail_ci_if_error: false
verbose: true verbose: true
# compat: compat:
# name: Compatibility Test name: Compatibility Test
# needs: build needs: build
# runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
# timeout-minutes: 60 timeout-minutes: 60
# steps: steps:
# - uses: actions/checkout@v4 - uses: actions/checkout@v4
# - name: Download pre-built binaries - name: Download pre-built binaries
# uses: actions/download-artifact@v4 uses: actions/download-artifact@v4
# with: with:
# name: bins name: bins
# path: . path: .
# - name: Unzip binaries - name: Unzip binaries
# run: | run: |
# mkdir -p ./bins/current mkdir -p ./bins/current
# tar -xvf ./bins.tar.gz --strip-components=1 -C ./bins/current tar -xvf ./bins.tar.gz --strip-components=1 -C ./bins/current
# - run: ./tests/compat/test-compat.sh 0.6.0 - run: ./tests/compat/test-compat.sh 0.6.0

33
Cargo.lock generated
View File

@@ -4150,8 +4150,8 @@ dependencies = [
[[package]] [[package]]
name = "hydroflow" name = "hydroflow"
version = "0.6.2" version = "0.6.0"
source = "git+https://github.com/GreptimeTeam/hydroflow.git?branch=main#b072ee026f97f8537165e1fb247101e0ab2fb320" source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -4183,7 +4183,7 @@ dependencies = [
[[package]] [[package]]
name = "hydroflow_datalog" name = "hydroflow_datalog"
version = "0.6.0" version = "0.6.0"
source = "git+https://github.com/GreptimeTeam/hydroflow.git?branch=main#b072ee026f97f8537165e1fb247101e0ab2fb320" source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
dependencies = [ dependencies = [
"hydroflow_datalog_core", "hydroflow_datalog_core",
"proc-macro-crate 1.3.1", "proc-macro-crate 1.3.1",
@@ -4194,8 +4194,8 @@ dependencies = [
[[package]] [[package]]
name = "hydroflow_datalog_core" name = "hydroflow_datalog_core"
version = "0.6.1" version = "0.6.0"
source = "git+https://github.com/GreptimeTeam/hydroflow.git?branch=main#b072ee026f97f8537165e1fb247101e0ab2fb320" source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
dependencies = [ dependencies = [
"hydroflow_lang", "hydroflow_lang",
"proc-macro-crate 1.3.1", "proc-macro-crate 1.3.1",
@@ -4209,8 +4209,8 @@ dependencies = [
[[package]] [[package]]
name = "hydroflow_lang" name = "hydroflow_lang"
version = "0.6.2" version = "0.6.0"
source = "git+https://github.com/GreptimeTeam/hydroflow.git?branch=main#b072ee026f97f8537165e1fb247101e0ab2fb320" source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
dependencies = [ dependencies = [
"auto_impl", "auto_impl",
"clap 4.5.4", "clap 4.5.4",
@@ -4230,7 +4230,7 @@ dependencies = [
[[package]] [[package]]
name = "hydroflow_macro" name = "hydroflow_macro"
version = "0.6.0" version = "0.6.0"
source = "git+https://github.com/GreptimeTeam/hydroflow.git?branch=main#b072ee026f97f8537165e1fb247101e0ab2fb320" source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
dependencies = [ dependencies = [
"hydroflow_lang", "hydroflow_lang",
"itertools 0.10.5", "itertools 0.10.5",
@@ -4610,9 +4610,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
[[package]] [[package]]
name = "jobserver" name = "jobserver"
version = "0.1.31" version = "0.1.30"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" checksum = "685a7d121ee3f65ae4fddd72b25a04bb36b6af81bc0828f7d5434c0fe60fa3a2"
dependencies = [ dependencies = [
"libc", "libc",
] ]
@@ -4723,8 +4723,8 @@ dependencies = [
[[package]] [[package]]
name = "lattices" name = "lattices"
version = "0.5.4" version = "0.5.3"
source = "git+https://github.com/GreptimeTeam/hydroflow.git?branch=main#b072ee026f97f8537165e1fb247101e0ab2fb320" source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
dependencies = [ dependencies = [
"cc-traits", "cc-traits",
"sealed", "sealed",
@@ -5416,7 +5416,6 @@ dependencies = [
"common-wal", "common-wal",
"crc32fast", "crc32fast",
"criterion", "criterion",
"crossbeam-utils",
"datafusion", "datafusion",
"datafusion-common", "datafusion-common",
"datafusion-expr", "datafusion-expr",
@@ -6299,6 +6298,7 @@ dependencies = [
"sql", "sql",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)", "sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
"store-api", "store-api",
"substrait 0.7.2",
"table", "table",
"tokio", "tokio",
"tonic 0.11.0", "tonic 0.11.0",
@@ -7377,7 +7377,7 @@ checksum = "3b7e158a385023d209d6d5f2585c4b468f6dcb3dd5aca9b75c4f1678c05bb375"
[[package]] [[package]]
name = "pusherator" name = "pusherator"
version = "0.0.5" version = "0.0.5"
source = "git+https://github.com/GreptimeTeam/hydroflow.git?branch=main#b072ee026f97f8537165e1fb247101e0ab2fb320" source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
dependencies = [ dependencies = [
"either", "either",
"variadics", "variadics",
@@ -9540,7 +9540,6 @@ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
"sqlness", "sqlness",
"tempfile",
"tinytemplate", "tinytemplate",
"tokio", "tokio",
] ]
@@ -11149,7 +11148,7 @@ version = "1.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
dependencies = [ dependencies = [
"cfg-if 0.1.10", "cfg-if 1.0.0",
"rand", "rand",
"static_assertions", "static_assertions",
] ]
@@ -11563,7 +11562,7 @@ checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]] [[package]]
name = "variadics" name = "variadics"
version = "0.0.4" version = "0.0.4"
source = "git+https://github.com/GreptimeTeam/hydroflow.git?branch=main#b072ee026f97f8537165e1fb247101e0ab2fb320" source = "git+https://github.com/GreptimeTeam/hydroflow.git?rev=ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94#ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94"
dependencies = [ dependencies = [
"sealed", "sealed",
] ]

View File

@@ -98,7 +98,6 @@ bytemuck = "1.12"
bytes = { version = "1.5", features = ["serde"] } bytes = { version = "1.5", features = ["serde"] }
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.4", features = ["derive"] } clap = { version = "4.4", features = ["derive"] }
crossbeam-utils = "0.8"
dashmap = "5.4" dashmap = "5.4"
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" } datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" } datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }

View File

@@ -54,10 +54,8 @@ ifneq ($(strip $(RELEASE)),)
CARGO_BUILD_OPTS += --release CARGO_BUILD_OPTS += --release
endif endif
ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), all) ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), true)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64,linux/arm64 --push BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64,linux/arm64 --push
else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), amd64)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64 --push
else else
BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
endif endif

View File

@@ -1,136 +0,0 @@
# How to write fuzz tests
This document introduces how to write fuzz tests in GreptimeDB.
## What is a fuzz test
Fuzz test is tool that leverage deterministic random generation to assist in finding bugs. The goal of fuzz tests is to identify inputs generated by the fuzzer that cause system panics, crashes, or unexpected behaviors to occur. And we are using the [cargo-fuzz](https://github.com/rust-fuzz/cargo-fuzz) to run our fuzz test targets.
## Why we need them
- Find bugs by leveraging random generation
- Integrate with other tests (e.g., e2e)
## Resources
All fuzz test-related resources are located in the `/tests-fuzz` directory.
There are two types of resources: (1) fundamental components and (2) test targets.
### Fundamental components
They are located in the `/tests-fuzz/src` directory. The fundamental components define how to generate SQLs (including dialects for different protocols) and validate execution results (e.g., column attribute validation), etc.
### Test targets
They are located in the `/tests-fuzz/targets` directory, with each file representing an independent fuzz test case. The target utilizes fundamental components to generate SQLs, sends the generated SQLs via specified protocol, and validates the results of SQL execution.
Figure 1 illustrates the fundamental components of the fuzz test provide the ability to generate random SQLs. It utilizes a Random Number Generator (Rng) to generate the Intermediate Representation (IR), then employs a DialectTranslator to produce specified dialects for different protocols. Finally, the fuzz tests send the generated SQL via the specified protocol and verify that the execution results meet expectations.
```
Rng
|
|
v
ExprGenerator
|
|
v
Intermediate representation (IR)
|
|
+----------------------+----------------------+
| | |
v v v
MySQLTranslator PostgreSQLTranslator OtherDialectTranslator
| | |
| | |
v v v
SQL(MySQL Dialect) ..... .....
|
|
v
Fuzz Test
```
(Figure1: Overview of fuzz tests)
For more details about fuzz targets and fundamental components, please refer to this [tracking issue](https://github.com/GreptimeTeam/greptimedb/issues/3174).
## How to add a fuzz test target
1. Create an empty rust source file under the `/tests-fuzz/targets/<fuzz-target>.rs` directory.
2. Register the fuzz test target in the `/tests-fuzz/Cargo.toml` file.
```toml
[[bin]]
name = "<fuzz-target>"
path = "targets/<fuzz-target>.rs"
test = false
bench = false
doc = false
```
3. Define the `FuzzInput` in the `/tests-fuzz/targets/<fuzz-target>.rs`.
```rust
#![no_main]
use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured};
#[derive(Clone, Debug)]
struct FuzzInput {
seed: u64,
}
impl Arbitrary<'_> for FuzzInput {
fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result<Self> {
let seed = u.int_in_range(u64::MIN..=u64::MAX)?;
Ok(FuzzInput { seed })
}
}
```
4. Write your first fuzz test target in the `/tests-fuzz/targets/<fuzz-target>.rs`.
```rust
use libfuzzer_sys::fuzz_target;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaChaRng;
use snafu::ResultExt;
use sqlx::{MySql, Pool};
use tests_fuzz::fake::{
merge_two_word_map_fn, random_capitalize_map, uppercase_and_keyword_backtick_map,
MappedGenerator, WordGenerator,
};
use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder;
use tests_fuzz::generator::Generator;
use tests_fuzz::ir::CreateTableExpr;
use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator;
use tests_fuzz::translator::DslTranslator;
use tests_fuzz::utils::{init_greptime_connections, Connections};
fuzz_target!(|input: FuzzInput| {
common_telemetry::init_default_ut_logging();
common_runtime::block_on_write(async {
let Connections { mysql } = init_greptime_connections().await;
let mut rng = ChaChaRng::seed_from_u64(input.seed);
let columns = rng.gen_range(2..30);
let create_table_generator = CreateTableExprGeneratorBuilder::default()
.name_generator(Box::new(MappedGenerator::new(
WordGenerator,
merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
)))
.columns(columns)
.engine("mito")
.if_not_exists(if_not_exists)
.build()
.unwrap();
let ir = create_table_generator.generate(&mut rng);
let translator = CreateTableExprTranslator;
let sql = translator.translate(&expr).unwrap();
mysql.execute(&sql).await
})
});
```
5. Run your fuzz test target
```bash
cargo fuzz run <fuzz-target> --fuzz-dir tests-fuzz
```
For more details, please refer to this [document](/tests-fuzz/README.md).

View File

@@ -73,7 +73,7 @@ CREATE TABLE cpu (
usage_system DOUBLE, usage_system DOUBLE,
datacenter STRING, datacenter STRING,
TIME INDEX (ts), TIME INDEX (ts),
PRIMARY KEY(datacenter, host)) ENGINE=mito; PRIMARY KEY(datacenter, host)) ENGINE=mito WITH(regions=1);
``` ```
Then the table's `TableMeta` may look like this: Then the table's `TableMeta` may look like this:
@@ -249,7 +249,7 @@ CREATE TABLE cpu (
usage_system DOUBLE, usage_system DOUBLE,
datacenter STRING, datacenter STRING,
TIME INDEX (ts), TIME INDEX (ts),
PRIMARY KEY(datacenter, host)) ENGINE=mito; PRIMARY KEY(datacenter, host)) ENGINE=mito WITH(regions=1);
select ts, usage_system from cpu; select ts, usage_system from cpu;
``` ```

View File

@@ -36,7 +36,6 @@ common-telemetry = { workspace = true, features = [
"deadlock_detection", "deadlock_detection",
] } ] }
common-time.workspace = true common-time.workspace = true
common-version.workspace = true
common-wal.workspace = true common-wal.workspace = true
config = "0.13" config = "0.13"
datanode.workspace = true datanode.workspace = true

View File

@@ -22,7 +22,6 @@ use cmd::options::{CliOptions, Options};
use cmd::{ use cmd::{
cli, datanode, frontend, greptimedb_cli, log_versions, metasrv, standalone, start_app, App, cli, datanode, frontend, greptimedb_cli, log_versions, metasrv, standalone, start_app, App,
}; };
use common_version::{short_version, version};
#[derive(Parser)] #[derive(Parser)]
enum SubCommand { enum SubCommand {
@@ -106,8 +105,7 @@ async fn main() -> Result<()> {
common_telemetry::set_panic_hook(); common_telemetry::set_panic_hook();
let version = version!(); let cli = greptimedb_cli();
let cli = greptimedb_cli().version(version);
let cli = SubCommand::augment_subcommands(cli); let cli = SubCommand::augment_subcommands(cli);
@@ -131,7 +129,7 @@ async fn main() -> Result<()> {
opts.node_id(), opts.node_id(),
); );
log_versions(version, short_version!()); log_versions();
let app = subcmd.build(opts).await?; let app = subcmd.build(opts).await?;

View File

@@ -492,7 +492,9 @@ mod tests {
) )
ENGINE=mito ENGINE=mito
; WITH(
regions = 1
);
"#; "#;
assert_eq!(res.trim(), expect.trim()); assert_eq!(res.trim(), expect.trim());

View File

@@ -192,10 +192,10 @@ impl MigrateTableMetadata {
let key = v1SchemaKey::parse(key_str) let key = v1SchemaKey::parse(key_str)
.unwrap_or_else(|e| panic!("schema key is corrupted: {e}, key: {key_str}")); .unwrap_or_else(|e| panic!("schema key is corrupted: {e}, key: {key_str}"));
Ok(key) Ok((key, ()))
}), }),
); );
while let Some(key) = stream.try_next().await.context(error::IterStreamSnafu)? { while let Some((key, _)) = stream.try_next().await.context(error::IterStreamSnafu)? {
let _ = self.migrate_schema_key(&key).await; let _ = self.migrate_schema_key(&key).await;
keys.push(key.to_string().as_bytes().to_vec()); keys.push(key.to_string().as_bytes().to_vec());
} }
@@ -244,10 +244,10 @@ impl MigrateTableMetadata {
let key = v1CatalogKey::parse(key_str) let key = v1CatalogKey::parse(key_str)
.unwrap_or_else(|e| panic!("catalog key is corrupted: {e}, key: {key_str}")); .unwrap_or_else(|e| panic!("catalog key is corrupted: {e}, key: {key_str}"));
Ok(key) Ok((key, ()))
}), }),
); );
while let Some(key) = stream.try_next().await.context(error::IterStreamSnafu)? { while let Some((key, _)) = stream.try_next().await.context(error::IterStreamSnafu)? {
let _ = self.migrate_catalog_key(&key).await; let _ = self.migrate_catalog_key(&key).await;
keys.push(key.to_string().as_bytes().to_vec()); keys.push(key.to_string().as_bytes().to_vec());
} }

View File

@@ -64,23 +64,26 @@ pub async fn start_app(mut app: Box<dyn App>) -> error::Result<()> {
Ok(()) Ok(())
} }
/// Log the versions of the application, and the arguments passed to the cli. pub fn log_versions() {
/// `version_string` should be the same as the output of cli "--version";
/// and the `app_version` is the short version of the codes, often consist of git branch and commit.
pub fn log_versions(version_string: &str, app_version: &str) {
// Report app version as gauge. // Report app version as gauge.
APP_VERSION APP_VERSION
.with_label_values(&[env!("CARGO_PKG_VERSION"), app_version]) .with_label_values(&[short_version(), full_version()])
.inc(); .inc();
// Log version and argument flags. // Log version and argument flags.
info!("GreptimeDB version: {}", version_string); info!(
"short_version: {}, full_version: {}",
short_version(),
full_version()
);
log_env_flags(); log_env_flags();
} }
pub fn greptimedb_cli() -> clap::Command { pub fn greptimedb_cli() -> clap::Command {
let cmd = clap::Command::new("greptimedb").subcommand_required(true); let cmd = clap::Command::new("greptimedb")
.version(print_version())
.subcommand_required(true);
#[cfg(feature = "tokio-console")] #[cfg(feature = "tokio-console")]
let cmd = cmd.arg(arg!(--"tokio-console-addr"[TOKIO_CONSOLE_ADDR])); let cmd = cmd.arg(arg!(--"tokio-console-addr"[TOKIO_CONSOLE_ADDR]));
@@ -88,6 +91,35 @@ pub fn greptimedb_cli() -> clap::Command {
cmd.args([arg!(--"log-dir"[LOG_DIR]), arg!(--"log-level"[LOG_LEVEL])]) cmd.args([arg!(--"log-dir"[LOG_DIR]), arg!(--"log-level"[LOG_LEVEL])])
} }
fn print_version() -> &'static str {
concat!(
"\nbranch: ",
env!("GIT_BRANCH"),
"\ncommit: ",
env!("GIT_COMMIT"),
"\ndirty: ",
env!("GIT_DIRTY"),
"\nversion: ",
env!("CARGO_PKG_VERSION")
)
}
fn short_version() -> &'static str {
env!("CARGO_PKG_VERSION")
}
// {app_name}-{branch_name}-{commit_short}
// The branch name (tag) of a release build should already contain the short
// version so the full version doesn't concat the short version explicitly.
fn full_version() -> &'static str {
concat!(
"greptimedb-",
env!("GIT_BRANCH"),
"-",
env!("GIT_COMMIT_SHORT")
)
}
fn log_env_flags() { fn log_env_flags() {
info!("command line arguments"); info!("command line arguments");
for argument in std::env::args() { for argument in std::env::args() {

View File

@@ -59,7 +59,6 @@ pub enum StatusCode {
RegionNotFound = 4005, RegionNotFound = 4005,
RegionAlreadyExists = 4006, RegionAlreadyExists = 4006,
RegionReadonly = 4007, RegionReadonly = 4007,
/// Region is not in a proper state to handle specific request.
RegionNotReady = 4008, RegionNotReady = 4008,
// If mutually exclusive operations are reached at the same time, // If mutually exclusive operations are reached at the same time,
// only one can be executed, another one will get region busy. // only one can be executed, another one will get region busy.

View File

@@ -15,7 +15,7 @@
pub mod channel_manager; pub mod channel_manager;
pub mod error; pub mod error;
pub mod flight; pub mod flight;
pub mod precision;
pub mod select; pub mod select;
pub mod writer;
pub use error::Error; pub use error::Error;

View File

@@ -1,141 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Display;
use common_time::timestamp::TimeUnit;
use crate::Error;
/// Precision represents the precision of a timestamp.
/// It is used to convert timestamps between different precisions.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Precision {
Nanosecond,
Microsecond,
Millisecond,
Second,
Minute,
Hour,
}
impl Precision {
pub fn to_nanos(&self, amount: i64) -> Option<i64> {
match self {
Precision::Nanosecond => Some(amount),
Precision::Microsecond => amount.checked_mul(1_000),
Precision::Millisecond => amount.checked_mul(1_000_000),
Precision::Second => amount.checked_mul(1_000_000_000),
Precision::Minute => amount
.checked_mul(60)
.and_then(|a| a.checked_mul(1_000_000_000)),
Precision::Hour => amount
.checked_mul(3600)
.and_then(|a| a.checked_mul(1_000_000_000)),
}
}
pub fn to_millis(&self, amount: i64) -> Option<i64> {
match self {
Precision::Nanosecond => amount.checked_div(1_000_000),
Precision::Microsecond => amount.checked_div(1_000),
Precision::Millisecond => Some(amount),
Precision::Second => amount.checked_mul(1_000),
Precision::Minute => amount.checked_mul(60_000),
Precision::Hour => amount.checked_mul(3_600_000),
}
}
}
impl Display for Precision {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Precision::Nanosecond => write!(f, "Precision::Nanosecond"),
Precision::Microsecond => write!(f, "Precision::Microsecond"),
Precision::Millisecond => write!(f, "Precision::Millisecond"),
Precision::Second => write!(f, "Precision::Second"),
Precision::Minute => write!(f, "Precision::Minute"),
Precision::Hour => write!(f, "Precision::Hour"),
}
}
}
impl TryFrom<Precision> for TimeUnit {
type Error = Error;
fn try_from(precision: Precision) -> Result<Self, Self::Error> {
Ok(match precision {
Precision::Second => TimeUnit::Second,
Precision::Millisecond => TimeUnit::Millisecond,
Precision::Microsecond => TimeUnit::Microsecond,
Precision::Nanosecond => TimeUnit::Nanosecond,
_ => {
return Err(Error::NotSupported {
feat: format!("convert {precision} into TimeUnit"),
})
}
})
}
}
#[cfg(test)]
mod tests {
use crate::precision::Precision;
#[test]
fn test_to_nanos() {
assert_eq!(Precision::Nanosecond.to_nanos(1).unwrap(), 1);
assert_eq!(Precision::Microsecond.to_nanos(1).unwrap(), 1_000);
assert_eq!(Precision::Millisecond.to_nanos(1).unwrap(), 1_000_000);
assert_eq!(Precision::Second.to_nanos(1).unwrap(), 1_000_000_000);
assert_eq!(Precision::Minute.to_nanos(1).unwrap(), 60 * 1_000_000_000);
assert_eq!(
Precision::Hour.to_nanos(1).unwrap(),
60 * 60 * 1_000_000_000
);
}
#[test]
fn test_to_millis() {
assert_eq!(Precision::Nanosecond.to_millis(1_000_000).unwrap(), 1);
assert_eq!(Precision::Microsecond.to_millis(1_000).unwrap(), 1);
assert_eq!(Precision::Millisecond.to_millis(1).unwrap(), 1);
assert_eq!(Precision::Second.to_millis(1).unwrap(), 1_000);
assert_eq!(Precision::Minute.to_millis(1).unwrap(), 60 * 1_000);
assert_eq!(Precision::Hour.to_millis(1).unwrap(), 60 * 60 * 1_000);
}
#[test]
fn test_to_nanos_basic() {
assert_eq!(Precision::Second.to_nanos(1), Some(1_000_000_000));
assert_eq!(Precision::Minute.to_nanos(1), Some(60 * 1_000_000_000));
}
#[test]
fn test_to_millis_basic() {
assert_eq!(Precision::Second.to_millis(1), Some(1_000));
assert_eq!(Precision::Minute.to_millis(1), Some(60_000));
}
#[test]
fn test_to_nanos_overflow() {
assert_eq!(Precision::Hour.to_nanos(i64::MAX / 100), None);
}
#[test]
fn test_zero_input() {
assert_eq!(Precision::Second.to_nanos(0), Some(0));
assert_eq!(Precision::Minute.to_millis(0), Some(0));
}
}

View File

@@ -0,0 +1,441 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::fmt::Display;
use api::helper::values_with_capacity;
use api::v1::{Column, ColumnDataType, ColumnDataTypeExtension, SemanticType};
use common_base::BitVec;
use common_time::timestamp::TimeUnit;
use snafu::ensure;
use crate::error::{Result, TypeMismatchSnafu};
use crate::Error;
type ColumnName = String;
type RowCount = u32;
// TODO(fys): will remove in the future.
#[derive(Default)]
pub struct LinesWriter {
column_name_index: HashMap<ColumnName, usize>,
null_masks: Vec<BitVec>,
batch: (Vec<Column>, RowCount),
lines: usize,
}
impl LinesWriter {
pub fn with_lines(lines: usize) -> Self {
Self {
lines,
..Default::default()
}
}
pub fn write_ts(&mut self, column_name: &str, value: (i64, Precision)) -> Result<()> {
let (idx, column) = self.mut_column(
column_name,
ColumnDataType::TimestampMillisecond,
SemanticType::Timestamp,
None,
);
ensure!(
column.datatype == ColumnDataType::TimestampMillisecond as i32,
TypeMismatchSnafu {
column_name,
expected: "timestamp",
actual: format!("{:?}", column.datatype)
}
);
// It is safe to use unwrap here, because values has been initialized in mut_column()
let values = column.values.as_mut().unwrap();
values
.timestamp_millisecond_values
.push(to_ms_ts(value.1, value.0));
self.null_masks[idx].push(false);
Ok(())
}
pub fn write_tag(&mut self, column_name: &str, value: &str) -> Result<()> {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::String, SemanticType::Tag, None);
ensure!(
column.datatype == ColumnDataType::String as i32,
TypeMismatchSnafu {
column_name,
expected: "string",
actual: format!("{:?}", column.datatype)
}
);
// It is safe to use unwrap here, because values has been initialized in mut_column()
let values = column.values.as_mut().unwrap();
values.string_values.push(value.to_string());
self.null_masks[idx].push(false);
Ok(())
}
pub fn write_u64(&mut self, column_name: &str, value: u64) -> Result<()> {
let (idx, column) = self.mut_column(
column_name,
ColumnDataType::Uint64,
SemanticType::Field,
None,
);
ensure!(
column.datatype == ColumnDataType::Uint64 as i32,
TypeMismatchSnafu {
column_name,
expected: "u64",
actual: format!("{:?}", column.datatype)
}
);
// It is safe to use unwrap here, because values has been initialized in mut_column()
let values = column.values.as_mut().unwrap();
values.u64_values.push(value);
self.null_masks[idx].push(false);
Ok(())
}
pub fn write_i64(&mut self, column_name: &str, value: i64) -> Result<()> {
let (idx, column) = self.mut_column(
column_name,
ColumnDataType::Int64,
SemanticType::Field,
None,
);
ensure!(
column.datatype == ColumnDataType::Int64 as i32,
TypeMismatchSnafu {
column_name,
expected: "i64",
actual: format!("{:?}", column.datatype)
}
);
// It is safe to use unwrap here, because values has been initialized in mut_column()
let values = column.values.as_mut().unwrap();
values.i64_values.push(value);
self.null_masks[idx].push(false);
Ok(())
}
pub fn write_f64(&mut self, column_name: &str, value: f64) -> Result<()> {
let (idx, column) = self.mut_column(
column_name,
ColumnDataType::Float64,
SemanticType::Field,
None,
);
ensure!(
column.datatype == ColumnDataType::Float64 as i32,
TypeMismatchSnafu {
column_name,
expected: "f64",
actual: format!("{:?}", column.datatype)
}
);
// It is safe to use unwrap here, because values has been initialized in mut_column()
let values = column.values.as_mut().unwrap();
values.f64_values.push(value);
self.null_masks[idx].push(false);
Ok(())
}
pub fn write_string(&mut self, column_name: &str, value: &str) -> Result<()> {
let (idx, column) = self.mut_column(
column_name,
ColumnDataType::String,
SemanticType::Field,
None,
);
ensure!(
column.datatype == ColumnDataType::String as i32,
TypeMismatchSnafu {
column_name,
expected: "string",
actual: format!("{:?}", column.datatype)
}
);
// It is safe to use unwrap here, because values has been initialized in mut_column()
let values = column.values.as_mut().unwrap();
values.string_values.push(value.to_string());
self.null_masks[idx].push(false);
Ok(())
}
pub fn write_bool(&mut self, column_name: &str, value: bool) -> Result<()> {
let (idx, column) = self.mut_column(
column_name,
ColumnDataType::Boolean,
SemanticType::Field,
None,
);
ensure!(
column.datatype == ColumnDataType::Boolean as i32,
TypeMismatchSnafu {
column_name,
expected: "boolean",
actual: format!("{:?}", column.datatype)
}
);
// It is safe to use unwrap here, because values has been initialized in mut_column()
let values = column.values.as_mut().unwrap();
values.bool_values.push(value);
self.null_masks[idx].push(false);
Ok(())
}
pub fn commit(&mut self) {
let batch = &mut self.batch;
batch.1 += 1;
for i in 0..batch.0.len() {
let null_mask = &mut self.null_masks[i];
if batch.1 as usize > null_mask.len() {
null_mask.push(true);
}
}
}
pub fn finish(mut self) -> (Vec<Column>, RowCount) {
let null_masks = self.null_masks;
for (i, null_mask) in null_masks.into_iter().enumerate() {
let columns = &mut self.batch.0;
columns[i].null_mask = null_mask.into_vec();
}
self.batch
}
fn mut_column(
&mut self,
column_name: &str,
datatype: ColumnDataType,
semantic_type: SemanticType,
datatype_extension: Option<ColumnDataTypeExtension>,
) -> (usize, &mut Column) {
let column_names = &mut self.column_name_index;
let column_idx = match column_names.get(column_name) {
Some(i) => *i,
None => {
let new_idx = column_names.len();
let batch = &mut self.batch;
let to_insert = self.lines;
let mut null_mask = BitVec::with_capacity(to_insert);
null_mask.extend(BitVec::repeat(true, batch.1 as usize));
self.null_masks.push(null_mask);
batch.0.push(Column {
column_name: column_name.to_string(),
semantic_type: semantic_type.into(),
values: Some(values_with_capacity(datatype, to_insert)),
datatype: datatype as i32,
null_mask: Vec::default(),
datatype_extension,
});
let _ = column_names.insert(column_name.to_string(), new_idx);
new_idx
}
};
(column_idx, &mut self.batch.0[column_idx])
}
}
pub fn to_ms_ts(p: Precision, ts: i64) -> i64 {
match p {
Precision::Nanosecond => ts / 1_000_000,
Precision::Microsecond => ts / 1000,
Precision::Millisecond => ts,
Precision::Second => ts * 1000,
Precision::Minute => ts * 1000 * 60,
Precision::Hour => ts * 1000 * 60 * 60,
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Precision {
Nanosecond,
Microsecond,
Millisecond,
Second,
Minute,
Hour,
}
impl Display for Precision {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Precision::Nanosecond => write!(f, "Precision::Nanosecond"),
Precision::Microsecond => write!(f, "Precision::Microsecond"),
Precision::Millisecond => write!(f, "Precision::Millisecond"),
Precision::Second => write!(f, "Precision::Second"),
Precision::Minute => write!(f, "Precision::Minute"),
Precision::Hour => write!(f, "Precision::Hour"),
}
}
}
impl TryFrom<Precision> for TimeUnit {
type Error = Error;
fn try_from(precision: Precision) -> std::result::Result<Self, Self::Error> {
Ok(match precision {
Precision::Second => TimeUnit::Second,
Precision::Millisecond => TimeUnit::Millisecond,
Precision::Microsecond => TimeUnit::Microsecond,
Precision::Nanosecond => TimeUnit::Nanosecond,
_ => {
return Err(Error::NotSupported {
feat: format!("convert {precision} into TimeUnit"),
})
}
})
}
}
#[cfg(test)]
mod tests {
use api::v1::{ColumnDataType, SemanticType};
use common_base::BitVec;
use super::LinesWriter;
use crate::writer::{to_ms_ts, Precision};
#[test]
fn test_lines_writer() {
let mut writer = LinesWriter::with_lines(3);
writer.write_tag("host", "host1").unwrap();
writer.write_f64("cpu", 0.5).unwrap();
writer.write_f64("memory", 0.4).unwrap();
writer.write_string("name", "name1").unwrap();
writer
.write_ts("ts", (101011000, Precision::Millisecond))
.unwrap();
writer.commit();
writer.write_tag("host", "host2").unwrap();
writer
.write_ts("ts", (102011001, Precision::Millisecond))
.unwrap();
writer.write_bool("enable_reboot", true).unwrap();
writer.write_u64("year_of_service", 2).unwrap();
writer.write_i64("temperature", 4).unwrap();
writer.commit();
writer.write_tag("host", "host3").unwrap();
writer.write_f64("cpu", 0.4).unwrap();
writer.write_u64("cpu_core_num", 16).unwrap();
writer
.write_ts("ts", (103011002, Precision::Millisecond))
.unwrap();
writer.commit();
let insert_batch = writer.finish();
assert_eq!(3, insert_batch.1);
let columns = insert_batch.0;
assert_eq!(9, columns.len());
let column = &columns[0];
assert_eq!("host", columns[0].column_name);
assert_eq!(ColumnDataType::String as i32, column.datatype);
assert_eq!(SemanticType::Tag as i32, column.semantic_type);
assert_eq!(
vec!["host1", "host2", "host3"],
column.values.as_ref().unwrap().string_values
);
verify_null_mask(&column.null_mask, vec![false, false, false]);
let column = &columns[1];
assert_eq!("cpu", column.column_name);
assert_eq!(ColumnDataType::Float64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![0.5, 0.4], column.values.as_ref().unwrap().f64_values);
verify_null_mask(&column.null_mask, vec![false, true, false]);
let column = &columns[2];
assert_eq!("memory", column.column_name);
assert_eq!(ColumnDataType::Float64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![0.4], column.values.as_ref().unwrap().f64_values);
verify_null_mask(&column.null_mask, vec![false, true, true]);
let column = &columns[3];
assert_eq!("name", column.column_name);
assert_eq!(ColumnDataType::String as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec!["name1"], column.values.as_ref().unwrap().string_values);
verify_null_mask(&column.null_mask, vec![false, true, true]);
let column = &columns[4];
assert_eq!("ts", column.column_name);
assert_eq!(ColumnDataType::TimestampMillisecond as i32, column.datatype);
assert_eq!(SemanticType::Timestamp as i32, column.semantic_type);
assert_eq!(
vec![101011000, 102011001, 103011002],
column.values.as_ref().unwrap().timestamp_millisecond_values
);
verify_null_mask(&column.null_mask, vec![false, false, false]);
let column = &columns[5];
assert_eq!("enable_reboot", column.column_name);
assert_eq!(ColumnDataType::Boolean as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![true], column.values.as_ref().unwrap().bool_values);
verify_null_mask(&column.null_mask, vec![true, false, true]);
let column = &columns[6];
assert_eq!("year_of_service", column.column_name);
assert_eq!(ColumnDataType::Uint64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![2], column.values.as_ref().unwrap().u64_values);
verify_null_mask(&column.null_mask, vec![true, false, true]);
let column = &columns[7];
assert_eq!("temperature", column.column_name);
assert_eq!(ColumnDataType::Int64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![4], column.values.as_ref().unwrap().i64_values);
verify_null_mask(&column.null_mask, vec![true, false, true]);
let column = &columns[8];
assert_eq!("cpu_core_num", column.column_name);
assert_eq!(ColumnDataType::Uint64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![16], column.values.as_ref().unwrap().u64_values);
verify_null_mask(&column.null_mask, vec![true, true, false]);
}
fn verify_null_mask(data: &[u8], expected: Vec<bool>) {
let bitvec = BitVec::from_slice(data);
for (idx, b) in expected.iter().enumerate() {
assert_eq!(b, bitvec.get(idx).unwrap())
}
}
#[test]
fn test_to_ms() {
assert_eq!(100, to_ms_ts(Precision::Nanosecond, 100110000));
assert_eq!(100110, to_ms_ts(Precision::Microsecond, 100110000));
assert_eq!(100110000, to_ms_ts(Precision::Millisecond, 100110000));
assert_eq!(
100110000 * 1000 * 60,
to_ms_ts(Precision::Minute, 100110000)
);
assert_eq!(
100110000 * 1000 * 60 * 60,
to_ms_ts(Precision::Hour, 100110000)
);
}
}

View File

@@ -51,7 +51,7 @@ impl AlterTableProcedure {
AlterKind::RenameTable { new_table_name } => { AlterKind::RenameTable { new_table_name } => {
new_info.name = new_table_name.to_string(); new_info.name = new_table_name.to_string();
} }
AlterKind::DropColumns { .. } | AlterKind::ChangeColumnTypes { .. } => {} AlterKind::DropColumns { .. } => {}
} }
Ok(new_info) Ok(new_info)

View File

@@ -271,7 +271,7 @@ impl CreateTableProcedure {
/// ///
/// Abort(not-retry): /// Abort(not-retry):
/// - Failed to create table metadata. /// - Failed to create table metadata.
async fn on_create_metadata(&mut self) -> Result<Status> { async fn on_create_metadata(&self) -> Result<Status> {
let table_id = self.table_id(); let table_id = self.table_id();
let manager = &self.context.table_metadata_manager; let manager = &self.context.table_metadata_manager;
@@ -285,7 +285,6 @@ impl CreateTableProcedure {
.await?; .await?;
info!("Created table metadata for table {table_id}"); info!("Created table metadata for table {table_id}");
self.creator.opening_regions.clear();
Ok(Status::done_with_output(table_id)) Ok(Status::done_with_output(table_id))
} }
} }
@@ -386,7 +385,7 @@ impl TableCreator {
} }
} }
#[derive(Debug, Clone, Serialize, Deserialize, AsRefStr, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize, AsRefStr)]
pub enum CreateTableState { pub enum CreateTableState {
/// Prepares to create the table /// Prepares to create the table
Prepare, Prepare,

View File

@@ -165,7 +165,7 @@ mod tests {
async fn test_next_without_logical_tables() { async fn test_next_without_logical_tables() {
let datanode_manager = Arc::new(MockDatanodeManager::new(())); let datanode_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(datanode_manager); let ddl_context = new_ddl_context(datanode_manager);
create_physical_table(&ddl_context, 0, "phy").await; create_physical_table(ddl_context.clone(), 0, "phy").await;
// It always starts from Logical // It always starts from Logical
let mut state = DropDatabaseCursor::new(DropTableTarget::Logical); let mut state = DropDatabaseCursor::new(DropTableTarget::Logical);
let mut ctx = DropDatabaseContext { let mut ctx = DropDatabaseContext {
@@ -199,7 +199,7 @@ mod tests {
async fn test_next_with_logical_tables() { async fn test_next_with_logical_tables() {
let datanode_manager = Arc::new(MockDatanodeManager::new(())); let datanode_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(datanode_manager); let ddl_context = new_ddl_context(datanode_manager);
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await; let physical_table_id = create_physical_table(ddl_context.clone(), 0, "phy").await;
create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric_0").await; create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric_0").await;
// It always starts from Logical // It always starts from Logical
let mut state = DropDatabaseCursor::new(DropTableTarget::Logical); let mut state = DropDatabaseCursor::new(DropTableTarget::Logical);

View File

@@ -161,7 +161,7 @@ mod tests {
async fn test_next_with_physical_table() { async fn test_next_with_physical_table() {
let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler)); let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(datanode_manager); let ddl_context = new_ddl_context(datanode_manager);
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await; let physical_table_id = create_physical_table(ddl_context.clone(), 0, "phy").await;
let (_, table_route) = ddl_context let (_, table_route) = ddl_context
.table_metadata_manager .table_metadata_manager
.table_route_manager() .table_route_manager()
@@ -211,7 +211,7 @@ mod tests {
async fn test_next_logical_table() { async fn test_next_logical_table() {
let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler)); let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(datanode_manager); let ddl_context = new_ddl_context(datanode_manager);
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await; let physical_table_id = create_physical_table(ddl_context.clone(), 0, "phy").await;
create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric").await; create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric").await;
let logical_table_id = physical_table_id + 1; let logical_table_id = physical_table_id + 1;
let (_, table_route) = ddl_context let (_, table_route) = ddl_context
@@ -315,7 +315,7 @@ mod tests {
async fn test_next_retryable_err() { async fn test_next_retryable_err() {
let datanode_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler)); let datanode_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
let ddl_context = new_ddl_context(datanode_manager); let ddl_context = new_ddl_context(datanode_manager);
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await; let physical_table_id = create_physical_table(ddl_context.clone(), 0, "phy").await;
let (_, table_route) = ddl_context let (_, table_route) = ddl_context
.table_metadata_manager .table_metadata_manager
.table_route_manager() .table_route_manager()

View File

@@ -46,7 +46,7 @@ pub struct DropTableProcedure {
/// The serializable data. /// The serializable data.
pub data: DropTableData, pub data: DropTableData,
/// The guards of opening regions. /// The guards of opening regions.
pub(crate) dropping_regions: Vec<OperatingRegionGuard>, pub dropping_regions: Vec<OperatingRegionGuard>,
/// The drop table executor. /// The drop table executor.
executor: DropTableExecutor, executor: DropTableExecutor,
} }
@@ -153,7 +153,7 @@ impl DropTableProcedure {
} }
/// Deletes metadata tombstone. /// Deletes metadata tombstone.
async fn on_delete_metadata_tombstone(&mut self) -> Result<Status> { async fn on_delete_metadata_tombstone(&self) -> Result<Status> {
let table_route_value = &TableRouteValue::new( let table_route_value = &TableRouteValue::new(
self.data.task.table_id, self.data.task.table_id,
// Safety: checked // Safety: checked
@@ -163,8 +163,6 @@ impl DropTableProcedure {
self.executor self.executor
.on_delete_metadata_tombstone(&self.context, table_route_value) .on_delete_metadata_tombstone(&self.context, table_route_value)
.await?; .await?;
self.dropping_regions.clear();
Ok(Status::done()) Ok(Status::done())
} }
} }
@@ -268,7 +266,7 @@ impl DropTableData {
} }
/// The state of drop table. /// The state of drop table.
#[derive(Debug, Serialize, Deserialize, AsRefStr, PartialEq)] #[derive(Debug, Serialize, Deserialize, AsRefStr)]
pub enum DropTableState { pub enum DropTableState {
/// Prepares to drop the table /// Prepares to drop the table
Prepare, Prepare,

View File

@@ -52,9 +52,5 @@ pub(crate) fn build_new_physical_table_info(
columns.push(col.column_schema.clone()); columns.push(col.column_schema.clone());
} }
if let Some(time_index) = *time_index {
raw_table_info.meta.schema.column_schemas[time_index].set_time_index();
}
raw_table_info raw_table_info
} }

View File

@@ -47,7 +47,7 @@ pub async fn create_physical_table_metadata(
} }
pub async fn create_physical_table( pub async fn create_physical_table(
ddl_context: &DdlContext, ddl_context: DdlContext,
cluster_id: ClusterId, cluster_id: ClusterId,
name: &str, name: &str,
) -> TableId { ) -> TableId {
@@ -67,7 +67,7 @@ pub async fn create_physical_table(
.unwrap(); .unwrap();
create_physical_table_task.set_table_id(table_id); create_physical_table_task.set_table_id(table_id);
create_physical_table_metadata( create_physical_table_metadata(
ddl_context, &ddl_context,
create_physical_table_task.table_info.clone(), create_physical_table_task.table_info.clone(),
TableRouteValue::Physical(table_route), TableRouteValue::Physical(table_route),
) )
@@ -81,7 +81,7 @@ pub async fn create_logical_table(
cluster_id: ClusterId, cluster_id: ClusterId,
physical_table_id: TableId, physical_table_id: TableId,
table_name: &str, table_name: &str,
) -> TableId { ) {
use std::assert_matches::assert_matches; use std::assert_matches::assert_matches;
let tasks = vec![test_create_logical_table_task(table_name)]; let tasks = vec![test_create_logical_table_task(table_name)];
@@ -91,14 +91,6 @@ pub async fn create_logical_table(
assert_matches!(status, Status::Executing { persist: true }); assert_matches!(status, Status::Executing { persist: true });
let status = procedure.on_create_metadata().await.unwrap(); let status = procedure.on_create_metadata().await.unwrap();
assert_matches!(status, Status::Done { .. }); assert_matches!(status, Status::Done { .. });
let Status::Done {
output: Some(output),
} = status
else {
panic!("Unexpected status: {:?}", status);
};
output.downcast_ref::<Vec<u32>>().unwrap()[0]
} }
pub fn test_create_logical_table_task(name: &str) -> CreateTableTask { pub fn test_create_logical_table_task(name: &str) -> CreateTableTask {

View File

@@ -128,9 +128,9 @@ async fn test_on_prepare_different_physical_table() {
let datanode_manager = Arc::new(MockDatanodeManager::new(())); let datanode_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(datanode_manager); let ddl_context = new_ddl_context(datanode_manager);
let phy1_id = create_physical_table(&ddl_context, cluster_id, "phy1").await; let phy1_id = create_physical_table(ddl_context.clone(), cluster_id, "phy1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy1_id, "table1").await; create_logical_table(ddl_context.clone(), cluster_id, phy1_id, "table1").await;
let phy2_id = create_physical_table(&ddl_context, cluster_id, "phy2").await; let phy2_id = create_physical_table(ddl_context.clone(), cluster_id, "phy2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy2_id, "table2").await; create_logical_table(ddl_context.clone(), cluster_id, phy2_id, "table2").await;
let tasks = vec![ let tasks = vec![
@@ -150,7 +150,7 @@ async fn test_on_prepare_logical_table_not_exists() {
let ddl_context = new_ddl_context(datanode_manager); let ddl_context = new_ddl_context(datanode_manager);
// Creates physical table // Creates physical table
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await; let phy_id = create_physical_table(ddl_context.clone(), cluster_id, "phy").await;
// Creates 3 logical tables // Creates 3 logical tables
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
@@ -172,7 +172,7 @@ async fn test_on_prepare() {
let ddl_context = new_ddl_context(datanode_manager); let ddl_context = new_ddl_context(datanode_manager);
// Creates physical table // Creates physical table
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await; let phy_id = create_physical_table(ddl_context.clone(), cluster_id, "phy").await;
// Creates 3 logical tables // Creates 3 logical tables
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
@@ -196,7 +196,7 @@ async fn test_on_update_metadata() {
let ddl_context = new_ddl_context(datanode_manager); let ddl_context = new_ddl_context(datanode_manager);
// Creates physical table // Creates physical table
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await; let phy_id = create_physical_table(ddl_context.clone(), cluster_id, "phy").await;
// Creates 3 logical tables // Creates 3 logical tables
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
@@ -233,7 +233,7 @@ async fn test_on_part_duplicate_alter_request() {
let ddl_context = new_ddl_context(datanode_manager); let ddl_context = new_ddl_context(datanode_manager);
// Creates physical table // Creates physical table
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await; let phy_id = create_physical_table(ddl_context.clone(), cluster_id, "phy").await;
// Creates 3 logical tables // Creates 3 logical tables
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;

View File

@@ -21,12 +21,9 @@ use api::v1::{ColumnDataType, SemanticType};
use common_error::ext::ErrorExt; use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode; use common_error::status_code::StatusCode;
use common_procedure::{Context as ProcedureContext, Procedure, ProcedureId, Status}; use common_procedure::{Context as ProcedureContext, Procedure, ProcedureId, Status};
use common_procedure_test::{ use common_procedure_test::MockContextProvider;
execute_procedure_until, execute_procedure_until_done, MockContextProvider,
};
use store_api::storage::RegionId;
use crate::ddl::create_table::{CreateTableProcedure, CreateTableState}; use crate::ddl::create_table::CreateTableProcedure;
use crate::ddl::test_util::columns::TestColumnDefBuilder; use crate::ddl::test_util::columns::TestColumnDefBuilder;
use crate::ddl::test_util::create_table::{ use crate::ddl::test_util::create_table::{
build_raw_table_info_from_expr, TestCreateTableExprBuilder, build_raw_table_info_from_expr, TestCreateTableExprBuilder,
@@ -36,9 +33,8 @@ use crate::ddl::test_util::datanode_handler::{
}; };
use crate::error::Error; use crate::error::Error;
use crate::key::table_route::TableRouteValue; use crate::key::table_route::TableRouteValue;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::rpc::ddl::CreateTableTask; use crate::rpc::ddl::CreateTableTask;
use crate::test_util::{new_ddl_context, new_ddl_context_with_kv_backend, MockDatanodeManager}; use crate::test_util::{new_ddl_context, MockDatanodeManager};
fn test_create_table_task(name: &str) -> CreateTableTask { fn test_create_table_task(name: &str) -> CreateTableTask {
let create_table = TestCreateTableExprBuilder::default() let create_table = TestCreateTableExprBuilder::default()
@@ -248,39 +244,3 @@ async fn test_on_create_metadata() {
let table_id = status.downcast_output_ref::<u32>().unwrap(); let table_id = status.downcast_output_ref::<u32>().unwrap();
assert_eq!(*table_id, 1024); assert_eq!(*table_id, 1024);
} }
#[tokio::test]
async fn test_memory_region_keeper_guard_dropped_on_procedure_done() {
let cluster_id = 1;
let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(datanode_manager, kv_backend);
let task = test_create_table_task("foo");
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until(&mut procedure, |p| {
p.creator.data.state == CreateTableState::CreateMetadata
})
.await;
// Ensure that after running to the state `CreateMetadata`(just past `DatanodeCreateRegions`),
// the opening regions should be recorded:
let guards = &procedure.creator.opening_regions;
assert_eq!(guards.len(), 1);
let (datanode_id, region_id) = (0, RegionId::new(procedure.table_id(), 0));
assert_eq!(guards[0].info(), (datanode_id, region_id));
assert!(ddl_context
.memory_region_keeper
.contains(datanode_id, region_id));
execute_procedure_until_done(&mut procedure).await;
// Ensure that when run to the end, the opening regions should be cleared:
let guards = &procedure.creator.opening_regions;
assert!(guards.is_empty());
assert!(!ddl_context
.memory_region_keeper
.contains(datanode_id, region_id));
}

View File

@@ -42,7 +42,7 @@ async fn test_drop_database_with_logical_tables() {
.await .await
.unwrap(); .unwrap();
// Creates physical table // Creates physical table
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await; let phy_id = create_physical_table(ddl_context.clone(), cluster_id, "phy").await;
// Creates 3 logical tables // Creates 3 logical tables
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
@@ -91,7 +91,7 @@ async fn test_drop_database_retryable_error() {
.await .await
.unwrap(); .unwrap();
// Creates physical table // Creates physical table
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await; let phy_id = create_physical_table(ddl_context.clone(), cluster_id, "phy").await;
// Creates 3 logical tables // Creates 3 logical tables
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await; create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;

View File

@@ -19,21 +19,17 @@ use api::v1::region::{region_request, RegionRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::ext::ErrorExt; use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode; use common_error::status_code::StatusCode;
use common_procedure::Procedure; use common_procedure::{Context as ProcedureContext, Procedure, ProcedureId};
use common_procedure_test::{ use common_procedure_test::MockContextProvider;
execute_procedure_until, execute_procedure_until_done, new_test_procedure_context,
};
use store_api::storage::RegionId; use store_api::storage::RegionId;
use table::metadata::TableId;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure; use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
use crate::ddl::drop_table::{DropTableProcedure, DropTableState}; use crate::ddl::drop_table::DropTableProcedure;
use crate::ddl::test_util::create_table::test_create_table_task; use crate::ddl::test_util::create_table::test_create_table_task;
use crate::ddl::test_util::datanode_handler::{DatanodeWatcher, NaiveDatanodeHandler}; use crate::ddl::test_util::datanode_handler::{DatanodeWatcher, NaiveDatanodeHandler};
use crate::ddl::test_util::{ use crate::ddl::test_util::{
create_logical_table, create_physical_table, create_physical_table_metadata, create_physical_table_metadata, test_create_logical_table_task, test_create_physical_table_task,
test_create_logical_table_task, test_create_physical_table_task,
}; };
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext}; use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
use crate::key::table_route::TableRouteValue; use crate::key::table_route::TableRouteValue;
@@ -62,7 +58,14 @@ async fn test_on_prepare_table_not_exists_err() {
.await .await
.unwrap(); .unwrap();
let task = new_drop_table_task("bar", table_id, false); let task = DropTableTask {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table: "bar".to_string(),
table_id,
drop_if_exists: false,
};
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context); let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err(); let err = procedure.on_prepare().await.unwrap_err();
assert_eq!(err.status_code(), StatusCode::TableNotFound); assert_eq!(err.status_code(), StatusCode::TableNotFound);
@@ -87,12 +90,26 @@ async fn test_on_prepare_table() {
.await .await
.unwrap(); .unwrap();
let task = new_drop_table_task("bar", table_id, true); let task = DropTableTask {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table: "bar".to_string(),
table_id,
drop_if_exists: true,
};
// Drop if exists // Drop if exists
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone()); let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap(); procedure.on_prepare().await.unwrap();
let task = new_drop_table_task(table_name, table_id, false); let task = DropTableTask {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table: table_name.to_string(),
table_id,
drop_if_exists: false,
};
// Drop table // Drop table
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context); let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap(); procedure.on_prepare().await.unwrap();
@@ -141,7 +158,13 @@ async fn test_on_datanode_drop_regions() {
.await .await
.unwrap(); .unwrap();
let task = new_drop_table_task(table_name, table_id, false); let task = DropTableTask {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table: table_name.to_string(),
table_id,
drop_if_exists: false,
};
// Drop table // Drop table
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context); let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap(); procedure.on_prepare().await.unwrap();
@@ -211,7 +234,10 @@ async fn test_on_rollback() {
ddl_context.clone(), ddl_context.clone(),
); );
procedure.on_prepare().await.unwrap(); procedure.on_prepare().await.unwrap();
let ctx = new_test_procedure_context(); let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
provider: Arc::new(MockContextProvider::default()),
};
procedure.execute(&ctx).await.unwrap(); procedure.execute(&ctx).await.unwrap();
// Triggers procedure to create table metadata // Triggers procedure to create table metadata
let status = procedure.execute(&ctx).await.unwrap(); let status = procedure.execute(&ctx).await.unwrap();
@@ -221,10 +247,20 @@ async fn test_on_rollback() {
let expected_kvs = kv_backend.dump(); let expected_kvs = kv_backend.dump();
// Drops the physical table // Drops the physical table
{ {
let task = new_drop_table_task("phy_table", physical_table_id, false); let task = DropTableTask {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table: "phy_table".to_string(),
table_id: physical_table_id,
drop_if_exists: false,
};
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone()); let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap(); procedure.on_prepare().await.unwrap();
procedure.on_delete_metadata().await.unwrap(); procedure.on_delete_metadata().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
provider: Arc::new(MockContextProvider::default()),
};
procedure.rollback(&ctx).await.unwrap(); procedure.rollback(&ctx).await.unwrap();
// Rollback again // Rollback again
procedure.rollback(&ctx).await.unwrap(); procedure.rollback(&ctx).await.unwrap();
@@ -233,66 +269,23 @@ async fn test_on_rollback() {
} }
// Drops the logical table // Drops the logical table
let task = new_drop_table_task("foo", table_ids[0], false); let task = DropTableTask {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table: "foo".to_string(),
table_id: table_ids[0],
drop_if_exists: false,
};
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone()); let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap(); procedure.on_prepare().await.unwrap();
procedure.on_delete_metadata().await.unwrap(); procedure.on_delete_metadata().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
provider: Arc::new(MockContextProvider::default()),
};
procedure.rollback(&ctx).await.unwrap(); procedure.rollback(&ctx).await.unwrap();
// Rollback again // Rollback again
procedure.rollback(&ctx).await.unwrap(); procedure.rollback(&ctx).await.unwrap();
let kvs = kv_backend.dump(); let kvs = kv_backend.dump();
assert_eq!(kvs, expected_kvs); assert_eq!(kvs, expected_kvs);
} }
fn new_drop_table_task(table_name: &str, table_id: TableId, drop_if_exists: bool) -> DropTableTask {
DropTableTask {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table: table_name.to_string(),
table_id,
drop_if_exists,
}
}
#[tokio::test]
async fn test_memory_region_keeper_guard_dropped_on_procedure_done() {
let cluster_id = 1;
let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(datanode_manager, kv_backend);
let physical_table_id = create_physical_table(&ddl_context, cluster_id, "t").await;
let logical_table_id =
create_logical_table(ddl_context.clone(), cluster_id, physical_table_id, "s").await;
let inner_test = |task: DropTableTask| async {
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until(&mut procedure, |p| {
p.data.state == DropTableState::InvalidateTableCache
})
.await;
// Ensure that after running to the state `InvalidateTableCache`(just past `DeleteMetadata`),
// the dropping regions should be recorded:
let guards = &procedure.dropping_regions;
assert_eq!(guards.len(), 1);
let (datanode_id, region_id) = (0, RegionId::new(physical_table_id, 0));
assert_eq!(guards[0].info(), (datanode_id, region_id));
assert!(ddl_context
.memory_region_keeper
.contains(datanode_id, region_id));
execute_procedure_until_done(&mut procedure).await;
// Ensure that when run to the end, the dropping regions should be cleared:
let guards = &procedure.dropping_regions;
assert!(guards.is_empty());
assert!(!ddl_context
.memory_region_keeper
.contains(datanode_id, region_id));
};
inner_test(new_drop_table_task("s", logical_table_id, false)).await;
inner_test(new_drop_table_task("t", physical_table_id, false)).await;
}

View File

@@ -258,7 +258,7 @@ pub enum Error {
error: Utf8Error, error: Utf8Error,
}, },
#[snafu(display("Table not found: '{}'", table_name))] #[snafu(display("Table nod found, table: {}", table_name))]
TableNotFound { TableNotFound {
table_name: String, table_name: String,
location: Location, location: Location,

View File

@@ -17,6 +17,7 @@ use std::sync::Arc;
use common_catalog::consts::DEFAULT_CATALOG_NAME; use common_catalog::consts::DEFAULT_CATALOG_NAME;
use futures::stream::BoxStream; use futures::stream::BoxStream;
use futures::StreamExt;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
@@ -83,11 +84,11 @@ impl<'a> TryFrom<&'a str> for CatalogNameKey<'a> {
} }
/// Decoder `KeyValue` to ({catalog},()) /// Decoder `KeyValue` to ({catalog},())
pub fn catalog_decoder(kv: KeyValue) -> Result<String> { pub fn catalog_decoder(kv: KeyValue) -> Result<(String, ())> {
let str = std::str::from_utf8(&kv.key).context(error::ConvertRawKeySnafu)?; let str = std::str::from_utf8(&kv.key).context(error::ConvertRawKeySnafu)?;
let catalog_name = CatalogNameKey::try_from(str)?; let catalog_name = CatalogNameKey::try_from(str)?;
Ok(catalog_name.catalog.to_string()) Ok((catalog_name.catalog.to_string(), ()))
} }
pub struct CatalogManager { pub struct CatalogManager {
@@ -133,7 +134,7 @@ impl CatalogManager {
Arc::new(catalog_decoder), Arc::new(catalog_decoder),
); );
Box::pin(stream) Box::pin(stream.map(|kv| kv.map(|kv| kv.0)))
} }
} }

View File

@@ -16,6 +16,7 @@ use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use futures::stream::BoxStream; use futures::stream::BoxStream;
use futures::StreamExt;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use snafu::OptionExt; use snafu::OptionExt;
use store_api::storage::RegionNumber; use store_api::storage::RegionNumber;
@@ -125,8 +126,10 @@ impl DatanodeTableValue {
} }
/// Decodes `KeyValue` to ((),`DatanodeTableValue`) /// Decodes `KeyValue` to ((),`DatanodeTableValue`)
pub fn datanode_table_value_decoder(kv: KeyValue) -> Result<DatanodeTableValue> { pub fn datanode_table_value_decoder(kv: KeyValue) -> Result<((), DatanodeTableValue)> {
DatanodeTableValue::try_from_raw_value(&kv.value) let value = DatanodeTableValue::try_from_raw_value(&kv.value)?;
Ok(((), value))
} }
pub struct DatanodeTableManager { pub struct DatanodeTableManager {
@@ -160,7 +163,7 @@ impl DatanodeTableManager {
Arc::new(datanode_table_value_decoder), Arc::new(datanode_table_value_decoder),
); );
Box::pin(stream) Box::pin(stream.map(|kv| kv.map(|kv| kv.1)))
} }
/// Builds the create datanode table transactions. It only executes while the primary keys comparing successes. /// Builds the create datanode table transactions. It only executes while the primary keys comparing successes.

View File

@@ -19,6 +19,7 @@ use std::time::Duration;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use futures::stream::BoxStream; use futures::stream::BoxStream;
use futures::StreamExt;
use humantime_serde::re::humantime; use humantime_serde::re::humantime;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
@@ -102,11 +103,11 @@ impl TableMetaKey for SchemaNameKey<'_> {
} }
/// Decodes `KeyValue` to ({schema},()) /// Decodes `KeyValue` to ({schema},())
pub fn schema_decoder(kv: KeyValue) -> Result<String> { pub fn schema_decoder(kv: KeyValue) -> Result<(String, ())> {
let str = std::str::from_utf8(&kv.key).context(error::ConvertRawKeySnafu)?; let str = std::str::from_utf8(&kv.key).context(error::ConvertRawKeySnafu)?;
let schema_name = SchemaNameKey::try_from(str)?; let schema_name = SchemaNameKey::try_from(str)?;
Ok(schema_name.schema.to_string()) Ok((schema_name.schema.to_string(), ()))
} }
impl<'a> TryFrom<&'a str> for SchemaNameKey<'a> { impl<'a> TryFrom<&'a str> for SchemaNameKey<'a> {
@@ -192,7 +193,7 @@ impl SchemaManager {
Arc::new(schema_decoder), Arc::new(schema_decoder),
); );
Box::pin(stream) Box::pin(stream.map(|kv| kv.map(|kv| kv.0)))
} }
} }

View File

@@ -28,13 +28,13 @@ use crate::rpc::store::{RangeRequest, RangeResponse};
use crate::rpc::KeyValue; use crate::rpc::KeyValue;
use crate::util::get_next_prefix_key; use crate::util::get_next_prefix_key;
pub type KeyValueDecoderFn<T> = dyn Fn(KeyValue) -> Result<T> + Send + Sync; pub type KeyValueDecoderFn<K, V> = dyn Fn(KeyValue) -> Result<(K, V)> + Send + Sync;
enum PaginationStreamState<T> { enum PaginationStreamState<K, V> {
/// At the start of reading. /// At the start of reading.
Init, Init,
/// Decoding key value pairs. /// Decoding key value pairs.
Decoding(SimpleKeyValueDecoder<T>), Decoding(SimpleKeyValueDecoder<K, V>),
/// Retrieving data from backend. /// Retrieving data from backend.
Reading(BoxFuture<'static, Result<(PaginationStreamFactory, Option<RangeResponse>)>>), Reading(BoxFuture<'static, Result<(PaginationStreamFactory, Option<RangeResponse>)>>),
/// Error /// Error
@@ -77,7 +77,7 @@ struct PaginationStreamFactory {
} }
impl PaginationStreamFactory { impl PaginationStreamFactory {
fn new( pub fn new(
kv: &KvBackendRef, kv: &KvBackendRef,
key: Vec<u8>, key: Vec<u8>,
range_end: Vec<u8>, range_end: Vec<u8>,
@@ -137,7 +137,7 @@ impl PaginationStreamFactory {
} }
} }
async fn read_next(mut self) -> Result<(Self, Option<RangeResponse>)> { pub async fn read_next(mut self) -> Result<(Self, Option<RangeResponse>)> {
if self.more { if self.more {
let resp = self let resp = self
.adaptive_range(RangeRequest { .adaptive_range(RangeRequest {
@@ -174,19 +174,18 @@ impl PaginationStreamFactory {
} }
} }
pub struct PaginationStream<T> { pub struct PaginationStream<K, V> {
state: PaginationStreamState<T>, state: PaginationStreamState<K, V>,
decoder_fn: Arc<KeyValueDecoderFn<T>>, decoder_fn: Arc<KeyValueDecoderFn<K, V>>,
factory: Option<PaginationStreamFactory>, factory: Option<PaginationStreamFactory>,
} }
impl<T> PaginationStream<T> { impl<K, V> PaginationStream<K, V> {
/// Returns a new [PaginationStream].
pub fn new( pub fn new(
kv: KvBackendRef, kv: KvBackendRef,
req: RangeRequest, req: RangeRequest,
page_size: usize, page_size: usize,
decoder_fn: Arc<KeyValueDecoderFn<T>>, decoder_fn: Arc<KeyValueDecoderFn<K, V>>,
) -> Self { ) -> Self {
Self { Self {
state: PaginationStreamState::Init, state: PaginationStreamState::Init,
@@ -203,13 +202,13 @@ impl<T> PaginationStream<T> {
} }
} }
struct SimpleKeyValueDecoder<T> { struct SimpleKeyValueDecoder<K, V> {
kv: VecDeque<KeyValue>, kv: VecDeque<KeyValue>,
decoder: Arc<KeyValueDecoderFn<T>>, decoder: Arc<KeyValueDecoderFn<K, V>>,
} }
impl<T> Iterator for SimpleKeyValueDecoder<T> { impl<K, V> Iterator for SimpleKeyValueDecoder<K, V> {
type Item = Result<T>; type Item = Result<(K, V)>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if let Some(kv) = self.kv.pop_front() { if let Some(kv) = self.kv.pop_front() {
@@ -220,8 +219,8 @@ impl<T> Iterator for SimpleKeyValueDecoder<T> {
} }
} }
impl<T> Stream for PaginationStream<T> { impl<K, V> Stream for PaginationStream<K, V> {
type Item = Result<T>; type Item = Result<(K, V)>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> { fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
loop { loop {

View File

@@ -114,29 +114,3 @@ pub async fn execute_until_suspended_or_done(
None None
} }
pub fn new_test_procedure_context() -> Context {
Context {
procedure_id: ProcedureId::random(),
provider: Arc::new(MockContextProvider::default()),
}
}
pub async fn execute_procedure_until<P: Procedure>(procedure: &mut P, until: impl Fn(&P) -> bool) {
let mut reached = false;
let context = new_test_procedure_context();
while !matches!(
procedure.execute(&context).await.unwrap(),
Status::Done { .. }
) {
if until(procedure) {
reached = true;
break;
}
}
assert!(
reached,
"procedure '{}' did not reach the expected state",
procedure.type_name()
);
}

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::fmt::{Debug, Display, Formatter}; use std::fmt::{Debug, Formatter};
use std::sync::Arc; use std::sync::Arc;
use api::greptime_proto::v1::add_column_location::LocationType; use api::greptime_proto::v1::add_column_location::LocationType;
@@ -126,17 +126,6 @@ pub enum AddColumnLocation {
After { column_name: String }, After { column_name: String },
} }
impl Display for AddColumnLocation {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
AddColumnLocation::First => write!(f, r#"FIRST"#),
AddColumnLocation::After { column_name } => {
write!(f, r#"AFTER {column_name}"#)
}
}
}
}
impl From<&AddColumnLocation> for Location { impl From<&AddColumnLocation> for Location {
fn from(value: &AddColumnLocation) -> Self { fn from(value: &AddColumnLocation) -> Self {
match value { match value {

View File

@@ -103,28 +103,3 @@ pub fn setup_build_info() {
println!("cargo:rustc-env=RUSTC_VERSION={}", build_info.rustc); println!("cargo:rustc-env=RUSTC_VERSION={}", build_info.rustc);
println!("cargo:rustc-env=SOURCE_TIMESTAMP={}", build_info.timestamp); println!("cargo:rustc-env=SOURCE_TIMESTAMP={}", build_info.timestamp);
} }
/// Get the string for the output of cli "--version".
#[macro_export]
macro_rules! version {
() => {
concat!(
"\nbranch: ",
env!("GIT_BRANCH"),
"\ncommit: ",
env!("GIT_COMMIT"),
"\ndirty: ",
env!("GIT_DIRTY"),
"\nversion: ",
env!("CARGO_PKG_VERSION")
)
};
}
/// Short version for reporting metrics.
#[macro_export]
macro_rules! short_version {
() => {
concat!(env!("GIT_BRANCH"), "-", env!("GIT_COMMIT_SHORT"))
};
}

View File

@@ -54,10 +54,6 @@ impl fmt::Debug for ColumnSchema {
if self.is_nullable { "null" } else { "not null" }, if self.is_nullable { "null" } else { "not null" },
)?; )?;
if self.is_time_index {
write!(f, " time_index")?;
}
// Add default constraint if present // Add default constraint if present
if let Some(default_constraint) = &self.default_constraint { if let Some(default_constraint) = &self.default_constraint {
write!(f, " default={:?}", default_constraint)?; write!(f, " default={:?}", default_constraint)?;
@@ -163,14 +159,6 @@ impl ColumnSchema {
self.is_nullable = true; self.is_nullable = true;
} }
/// Set the `is_time_index` to `true` of the column.
/// Similar to [with_time_index] but don't take the ownership.
///
/// [with_time_index]: Self::with_time_index
pub fn set_time_index(&mut self) {
self.is_time_index = true;
}
/// Creates a new [`ColumnSchema`] with given metadata. /// Creates a new [`ColumnSchema`] with given metadata.
pub fn with_metadata(mut self, metadata: Metadata) -> Self { pub fn with_metadata(mut self, metadata: Metadata) -> Self {
self.metadata = metadata; self.metadata = metadata;

View File

@@ -207,21 +207,4 @@ mod tests {
assert!(c.is_null(2)); assert!(c.is_null(2));
} }
} }
#[test]
fn test_safe_cast_to_null() {
let string_vector = Arc::new(StringVector::from(vec![
Some("1"),
Some("hello"),
Some(&i64::MAX.to_string()),
None,
])) as VectorRef;
let to_type = ConcreteDataType::int32_datatype();
let b = string_vector.cast(&to_type).unwrap();
let c = b.as_any().downcast_ref::<Int32Vector>().unwrap();
assert_eq!(Value::Int32(1), c.get(0));
assert_eq!(Value::Null, c.get(1));
assert_eq!(Value::Null, c.get(2));
assert_eq!(Value::Null, c.get(3));
}
} }

View File

@@ -14,14 +14,14 @@ common-error.workspace = true
common-macro.workspace = true common-macro.workspace = true
common-telemetry.workspace = true common-telemetry.workspace = true
common-time.workspace = true common-time.workspace = true
datafusion-common.workspace = true
datafusion-expr.workspace = true
datafusion-substrait.workspace = true datafusion-substrait.workspace = true
datatypes.workspace = true datatypes.workspace = true
enum_dispatch = "0.3" enum_dispatch = "0.3"
# This fork is simply for keeping our dependency in our org, and pin the version # This fork is simply for keeping our dependency in our org, and pin the version
# it is the same with upstream repo # it is the same with upstream repo
hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "main" } datafusion-common.workspace = true
datafusion-expr.workspace = true
hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", rev = "ba2df44efd42b7c4d37ebefbf82e77c6f1d4cb94" }
itertools.workspace = true itertools.workspace = true
num-traits = "0.2" num-traits = "0.2"
serde.workspace = true serde.workspace = true

View File

@@ -18,33 +18,25 @@
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::{BTreeMap, VecDeque}; use std::collections::{BTreeMap, VecDeque};
use std::ops::Range;
use std::rc::Rc; use std::rc::Rc;
use datatypes::data_type::ConcreteDataType;
use datatypes::value::{ListValue, Value};
use hydroflow::futures::SinkExt;
use hydroflow::lattices::cc_traits::Get; use hydroflow::lattices::cc_traits::Get;
use hydroflow::scheduled::graph::Hydroflow; use hydroflow::scheduled::graph::Hydroflow;
use hydroflow::scheduled::graph_ext::GraphExt; use hydroflow::scheduled::graph_ext::GraphExt;
use hydroflow::scheduled::port::{PortCtx, SEND}; use hydroflow::scheduled::port::{PortCtx, SEND};
use itertools::Itertools; use itertools::Itertools;
use snafu::{ensure, OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use super::state::Scheduler; use super::state::Scheduler;
use crate::adapter::error::{Error, EvalSnafu, InvalidQuerySnafu, NotImplementedSnafu, PlanSnafu}; use crate::adapter::error::{Error, EvalSnafu, InvalidQuerySnafu};
use crate::compute::state::DataflowState; use crate::compute::state::DataflowState;
use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff}; use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
use crate::expr::error::{DataTypeSnafu, InternalSnafu};
use crate::expr::{ use crate::expr::{
self, EvalError, GlobalId, LocalId, MapFilterProject, MfpPlan, SafeMfpPlan, ScalarExpr, self, EvalError, GlobalId, LocalId, MapFilterProject, MfpPlan, SafeMfpPlan, ScalarExpr,
}; };
use crate::plan::{AccumulablePlan, KeyValPlan, Plan, ReducePlan}; use crate::plan::Plan;
use crate::repr::{self, DiffRow, KeyValDiffRow, Row}; use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter, Arrangement}; use crate::utils::{ArrangeHandler, Arrangement};
mod map;
mod reduce;
/// The Context for build a Operator with id of `GlobalId` /// The Context for build a Operator with id of `GlobalId`
pub struct Context<'referred, 'df> { pub struct Context<'referred, 'df> {
@@ -94,6 +86,8 @@ impl<'referred, 'df> Context<'referred, 'df> {
} }
} }
// There is a false positive in using `Vec<ScalarExpr>` as key
#[allow(clippy::mutable_key_type)]
impl<'referred, 'df> Context<'referred, 'df> { impl<'referred, 'df> Context<'referred, 'df> {
/// Interpret and execute plan /// Interpret and execute plan
/// ///
@@ -103,59 +97,26 @@ impl<'referred, 'df> Context<'referred, 'df> {
Plan::Constant { rows } => Ok(self.render_constant(rows)), Plan::Constant { rows } => Ok(self.render_constant(rows)),
Plan::Get { id } => self.get_by_id(id), Plan::Get { id } => self.get_by_id(id),
Plan::Let { id, value, body } => self.eval_let(id, value, body), Plan::Let { id, value, body } => self.eval_let(id, value, body),
Plan::Mfp { input, mfp } => self.render_mfp(input, mfp), Plan::Mfp { input, mfp } => {
Plan::Reduce { self.render_map_filter_project_into_executable_dataflow(input, mfp)
input,
key_val_plan,
reduce_plan,
} => self.render_reduce(input, key_val_plan, reduce_plan),
Plan::Join { .. } => NotImplementedSnafu {
reason: "Join is still WIP".to_string(),
} }
.fail(), Plan::Reduce { .. } => todo!(),
Plan::Union { .. } => NotImplementedSnafu { Plan::Join { .. } => todo!(),
reason: "Union is still WIP".to_string(), Plan::Union { .. } => todo!(),
}
.fail(),
} }
} }
/// render Constant, take all rows that have a timestamp not greater than the current time /// render Constant, will only emit the `rows` once.
/// pub fn render_constant(&mut self, mut rows: Vec<DiffRow>) -> CollectionBundle {
/// Always assume input is sorted by timestamp
pub fn render_constant(&mut self, rows: Vec<DiffRow>) -> CollectionBundle {
let (send_port, recv_port) = self.df.make_edge::<_, Toff>("constant"); let (send_port, recv_port) = self.df.make_edge::<_, Toff>("constant");
let mut per_time: BTreeMap<repr::Timestamp, Vec<DiffRow>> = rows
.into_iter()
.group_by(|(_row, ts, _diff)| *ts)
.into_iter()
.map(|(k, v)| (k, v.into_iter().collect_vec()))
.collect();
let now = self.compute_state.current_time_ref();
// TODO(discord9): better way to schedule future run
let scheduler = self.compute_state.get_scheduler();
let scheduler_inner = scheduler.clone();
let subgraph_id =
self.df self.df
.add_subgraph_source("Constant", send_port, move |_ctx, send_port| { .add_subgraph_source("Constant", send_port, move |_ctx, send_port| {
// find the first timestamp that is greater than now if rows.is_empty() {
// use filter_map return;
let mut after = per_time.split_off(&(*now.borrow() + 1));
// swap
std::mem::swap(&mut per_time, &mut after);
let not_great_than_now = after;
not_great_than_now.into_iter().for_each(|(_ts, rows)| {
send_port.give(rows);
});
// schedule the next run
if let Some(next_run_time) = per_time.keys().next().copied() {
scheduler_inner.schedule_at(next_run_time);
} }
send_port.give(std::mem::take(&mut rows));
}); });
scheduler.set_cur_subgraph(subgraph_id);
CollectionBundle::from_collection(Collection::from_port(recv_port)) CollectionBundle::from_collection(Collection::from_port(recv_port))
} }
@@ -200,14 +161,144 @@ impl<'referred, 'df> Context<'referred, 'df> {
let ret = self.render_plan(*body)?; let ret = self.render_plan(*body)?;
Ok(ret) Ok(ret)
} }
/// render MapFilterProject, will only emit the `rows` once. Assume all incoming row's sys time being `now`` and ignore the row's stated sys time
/// TODO(discord9): schedule mfp operator to run when temporal filter need
///
/// `MapFilterProject`(`mfp` for short) is scheduled to run when there is enough amount of input updates
/// ***or*** when a future update in it's output buffer(a `Arrangement`) is supposed to emit now.
pub fn render_map_filter_project_into_executable_dataflow(
&mut self,
input: Box<Plan>,
mfp: MapFilterProject,
) -> Result<CollectionBundle, Error> {
let input = self.render_plan(*input)?;
// TODO(discord9): consider if check if contain temporal to determine if
// need arrange or not, or does this added complexity worth it
let (out_send_port, out_recv_port) = self.df.make_edge::<_, Toff>("mfp");
let input_arity = mfp.input_arity;
// default to have a arrange with only future updates, so it can be empty if no temporal filter is applied
// as stream only sends current updates and etc.
let arrange = Arrangement::new();
let arrange_handler = ArrangeHandler::from(arrange.clone());
let arrange_handler_inner = ArrangeHandler::from(arrange);
// This closure capture following variables:
let mfp_plan = MfpPlan::create_from(mfp)?;
let now = self.compute_state.current_time_ref();
let err_collector = self.err_collector.clone();
// TODO(discord9): better way to schedule future run
let scheduler = self.compute_state.get_scheduler();
let scheduler_inner = scheduler.clone();
let subgraph = self.df.add_subgraph_in_out(
"mfp",
input.collection.into_inner(),
out_send_port,
move |_ctx, recv, send| {
// mfp only need to passively receive updates from recvs
let data = recv.take_inner().into_iter().flat_map(|v| v.into_iter());
mfp_subgraph(
&arrange_handler_inner,
data,
&mfp_plan,
*now.borrow(),
&err_collector,
&scheduler_inner,
send,
);
},
);
// register current subgraph in scheduler for future scheduling
scheduler.set_cur_subgraph(subgraph);
let arranged = BTreeMap::from([(
(0..input_arity).map(ScalarExpr::Column).collect_vec(),
Arranged::new(arrange_handler),
)]);
let bundle = CollectionBundle {
collection: Collection::from_port(out_recv_port),
arranged,
};
Ok(bundle)
}
} }
/// The Common argument for all `Subgraph` in the render process fn mfp_subgraph(
struct SubgraphArg<'a> { arrange: &ArrangeHandler,
input: impl IntoIterator<Item = DiffRow>,
mfp_plan: &MfpPlan,
now: repr::Timestamp, now: repr::Timestamp,
err_collector: &'a ErrCollector, err_collector: &ErrCollector,
scheduler: &'a Scheduler, scheduler: &Scheduler,
send: &'a PortCtx<SEND, Toff>, send: &PortCtx<SEND, Toff>,
) {
let run_mfp = || {
let all_updates = eval_mfp_core(input, mfp_plan, now, err_collector);
arrange.write().apply_updates(now, all_updates)?;
Ok(())
};
err_collector.run(run_mfp);
// Deal with output:
// 1. Read all updates that were emitted between the last time this arrangement had updates and the current time.
// 2. Output the updates.
// 3. Truncate all updates within that range.
let from = arrange.read().last_compaction_time().map(|n| n + 1);
let from = from.unwrap_or(repr::Timestamp::MIN);
let output_kv = arrange.read().get_updates_in_range(from..=now);
// the output is expected to be key -> empty val
let output = output_kv
.into_iter()
.map(|((key, _v), ts, diff)| (key, ts, diff))
.collect_vec();
send.give(output);
let run_compaction = || {
arrange.write().compaction_to(now)?;
Ok(())
};
err_collector.run(run_compaction);
// schedule the next time this operator should run
if let Some(i) = arrange.read().get_next_update_time(&now) {
scheduler.schedule_at(i)
}
}
/// The core of evaluating MFP operator, given a MFP and a input, evaluate the MFP operator,
/// return the output updates **And** possibly any number of errors that occurred during the evaluation
fn eval_mfp_core(
input: impl IntoIterator<Item = DiffRow>,
mfp_plan: &MfpPlan,
now: repr::Timestamp,
err_collector: &ErrCollector,
) -> Vec<KeyValDiffRow> {
let mut all_updates = Vec::new();
for (mut row, _sys_time, diff) in input.into_iter() {
// this updates is expected to be only zero to two rows
let updates = mfp_plan.evaluate::<EvalError>(&mut row.inner, now, diff);
// TODO(discord9): refactor error handling
// Expect error in a single row to not interrupt the whole evaluation
let updates = updates
.filter_map(|r| match r {
Ok((key, ts, diff)) => Some(((key, Row::empty()), ts, diff)),
Err((err, _ts, _diff)) => {
err_collector.push_err(err);
None
}
})
.collect_vec();
all_updates.extend(updates);
}
all_updates
} }
#[cfg(test)] #[cfg(test)]
@@ -225,30 +316,64 @@ mod test {
use crate::expr::BinaryFunc; use crate::expr::BinaryFunc;
use crate::repr::Row; use crate::repr::Row;
pub fn run_and_check( fn harness_test_ctx<'r, 'h>(
state: &mut DataflowState, df: &'r mut Hydroflow<'h>,
df: &mut Hydroflow, state: &'r mut DataflowState,
time_range: Range<i64>, ) -> Context<'r, 'h> {
expected: BTreeMap<i64, Vec<DiffRow>>, let err_collector = state.get_err_collector();
output: Rc<RefCell<Vec<DiffRow>>>, Context {
) { id: GlobalId::User(0),
for now in time_range { df,
state.set_current_ts(now); compute_state: state,
state.run_available_with_schedule(df); input_collection: BTreeMap::new(),
assert!(state.get_err_collector().inner.borrow().is_empty()); local_scope: Default::default(),
if let Some(expected) = expected.get(&now) { err_collector,
assert_eq!(*output.borrow(), *expected, "at ts={}", now);
} else {
assert_eq!(*output.borrow(), vec![], "at ts={}", now);
};
output.borrow_mut().clear();
} }
} }
pub fn get_output_handle( /// test if temporal filter works properly
ctx: &mut Context, /// namely: if mfp operator can schedule a delete at the correct time
mut bundle: CollectionBundle, #[test]
) -> Rc<RefCell<Vec<DiffRow>>> { fn test_render_mfp_with_temporal() {
let mut df = Hydroflow::new();
let mut state = DataflowState::default();
let mut ctx = harness_test_ctx(&mut df, &mut state);
let rows = vec![
(Row::new(vec![1i64.into()]), 1, 1),
(Row::new(vec![2i64.into()]), 2, 1),
(Row::new(vec![3i64.into()]), 3, 1),
];
let collection = ctx.render_constant(rows);
ctx.insert_global(GlobalId::User(1), collection);
let input_plan = Plan::Get {
id: expr::Id::Global(GlobalId::User(1)),
};
// temporal filter: now <= col(0) < now + 4
let mfp = MapFilterProject::new(1)
.filter(vec![
ScalarExpr::Column(0)
.call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
.call_binary(
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
BinaryFunc::Gte,
),
ScalarExpr::Column(0)
.call_binary(
ScalarExpr::literal(4i64.into(), ConcreteDataType::int64_datatype()),
BinaryFunc::SubInt64,
)
.call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
.call_binary(
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
BinaryFunc::Lt,
),
])
.unwrap();
let mut bundle = ctx
.render_map_filter_project_into_executable_dataflow(Box::new(input_plan), mfp)
.unwrap();
let collection = bundle.collection; let collection = bundle.collection;
let _arranged = bundle.arranged.pop_first().unwrap().1; let _arranged = bundle.arranged.pop_first().unwrap().1;
let output = Rc::new(RefCell::new(vec![])); let output = Rc::new(RefCell::new(vec![]));
@@ -263,22 +388,93 @@ mod test {
output_inner.borrow_mut().extend(res); output_inner.borrow_mut().extend(res);
}, },
); );
output // drop ctx here to simulate actual process of compile first, run later scenario
drop(ctx);
// expected output at given time
let expected_output = BTreeMap::from([
(
0, // time
vec![
(Row::new(vec![1i64.into()]), 0, 1),
(Row::new(vec![2i64.into()]), 0, 1),
(Row::new(vec![3i64.into()]), 0, 1),
],
),
(
2, // time
vec![(Row::new(vec![1i64.into()]), 2, -1)],
),
(
3, // time
vec![(Row::new(vec![2i64.into()]), 3, -1)],
),
(
4, // time
vec![(Row::new(vec![3i64.into()]), 4, -1)],
),
]);
for now in 0i64..5 {
state.set_current_ts(now);
state.run_available_with_schedule(&mut df);
assert!(state.get_err_collector().inner.borrow().is_empty());
if let Some(expected) = expected_output.get(&now) {
assert_eq!(*output.borrow(), *expected);
} else {
assert_eq!(*output.borrow(), vec![]);
};
output.borrow_mut().clear();
}
} }
pub fn harness_test_ctx<'r, 'h>( /// test if mfp operator without temporal filter works properly
df: &'r mut Hydroflow<'h>, /// that is it filter the rows correctly
state: &'r mut DataflowState, #[test]
) -> Context<'r, 'h> { fn test_render_mfp() {
let err_collector = state.get_err_collector(); let mut df = Hydroflow::new();
Context { let mut state = DataflowState::default();
id: GlobalId::User(0), let mut ctx = harness_test_ctx(&mut df, &mut state);
df,
compute_state: state, let rows = vec![
input_collection: BTreeMap::new(), (Row::new(vec![1.into()]), 1, 1),
local_scope: Default::default(), (Row::new(vec![2.into()]), 2, 1),
err_collector, (Row::new(vec![3.into()]), 3, 1),
} ];
let collection = ctx.render_constant(rows);
ctx.insert_global(GlobalId::User(1), collection);
let input_plan = Plan::Get {
id: expr::Id::Global(GlobalId::User(1)),
};
// filter: col(0)>1
let mfp = MapFilterProject::new(1)
.filter(vec![ScalarExpr::Column(0).call_binary(
ScalarExpr::literal(1.into(), ConcreteDataType::int32_datatype()),
BinaryFunc::Gt,
)])
.unwrap();
let bundle = ctx
.render_map_filter_project_into_executable_dataflow(Box::new(input_plan), mfp)
.unwrap();
let collection = bundle.collection.clone(ctx.df);
ctx.df.add_subgraph_sink(
"test_render_constant",
collection.into_inner(),
move |_ctx, recv| {
let data = recv.take_inner();
let res = data.into_iter().flat_map(|v| v.into_iter()).collect_vec();
assert_eq!(
res,
vec![
(Row::new(vec![2.into()]), 0, 1),
(Row::new(vec![3.into()]), 0, 1),
]
)
},
);
drop(ctx);
df.run_available();
} }
/// test if constant operator works properly /// test if constant operator works properly
@@ -298,7 +494,7 @@ mod test {
let collection = collection.collection.clone(ctx.df); let collection = collection.collection.clone(ctx.df);
let cnt = Rc::new(RefCell::new(0)); let cnt = Rc::new(RefCell::new(0));
let cnt_inner = cnt.clone(); let cnt_inner = cnt.clone();
let res_subgraph_id = ctx.df.add_subgraph_sink( ctx.df.add_subgraph_sink(
"test_render_constant", "test_render_constant",
collection.into_inner(), collection.into_inner(),
move |_ctx, recv| { move |_ctx, recv| {
@@ -306,16 +502,9 @@ mod test {
*cnt_inner.borrow_mut() += data.iter().map(|v| v.len()).sum::<usize>(); *cnt_inner.borrow_mut() += data.iter().map(|v| v.len()).sum::<usize>();
}, },
); );
ctx.compute_state.set_current_ts(2);
ctx.compute_state.run_available_with_schedule(ctx.df);
assert_eq!(*cnt.borrow(), 2);
ctx.compute_state.set_current_ts(3);
ctx.compute_state.run_available_with_schedule(ctx.df);
// to get output
ctx.df.schedule_subgraph(res_subgraph_id);
ctx.df.run_available(); ctx.df.run_available();
assert_eq!(*cnt.borrow(), 3);
ctx.df.run_available();
assert_eq!(*cnt.borrow(), 3); assert_eq!(*cnt.borrow(), 3);
} }
@@ -344,33 +533,4 @@ mod test {
assert_eq!(sum.borrow().to_owned(), 45); assert_eq!(sum.borrow().to_owned(), 45);
} }
#[test]
fn test_tee_auto_schedule() {
use hydroflow::scheduled::handoff::TeeingHandoff as Toff;
let mut df = Hydroflow::new();
let (send_port, recv_port) = df.make_edge::<_, Toff<i32>>("test_handoff");
let source = df.add_subgraph_source("test_handoff_source", send_port, move |_ctx, send| {
for i in 0..10 {
send.give(vec![i]);
}
});
let teed_recv_port = recv_port.tee(&mut df);
let sum = Rc::new(RefCell::new(0));
let sum_move = sum.clone();
let _sink = df.add_subgraph_sink("test_handoff_sink", teed_recv_port, move |_ctx, recv| {
let data = recv.take_inner();
*sum_move.borrow_mut() += data.iter().flat_map(|i| i.iter()).sum::<i32>();
});
drop(recv_port);
df.run_available();
assert_eq!(sum.borrow().to_owned(), 45);
df.schedule_subgraph(source);
df.run_available();
assert_eq!(sum.borrow().to_owned(), 90);
}
} }

View File

@@ -1,293 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use hydroflow::scheduled::graph_ext::GraphExt;
use hydroflow::scheduled::port::{PortCtx, SEND};
use itertools::Itertools;
use snafu::OptionExt;
use crate::adapter::error::{Error, PlanSnafu};
use crate::compute::render::Context;
use crate::compute::state::Scheduler;
use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
use crate::expr::{EvalError, MapFilterProject, MfpPlan, ScalarExpr};
use crate::plan::Plan;
use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
use crate::utils::ArrangeHandler;
impl<'referred, 'df> Context<'referred, 'df> {
/// render MapFilterProject, will only emit the `rows` once. Assume all incoming row's sys time being `now`` and ignore the row's stated sys time
/// TODO(discord9): schedule mfp operator to run when temporal filter need
///
/// `MapFilterProject`(`mfp` for short) is scheduled to run when there is enough amount of input updates
/// ***or*** when a future update in it's output buffer(a `Arrangement`) is supposed to emit now.
// There is a false positive in using `Vec<ScalarExpr>` as key due to `Value` have `bytes` variant
#[allow(clippy::mutable_key_type)]
pub fn render_mfp(
&mut self,
input: Box<Plan>,
mfp: MapFilterProject,
) -> Result<CollectionBundle, Error> {
let input = self.render_plan(*input)?;
// TODO(discord9): consider if check if contain temporal to determine if
// need arrange or not, or does this added complexity worth it
let (out_send_port, out_recv_port) = self.df.make_edge::<_, Toff>("mfp");
let output_arity = mfp.output_arity();
// default to have a arrange with only future updates, so it can be empty if no temporal filter is applied
// as stream only sends current updates and etc.
let arrange_handler = self.compute_state.new_arrange(None);
let arrange_handler_inner =
arrange_handler
.clone_future_only()
.with_context(|| PlanSnafu {
reason: "No write is expected at this point",
})?;
// This closure capture following variables:
let mfp_plan = MfpPlan::create_from(mfp)?;
let now = self.compute_state.current_time_ref();
let err_collector = self.err_collector.clone();
// TODO(discord9): better way to schedule future run
let scheduler = self.compute_state.get_scheduler();
let scheduler_inner = scheduler.clone();
let subgraph = self.df.add_subgraph_in_out(
"mfp",
input.collection.into_inner(),
out_send_port,
move |_ctx, recv, send| {
// mfp only need to passively receive updates from recvs
let data = recv.take_inner().into_iter().flat_map(|v| v.into_iter());
mfp_subgraph(
&arrange_handler_inner,
data,
&mfp_plan,
*now.borrow(),
&err_collector,
&scheduler_inner,
send,
);
},
);
// register current subgraph in scheduler for future scheduling
scheduler.set_cur_subgraph(subgraph);
let arranged = BTreeMap::from([(
(0..output_arity).map(ScalarExpr::Column).collect_vec(),
Arranged::new(arrange_handler),
)]);
let bundle = CollectionBundle {
collection: Collection::from_port(out_recv_port),
arranged,
};
Ok(bundle)
}
}
fn mfp_subgraph(
arrange: &ArrangeHandler,
input: impl IntoIterator<Item = DiffRow>,
mfp_plan: &MfpPlan,
now: repr::Timestamp,
err_collector: &ErrCollector,
scheduler: &Scheduler,
send: &PortCtx<SEND, Toff>,
) {
let run_mfp = || {
let all_updates = eval_mfp_core(input, mfp_plan, now, err_collector);
arrange.write().apply_updates(now, all_updates)?;
Ok(())
};
err_collector.run(run_mfp);
// Deal with output:
// 1. Read all updates that were emitted between the last time this arrangement had updates and the current time.
// 2. Output the updates.
// 3. Truncate all updates within that range.
let from = arrange.read().last_compaction_time().map(|n| n + 1);
let from = from.unwrap_or(repr::Timestamp::MIN);
let output_kv = arrange.read().get_updates_in_range(from..=now);
// the output is expected to be key -> empty val
let output = output_kv
.into_iter()
.map(|((key, _v), ts, diff)| (key, ts, diff))
.collect_vec();
send.give(output);
let run_compaction = || {
arrange.write().compact_to(now)?;
Ok(())
};
err_collector.run(run_compaction);
// schedule next time this subgraph should run
scheduler.schedule_for_arrange(&arrange.read(), now);
}
/// The core of evaluating MFP operator, given a MFP and a input, evaluate the MFP operator,
/// return the output updates **And** possibly any number of errors that occurred during the evaluation
fn eval_mfp_core(
input: impl IntoIterator<Item = DiffRow>,
mfp_plan: &MfpPlan,
now: repr::Timestamp,
err_collector: &ErrCollector,
) -> Vec<KeyValDiffRow> {
let mut all_updates = Vec::new();
for (mut row, _sys_time, diff) in input.into_iter() {
// this updates is expected to be only zero to two rows
let updates = mfp_plan.evaluate::<EvalError>(&mut row.inner, now, diff);
// TODO(discord9): refactor error handling
// Expect error in a single row to not interrupt the whole evaluation
let updates = updates
.filter_map(|r| match r {
Ok((key, ts, diff)) => Some(((key, Row::empty()), ts, diff)),
Err((err, _ts, _diff)) => {
err_collector.push_err(err);
None
}
})
.collect_vec();
all_updates.extend(updates);
}
all_updates
}
#[cfg(test)]
mod test {
use std::cell::RefCell;
use std::rc::Rc;
use datatypes::data_type::ConcreteDataType;
use hydroflow::scheduled::graph::Hydroflow;
use super::*;
use crate::compute::render::test::{get_output_handle, harness_test_ctx, run_and_check};
use crate::compute::state::DataflowState;
use crate::expr::{self, BinaryFunc, GlobalId};
/// test if temporal filter works properly
/// namely: if mfp operator can schedule a delete at the correct time
#[test]
fn test_render_mfp_with_temporal() {
let mut df = Hydroflow::new();
let mut state = DataflowState::default();
let mut ctx = harness_test_ctx(&mut df, &mut state);
let rows = vec![
(Row::new(vec![1i64.into()]), 0, 1),
(Row::new(vec![2i64.into()]), 0, 1),
(Row::new(vec![3i64.into()]), 0, 1),
];
let collection = ctx.render_constant(rows.clone());
ctx.insert_global(GlobalId::User(1), collection);
let input_plan = Plan::Get {
id: expr::Id::Global(GlobalId::User(1)),
};
// temporal filter: now <= col(0) < now + 4
let mfp = MapFilterProject::new(1)
.filter(vec![
ScalarExpr::Column(0)
.call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
.call_binary(
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
BinaryFunc::Gte,
),
ScalarExpr::Column(0)
.call_binary(
ScalarExpr::literal(4i64.into(), ConcreteDataType::int64_datatype()),
BinaryFunc::SubInt64,
)
.call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
.call_binary(
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
BinaryFunc::Lt,
),
])
.unwrap();
let bundle = ctx.render_mfp(Box::new(input_plan), mfp).unwrap();
let output = get_output_handle(&mut ctx, bundle);
// drop ctx here to simulate actual process of compile first, run later scenario
drop(ctx);
// expected output at given time
let expected_output = BTreeMap::from([
(
0, // time
vec![
(Row::new(vec![1i64.into()]), 0, 1),
(Row::new(vec![2i64.into()]), 0, 1),
(Row::new(vec![3i64.into()]), 0, 1),
],
),
(
2, // time
vec![(Row::new(vec![1i64.into()]), 2, -1)],
),
(
3, // time
vec![(Row::new(vec![2i64.into()]), 3, -1)],
),
(
4, // time
vec![(Row::new(vec![3i64.into()]), 4, -1)],
),
]);
run_and_check(&mut state, &mut df, 0..5, expected_output, output);
}
/// test if mfp operator without temporal filter works properly
/// that is it filter the rows correctly
#[test]
fn test_render_mfp() {
let mut df = Hydroflow::new();
let mut state = DataflowState::default();
let mut ctx = harness_test_ctx(&mut df, &mut state);
let rows = vec![
(Row::new(vec![1.into()]), 1, 1),
(Row::new(vec![2.into()]), 2, 1),
(Row::new(vec![3.into()]), 3, 1),
];
let collection = ctx.render_constant(rows.clone());
ctx.insert_global(GlobalId::User(1), collection);
let input_plan = Plan::Get {
id: expr::Id::Global(GlobalId::User(1)),
};
// filter: col(0)>1
let mfp = MapFilterProject::new(1)
.filter(vec![ScalarExpr::Column(0).call_binary(
ScalarExpr::literal(1.into(), ConcreteDataType::int32_datatype()),
BinaryFunc::Gt,
)])
.unwrap();
let bundle = ctx.render_mfp(Box::new(input_plan), mfp).unwrap();
let output = get_output_handle(&mut ctx, bundle);
drop(ctx);
let expected = BTreeMap::from([
(2, vec![(Row::new(vec![2.into()]), 2, 1)]),
(3, vec![(Row::new(vec![3.into()]), 3, 1)]),
]);
run_and_check(&mut state, &mut df, 1..5, expected, output);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -21,7 +21,6 @@ use hydroflow::scheduled::SubgraphId;
use crate::compute::types::ErrCollector; use crate::compute::types::ErrCollector;
use crate::repr::{self, Timestamp}; use crate::repr::{self, Timestamp};
use crate::utils::{ArrangeHandler, Arrangement};
/// input/output of a dataflow /// input/output of a dataflow
/// One `ComputeState` manage the input/output/schedule of one `Hydroflow` /// One `ComputeState` manage the input/output/schedule of one `Hydroflow`
@@ -39,24 +38,9 @@ pub struct DataflowState {
/// error collector local to this `ComputeState`, /// error collector local to this `ComputeState`,
/// useful for distinguishing errors from different `Hydroflow` /// useful for distinguishing errors from different `Hydroflow`
err_collector: ErrCollector, err_collector: ErrCollector,
/// save all used arrange in this dataflow, since usually there is no delete operation
/// we can just keep track of all used arrange and schedule subgraph when they need to be updated
arrange_used: Vec<ArrangeHandler>,
} }
impl DataflowState { impl DataflowState {
pub fn new_arrange(&mut self, name: Option<Vec<String>>) -> ArrangeHandler {
let arrange = name.map(Arrangement::new_with_name).unwrap_or_default();
let arr = ArrangeHandler::from(arrange);
// mark this arrange as used in this dataflow
self.arrange_used.push(
arr.clone_future_only()
.expect("No write happening at this point"),
);
arr
}
/// schedule all subgraph that need to run with time <= `as_of` and run_available() /// schedule all subgraph that need to run with time <= `as_of` and run_available()
/// ///
/// return true if any subgraph actually executed /// return true if any subgraph actually executed
@@ -101,9 +85,8 @@ impl DataflowState {
} }
} }
#[derive(Debug, Clone)] #[derive(Clone)]
pub struct Scheduler { pub struct Scheduler {
// this scheduler is shared with `DataflowState`, so it can schedule subgraph
schedule_subgraph: Rc<RefCell<BTreeMap<Timestamp, VecDeque<SubgraphId>>>>, schedule_subgraph: Rc<RefCell<BTreeMap<Timestamp, VecDeque<SubgraphId>>>>,
cur_subgraph: Rc<RefCell<Option<SubgraphId>>>, cur_subgraph: Rc<RefCell<Option<SubgraphId>>>,
} }
@@ -117,12 +100,6 @@ impl Scheduler {
subgraph_queue.push_back(*subgraph); subgraph_queue.push_back(*subgraph);
} }
pub fn schedule_for_arrange(&self, arrange: &Arrangement, now: Timestamp) {
if let Some(i) = arrange.get_next_update_time(&now) {
self.schedule_at(i)
}
}
pub fn set_cur_subgraph(&self, subgraph: SubgraphId) { pub fn set_cur_subgraph(&self, subgraph: SubgraphId) {
self.cur_subgraph.replace(Some(subgraph)); self.cur_subgraph.replace(Some(subgraph));
} }

View File

@@ -28,7 +28,7 @@ use crate::expr::{EvalError, ScalarExpr};
use crate::repr::DiffRow; use crate::repr::DiffRow;
use crate::utils::{ArrangeHandler, Arrangement}; use crate::utils::{ArrangeHandler, Arrangement};
pub type Toff<T = DiffRow> = TeeingHandoff<T>; pub type Toff = TeeingHandoff<DiffRow>;
/// A collection, represent a collections of data that is received from a handoff. /// A collection, represent a collections of data that is received from a handoff.
pub struct Collection<T: 'static> { pub struct Collection<T: 'static> {
@@ -107,17 +107,12 @@ impl Arranged {
/// of reading the data from the collection. /// of reading the data from the collection.
pub struct CollectionBundle { pub struct CollectionBundle {
/// This is useful for passively reading the new updates from the collection /// This is useful for passively reading the new updates from the collection
///
/// Invariant: the timestamp of the updates should always not greater than now, since future updates should be stored in the arrangement
pub collection: Collection<DiffRow>, pub collection: Collection<DiffRow>,
/// the key [`ScalarExpr`] indicate how the keys(also a [`Row`]) used in Arranged is extract from collection's [`Row`] /// the key [`ScalarExpr`] indicate how the keys(also a [`Row`]) used in Arranged is extract from collection's [`Row`]
/// So it is the "index" of the arrangement /// So it is the "index" of the arrangement
/// ///
/// The `Arranged` is the actual data source, it can be used to read the data from the collection by /// The `Arranged` is the actual data source, it can be used to read the data from the collection by
/// using the key indicated by the `Vec<ScalarExpr>` /// using the key indicated by the `Vec<ScalarExpr>`
/// There is a false positive in using `Vec<ScalarExpr>` as key due to `ScalarExpr::Literal`
/// contain a `Value` which have `bytes` variant
#[allow(clippy::mutable_key_type)]
pub arranged: BTreeMap<Vec<ScalarExpr>, Arranged>, pub arranged: BTreeMap<Vec<ScalarExpr>, Arranged>,
} }
@@ -156,16 +151,12 @@ impl ErrCollector {
self.inner.borrow_mut().push_back(err) self.inner.borrow_mut().push_back(err)
} }
pub fn run<F, R>(&self, f: F) -> Option<R> pub fn run<F>(&self, f: F)
where where
F: FnOnce() -> Result<R, EvalError>, F: FnOnce() -> Result<(), EvalError>,
{ {
match f() { if let Err(e) = f() {
Ok(r) => Some(r), self.push_err(e)
Err(e) => {
self.push_err(e);
None
}
} }
} }
} }

View File

@@ -52,13 +52,6 @@ pub enum EvalError {
location: Location, location: Location,
}, },
#[snafu(display("{msg}"))]
DataType {
msg: String,
source: datatypes::Error,
location: Location,
},
#[snafu(display("Invalid argument: {reason}"))] #[snafu(display("Invalid argument: {reason}"))]
InvalidArgument { reason: String, location: Location }, InvalidArgument { reason: String, location: Location },

View File

@@ -89,11 +89,6 @@ impl MapFilterProject {
} }
} }
/// The number of columns expected in the output row.
pub fn output_arity(&self) -> usize {
self.projection.len()
}
/// Given two mfps, return an mfp that applies one /// Given two mfps, return an mfp that applies one
/// followed by the other. /// followed by the other.
/// Note that the arguments are in the opposite order /// Note that the arguments are in the opposite order

View File

@@ -18,9 +18,7 @@
//! So the overhead is acceptable. //! So the overhead is acceptable.
//! //!
//! Currently support sum, count, any, all and min/max(with one caveat that min/max can't support delete with aggregate). //! Currently support sum, count, any, all and min/max(with one caveat that min/max can't support delete with aggregate).
//! TODO: think of better ways to not ser/de every time a accum needed to be updated, since it's in a tight loop
use std::any::type_name;
use std::fmt::Display; use std::fmt::Display;
use common_decimal::Decimal128; use common_decimal::Decimal128;
@@ -41,7 +39,6 @@ use crate::repr::Diff;
#[enum_dispatch] #[enum_dispatch]
pub trait Accumulator: Sized { pub trait Accumulator: Sized {
fn into_state(self) -> Vec<Value>; fn into_state(self) -> Vec<Value>;
fn update( fn update(
&mut self, &mut self,
aggr_fn: &AggregateFunc, aggr_fn: &AggregateFunc,
@@ -71,21 +68,6 @@ pub struct Bool {
falses: Diff, falses: Diff,
} }
impl Bool {
/// Expect two `Diff` type values, one for `true` and one for `false`.
pub fn try_from_iter<I>(iter: &mut I) -> Result<Self, EvalError>
where
I: Iterator<Item = Value>,
{
Ok(Self {
trues: Diff::try_from(iter.next().ok_or_else(fail_accum::<Self>)?)
.map_err(err_try_from_val)?,
falses: Diff::try_from(iter.next().ok_or_else(fail_accum::<Self>)?)
.map_err(err_try_from_val)?,
})
}
}
impl TryFrom<Vec<Value>> for Bool { impl TryFrom<Vec<Value>> for Bool {
type Error = EvalError; type Error = EvalError;
@@ -96,9 +78,13 @@ impl TryFrom<Vec<Value>> for Bool {
reason: "Bool Accumulator state should have 2 values", reason: "Bool Accumulator state should have 2 values",
} }
); );
let mut iter = state.into_iter(); let mut iter = state.into_iter();
Self::try_from_iter(&mut iter) Ok(Self {
trues: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
falses: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
})
} }
} }
@@ -171,24 +157,6 @@ pub struct SimpleNumber {
non_nulls: Diff, non_nulls: Diff,
} }
impl SimpleNumber {
/// Expect one `Decimal128` and one `Diff` type values.
/// The `Decimal128` type is used to store the sum of all non-NULL values.
/// The `Diff` type is used to count the number of non-NULL values.
pub fn try_from_iter<I>(iter: &mut I) -> Result<Self, EvalError>
where
I: Iterator<Item = Value>,
{
Ok(Self {
accum: Decimal128::try_from(iter.next().ok_or_else(fail_accum::<Self>)?)
.map_err(err_try_from_val)?
.val(),
non_nulls: Diff::try_from(iter.next().ok_or_else(fail_accum::<Self>)?)
.map_err(err_try_from_val)?,
})
}
}
impl TryFrom<Vec<Value>> for SimpleNumber { impl TryFrom<Vec<Value>> for SimpleNumber {
type Error = EvalError; type Error = EvalError;
@@ -200,7 +168,13 @@ impl TryFrom<Vec<Value>> for SimpleNumber {
} }
); );
let mut iter = state.into_iter(); let mut iter = state.into_iter();
Self::try_from_iter(&mut iter)
Ok(Self {
accum: Decimal128::try_from(iter.next().unwrap())
.map_err(err_try_from_val)?
.val(),
non_nulls: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
})
} }
} }
@@ -298,34 +272,6 @@ pub struct Float {
non_nulls: Diff, non_nulls: Diff,
} }
impl Float {
/// Expect first value to be `OrderedF64` and the rest four values to be `Diff` type values.
pub fn try_from_iter<I>(iter: &mut I) -> Result<Self, EvalError>
where
I: Iterator<Item = Value>,
{
let mut ret = Self {
accum: OrderedF64::try_from(iter.next().ok_or_else(fail_accum::<Self>)?)
.map_err(err_try_from_val)?,
pos_infs: Diff::try_from(iter.next().ok_or_else(fail_accum::<Self>)?)
.map_err(err_try_from_val)?,
neg_infs: Diff::try_from(iter.next().ok_or_else(fail_accum::<Self>)?)
.map_err(err_try_from_val)?,
nans: Diff::try_from(iter.next().ok_or_else(fail_accum::<Self>)?)
.map_err(err_try_from_val)?,
non_nulls: Diff::try_from(iter.next().ok_or_else(fail_accum::<Self>)?)
.map_err(err_try_from_val)?,
};
// This prevent counter-intuitive behavior of summing over no values having non-zero results
if ret.non_nulls == 0 {
ret.accum = OrderedFloat::from(0.0);
}
Ok(ret)
}
}
impl TryFrom<Vec<Value>> for Float { impl TryFrom<Vec<Value>> for Float {
type Error = EvalError; type Error = EvalError;
@@ -439,26 +385,6 @@ pub struct OrdValue {
non_nulls: Diff, non_nulls: Diff,
} }
impl OrdValue {
pub fn try_from_iter<I>(iter: &mut I) -> Result<Self, EvalError>
where
I: Iterator<Item = Value>,
{
Ok(Self {
val: {
let v = iter.next().ok_or_else(fail_accum::<Self>)?;
if v == Value::Null {
None
} else {
Some(v)
}
},
non_nulls: Diff::try_from(iter.next().ok_or_else(fail_accum::<Self>)?)
.map_err(err_try_from_val)?,
})
}
}
impl TryFrom<Vec<Value>> for OrdValue { impl TryFrom<Vec<Value>> for OrdValue {
type Error = EvalError; type Error = EvalError;
@@ -667,37 +593,6 @@ impl Accum {
}) })
} }
pub fn try_from_iter(
aggr_fn: &AggregateFunc,
iter: &mut impl Iterator<Item = Value>,
) -> Result<Self, EvalError> {
match aggr_fn {
AggregateFunc::Any
| AggregateFunc::All
| AggregateFunc::MaxBool
| AggregateFunc::MinBool => Ok(Self::from(Bool::try_from_iter(iter)?)),
AggregateFunc::SumInt16
| AggregateFunc::SumInt32
| AggregateFunc::SumInt64
| AggregateFunc::SumUInt16
| AggregateFunc::SumUInt32
| AggregateFunc::SumUInt64 => Ok(Self::from(SimpleNumber::try_from_iter(iter)?)),
AggregateFunc::SumFloat32 | AggregateFunc::SumFloat64 => {
Ok(Self::from(Float::try_from_iter(iter)?))
}
f if f.is_max() || f.is_min() || matches!(f, AggregateFunc::Count) => {
Ok(Self::from(OrdValue::try_from_iter(iter)?))
}
f => Err(InternalSnafu {
reason: format!(
"Accumulator does not support this aggregation function: {:?}",
f
),
}
.build()),
}
}
/// try to convert a vector of value into given aggregate function's accumulator /// try to convert a vector of value into given aggregate function's accumulator
pub fn try_into_accum(aggr_fn: &AggregateFunc, state: Vec<Value>) -> Result<Self, EvalError> { pub fn try_into_accum(aggr_fn: &AggregateFunc, state: Vec<Value>) -> Result<Self, EvalError> {
match aggr_fn { match aggr_fn {
@@ -728,16 +623,6 @@ impl Accum {
} }
} }
fn fail_accum<T>() -> EvalError {
InternalSnafu {
reason: format!(
"list of values exhausted before a accum of type {} can be build from it",
type_name::<T>()
),
}
.build()
}
fn err_try_from_val<T: Display>(reason: T) -> EvalError { fn err_try_from_val<T: Display>(reason: T) -> EvalError {
TryFromValueSnafu { TryFromValueSnafu {
msg: reason.to_string(), msg: reason.to_string(),
@@ -890,9 +775,7 @@ mod test {
let mut acc = Accum::new_accum(&aggr_fn)?; let mut acc = Accum::new_accum(&aggr_fn)?;
acc.update_batch(&aggr_fn, input.clone())?; acc.update_batch(&aggr_fn, input.clone())?;
let row = acc.into_state(); let row = acc.into_state();
let acc = Accum::try_into_accum(&aggr_fn, row.clone())?; let acc = Accum::try_into_accum(&aggr_fn, row)?;
let alter_acc = Accum::try_from_iter(&aggr_fn, &mut row.into_iter())?;
assert_eq!(acc, alter_acc);
Ok(acc) Ok(acc)
}; };
let acc = match create_and_insert() { let acc = match create_and_insert() {

View File

@@ -112,21 +112,18 @@ impl AggregateFunc {
/// Expect self to be accumulable aggregate function, i.e. sum/count /// Expect self to be accumulable aggregate function, i.e. sum/count
/// ///
/// TODO(discord9): deal with overflow&better accumulator /// TODO(discord9): deal with overflow&better accumulator
pub fn eval_diff_accumulable<A, I>( pub fn eval_diff_accumulable<I>(
&self, &self,
accum: A, accum: Vec<Value>,
value_diffs: I, value_diffs: I,
) -> Result<(Value, Vec<Value>), EvalError> ) -> Result<(Value, Vec<Value>), EvalError>
where where
A: IntoIterator<Item = Value>,
I: IntoIterator<Item = (Value, Diff)>, I: IntoIterator<Item = (Value, Diff)>,
{ {
let mut accum = accum.into_iter().peekable(); let mut accum = if accum.is_empty() {
let mut accum = if accum.peek().is_none() {
Accum::new_accum(self)? Accum::new_accum(self)?
} else { } else {
Accum::try_from_iter(self, &mut accum)? Accum::try_into_accum(self, accum)?
}; };
accum.update_batch(self, value_diffs)?; accum.update_batch(self, value_diffs)?;
let res = accum.eval(self)?; let res = accum.eval(self)?;

View File

@@ -16,7 +16,6 @@
//! It can transform substrait plan into it's own plan and execute it. //! It can transform substrait plan into it's own plan and execute it.
//! It also contains definition of expression, adapter and plan, and internal state management. //! It also contains definition of expression, adapter and plan, and internal state management.
#![feature(let_chains)]
#![allow(dead_code)] #![allow(dead_code)]
#![allow(unused_imports)] #![allow(unused_imports)]
#![warn(missing_docs)] #![warn(missing_docs)]

View File

@@ -21,12 +21,12 @@ mod reduce;
use datatypes::arrow::ipc::Map; use datatypes::arrow::ipc::Map;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
pub(crate) use self::reduce::{AccumulablePlan, KeyValPlan, ReducePlan};
use crate::adapter::error::Error; use crate::adapter::error::Error;
use crate::expr::{ use crate::expr::{
AggregateExpr, EvalError, Id, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr, TypedExpr, AggregateExpr, EvalError, Id, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr, TypedExpr,
}; };
use crate::plan::join::JoinPlan; use crate::plan::join::JoinPlan;
pub(crate) use crate::plan::reduce::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan};
use crate::repr::{ColumnType, DiffRow, RelationType}; use crate::repr::{ColumnType, DiffRow, RelationType};
/// A plan for a dataflow component. But with type to indicate the output type of the relation. /// A plan for a dataflow component. But with type to indicate the output type of the relation.

View File

@@ -47,33 +47,7 @@ pub struct AccumulablePlan {
/// Each element represents: /// Each element represents:
/// (index of aggr output, index of value among inputs, aggr expr) /// (index of aggr output, index of value among inputs, aggr expr)
/// These will all be rendered together in one dataflow fragment. /// These will all be rendered together in one dataflow fragment.
/// pub simple_aggrs: Vec<(usize, usize, AggregateExpr)>,
/// Invariant: the output index is the index of the aggregation in `full_aggrs` /// Same as above but for all of the `DISTINCT` accumulable aggregations.
/// which means output index is always smaller than the length of `full_aggrs` pub distinct_aggrs: Vec<(usize, usize, AggregateExpr)>,
pub simple_aggrs: Vec<AggrWithIndex>,
/// Same as `simple_aggrs` but for all of the `DISTINCT` accumulable aggregations.
pub distinct_aggrs: Vec<AggrWithIndex>,
}
/// Invariant: the output index is the index of the aggregation in `full_aggrs`
/// which means output index is always smaller than the length of `full_aggrs`
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
pub struct AggrWithIndex {
/// aggregation expression
pub expr: AggregateExpr,
/// index of aggr input among input row
pub input_idx: usize,
/// index of aggr output among output row
pub output_idx: usize,
}
impl AggrWithIndex {
/// Create a new `AggrWithIndex`
pub fn new(expr: AggregateExpr, input_idx: usize, output_idx: usize) -> Self {
Self {
expr,
input_idx,
output_idx,
}
}
} }

View File

@@ -160,7 +160,7 @@ impl Row {
self.inner.iter() self.inner.iter()
} }
/// Returns the number of elements in the row, also known as its 'length'. /// eturns the number of elements in the row, also known as its 'length'.
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.inner.len() self.inner.len()
} }

View File

@@ -52,7 +52,7 @@ use crate::expr::{
AggregateExpr, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, SafeMfpPlan, ScalarExpr, AggregateExpr, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, SafeMfpPlan, ScalarExpr,
TypedExpr, UnaryFunc, UnmaterializableFunc, VariadicFunc, TypedExpr, UnaryFunc, UnmaterializableFunc, VariadicFunc,
}; };
use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, Plan, ReducePlan, TypedPlan}; use crate::plan::{AccumulablePlan, KeyValPlan, Plan, ReducePlan, TypedPlan};
use crate::repr::{self, ColumnType, RelationType}; use crate::repr::{self, ColumnType, RelationType};
use crate::transform::{DataflowContext, FunctionExtensions}; use crate::transform::{DataflowContext, FunctionExtensions};
@@ -265,17 +265,9 @@ impl TypedPlan {
reason: "Expect aggregate argument to be transformed into a column at this point", reason: "Expect aggregate argument to be transformed into a column at this point",
})?; })?;
if aggr_expr.distinct { if aggr_expr.distinct {
distinct_aggrs.push(AggrWithIndex::new( distinct_aggrs.push((output_column, input_column, aggr_expr.clone()));
aggr_expr.clone(),
input_column,
output_column,
));
} else { } else {
simple_aggrs.push(AggrWithIndex::new( simple_aggrs.push((output_column, input_column, aggr_expr.clone()));
aggr_expr.clone(),
input_column,
output_column,
));
} }
} }
let accum_plan = AccumulablePlan { let accum_plan = AccumulablePlan {
@@ -335,7 +327,7 @@ mod test {
}, },
reduce_plan: ReducePlan::Accumulable(AccumulablePlan { reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
full_aggrs: vec![aggr_expr.clone()], full_aggrs: vec![aggr_expr.clone()],
simple_aggrs: vec![AggrWithIndex::new(aggr_expr.clone(), 0, 0)], simple_aggrs: vec![(0, 0, aggr_expr.clone())],
distinct_aggrs: vec![], distinct_aggrs: vec![],
}), }),
}), }),
@@ -387,7 +379,7 @@ mod test {
}, },
reduce_plan: ReducePlan::Accumulable(AccumulablePlan { reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
full_aggrs: vec![aggr_expr.clone()], full_aggrs: vec![aggr_expr.clone()],
simple_aggrs: vec![AggrWithIndex::new(aggr_expr.clone(), 0, 0)], simple_aggrs: vec![(0, 0, aggr_expr.clone())],
distinct_aggrs: vec![], distinct_aggrs: vec![],
}), }),
}), }),
@@ -438,7 +430,7 @@ mod test {
}, },
reduce_plan: ReducePlan::Accumulable(AccumulablePlan { reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
full_aggrs: vec![aggr_expr.clone()], full_aggrs: vec![aggr_expr.clone()],
simple_aggrs: vec![AggrWithIndex::new(aggr_expr.clone(), 0, 0)], simple_aggrs: vec![(0, 0, aggr_expr.clone())],
distinct_aggrs: vec![], distinct_aggrs: vec![],
}), }),
}), }),

File diff suppressed because it is too large Load Diff

View File

@@ -650,7 +650,7 @@ mod tests {
ts TIMESTAMP, ts TIMESTAMP,
TIME INDEX (ts), TIME INDEX (ts),
PRIMARY KEY(host) PRIMARY KEY(host)
) engine=mito;"#; ) engine=mito with(regions=1);"#;
replace_test(sql, plugins.clone(), &query_ctx); replace_test(sql, plugins.clone(), &query_ctx);
// test drop table // test drop table

View File

@@ -102,10 +102,15 @@ impl LeaderCachedKvBackend {
self.store.clone(), self.store.clone(),
RangeRequest::new().with_prefix(prefix.as_bytes()), RangeRequest::new().with_prefix(prefix.as_bytes()),
DEFAULT_PAGE_SIZE, DEFAULT_PAGE_SIZE,
Arc::new(Ok), Arc::new(|kv| Ok((kv, ()))),
); );
let kvs = stream.try_collect::<Vec<_>>().await?.into_iter().collect(); let kvs = stream
.try_collect::<Vec<_>>()
.await?
.into_iter()
.map(|(kv, _)| kv)
.collect();
self.cache self.cache
.batch_put(BatchPutRequest { .batch_put(BatchPutRequest {

View File

@@ -25,7 +25,7 @@ use store_api::region_request::{
AddColumn, AffectedRows, AlterKind, RegionAlterRequest, RegionPutRequest, RegionRequest, AddColumn, AffectedRows, AlterKind, RegionAlterRequest, RegionPutRequest, RegionRequest,
}; };
use store_api::storage::consts::ReservedColumnId; use store_api::storage::consts::ReservedColumnId;
use store_api::storage::{ConcreteDataType, RegionId}; use store_api::storage::RegionId;
use crate::error::{ use crate::error::{
ColumnTypeMismatchSnafu, MitoReadOperationSnafu, MitoWriteOperationSnafu, Result, ColumnTypeMismatchSnafu, MitoReadOperationSnafu, MitoWriteOperationSnafu, Result,
@@ -128,8 +128,7 @@ impl DataRegion {
if c.semantic_type == SemanticType::Tag { if c.semantic_type == SemanticType::Tag {
if !c.column_schema.data_type.is_string() { if !c.column_schema.data_type.is_string() {
return ColumnTypeMismatchSnafu { return ColumnTypeMismatchSnafu {
expect: ConcreteDataType::string_datatype(), column_type: c.column_schema.data_type.clone(),
actual: c.column_schema.data_type.clone(),
} }
.fail(); .fail();
} }

View File

@@ -43,11 +43,9 @@ use crate::engine::options::{
}; };
use crate::engine::MetricEngineInner; use crate::engine::MetricEngineInner;
use crate::error::{ use crate::error::{
AddingFieldColumnSnafu, ColumnNotFoundSnafu, ColumnTypeMismatchSnafu, ColumnNotFoundSnafu, ConflictRegionOptionSnafu, CreateMitoRegionSnafu,
ConflictRegionOptionSnafu, CreateMitoRegionSnafu, InternalColumnOccupiedSnafu, InternalColumnOccupiedSnafu, MissingRegionOptionSnafu, MitoReadOperationSnafu,
InvalidMetadataSnafu, MissingRegionOptionSnafu, MitoReadOperationSnafu, ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu, Result, SerializeColumnMetadataSnafu,
MultipleFieldColumnSnafu, NoFieldColumnSnafu, ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu,
Result, SerializeColumnMetadataSnafu,
}; };
use crate::metrics::{LOGICAL_REGION_COUNT, PHYSICAL_COLUMN_COUNT, PHYSICAL_REGION_COUNT}; use crate::metrics::{LOGICAL_REGION_COUNT, PHYSICAL_COLUMN_COUNT, PHYSICAL_REGION_COUNT};
use crate::utils::{to_data_region_id, to_metadata_region_id}; use crate::utils::{to_data_region_id, to_metadata_region_id};
@@ -193,14 +191,6 @@ impl MetricEngineInner {
})?; })?;
for col in &request.column_metadatas { for col in &request.column_metadatas {
if !physical_columns.contains(&col.column_schema.name) { if !physical_columns.contains(&col.column_schema.name) {
// Multi-field on physical table is explicit forbidden at present
// TODO(ruihang): support multi-field on both logical and physical column
ensure!(
col.semantic_type != SemanticType::Field,
AddingFieldColumnSnafu {
name: col.column_schema.name.clone()
}
);
new_columns.push(col.clone()); new_columns.push(col.clone());
} else { } else {
existing_columns.push(col.column_schema.name.clone()); existing_columns.push(col.column_schema.name.clone());
@@ -300,8 +290,6 @@ impl MetricEngineInner {
/// - required table option is present ([PHYSICAL_TABLE_METADATA_KEY] or /// - required table option is present ([PHYSICAL_TABLE_METADATA_KEY] or
/// [LOGICAL_TABLE_METADATA_KEY]) /// [LOGICAL_TABLE_METADATA_KEY])
fn verify_region_create_request(request: &RegionCreateRequest) -> Result<()> { fn verify_region_create_request(request: &RegionCreateRequest) -> Result<()> {
request.validate().context(InvalidMetadataSnafu)?;
let name_to_index = request let name_to_index = request
.column_metadatas .column_metadatas
.iter() .iter()
@@ -335,41 +323,6 @@ impl MetricEngineInner {
ConflictRegionOptionSnafu {} ConflictRegionOptionSnafu {}
); );
// check if only one field column is declared, and all tag columns are string
let mut field_col: Option<&ColumnMetadata> = None;
for col in &request.column_metadatas {
match col.semantic_type {
SemanticType::Tag => ensure!(
col.column_schema.data_type == ConcreteDataType::string_datatype(),
ColumnTypeMismatchSnafu {
expect: ConcreteDataType::string_datatype(),
actual: col.column_schema.data_type.clone(),
}
),
SemanticType::Field => {
if field_col.is_some() {
MultipleFieldColumnSnafu {
previous: field_col.unwrap().column_schema.name.clone(),
current: col.column_schema.name.clone(),
}
.fail()?;
}
field_col = Some(col)
}
SemanticType::Timestamp => {}
}
}
let field_col = field_col.context(NoFieldColumnSnafu)?;
// make sure the field column is float64 type
ensure!(
field_col.column_schema.data_type == ConcreteDataType::float64_datatype(),
ColumnTypeMismatchSnafu {
expect: ConcreteDataType::float64_datatype(),
actual: field_col.column_schema.data_type.clone(),
}
);
Ok(()) Ok(())
} }
@@ -578,15 +531,6 @@ mod test {
false, false,
), ),
}, },
ColumnMetadata {
column_id: 2,
semantic_type: SemanticType::Field,
column_schema: ColumnSchema::new(
"column2".to_string(),
ConcreteDataType::float64_datatype(),
false,
),
},
], ],
region_dir: "test_dir".to_string(), region_dir: "test_dir".to_string(),
engine: METRIC_ENGINE_NAME.to_string(), engine: METRIC_ENGINE_NAME.to_string(),
@@ -595,51 +539,37 @@ mod test {
.into_iter() .into_iter()
.collect(), .collect(),
}; };
MetricEngineInner::verify_region_create_request(&request).unwrap(); let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_ok());
} }
#[test] #[test]
fn test_verify_region_create_request_options() { fn test_verify_region_create_request_options() {
let mut request = RegionCreateRequest { let mut request = RegionCreateRequest {
column_metadatas: vec![ column_metadatas: vec![],
ColumnMetadata {
column_id: 0,
semantic_type: SemanticType::Timestamp,
column_schema: ColumnSchema::new(
METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME,
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
},
ColumnMetadata {
column_id: 1,
semantic_type: SemanticType::Field,
column_schema: ColumnSchema::new(
"val".to_string(),
ConcreteDataType::float64_datatype(),
false,
),
},
],
region_dir: "test_dir".to_string(), region_dir: "test_dir".to_string(),
engine: METRIC_ENGINE_NAME.to_string(), engine: METRIC_ENGINE_NAME.to_string(),
primary_key: vec![], primary_key: vec![],
options: HashMap::new(), options: HashMap::new(),
}; };
MetricEngineInner::verify_region_create_request(&request).unwrap_err(); let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_err());
let mut options = HashMap::new(); let mut options = HashMap::new();
options.insert(PHYSICAL_TABLE_METADATA_KEY.to_string(), "value".to_string()); options.insert(PHYSICAL_TABLE_METADATA_KEY.to_string(), "value".to_string());
request.options.clone_from(&options); request.options.clone_from(&options);
MetricEngineInner::verify_region_create_request(&request).unwrap(); let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_ok());
options.insert(LOGICAL_TABLE_METADATA_KEY.to_string(), "value".to_string()); options.insert(LOGICAL_TABLE_METADATA_KEY.to_string(), "value".to_string());
request.options.clone_from(&options); request.options.clone_from(&options);
MetricEngineInner::verify_region_create_request(&request).unwrap_err(); let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_err());
options.remove(PHYSICAL_TABLE_METADATA_KEY).unwrap(); options.remove(PHYSICAL_TABLE_METADATA_KEY).unwrap();
request.options = options; request.options = options;
MetricEngineInner::verify_region_create_request(&request).unwrap(); let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_ok());
} }
#[tokio::test] #[tokio::test]

View File

@@ -133,10 +133,9 @@ pub enum Error {
location: Location, location: Location,
}, },
#[snafu(display("Column type mismatch. Expect {:?}, got {:?}", expect, actual))] #[snafu(display("Column type mismatch. Expect string, got {:?}", column_type))]
ColumnTypeMismatch { ColumnTypeMismatch {
expect: ConcreteDataType, column_type: ConcreteDataType,
actual: ConcreteDataType,
location: Location, location: Location,
}, },
@@ -170,19 +169,6 @@ pub enum Error {
request: RegionRequest, request: RegionRequest,
location: Location, location: Location,
}, },
#[snafu(display("Multiple field column found: {} and {}", previous, current))]
MultipleFieldColumn {
previous: String,
current: String,
location: Location,
},
#[snafu(display("Adding field column {} to physical table", name))]
AddingFieldColumn { name: String, location: Location },
#[snafu(display("No field column found"))]
NoFieldColumn { location: Location },
} }
pub type Result<T, E = Error> = std::result::Result<T, E>; pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -196,10 +182,7 @@ impl ErrorExt for Error {
| MissingRegionOption { .. } | MissingRegionOption { .. }
| ConflictRegionOption { .. } | ConflictRegionOption { .. }
| ColumnTypeMismatch { .. } | ColumnTypeMismatch { .. }
| PhysicalRegionBusy { .. } | PhysicalRegionBusy { .. } => StatusCode::InvalidArguments,
| MultipleFieldColumn { .. }
| NoFieldColumn { .. }
| AddingFieldColumn { .. } => StatusCode::InvalidArguments,
ForbiddenPhysicalAlter { .. } | UnsupportedRegionRequest { .. } => { ForbiddenPhysicalAlter { .. } | UnsupportedRegionRequest { .. } => {
StatusCode::Unsupported StatusCode::Unsupported

View File

@@ -210,9 +210,9 @@ pub fn create_logical_region_request(
), ),
}, },
]; ];
for (bias, tag) in tags.iter().enumerate() { for tag in tags {
column_metadatas.push(ColumnMetadata { column_metadatas.push(ColumnMetadata {
column_id: 2 + bias as ColumnId, column_id: 2,
semantic_type: SemanticType::Tag, semantic_type: SemanticType::Tag,
column_schema: ColumnSchema::new( column_schema: ColumnSchema::new(
tag.to_string(), tag.to_string(),

View File

@@ -32,7 +32,6 @@ common-test-util = { workspace = true, optional = true }
common-time.workspace = true common-time.workspace = true
common-wal.workspace = true common-wal.workspace = true
crc32fast = "1" crc32fast = "1"
crossbeam-utils.workspace = true
datafusion.workspace = true datafusion.workspace = true
datafusion-common.workspace = true datafusion-common.workspace = true
datafusion-expr.workspace = true datafusion-expr.workspace = true

View File

@@ -37,11 +37,9 @@ use crate::error::{
use crate::metrics::COMPACTION_STAGE_ELAPSED; use crate::metrics::COMPACTION_STAGE_ELAPSED;
use crate::region::options::CompactionOptions; use crate::region::options::CompactionOptions;
use crate::region::version::{VersionControlRef, VersionRef}; use crate::region::version::{VersionControlRef, VersionRef};
use crate::region::ManifestContextRef;
use crate::request::{OptionOutputTx, OutputTx, WorkerRequest}; use crate::request::{OptionOutputTx, OutputTx, WorkerRequest};
use crate::schedule::scheduler::SchedulerRef; use crate::schedule::scheduler::SchedulerRef;
use crate::sst::file_purger::FilePurgerRef; use crate::sst::file_purger::FilePurgerRef;
use crate::worker::WorkerListener;
/// Region compaction request. /// Region compaction request.
pub struct CompactionRequest { pub struct CompactionRequest {
@@ -56,9 +54,6 @@ pub struct CompactionRequest {
/// Start time of compaction task. /// Start time of compaction task.
pub(crate) start_time: Instant, pub(crate) start_time: Instant,
pub(crate) cache_manager: CacheManagerRef, pub(crate) cache_manager: CacheManagerRef,
pub(crate) manifest_ctx: ManifestContextRef,
pub(crate) version_control: VersionControlRef,
pub(crate) listener: WorkerListener,
} }
impl CompactionRequest { impl CompactionRequest {
@@ -93,8 +88,6 @@ pub(crate) struct CompactionScheduler {
/// Request sender of the worker that this scheduler belongs to. /// Request sender of the worker that this scheduler belongs to.
request_sender: Sender<WorkerRequest>, request_sender: Sender<WorkerRequest>,
cache_manager: CacheManagerRef, cache_manager: CacheManagerRef,
engine_config: Arc<MitoConfig>,
listener: WorkerListener,
} }
impl CompactionScheduler { impl CompactionScheduler {
@@ -102,16 +95,12 @@ impl CompactionScheduler {
scheduler: SchedulerRef, scheduler: SchedulerRef,
request_sender: Sender<WorkerRequest>, request_sender: Sender<WorkerRequest>,
cache_manager: CacheManagerRef, cache_manager: CacheManagerRef,
engine_config: Arc<MitoConfig>,
listener: WorkerListener,
) -> Self { ) -> Self {
Self { Self {
scheduler, scheduler,
region_status: HashMap::new(), region_status: HashMap::new(),
request_sender, request_sender,
cache_manager, cache_manager,
engine_config,
listener,
} }
} }
@@ -123,7 +112,7 @@ impl CompactionScheduler {
access_layer: &AccessLayerRef, access_layer: &AccessLayerRef,
file_purger: &FilePurgerRef, file_purger: &FilePurgerRef,
waiter: OptionOutputTx, waiter: OptionOutputTx,
manifest_ctx: &ManifestContextRef, engine_config: Arc<MitoConfig>,
) -> Result<()> { ) -> Result<()> {
if let Some(status) = self.region_status.get_mut(&region_id) { if let Some(status) = self.region_status.get_mut(&region_id) {
// Region is compacting. Add the waiter to pending list. // Region is compacting. Add the waiter to pending list.
@@ -141,10 +130,8 @@ impl CompactionScheduler {
let request = status.new_compaction_request( let request = status.new_compaction_request(
self.request_sender.clone(), self.request_sender.clone(),
waiter, waiter,
self.engine_config.clone(), engine_config,
self.cache_manager.clone(), self.cache_manager.clone(),
manifest_ctx,
self.listener.clone(),
); );
self.region_status.insert(region_id, status); self.region_status.insert(region_id, status);
self.schedule_compaction_request(request) self.schedule_compaction_request(request)
@@ -154,7 +141,7 @@ impl CompactionScheduler {
pub(crate) fn on_compaction_finished( pub(crate) fn on_compaction_finished(
&mut self, &mut self,
region_id: RegionId, region_id: RegionId,
manifest_ctx: &ManifestContextRef, engine_config: Arc<MitoConfig>,
) { ) {
let Some(status) = self.region_status.get_mut(&region_id) else { let Some(status) = self.region_status.get_mut(&region_id) else {
return; return;
@@ -163,10 +150,8 @@ impl CompactionScheduler {
let request = status.new_compaction_request( let request = status.new_compaction_request(
self.request_sender.clone(), self.request_sender.clone(),
OptionOutputTx::none(), OptionOutputTx::none(),
self.engine_config.clone(), engine_config,
self.cache_manager.clone(), self.cache_manager.clone(),
manifest_ctx,
self.listener.clone(),
); );
// Try to schedule next compaction task for this region. // Try to schedule next compaction task for this region.
if let Err(e) = self.schedule_compaction_request(request) { if let Err(e) = self.schedule_compaction_request(request) {
@@ -340,8 +325,6 @@ impl CompactionStatus {
waiter: OptionOutputTx, waiter: OptionOutputTx,
engine_config: Arc<MitoConfig>, engine_config: Arc<MitoConfig>,
cache_manager: CacheManagerRef, cache_manager: CacheManagerRef,
manifest_ctx: &ManifestContextRef,
listener: WorkerListener,
) -> CompactionRequest { ) -> CompactionRequest {
let current_version = self.version_control.current().version; let current_version = self.version_control.current().version;
let start_time = Instant::now(); let start_time = Instant::now();
@@ -354,9 +337,6 @@ impl CompactionStatus {
file_purger: self.file_purger.clone(), file_purger: self.file_purger.clone(),
start_time, start_time,
cache_manager, cache_manager,
manifest_ctx: manifest_ctx.clone(),
version_control: self.version_control.clone(),
listener,
}; };
if let Some(pending) = self.pending_compaction.take() { if let Some(pending) = self.pending_compaction.take() {
@@ -391,9 +371,6 @@ mod tests {
let version_control = Arc::new(builder.build()); let version_control = Arc::new(builder.build());
let (output_tx, output_rx) = oneshot::channel(); let (output_tx, output_rx) = oneshot::channel();
let waiter = OptionOutputTx::from(output_tx); let waiter = OptionOutputTx::from(output_tx);
let manifest_ctx = env
.mock_manifest_context(version_control.current().version.metadata.clone())
.await;
scheduler scheduler
.schedule_compaction( .schedule_compaction(
builder.region_id(), builder.region_id(),
@@ -401,7 +378,7 @@ mod tests {
&env.access_layer, &env.access_layer,
&purger, &purger,
waiter, waiter,
&manifest_ctx, Arc::new(MitoConfig::default()),
) )
.unwrap(); .unwrap();
let output = output_rx.await.unwrap().unwrap(); let output = output_rx.await.unwrap().unwrap();
@@ -419,7 +396,7 @@ mod tests {
&env.access_layer, &env.access_layer,
&purger, &purger,
waiter, waiter,
&manifest_ctx, Arc::new(MitoConfig::default()),
) )
.unwrap(); .unwrap();
let output = output_rx.await.unwrap().unwrap(); let output = output_rx.await.unwrap().unwrap();
@@ -471,9 +448,6 @@ mod tests {
.push_l0_file(90, end) .push_l0_file(90, end)
.build(), .build(),
); );
let manifest_ctx = env
.mock_manifest_context(version_control.current().version.metadata.clone())
.await;
scheduler scheduler
.schedule_compaction( .schedule_compaction(
region_id, region_id,
@@ -481,7 +455,7 @@ mod tests {
&env.access_layer, &env.access_layer,
&purger, &purger,
OptionOutputTx::none(), OptionOutputTx::none(),
&manifest_ctx, Arc::new(MitoConfig::default()),
) )
.unwrap(); .unwrap();
// Should schedule 1 compaction. // Should schedule 1 compaction.
@@ -509,7 +483,7 @@ mod tests {
&env.access_layer, &env.access_layer,
&purger, &purger,
OptionOutputTx::none(), OptionOutputTx::none(),
&manifest_ctx, Arc::new(MitoConfig::default()),
) )
.unwrap(); .unwrap();
assert_eq!(1, scheduler.region_status.len()); assert_eq!(1, scheduler.region_status.len());
@@ -522,7 +496,7 @@ mod tests {
.is_some()); .is_some());
// On compaction finished and schedule next compaction. // On compaction finished and schedule next compaction.
scheduler.on_compaction_finished(region_id, &manifest_ctx); scheduler.on_compaction_finished(region_id, Arc::new(MitoConfig::default()));
assert_eq!(1, scheduler.region_status.len()); assert_eq!(1, scheduler.region_status.len());
assert_eq!(2, job_scheduler.num_jobs()); assert_eq!(2, job_scheduler.num_jobs());
// 5 files for next compaction. // 5 files for next compaction.
@@ -540,7 +514,7 @@ mod tests {
&env.access_layer, &env.access_layer,
&purger, &purger,
OptionOutputTx::none(), OptionOutputTx::none(),
&manifest_ctx, Arc::new(MitoConfig::default()),
) )
.unwrap(); .unwrap();
assert_eq!(2, job_scheduler.num_jobs()); assert_eq!(2, job_scheduler.num_jobs());

View File

@@ -12,8 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::collections::hash_map::Entry; use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashMap};
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Formatter};
use std::sync::Arc; use std::sync::Arc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
@@ -34,15 +33,12 @@ use crate::compaction::picker::{CompactionTask, Picker};
use crate::compaction::CompactionRequest; use crate::compaction::CompactionRequest;
use crate::config::MitoConfig; use crate::config::MitoConfig;
use crate::error::{self, CompactRegionSnafu}; use crate::error::{self, CompactRegionSnafu};
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
use crate::metrics::{COMPACTION_FAILURE_COUNT, COMPACTION_STAGE_ELAPSED}; use crate::metrics::{COMPACTION_FAILURE_COUNT, COMPACTION_STAGE_ELAPSED};
use crate::read::projection::ProjectionMapper; use crate::read::projection::ProjectionMapper;
use crate::read::scan_region::ScanInput; use crate::read::scan_region::ScanInput;
use crate::read::seq_scan::SeqScan; use crate::read::seq_scan::SeqScan;
use crate::read::{BoxedBatchReader, Source}; use crate::read::{BoxedBatchReader, Source};
use crate::region::options::IndexOptions; use crate::region::options::IndexOptions;
use crate::region::version::VersionControlRef;
use crate::region::{ManifestContextRef, RegionState};
use crate::request::{ use crate::request::{
BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest, BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest,
}; };
@@ -50,7 +46,6 @@ use crate::sst::file::{FileHandle, FileId, FileMeta, IndexType, Level};
use crate::sst::file_purger::FilePurgerRef; use crate::sst::file_purger::FilePurgerRef;
use crate::sst::parquet::WriteOptions; use crate::sst::parquet::WriteOptions;
use crate::sst::version::LevelMeta; use crate::sst::version::LevelMeta;
use crate::worker::WorkerListener;
const MAX_PARALLEL_COMPACTION: usize = 8; const MAX_PARALLEL_COMPACTION: usize = 8;
@@ -89,41 +84,35 @@ impl TwcsPicker {
/// fragmentation. For other windows, we allow at most 1 file at each window. /// fragmentation. For other windows, we allow at most 1 file at each window.
fn build_output( fn build_output(
&self, &self,
time_windows: &BTreeMap<i64, Window>, time_windows: &BTreeMap<i64, Vec<FileHandle>>,
active_window: Option<i64>, active_window: Option<i64>,
) -> Vec<CompactionOutput> { ) -> Vec<CompactionOutput> {
let mut output = vec![]; let mut output = vec![];
for (window, files) in time_windows { for (window, files) in time_windows {
let files_in_window = &files.files;
// we only remove deletion markers once no file in current window overlaps with any other window.
let filter_deleted = !files.overlapping;
if let Some(active_window) = active_window if let Some(active_window) = active_window
&& *window == active_window && *window == active_window
{ {
if files_in_window.len() > self.max_active_window_files { if files.len() > self.max_active_window_files {
output.push(CompactionOutput { output.push(CompactionOutput {
output_file_id: FileId::random(), output_file_id: FileId::random(),
output_level: 1, // we only have two levels and always compact to l1 output_level: 1, // we only have two levels and always compact to l1
inputs: files_in_window.clone(), inputs: files.clone(),
filter_deleted,
}); });
} else { } else {
debug!("Active window not present or no enough files in active window {:?}, window: {}", active_window, *window); debug!("Active window not present or no enough files in active window {:?}, window: {}", active_window, *window);
} }
} else { } else {
// not active writing window // not active writing window
if files_in_window.len() > self.max_inactive_window_files { if files.len() > self.max_inactive_window_files {
output.push(CompactionOutput { output.push(CompactionOutput {
output_file_id: FileId::random(), output_file_id: FileId::random(),
output_level: 1, output_level: 1,
inputs: files_in_window.clone(), inputs: files.clone(),
filter_deleted,
}); });
} else { } else {
debug!( debug!(
"No enough files, current: {}, max_inactive_window_files: {}", "No enough files, current: {}, max_inactive_window_files: {}",
files_in_window.len(), files.len(),
self.max_inactive_window_files self.max_inactive_window_files
) )
} }
@@ -144,9 +133,6 @@ impl Picker for TwcsPicker {
file_purger, file_purger,
start_time, start_time,
cache_manager, cache_manager,
manifest_ctx,
version_control,
listener,
} = req; } = req;
let region_metadata = current_version.metadata.clone(); let region_metadata = current_version.metadata.clone();
@@ -204,107 +190,29 @@ impl Picker for TwcsPicker {
storage: current_version.options.storage.clone(), storage: current_version.options.storage.clone(),
index_options: current_version.options.index_options.clone(), index_options: current_version.options.index_options.clone(),
append_mode: current_version.options.append_mode, append_mode: current_version.options.append_mode,
manifest_ctx,
version_control,
listener,
}; };
Some(Box::new(task)) Some(Box::new(task))
} }
} }
struct Window {
start: Timestamp,
end: Timestamp,
files: Vec<FileHandle>,
time_window: i64,
overlapping: bool,
}
impl Window {
/// Creates a new [Window] with given file.
fn new_with_file(file: FileHandle) -> Self {
let (start, end) = file.time_range();
Self {
start,
end,
files: vec![file],
time_window: 0,
overlapping: false,
}
}
/// Returns the time range of all files in current window (inclusive).
fn range(&self) -> (Timestamp, Timestamp) {
(self.start, self.end)
}
/// Adds a new file to window and updates time range.
fn add_file(&mut self, file: FileHandle) {
let (start, end) = file.time_range();
self.start = self.start.min(start);
self.end = self.end.max(end);
self.files.push(file);
}
}
/// Assigns files to windows with predefined window size (in seconds) by their max timestamps. /// Assigns files to windows with predefined window size (in seconds) by their max timestamps.
fn assign_to_windows<'a>( fn assign_to_windows<'a>(
files: impl Iterator<Item = &'a FileHandle>, files: impl Iterator<Item = &'a FileHandle>,
time_window_size: i64, time_window_size: i64,
) -> BTreeMap<i64, Window> { ) -> BTreeMap<i64, Vec<FileHandle>> {
let mut windows: HashMap<i64, Window> = HashMap::new(); let mut windows: BTreeMap<i64, Vec<FileHandle>> = BTreeMap::new();
// Iterates all files and assign to time windows according to max timestamp // Iterates all files and assign to time windows according to max timestamp
for f in files { for file in files {
let (_, end) = f.time_range(); let (_, end) = file.time_range();
let time_window = end let time_window = end
.convert_to(TimeUnit::Second) .convert_to(TimeUnit::Second)
.unwrap() .unwrap()
.value() .value()
.align_to_ceil_by_bucket(time_window_size) .align_to_ceil_by_bucket(time_window_size)
.unwrap_or(i64::MIN); .unwrap_or(i64::MIN);
windows.entry(time_window).or_default().push(file.clone());
match windows.entry(time_window) {
Entry::Occupied(mut e) => {
e.get_mut().add_file(f.clone());
} }
Entry::Vacant(e) => { windows
let mut window = Window::new_with_file(f.clone());
window.time_window = time_window;
e.insert(window);
}
}
}
if windows.is_empty() {
return BTreeMap::new();
}
let mut windows = windows.into_values().collect::<Vec<_>>();
windows.sort_unstable_by(|l, r| l.start.cmp(&r.start).then(l.end.cmp(&r.end).reverse()));
let mut current_range: (Timestamp, Timestamp) = windows[0].range(); // windows cannot be empty.
for idx in 1..windows.len() {
let next_range = windows[idx].range();
if overlaps(&current_range, &next_range) {
windows[idx - 1].overlapping = true;
windows[idx].overlapping = true;
}
current_range = (
current_range.0.min(next_range.0),
current_range.1.max(next_range.1),
);
}
windows.into_iter().map(|w| (w.time_window, w)).collect()
}
/// Checks if two inclusive timestamp ranges overlap with each other.
fn overlaps(l: &(Timestamp, Timestamp), r: &(Timestamp, Timestamp)) -> bool {
let (l, r) = if l.0 <= r.0 { (l, r) } else { (r, l) };
let (_, l_end) = l;
let (r_start, _) = r;
r_start <= l_end
} }
/// Finds the latest active writing window among all files. /// Finds the latest active writing window among all files.
@@ -351,12 +259,6 @@ pub(crate) struct TwcsCompactionTask {
pub(crate) index_options: IndexOptions, pub(crate) index_options: IndexOptions,
/// The region is using append mode. /// The region is using append mode.
pub(crate) append_mode: bool, pub(crate) append_mode: bool,
/// Manifest context.
pub(crate) manifest_ctx: ManifestContextRef,
/// Version control to update.
pub(crate) version_control: VersionControlRef,
/// Event listener.
pub(crate) listener: WorkerListener,
} }
impl Debug for TwcsCompactionTask { impl Debug for TwcsCompactionTask {
@@ -442,7 +344,6 @@ impl TwcsCompactionTask {
sst_layer.clone(), sst_layer.clone(),
&output.inputs, &output.inputs,
append_mode, append_mode,
output.filter_deleted,
) )
.await?; .await?;
let file_meta_opt = sst_layer let file_meta_opt = sst_layer
@@ -497,55 +398,18 @@ impl TwcsCompactionTask {
Ok((output_files, inputs)) Ok((output_files, inputs))
} }
async fn handle_compaction(&mut self) -> error::Result<()> { async fn handle_compaction(&mut self) -> error::Result<(Vec<FileMeta>, Vec<FileMeta>)> {
self.mark_files_compacting(true); self.mark_files_compacting(true);
let merge_timer = COMPACTION_STAGE_ELAPSED let merge_timer = COMPACTION_STAGE_ELAPSED
.with_label_values(&["merge"]) .with_label_values(&["merge"])
.start_timer(); .start_timer();
let (added, mut deleted) = match self.merge_ssts().await { let (output, mut compacted) = self.merge_ssts().await.map_err(|e| {
Ok(v) => v,
Err(e) => {
error!(e; "Failed to compact region: {}", self.region_id); error!(e; "Failed to compact region: {}", self.region_id);
merge_timer.stop_and_discard(); merge_timer.stop_and_discard();
return Err(e); e
} })?;
}; compacted.extend(self.expired_ssts.iter().map(FileHandle::meta));
deleted.extend(self.expired_ssts.iter().map(FileHandle::meta)); Ok((output, compacted))
let merge_time = merge_timer.stop_and_record();
info!(
"Compacted SST files, region_id: {}, input: {:?}, output: {:?}, window: {:?}, waiter_num: {}, merge_time: {}s",
self.region_id,
deleted,
added,
self.compaction_time_window,
self.waiters.len(),
merge_time,
);
self.listener.on_merge_ssts_finished(self.region_id).await;
let _manifest_timer = COMPACTION_STAGE_ELAPSED
.with_label_values(&["write_manifest"])
.start_timer();
// Write region edit to manifest.
let edit = RegionEdit {
files_to_add: added,
files_to_remove: deleted,
compaction_time_window: self
.compaction_time_window
.map(|seconds| Duration::from_secs(seconds as u64)),
flushed_entry_id: None,
flushed_sequence: None,
};
let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
// We might leak files if we fail to update manifest. We can add a cleanup task to
// remove them later.
self.manifest_ctx
.update_manifest(RegionState::Writable, action_list, || {
self.version_control
.apply_edit(edit, &[], self.file_purger.clone());
})
.await
} }
/// Handles compaction failure, notifies all waiters. /// Handles compaction failure, notifies all waiters.
@@ -573,11 +437,27 @@ impl TwcsCompactionTask {
impl CompactionTask for TwcsCompactionTask { impl CompactionTask for TwcsCompactionTask {
async fn run(&mut self) { async fn run(&mut self) {
let notify = match self.handle_compaction().await { let notify = match self.handle_compaction().await {
Ok(()) => BackgroundNotify::CompactionFinished(CompactionFinished { Ok((added, deleted)) => {
info!(
"Compacted SST files, input: {:?}, output: {:?}, window: {:?}, waiter_num: {}",
deleted,
added,
self.compaction_time_window,
self.waiters.len(),
);
BackgroundNotify::CompactionFinished(CompactionFinished {
region_id: self.region_id, region_id: self.region_id,
compaction_outputs: added,
compacted_files: deleted,
senders: std::mem::take(&mut self.waiters), senders: std::mem::take(&mut self.waiters),
file_purger: self.file_purger.clone(),
compaction_time_window: self
.compaction_time_window
.map(|seconds| Duration::from_secs(seconds as u64)),
start_time: self.start_time, start_time: self.start_time,
}), })
}
Err(e) => { Err(e) => {
error!(e; "Failed to compact region, region id: {}", self.region_id); error!(e; "Failed to compact region, region id: {}", self.region_id);
let err = Arc::new(e); let err = Arc::new(e);
@@ -692,8 +572,6 @@ pub(crate) struct CompactionOutput {
pub output_level: Level, pub output_level: Level,
/// Compaction input files. /// Compaction input files.
pub inputs: Vec<FileHandle>, pub inputs: Vec<FileHandle>,
/// Whether to remove deletion markers.
pub filter_deleted: bool,
} }
/// Builds [BoxedBatchReader] that reads all SST files and yields batches in primary key order. /// Builds [BoxedBatchReader] that reads all SST files and yields batches in primary key order.
@@ -702,12 +580,10 @@ async fn build_sst_reader(
sst_layer: AccessLayerRef, sst_layer: AccessLayerRef,
inputs: &[FileHandle], inputs: &[FileHandle],
append_mode: bool, append_mode: bool,
filter_deleted: bool,
) -> error::Result<BoxedBatchReader> { ) -> error::Result<BoxedBatchReader> {
let scan_input = ScanInput::new(sst_layer, ProjectionMapper::all(&metadata)?) let scan_input = ScanInput::new(sst_layer, ProjectionMapper::all(&metadata)?)
.with_files(inputs.to_vec()) .with_files(inputs.to_vec())
.with_append_mode(append_mode) .with_append_mode(append_mode)
.with_filter_deleted(filter_deleted)
// We ignore file not found error during compaction. // We ignore file not found error during compaction.
.with_ignore_file_not_found(true); .with_ignore_file_not_found(true);
SeqScan::new(scan_input).build_reader().await SeqScan::new(scan_input).build_reader().await
@@ -766,7 +642,7 @@ mod tests {
.iter(), .iter(),
3, 3,
); );
assert_eq!(5, windows.get(&0).unwrap().files.len()); assert_eq!(5, windows.get(&0).unwrap().len());
let files = [FileId::random(); 3]; let files = [FileId::random(); 3];
let windows = assign_to_windows( let windows = assign_to_windows(
@@ -780,148 +656,15 @@ mod tests {
); );
assert_eq!( assert_eq!(
files[0], files[0],
windows.get(&0).unwrap().files.first().unwrap().file_id() windows.get(&0).unwrap().first().unwrap().file_id()
); );
assert_eq!( assert_eq!(
files[1], files[1],
windows.get(&3).unwrap().files.first().unwrap().file_id() windows.get(&3).unwrap().first().unwrap().file_id()
); );
assert_eq!( assert_eq!(
files[2], files[2],
windows.get(&12).unwrap().files.first().unwrap().file_id() windows.get(&12).unwrap().first().unwrap().file_id()
);
}
/// (Window value, overlapping, files' time ranges in window)
type ExpectedWindowSpec = (i64, bool, Vec<(i64, i64)>);
fn check_assign_to_windows_with_overlapping(
file_time_ranges: &[(i64, i64)],
time_window: i64,
expected_files: &[ExpectedWindowSpec],
) {
let files: Vec<_> = (0..file_time_ranges.len())
.map(|_| FileId::random())
.collect();
let file_handles = files
.iter()
.zip(file_time_ranges.iter())
.map(|(file_id, range)| new_file_handle(*file_id, range.0, range.1, 0))
.collect::<Vec<_>>();
let windows = assign_to_windows(file_handles.iter(), time_window);
for (expected_window, overlapping, window_files) in expected_files {
let actual_window = windows.get(expected_window).unwrap();
assert_eq!(*overlapping, actual_window.overlapping);
let mut file_ranges = actual_window
.files
.iter()
.map(|f| {
let (s, e) = f.time_range();
(s.value(), e.value())
})
.collect::<Vec<_>>();
file_ranges.sort_unstable_by(|l, r| l.0.cmp(&r.0).then(l.1.cmp(&r.1)));
assert_eq!(window_files, &file_ranges);
}
}
#[test]
fn test_assign_to_windows_with_overlapping() {
check_assign_to_windows_with_overlapping(
&[(0, 999), (1000, 1999), (2000, 2999)],
2,
&[
(0, false, vec![(0, 999)]),
(2, false, vec![(1000, 1999), (2000, 2999)]),
],
);
check_assign_to_windows_with_overlapping(
&[(0, 1), (0, 999), (100, 2999)],
2,
&[
(0, true, vec![(0, 1), (0, 999)]),
(2, true, vec![(100, 2999)]),
],
);
check_assign_to_windows_with_overlapping(
&[(0, 999), (1000, 1999), (2000, 2999), (3000, 3999)],
2,
&[
(0, false, vec![(0, 999)]),
(2, false, vec![(1000, 1999), (2000, 2999)]),
(4, false, vec![(3000, 3999)]),
],
);
check_assign_to_windows_with_overlapping(
&[
(0, 999),
(1000, 1999),
(2000, 2999),
(3000, 3999),
(0, 3999),
],
2,
&[
(0, true, vec![(0, 999)]),
(2, true, vec![(1000, 1999), (2000, 2999)]),
(4, true, vec![(0, 3999), (3000, 3999)]),
],
);
check_assign_to_windows_with_overlapping(
&[
(0, 999),
(1000, 1999),
(2000, 2999),
(3000, 3999),
(1999, 3999),
],
2,
&[
(0, false, vec![(0, 999)]),
(2, true, vec![(1000, 1999), (2000, 2999)]),
(4, true, vec![(1999, 3999), (3000, 3999)]),
],
);
check_assign_to_windows_with_overlapping(
&[
(0, 999), // window 0
(1000, 1999), // window 2
(2000, 2999), // window 2
(3000, 3999), // window 4
(2999, 3999), // window 4
],
2,
&[
// window 2 overlaps with window 4
(0, false, vec![(0, 999)]),
(2, true, vec![(1000, 1999), (2000, 2999)]),
(4, true, vec![(2999, 3999), (3000, 3999)]),
],
);
check_assign_to_windows_with_overlapping(
&[
(0, 999), // window 0
(1000, 1999), // window 2
(2000, 2999), // window 2
(3000, 3999), // window 4
(0, 1000), // // window 2
],
2,
&[
// only window 0 overlaps with window 2.
(0, true, vec![(0, 999)]),
(2, true, vec![(0, 1000), (1000, 1999), (2000, 2999)]),
(4, false, vec![(3000, 3999)]),
],
); );
} }

View File

@@ -345,7 +345,7 @@ async fn test_catchup_with_manifest_update() {
// Ensures the mutable is empty. // Ensures the mutable is empty.
assert!(region.version().memtables.mutable.is_empty()); assert!(region.version().memtables.mutable.is_empty());
let manifest = region.manifest_ctx.manifest().await; let manifest = region.manifest_manager.read().await.manifest();
assert_eq!(manifest.manifest_version, 0); assert_eq!(manifest.manifest_version, 0);
let resp = follower_engine let resp = follower_engine
@@ -361,7 +361,7 @@ async fn test_catchup_with_manifest_update() {
// The inner region was replaced. We must get it again. // The inner region was replaced. We must get it again.
let region = follower_engine.get_region(region_id).unwrap(); let region = follower_engine.get_region(region_id).unwrap();
let manifest = region.manifest_ctx.manifest().await; let manifest = region.manifest_manager.read().await.manifest();
assert_eq!(manifest.manifest_version, 2); assert_eq!(manifest.manifest_version, 2);
assert!(!region.is_writable()); assert!(!region.is_writable());

View File

@@ -149,102 +149,6 @@ async fn test_compaction_region() {
assert_eq!((0..25).map(|v| v * 1000).collect::<Vec<_>>(), vec); assert_eq!((0..25).map(|v| v * 1000).collect::<Vec<_>>(), vec);
} }
#[tokio::test]
async fn test_compaction_region_with_overlapping() {
common_telemetry::init_default_ut_logging();
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
let request = CreateRequestBuilder::new()
.insert_option("compaction.type", "twcs")
.insert_option("compaction.twcs.max_active_window_files", "2")
.insert_option("compaction.twcs.max_inactive_window_files", "2")
.insert_option("compaction.twcs.time_window", "1h")
.build();
let column_schemas = request
.column_metadatas
.iter()
.map(column_metadata_to_column_schema)
.collect::<Vec<_>>();
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
// Flush 4 SSTs for compaction.
put_and_flush(&engine, region_id, &column_schemas, 0..1200).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 0..2400).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 3600..10800).await; // window 10800
delete_and_flush(&engine, region_id, &column_schemas, 0..3600).await; // window 3600
let result = engine
.handle_request(region_id, RegionRequest::Compact(RegionCompactRequest {}))
.await
.unwrap();
assert_eq!(result.affected_rows, 0);
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(
2,
scanner.num_files(),
"unexpected files: {:?}",
scanner.file_ids()
);
let stream = scanner.scan().await.unwrap();
let vec = collect_stream_ts(stream).await;
assert_eq!((3600..10800).map(|i| { i * 1000 }).collect::<Vec<_>>(), vec);
}
#[tokio::test]
async fn test_compaction_region_with_overlapping_delete_all() {
common_telemetry::init_default_ut_logging();
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
let request = CreateRequestBuilder::new()
.insert_option("compaction.type", "twcs")
.insert_option("compaction.twcs.max_active_window_files", "2")
.insert_option("compaction.twcs.max_inactive_window_files", "2")
.insert_option("compaction.twcs.time_window", "1h")
.build();
let column_schemas = request
.column_metadatas
.iter()
.map(column_metadata_to_column_schema)
.collect::<Vec<_>>();
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
// Flush 4 SSTs for compaction.
put_and_flush(&engine, region_id, &column_schemas, 0..1200).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 0..2400).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 0..3600).await; // window 3600
delete_and_flush(&engine, region_id, &column_schemas, 0..10800).await; // window 10800
let result = engine
.handle_request(region_id, RegionRequest::Compact(RegionCompactRequest {}))
.await
.unwrap();
assert_eq!(result.affected_rows, 0);
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(
4,
scanner.num_files(),
"unexpected files: {:?}",
scanner.file_ids()
);
let stream = scanner.scan().await.unwrap();
let vec = collect_stream_ts(stream).await;
assert!(vec.is_empty());
}
// For issue https://github.com/GreptimeTeam/greptimedb/issues/3633 // For issue https://github.com/GreptimeTeam/greptimedb/issues/3633
#[tokio::test] #[tokio::test]
async fn test_readonly_during_compaction() { async fn test_readonly_during_compaction() {

View File

@@ -51,9 +51,9 @@ pub trait EventListener: Send + Sync {
let _ = removed; let _ = removed;
} }
/// Notifies the listener that ssts has been merged and the region /// Notifies the listener that the region is going to handle the compaction
/// is going to update its manifest. /// finished request.
async fn on_merge_ssts_finished(&self, region_id: RegionId) { async fn on_handle_compaction_finished(&self, region_id: RegionId) {
let _ = region_id; let _ = region_id;
} }
} }
@@ -201,7 +201,7 @@ impl CompactionListener {
#[async_trait] #[async_trait]
impl EventListener for CompactionListener { impl EventListener for CompactionListener {
async fn on_merge_ssts_finished(&self, region_id: RegionId) { async fn on_handle_compaction_finished(&self, region_id: RegionId) {
info!("Handle compaction finished request, region {region_id}"); info!("Handle compaction finished request, region {region_id}");
self.handle_finished_notify.notify_one(); self.handle_finished_notify.notify_one();

View File

@@ -127,7 +127,7 @@ async fn test_engine_open_readonly() {
) )
.await .await
.unwrap_err(); .unwrap_err();
assert_eq!(StatusCode::RegionNotReady, err.status_code()); assert_eq!(StatusCode::RegionReadonly, err.status_code());
assert_eq!(Some(RegionRole::Follower), engine.role(region_id)); assert_eq!(Some(RegionRole::Follower), engine.role(region_id));
// Set writable and write. // Set writable and write.

View File

@@ -66,7 +66,7 @@ async fn test_set_readonly_gracefully() {
.await .await
.unwrap_err(); .unwrap_err();
assert_eq!(error.status_code(), StatusCode::RegionNotReady); assert_eq!(error.status_code(), StatusCode::RegionReadonly);
engine.set_writable(region_id, true).unwrap(); engine.set_writable(region_id, true).unwrap();

View File

@@ -29,7 +29,6 @@ use store_api::manifest::ManifestVersion;
use store_api::storage::RegionId; use store_api::storage::RegionId;
use crate::cache::file_cache::FileType; use crate::cache::file_cache::FileType;
use crate::region::RegionState;
use crate::sst::file::FileId; use crate::sst::file::FileId;
use crate::worker::WorkerId; use crate::worker::WorkerId;
@@ -396,11 +395,9 @@ pub enum Error {
location: Location, location: Location,
}, },
#[snafu(display("Region {} is in {:?} state, expect: {:?}", region_id, state, expect))] #[snafu(display("Region {} is read only", region_id))]
RegionState { RegionReadonly {
region_id: RegionId, region_id: RegionId,
state: RegionState,
expect: RegionState,
location: Location, location: Location,
}, },
@@ -672,7 +669,7 @@ impl ErrorExt for Error {
CompactRegion { source, .. } => source.status_code(), CompactRegion { source, .. } => source.status_code(),
CompatReader { .. } => StatusCode::Unexpected, CompatReader { .. } => StatusCode::Unexpected,
InvalidRegionRequest { source, .. } => source.status_code(), InvalidRegionRequest { source, .. } => source.status_code(),
RegionState { .. } => StatusCode::RegionNotReady, RegionReadonly { .. } => StatusCode::RegionReadonly,
JsonOptions { .. } => StatusCode::InvalidArguments, JsonOptions { .. } => StatusCode::InvalidArguments,
EmptyRegionDir { .. } | EmptyManifestDir { .. } => StatusCode::RegionNotFound, EmptyRegionDir { .. } | EmptyManifestDir { .. } => StatusCode::RegionNotFound,
ArrowReader { .. } => StatusCode::StorageUnavailable, ArrowReader { .. } => StatusCode::StorageUnavailable,

View File

@@ -31,12 +31,10 @@ use crate::config::MitoConfig;
use crate::error::{ use crate::error::{
Error, FlushRegionSnafu, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, Result, Error, FlushRegionSnafu, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, Result,
}; };
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
use crate::metrics::{FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REQUESTS_TOTAL}; use crate::metrics::{FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REQUESTS_TOTAL};
use crate::read::Source; use crate::read::Source;
use crate::region::options::IndexOptions; use crate::region::options::IndexOptions;
use crate::region::version::{VersionControlData, VersionControlRef}; use crate::region::version::{VersionControlData, VersionControlRef, VersionRef};
use crate::region::{ManifestContextRef, RegionState};
use crate::request::{ use crate::request::{
BackgroundNotify, FlushFailed, FlushFinished, OptionOutputTx, OutputTx, SenderDdlRequest, BackgroundNotify, FlushFailed, FlushFinished, OptionOutputTx, OutputTx, SenderDdlRequest,
SenderWriteRequest, WorkerRequest, SenderWriteRequest, WorkerRequest,
@@ -206,7 +204,6 @@ pub(crate) struct RegionFlushTask {
pub(crate) engine_config: Arc<MitoConfig>, pub(crate) engine_config: Arc<MitoConfig>,
pub(crate) row_group_size: Option<usize>, pub(crate) row_group_size: Option<usize>,
pub(crate) cache_manager: CacheManagerRef, pub(crate) cache_manager: CacheManagerRef,
pub(crate) manifest_ctx: ManifestContextRef,
/// Index options for the region. /// Index options for the region.
pub(crate) index_options: IndexOptions, pub(crate) index_options: IndexOptions,
@@ -243,30 +240,36 @@ impl RegionFlushTask {
// Get a version of this region before creating a job to get current // Get a version of this region before creating a job to get current
// wal entry id, sequence and immutable memtables. // wal entry id, sequence and immutable memtables.
let version_data = version_control.current(); let version_data = version_control.current();
// This is used to update the version.
let version_control = version_control.clone();
Box::pin(async move { Box::pin(async move {
self.do_flush(version_data, &version_control).await; self.do_flush(version_data).await;
}) })
} }
/// Runs the flush task. /// Runs the flush task.
async fn do_flush( async fn do_flush(&mut self, version_data: VersionControlData) {
&mut self,
version_data: VersionControlData,
version_control: &VersionControlRef,
) {
let timer = FLUSH_ELAPSED.with_label_values(&["total"]).start_timer(); let timer = FLUSH_ELAPSED.with_label_values(&["total"]).start_timer();
self.listener.on_flush_begin(self.region_id).await; self.listener.on_flush_begin(self.region_id).await;
let worker_request = match self.flush_memtables(&version_data, version_control).await { let worker_request = match self.flush_memtables(&version_data.version).await {
Ok(()) => { Ok(file_metas) => {
let memtables_to_remove = version_data
.version
.memtables
.immutables()
.iter()
.map(|m| m.id())
.collect();
let flush_finished = FlushFinished { let flush_finished = FlushFinished {
region_id: self.region_id, region_id: self.region_id,
file_metas,
// The last entry has been flushed. // The last entry has been flushed.
flushed_entry_id: version_data.last_entry_id, flushed_entry_id: version_data.last_entry_id,
flushed_sequence: version_data.committed_sequence,
memtables_to_remove,
senders: std::mem::take(&mut self.senders), senders: std::mem::take(&mut self.senders),
file_purger: self.file_purger.clone(),
_timer: timer, _timer: timer,
}; };
WorkerRequest::Background { WorkerRequest::Background {
@@ -290,13 +293,8 @@ impl RegionFlushTask {
self.send_worker_request(worker_request).await; self.send_worker_request(worker_request).await;
} }
/// Flushes memtables to level 0 SSTs and updates the manifest. /// Flushes memtables to level 0 SSTs.
async fn flush_memtables( async fn flush_memtables(&self, version: &VersionRef) -> Result<Vec<FileMeta>> {
&self,
version_data: &VersionControlData,
version_control: &VersionControlRef,
) -> Result<()> {
let version = &version_data.version;
let timer = FLUSH_ELAPSED let timer = FLUSH_ELAPSED
.with_label_values(&["flush_memtables"]) .with_label_values(&["flush_memtables"])
.start_timer(); .start_timer();
@@ -384,31 +382,7 @@ impl RegionFlushTask {
timer.stop_and_record(), timer.stop_and_record(),
); );
let memtables_to_remove: SmallVec<[_; 2]> = version_data Ok(file_metas)
.version
.memtables
.immutables()
.iter()
.map(|m| m.id())
.collect();
let edit = RegionEdit {
files_to_add: file_metas,
files_to_remove: Vec::new(),
compaction_time_window: None,
// The last entry has been flushed.
flushed_entry_id: Some(version_data.last_entry_id),
flushed_sequence: Some(version_data.committed_sequence),
};
info!("Applying {edit:?} to region {}", self.region_id);
let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
// We will leak files if the manifest update fails, but we ignore them for simplicity. We can
// add a cleanup job to remove them later.
self.manifest_ctx
.update_manifest(RegionState::Writable, action_list, || {
version_control.apply_edit(edit, &memtables_to_remove, self.file_purger.clone());
})
.await
} }
/// Notify flush job status. /// Notify flush job status.
@@ -801,9 +775,6 @@ mod tests {
engine_config: Arc::new(MitoConfig::default()), engine_config: Arc::new(MitoConfig::default()),
row_group_size: None, row_group_size: None,
cache_manager: Arc::new(CacheManager::default()), cache_manager: Arc::new(CacheManager::default()),
manifest_ctx: env
.mock_manifest_context(version_control.current().version.metadata.clone())
.await,
index_options: IndexOptions::default(), index_options: IndexOptions::default(),
}; };
task.push_sender(OptionOutputTx::from(output_tx)); task.push_sender(OptionOutputTx::from(output_tx));

View File

@@ -257,8 +257,9 @@ impl RegionManifestManager {
} }
/// Stops the manager. /// Stops the manager.
pub async fn stop(&mut self) { pub async fn stop(&mut self) -> Result<()> {
self.stopped = true; self.stopped = true;
Ok(())
} }
/// Updates the manifest. Returns the current manifest version number. /// Updates the manifest. Returns the current manifest version number.
@@ -523,7 +524,7 @@ mod test {
.unwrap() .unwrap()
.unwrap(); .unwrap();
// Stops it. // Stops it.
manager.stop().await; manager.stop().await.unwrap();
// Open it. // Open it.
let manager = env let manager = env
@@ -563,7 +564,7 @@ mod test {
manager.validate_manifest(&new_metadata, 1); manager.validate_manifest(&new_metadata, 1);
// Reopen the manager. // Reopen the manager.
manager.stop().await; manager.stop().await.unwrap();
let manager = env let manager = env
.create_manifest_manager(CompressionType::Uncompressed, 10, None) .create_manifest_manager(CompressionType::Uncompressed, 10, None)
.await .await
@@ -650,7 +651,7 @@ mod test {
// Reopen the manager, // Reopen the manager,
// we just calculate the size from the latest checkpoint file // we just calculate the size from the latest checkpoint file
manager.stop().await; manager.stop().await.unwrap();
let manager = env let manager = env
.create_manifest_manager(CompressionType::Uncompressed, 10, None) .create_manifest_manager(CompressionType::Uncompressed, 10, None)
.await .await

View File

@@ -152,7 +152,7 @@ async fn manager_with_checkpoint_distance_1() {
assert_eq!(expected_json, raw_json); assert_eq!(expected_json, raw_json);
// reopen the manager // reopen the manager
manager.stop().await; manager.stop().await.unwrap();
let manager = reopen_manager(&env, 1, CompressionType::Uncompressed).await; let manager = reopen_manager(&env, 1, CompressionType::Uncompressed).await;
assert_eq!(10, manager.manifest().manifest_version); assert_eq!(10, manager.manifest().manifest_version);
} }

View File

@@ -382,17 +382,17 @@ impl Batch {
self.take_in_place(&indices) self.take_in_place(&indices)
} }
/// Returns ids and datatypes of fields in the [Batch] after applying the `projection`. /// Returns ids of fields in the [Batch] after applying the `projection`.
pub(crate) fn projected_fields( pub(crate) fn projected_fields(
metadata: &RegionMetadata, metadata: &RegionMetadata,
projection: &[ColumnId], projection: &[ColumnId],
) -> Vec<(ColumnId, ConcreteDataType)> { ) -> Vec<ColumnId> {
let projected_ids: HashSet<_> = projection.iter().copied().collect(); let projected_ids: HashSet<_> = projection.iter().copied().collect();
metadata metadata
.field_columns() .field_columns()
.filter_map(|column| { .filter_map(|column| {
if projected_ids.contains(&column.column_id) { if projected_ids.contains(&column.column_id) {
Some((column.column_id, column.column_schema.data_type.clone())) Some(column.column_id)
} else { } else {
None None
} }

View File

@@ -16,7 +16,6 @@
use std::collections::HashMap; use std::collections::HashMap;
use datatypes::data_type::ConcreteDataType;
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::VectorRef; use datatypes::vectors::VectorRef;
use snafu::{ensure, OptionExt, ResultExt}; use snafu::{ensure, OptionExt, ResultExt};
@@ -86,7 +85,7 @@ pub(crate) fn has_same_columns(left: &RegionMetadata, right: &RegionMetadata) ->
} }
for (left_col, right_col) in left.column_metadatas.iter().zip(&right.column_metadatas) { for (left_col, right_col) in left.column_metadatas.iter().zip(&right.column_metadatas) {
if left_col.column_id != right_col.column_id || !left_col.is_same_datatype(right_col) { if left_col.column_id != right_col.column_id {
return false; return false;
} }
debug_assert_eq!( debug_assert_eq!(
@@ -135,8 +134,8 @@ impl CompatPrimaryKey {
/// Helper to make fields compatible. /// Helper to make fields compatible.
#[derive(Debug)] #[derive(Debug)]
struct CompatFields { struct CompatFields {
/// Column Ids and DataTypes the reader actually returns. /// Column Ids the reader actually returns.
actual_fields: Vec<(ColumnId, ConcreteDataType)>, actual_fields: Vec<ColumnId>,
/// Indices to convert actual fields to expect fields. /// Indices to convert actual fields to expect fields.
index_or_defaults: Vec<IndexOrDefault>, index_or_defaults: Vec<IndexOrDefault>,
} }
@@ -150,28 +149,14 @@ impl CompatFields {
.actual_fields .actual_fields
.iter() .iter()
.zip(batch.fields()) .zip(batch.fields())
.all(|((id, _), batch_column)| *id == batch_column.column_id)); .all(|(id, batch_column)| *id == batch_column.column_id));
let len = batch.num_rows(); let len = batch.num_rows();
let fields = self let fields = self
.index_or_defaults .index_or_defaults
.iter() .iter()
.map(|index_or_default| match index_or_default { .map(|index_or_default| match index_or_default {
IndexOrDefault::Index { pos, cast_type } => { IndexOrDefault::Index(index) => batch.fields()[*index].clone(),
let old_column = &batch.fields()[*pos];
let data = if let Some(ty) = cast_type {
// Safety: We ensure type can be converted and the new batch should be valid.
// Tips: `safe` must be true in `CastOptions`, which will replace the specific value with null when it cannot be converted.
old_column.data.cast(ty).unwrap()
} else {
old_column.data.clone()
};
BatchColumn {
column_id: old_column.column_id,
data,
}
}
IndexOrDefault::DefaultValue { IndexOrDefault::DefaultValue {
column_id, column_id,
default_vector, default_vector,
@@ -263,23 +248,15 @@ fn may_compat_fields(
let source_field_index: HashMap<_, _> = actual_fields let source_field_index: HashMap<_, _> = actual_fields
.iter() .iter()
.enumerate() .enumerate()
.map(|(idx, (column_id, data_type))| (*column_id, (idx, data_type))) .map(|(idx, column_id)| (*column_id, idx))
.collect(); .collect();
let index_or_defaults = expect_fields let index_or_defaults = expect_fields
.iter() .iter()
.map(|(column_id, expect_data_type)| { .map(|column_id| {
if let Some((index, actual_data_type)) = source_field_index.get(column_id) { if let Some(index) = source_field_index.get(column_id) {
let mut cast_type = None;
if expect_data_type != *actual_data_type {
cast_type = Some(expect_data_type.clone())
}
// Source has this field. // Source has this field.
Ok(IndexOrDefault::Index { Ok(IndexOrDefault::Index(*index))
pos: *index,
cast_type,
})
} else { } else {
// Safety: mapper must have this column. // Safety: mapper must have this column.
let column = mapper.metadata().column_by_id(*column_id).unwrap(); let column = mapper.metadata().column_by_id(*column_id).unwrap();
@@ -316,10 +293,7 @@ fn may_compat_fields(
#[derive(Debug)] #[derive(Debug)]
enum IndexOrDefault { enum IndexOrDefault {
/// Index of the column in source batch. /// Index of the column in source batch.
Index { Index(usize),
pos: usize,
cast_type: Option<ConcreteDataType>,
},
/// Default value for the column. /// Default value for the column.
DefaultValue { DefaultValue {
/// Id of the column. /// Id of the column.
@@ -346,19 +320,27 @@ mod tests {
/// Creates a new [RegionMetadata]. /// Creates a new [RegionMetadata].
fn new_metadata( fn new_metadata(
semantic_types: &[(ColumnId, SemanticType, ConcreteDataType)], semantic_types: &[(ColumnId, SemanticType)],
primary_key: &[ColumnId], primary_key: &[ColumnId],
) -> RegionMetadata { ) -> RegionMetadata {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1)); let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
for (id, semantic_type, data_type) in semantic_types { for (id, semantic_type) in semantic_types {
let column_schema = match semantic_type { let column_schema = match semantic_type {
SemanticType::Tag => { SemanticType::Tag => ColumnSchema::new(
ColumnSchema::new(format!("tag_{id}"), data_type.clone(), true) format!("tag_{id}"),
} ConcreteDataType::string_datatype(),
SemanticType::Field => { true,
ColumnSchema::new(format!("field_{id}"), data_type.clone(), true) ),
} SemanticType::Field => ColumnSchema::new(
SemanticType::Timestamp => ColumnSchema::new("ts", data_type.clone(), false), format!("field_{id}"),
ConcreteDataType::int64_datatype(),
true,
),
SemanticType::Timestamp => ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
}; };
builder.push_column_metadata(ColumnMetadata { builder.push_column_metadata(ColumnMetadata {
@@ -427,26 +409,18 @@ mod tests {
fn test_invalid_pk_len() { fn test_invalid_pk_len() {
let reader_meta = new_metadata( let reader_meta = new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (2, SemanticType::Tag),
ConcreteDataType::timestamp_millisecond_datatype(), (3, SemanticType::Field),
),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Tag, ConcreteDataType::string_datatype()),
(3, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[1, 2], &[1, 2],
); );
let expect_meta = new_metadata( let expect_meta = new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (2, SemanticType::Field),
ConcreteDataType::timestamp_millisecond_datatype(),
),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[1], &[1],
); );
@@ -457,28 +431,20 @@ mod tests {
fn test_different_pk() { fn test_different_pk() {
let reader_meta = new_metadata( let reader_meta = new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (2, SemanticType::Tag),
ConcreteDataType::timestamp_millisecond_datatype(), (3, SemanticType::Field),
),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Tag, ConcreteDataType::string_datatype()),
(3, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[2, 1], &[2, 1],
); );
let expect_meta = new_metadata( let expect_meta = new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (2, SemanticType::Tag),
ConcreteDataType::timestamp_millisecond_datatype(), (3, SemanticType::Field),
), (4, SemanticType::Tag),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Tag, ConcreteDataType::string_datatype()),
(3, SemanticType::Field, ConcreteDataType::int64_datatype()),
(4, SemanticType::Tag, ConcreteDataType::string_datatype()),
], ],
&[1, 2, 4], &[1, 2, 4],
); );
@@ -489,13 +455,9 @@ mod tests {
fn test_same_pk() { fn test_same_pk() {
let reader_meta = new_metadata( let reader_meta = new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (2, SemanticType::Field),
ConcreteDataType::timestamp_millisecond_datatype(),
),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[1], &[1],
); );
@@ -508,13 +470,9 @@ mod tests {
fn test_same_fields() { fn test_same_fields() {
let reader_meta = Arc::new(new_metadata( let reader_meta = Arc::new(new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (2, SemanticType::Field),
ConcreteDataType::timestamp_millisecond_datatype(),
),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[1], &[1],
)); ));
@@ -526,27 +484,19 @@ mod tests {
async fn test_compat_reader() { async fn test_compat_reader() {
let reader_meta = Arc::new(new_metadata( let reader_meta = Arc::new(new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (2, SemanticType::Field),
ConcreteDataType::timestamp_millisecond_datatype(),
),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[1], &[1],
)); ));
let expect_meta = Arc::new(new_metadata( let expect_meta = Arc::new(new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (2, SemanticType::Field),
ConcreteDataType::timestamp_millisecond_datatype(), (3, SemanticType::Tag),
), (4, SemanticType::Field),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
(3, SemanticType::Tag, ConcreteDataType::string_datatype()),
(4, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[1, 3], &[1, 3],
)); ));
@@ -575,27 +525,19 @@ mod tests {
async fn test_compat_reader_different_order() { async fn test_compat_reader_different_order() {
let reader_meta = Arc::new(new_metadata( let reader_meta = Arc::new(new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (2, SemanticType::Field),
ConcreteDataType::timestamp_millisecond_datatype(),
),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[1], &[1],
)); ));
let expect_meta = Arc::new(new_metadata( let expect_meta = Arc::new(new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (3, SemanticType::Field),
ConcreteDataType::timestamp_millisecond_datatype(), (2, SemanticType::Field),
), (4, SemanticType::Field),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(3, SemanticType::Field, ConcreteDataType::int64_datatype()),
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
(4, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[1], &[1],
)); ));
@@ -618,85 +560,23 @@ mod tests {
.await; .await;
} }
#[tokio::test]
async fn test_compat_reader_different_types() {
let actual_meta = Arc::new(new_metadata(
&[
(
0,
SemanticType::Timestamp,
ConcreteDataType::timestamp_millisecond_datatype(),
),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
],
&[1],
));
let expect_meta = Arc::new(new_metadata(
&[
(
0,
SemanticType::Timestamp,
ConcreteDataType::timestamp_millisecond_datatype(),
),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Field, ConcreteDataType::string_datatype()),
],
&[1],
));
let mapper = ProjectionMapper::all(&expect_meta).unwrap();
let k1 = encode_key(&[Some("a")]);
let k2 = encode_key(&[Some("b")]);
let source_reader = VecBatchReader::new(&[
new_batch(&k1, &[(2, false)], 1000, 3),
new_batch(&k2, &[(2, false)], 1000, 3),
]);
let fn_batch_cast = |batch: Batch| {
let mut new_fields = batch.fields.clone();
new_fields[0].data = new_fields[0]
.data
.cast(&ConcreteDataType::string_datatype())
.unwrap();
batch.with_fields(new_fields).unwrap()
};
let mut compat_reader = CompatReader::new(&mapper, actual_meta, source_reader).unwrap();
check_reader_result(
&mut compat_reader,
&[
fn_batch_cast(new_batch(&k1, &[(2, false)], 1000, 3)),
fn_batch_cast(new_batch(&k2, &[(2, false)], 1000, 3)),
],
)
.await;
}
#[tokio::test] #[tokio::test]
async fn test_compat_reader_projection() { async fn test_compat_reader_projection() {
let reader_meta = Arc::new(new_metadata( let reader_meta = Arc::new(new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (2, SemanticType::Field),
ConcreteDataType::timestamp_millisecond_datatype(),
),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[1], &[1],
)); ));
let expect_meta = Arc::new(new_metadata( let expect_meta = Arc::new(new_metadata(
&[ &[
( (0, SemanticType::Timestamp),
0, (1, SemanticType::Tag),
SemanticType::Timestamp, (3, SemanticType::Field),
ConcreteDataType::timestamp_millisecond_datatype(), (2, SemanticType::Field),
), (4, SemanticType::Field),
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
(3, SemanticType::Field, ConcreteDataType::int64_datatype()),
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
(4, SemanticType::Field, ConcreteDataType::int64_datatype()),
], ],
&[1], &[1],
)); ));

View File

@@ -53,8 +53,8 @@ pub struct ProjectionMapper {
/// Ids of columns to project. It keeps ids in the same order as the `projection` /// Ids of columns to project. It keeps ids in the same order as the `projection`
/// indices to build the mapper. /// indices to build the mapper.
column_ids: Vec<ColumnId>, column_ids: Vec<ColumnId>,
/// Ids and DataTypes of field columns in the [Batch]. /// Ids of field columns in the [Batch].
batch_fields: Vec<(ColumnId, ConcreteDataType)>, batch_fields: Vec<ColumnId>,
} }
impl ProjectionMapper { impl ProjectionMapper {
@@ -95,7 +95,7 @@ impl ProjectionMapper {
let field_id_to_index: HashMap<_, _> = batch_fields let field_id_to_index: HashMap<_, _> = batch_fields
.iter() .iter()
.enumerate() .enumerate()
.map(|(index, (column_id, _))| (*column_id, index)) .map(|(index, column_id)| (*column_id, index))
.collect(); .collect();
// For each projected column, compute its index in batches. // For each projected column, compute its index in batches.
let mut batch_indices = Vec::with_capacity(projection.len()); let mut batch_indices = Vec::with_capacity(projection.len());
@@ -151,7 +151,7 @@ impl ProjectionMapper {
} }
/// Returns ids of fields in [Batch]es the mapper expects to convert. /// Returns ids of fields in [Batch]es the mapper expects to convert.
pub(crate) fn batch_fields(&self) -> &[(ColumnId, ConcreteDataType)] { pub(crate) fn batch_fields(&self) -> &[ColumnId] {
&self.batch_fields &self.batch_fields
} }
@@ -173,7 +173,7 @@ impl ProjectionMapper {
.batch_fields .batch_fields
.iter() .iter()
.zip(batch.fields()) .zip(batch.fields())
.all(|((id, _), batch_col)| *id == batch_col.column_id)); .all(|(id, batch_col)| *id == batch_col.column_id));
// Skips decoding pk if we don't need to output it. // Skips decoding pk if we don't need to output it.
let pk_values = if self.has_tags { let pk_values = if self.has_tags {
@@ -344,13 +344,7 @@ mod tests {
); );
let mapper = ProjectionMapper::all(&metadata).unwrap(); let mapper = ProjectionMapper::all(&metadata).unwrap();
assert_eq!([0, 1, 2, 3, 4], mapper.column_ids()); assert_eq!([0, 1, 2, 3, 4], mapper.column_ids());
assert_eq!( assert_eq!([3, 4], mapper.batch_fields());
[
(3, ConcreteDataType::int64_datatype()),
(4, ConcreteDataType::int64_datatype())
],
mapper.batch_fields()
);
// With vector cache. // With vector cache.
let cache = CacheManager::builder().vector_cache_size(1024).build(); let cache = CacheManager::builder().vector_cache_size(1024).build();
@@ -384,10 +378,7 @@ mod tests {
// Columns v1, k0 // Columns v1, k0
let mapper = ProjectionMapper::new(&metadata, [4, 1].into_iter()).unwrap(); let mapper = ProjectionMapper::new(&metadata, [4, 1].into_iter()).unwrap();
assert_eq!([4, 1], mapper.column_ids()); assert_eq!([4, 1], mapper.column_ids());
assert_eq!( assert_eq!([4], mapper.batch_fields());
[(4, ConcreteDataType::int64_datatype())],
mapper.batch_fields()
);
let batch = new_batch(0, &[1, 2], &[(4, 4)], 3); let batch = new_batch(0, &[1, 2], &[(4, 4)], 3);
let record_batch = mapper.convert(&batch, None).unwrap(); let record_batch = mapper.convert(&batch, None).unwrap();

View File

@@ -19,21 +19,21 @@ pub mod options;
pub(crate) mod version; pub(crate) mod version;
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::atomic::{AtomicI64, Ordering}; use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use common_telemetry::{error, info, warn}; use common_telemetry::info;
use common_wal::options::WalOptions; use common_wal::options::WalOptions;
use crossbeam_utils::atomic::AtomicCell;
use snafu::{ensure, OptionExt}; use snafu::{ensure, OptionExt};
use store_api::metadata::RegionMetadataRef; use store_api::metadata::RegionMetadataRef;
use store_api::storage::RegionId; use store_api::storage::RegionId;
use tokio::sync::RwLock as TokioRwLock;
use crate::access_layer::AccessLayerRef; use crate::access_layer::AccessLayerRef;
use crate::error::{RegionNotFoundSnafu, RegionStateSnafu, RegionTruncatedSnafu, Result}; use crate::error::{RegionNotFoundSnafu, RegionReadonlySnafu, Result};
use crate::manifest::action::{RegionMetaAction, RegionMetaActionList}; use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
use crate::manifest::manager::RegionManifestManager; use crate::manifest::manager::RegionManifestManager;
use crate::memtable::MemtableBuilderRef; use crate::memtable::{MemtableBuilderRef, MemtableId};
use crate::region::version::{VersionControlRef, VersionRef}; use crate::region::version::{VersionControlRef, VersionRef};
use crate::request::OnFailure; use crate::request::OnFailure;
use crate::sst::file_purger::FilePurgerRef; use crate::sst::file_purger::FilePurgerRef;
@@ -57,23 +57,6 @@ impl RegionUsage {
} }
} }
/// State of the region.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RegionState {
/// The region is opened but is still read-only.
ReadOnly,
/// The region is opened and is writable.
Writable,
/// The region is altering.
Altering,
/// The region is dropping.
Dropping,
/// The region is truncating.
Truncating,
/// The region is handling a region edit.
Editing,
}
/// Metadata and runtime status of a region. /// Metadata and runtime status of a region.
/// ///
/// Writing and reading a region follow a single-writer-multi-reader rule: /// Writing and reading a region follow a single-writer-multi-reader rule:
@@ -88,19 +71,19 @@ pub(crate) struct MitoRegion {
pub(crate) region_id: RegionId, pub(crate) region_id: RegionId,
/// Version controller for this region. /// Version controller for this region.
///
/// We MUST update the version control inside the write lock of the region manifest manager.
pub(crate) version_control: VersionControlRef, pub(crate) version_control: VersionControlRef,
/// SSTs accessor for this region. /// SSTs accessor for this region.
pub(crate) access_layer: AccessLayerRef, pub(crate) access_layer: AccessLayerRef,
/// Context to maintain manifest for this region. /// Manager to maintain manifest for this region.
pub(crate) manifest_ctx: ManifestContextRef, pub(crate) manifest_manager: TokioRwLock<RegionManifestManager>,
/// SST file purger. /// SST file purger.
pub(crate) file_purger: FilePurgerRef, pub(crate) file_purger: FilePurgerRef,
/// Wal options of this region. /// Wal options of this region.
pub(crate) wal_options: WalOptions, pub(crate) wal_options: WalOptions,
/// Last flush time in millis. /// Last flush time in millis.
last_flush_millis: AtomicI64, last_flush_millis: AtomicI64,
/// Whether the region is writable.
writable: AtomicBool,
/// Provider to get current time. /// Provider to get current time.
time_provider: TimeProviderRef, time_provider: TimeProviderRef,
/// Memtable builder for the region. /// Memtable builder for the region.
@@ -111,18 +94,15 @@ pub(crate) type MitoRegionRef = Arc<MitoRegion>;
impl MitoRegion { impl MitoRegion {
/// Stop background managers for this region. /// Stop background managers for this region.
pub(crate) async fn stop(&self) { pub(crate) async fn stop(&self) -> Result<()> {
self.manifest_ctx self.manifest_manager.write().await.stop().await?;
.manifest_manager
.write()
.await
.stop()
.await;
info!( info!(
"Stopped region manifest manager, region_id: {}", "Stopped region manifest manager, region_id: {}",
self.region_id self.region_id
); );
Ok(())
} }
/// Returns current metadata of the region. /// Returns current metadata of the region.
@@ -148,73 +128,19 @@ impl MitoRegion {
self.last_flush_millis.store(now, Ordering::Relaxed); self.last_flush_millis.store(now, Ordering::Relaxed);
} }
/// Returns whether the region is writable.
pub(crate) fn is_writable(&self) -> bool {
self.writable.load(Ordering::Relaxed)
}
/// Returns the region dir. /// Returns the region dir.
pub(crate) fn region_dir(&self) -> &str { pub(crate) fn region_dir(&self) -> &str {
self.access_layer.region_dir() self.access_layer.region_dir()
} }
/// Returns whether the region is writable. /// Sets the writable flag.
pub(crate) fn is_writable(&self) -> bool {
self.manifest_ctx.state.load() == RegionState::Writable
}
/// Returns the state of the region.
pub(crate) fn state(&self) -> RegionState {
self.manifest_ctx.state.load()
}
/// Sets the writable state.
pub(crate) fn set_writable(&self, writable: bool) { pub(crate) fn set_writable(&self, writable: bool) {
if writable { self.writable.store(writable, Ordering::Relaxed);
// Only sets the region to writable if it is read only.
// This prevents others updating the manifest.
let _ = self
.manifest_ctx
.state
.compare_exchange(RegionState::ReadOnly, RegionState::Writable);
} else {
self.manifest_ctx.state.store(RegionState::ReadOnly);
}
}
/// Sets the altering state.
/// You should call this method in the worker loop.
pub(crate) fn set_altering(&self) -> Result<()> {
self.compare_exchange_state(RegionState::Writable, RegionState::Altering)
}
/// Sets the dropping state.
/// You should call this method in the worker loop.
pub(crate) fn set_dropping(&self) -> Result<()> {
self.compare_exchange_state(RegionState::Writable, RegionState::Dropping)
}
/// Sets the truncating state.
/// You should call this method in the worker loop.
pub(crate) fn set_truncating(&self) -> Result<()> {
self.compare_exchange_state(RegionState::Writable, RegionState::Truncating)
}
/// Sets the editing state.
/// You should call this method in the worker loop.
pub(crate) fn set_editing(&self) -> Result<()> {
self.compare_exchange_state(RegionState::Writable, RegionState::Editing)
}
/// Sets the region to readonly gracefully. This acquires the manifest write lock.
pub(crate) async fn set_readonly_gracefully(&self) {
let _manager = self.manifest_ctx.manifest_manager.write().await;
// We acquires the write lock of the manifest manager to ensure that no one is updating the manifest.
// Then we change the state.
self.set_writable(false);
}
/// Switches the region state to `RegionState::Writable` if the current state is `expect`.
/// Otherwise, logs an error.
pub(crate) fn switch_state_to_writable(&self, expect: RegionState) {
if let Err(e) = self.compare_exchange_state(expect, RegionState::Writable) {
error!(e; "failed to switch region state to writable, expect state is {:?}", expect);
}
} }
/// Returns the region usage in bytes. /// Returns the region usage in bytes.
@@ -229,12 +155,7 @@ impl MitoRegion {
let wal_usage = self.estimated_wal_usage(memtable_usage); let wal_usage = self.estimated_wal_usage(memtable_usage);
let manifest_usage = self let manifest_usage = self.manifest_manager.read().await.manifest_usage();
.manifest_ctx
.manifest_manager
.read()
.await
.manifest_usage();
RegionUsage { RegionUsage {
region_id, region_id,
@@ -250,133 +171,28 @@ impl MitoRegion {
((memtable_usage as f32) * ESTIMATED_WAL_FACTOR) as u64 ((memtable_usage as f32) * ESTIMATED_WAL_FACTOR) as u64
} }
/// Sets the state of the region to given state if the current state equals to pub(crate) async fn apply_edit(
/// the expected.
fn compare_exchange_state(&self, expect: RegionState, state: RegionState) -> Result<()> {
self.manifest_ctx
.state
.compare_exchange(expect, state)
.map_err(|actual| {
RegionStateSnafu {
region_id: self.region_id,
state: actual,
expect,
}
.build()
})?;
Ok(())
}
}
/// Context to update the region manifest.
#[derive(Debug)]
pub(crate) struct ManifestContext {
/// Manager to maintain manifest for this region.
manifest_manager: tokio::sync::RwLock<RegionManifestManager>,
/// The state of the region. The region checks the state before updating
/// manifest.
state: AtomicCell<RegionState>,
}
impl ManifestContext {
pub(crate) fn new(manager: RegionManifestManager, state: RegionState) -> Self {
ManifestContext {
manifest_manager: tokio::sync::RwLock::new(manager),
state: AtomicCell::new(state),
}
}
pub(crate) async fn has_update(&self) -> Result<bool> {
self.manifest_manager.read().await.has_update().await
}
/// Updates the manifest if current state is `expect_state` and executes
/// the `applier` if the manifest is updated.
pub(crate) async fn update_manifest(
&self, &self,
expect_state: RegionState, edit: RegionEdit,
action_list: RegionMetaActionList, memtables_to_remove: &[MemtableId],
applier: impl FnOnce(),
) -> Result<()> { ) -> Result<()> {
// Acquires the write lock of the manifest manager. info!("Applying {edit:?} to region {}", self.region_id);
let mut manager = self.manifest_manager.write().await;
// Gets current manifest.
let manifest = manager.manifest();
// Checks state inside the lock. This is to ensure that we won't update the manifest
// after `set_readonly_gracefully()` is called.
let current_state = self.state.load();
ensure!(
current_state == expect_state,
RegionStateSnafu {
region_id: manifest.metadata.region_id,
state: current_state,
expect: expect_state,
}
);
for action in &action_list.actions { self.manifest_manager
// Checks whether the edit is still applicable. .write()
let RegionMetaAction::Edit(edit) = &action else { .await
continue; .update(RegionMetaActionList::with_action(RegionMetaAction::Edit(
}; edit.clone(),
)))
// Checks whether the region is truncated. .await?;
let Some(truncated_entry_id) = manifest.truncated_entry_id else {
continue;
};
// This is an edit from flush.
if let Some(flushed_entry_id) = edit.flushed_entry_id {
ensure!(
truncated_entry_id < flushed_entry_id,
RegionTruncatedSnafu {
region_id: manifest.metadata.region_id,
}
);
}
// This is an edit from compaction.
if !edit.files_to_remove.is_empty() {
// Input files of the compaction task has been truncated.
for file in &edit.files_to_remove {
ensure!(
manifest.files.contains_key(&file.file_id),
RegionTruncatedSnafu {
region_id: manifest.metadata.region_id,
}
);
}
}
}
// Now we can update the manifest.
manager.update(action_list).await.inspect_err(
|e| error!(e; "Failed to update manifest, region_id: {}", manifest.metadata.region_id),
)?;
// Executes the applier. We MUST hold the write lock.
applier();
if self.state.load() == RegionState::ReadOnly {
warn!(
"Region {} becomes read-only while updating manifest which may cause inconsistency",
manifest.metadata.region_id
);
}
// Apply edit to region's version.
self.version_control
.apply_edit(edit, memtables_to_remove, self.file_purger.clone());
Ok(()) Ok(())
} }
} }
#[cfg(test)]
impl ManifestContext {
pub(crate) async fn manifest(&self) -> Arc<crate::manifest::action::RegionManifest> {
self.manifest_manager.read().await.manifest()
}
}
pub(crate) type ManifestContextRef = Arc<ManifestContext>;
/// Regions indexed by ids. /// Regions indexed by ids.
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub(crate) struct RegionMap { pub(crate) struct RegionMap {
@@ -409,14 +225,7 @@ impl RegionMap {
let region = self let region = self
.get_region(region_id) .get_region(region_id)
.context(RegionNotFoundSnafu { region_id })?; .context(RegionNotFoundSnafu { region_id })?;
ensure!( ensure!(region.is_writable(), RegionReadonlySnafu { region_id });
region.is_writable(),
RegionStateSnafu {
region_id,
state: region.state(),
expect: RegionState::Writable,
}
);
Ok(region) Ok(region)
} }
@@ -456,15 +265,3 @@ impl RegionMap {
} }
pub(crate) type RegionMapRef = Arc<RegionMap>; pub(crate) type RegionMapRef = Arc<RegionMap>;
#[cfg(test)]
mod tests {
use crossbeam_utils::atomic::AtomicCell;
use crate::region::RegionState;
#[test]
fn test_region_state_lock_free() {
assert!(AtomicCell::<RegionState>::is_lock_free());
}
}

View File

@@ -15,7 +15,7 @@
//! Region opener. //! Region opener.
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::atomic::AtomicI64; use std::sync::atomic::{AtomicBool, AtomicI64};
use std::sync::Arc; use std::sync::Arc;
use common_telemetry::{debug, error, info, warn}; use common_telemetry::{debug, error, info, warn};
@@ -27,6 +27,7 @@ use snafu::{ensure, OptionExt};
use store_api::logstore::LogStore; use store_api::logstore::LogStore;
use store_api::metadata::{ColumnMetadata, RegionMetadata}; use store_api::metadata::{ColumnMetadata, RegionMetadata};
use store_api::storage::{ColumnId, RegionId}; use store_api::storage::{ColumnId, RegionId};
use tokio::sync::RwLock;
use crate::access_layer::AccessLayer; use crate::access_layer::AccessLayer;
use crate::cache::CacheManagerRef; use crate::cache::CacheManagerRef;
@@ -40,7 +41,7 @@ use crate::memtable::time_partition::TimePartitions;
use crate::memtable::MemtableBuilderProvider; use crate::memtable::MemtableBuilderProvider;
use crate::region::options::RegionOptions; use crate::region::options::RegionOptions;
use crate::region::version::{VersionBuilder, VersionControl, VersionControlRef}; use crate::region::version::{VersionBuilder, VersionControl, VersionControlRef};
use crate::region::{ManifestContext, MitoRegion, RegionState}; use crate::region::MitoRegion;
use crate::region_write_ctx::RegionWriteCtx; use crate::region_write_ctx::RegionWriteCtx;
use crate::request::OptionOutputTx; use crate::request::OptionOutputTx;
use crate::schedule::scheduler::SchedulerRef; use crate::schedule::scheduler::SchedulerRef;
@@ -202,11 +203,7 @@ impl RegionOpener {
region_id, region_id,
version_control, version_control,
access_layer: access_layer.clone(), access_layer: access_layer.clone(),
// Region is writable after it is created. manifest_manager: RwLock::new(manifest_manager),
manifest_ctx: Arc::new(ManifestContext::new(
manifest_manager,
RegionState::Writable,
)),
file_purger: Arc::new(LocalFilePurger::new( file_purger: Arc::new(LocalFilePurger::new(
self.purge_scheduler, self.purge_scheduler,
access_layer, access_layer,
@@ -214,6 +211,8 @@ impl RegionOpener {
)), )),
wal_options, wal_options,
last_flush_millis: AtomicI64::new(time_provider.current_time_millis()), last_flush_millis: AtomicI64::new(time_provider.current_time_millis()),
// Region is writable after it is created.
writable: AtomicBool::new(true),
time_provider, time_provider,
memtable_builder, memtable_builder,
}) })
@@ -332,14 +331,12 @@ impl RegionOpener {
region_id: self.region_id, region_id: self.region_id,
version_control, version_control,
access_layer, access_layer,
// Region is always opened in read only mode. manifest_manager: RwLock::new(manifest_manager),
manifest_ctx: Arc::new(ManifestContext::new(
manifest_manager,
RegionState::ReadOnly,
)),
file_purger, file_purger,
wal_options, wal_options,
last_flush_millis: AtomicI64::new(time_provider.current_time_millis()), last_flush_millis: AtomicI64::new(time_provider.current_time_millis()),
// Region is always opened in read only mode.
writable: AtomicBool::new(false),
time_provider, time_provider,
memtable_builder, memtable_builder,
}; };
@@ -441,11 +438,13 @@ pub(crate) async fn replay_memtable<S: LogStore>(
// data in the WAL. // data in the WAL.
let mut last_entry_id = flushed_entry_id; let mut last_entry_id = flushed_entry_id;
let replay_from_entry_id = flushed_entry_id + 1; let replay_from_entry_id = flushed_entry_id + 1;
let mut stale_entry_found = false;
let mut wal_stream = wal.scan(region_id, replay_from_entry_id, wal_options)?; let mut wal_stream = wal.scan(region_id, replay_from_entry_id, wal_options)?;
while let Some(res) = wal_stream.next().await { while let Some(res) = wal_stream.next().await {
let (entry_id, entry) = res?; let (entry_id, entry) = res?;
if entry_id <= flushed_entry_id { if entry_id <= flushed_entry_id {
stale_entry_found = true;
warn!("Stale WAL entries read during replay, region id: {}, flushed entry id: {}, entry id read: {}", region_id, flushed_entry_id, entry_id); warn!("Stale WAL entries read during replay, region id: {}, flushed entry id: {}, entry id read: {}", region_id, flushed_entry_id, entry_id);
ensure!( ensure!(
allow_stale_entries, allow_stale_entries,
@@ -474,8 +473,11 @@ pub(crate) async fn replay_memtable<S: LogStore>(
region_write_ctx.write_memtable(); region_write_ctx.write_memtable();
} }
if allow_stale_entries && stale_entry_found {
wal.obsolete(region_id, flushed_entry_id, wal_options) wal.obsolete(region_id, flushed_entry_id, wal_options)
.await?; .await?;
info!("Force obsolete WAL entries, region id: {}, flushed entry id: {}, last entry id read: {}", region_id, flushed_entry_id, last_entry_id);
}
info!( info!(
"Replay WAL for region: {}, rows recovered: {}, last entry id: {}", "Replay WAL for region: {}, rows recovered: {}, last entry id: {}",

View File

@@ -16,17 +16,18 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use std::time::Instant; use std::time::{Duration, Instant};
use api::helper::{ use api::helper::{
is_column_type_value_eq, is_semantic_type_eq, proto_value_type, to_proto_value, is_column_type_value_eq, is_semantic_type_eq, proto_value_type, to_proto_value,
ColumnDataTypeWrapper, ColumnDataTypeWrapper,
}; };
use api::v1::{ColumnDataType, ColumnSchema, OpType, Rows, SemanticType, Value}; use api::v1::{ColumnDataType, ColumnSchema, OpType, Rows, SemanticType, Value};
use common_telemetry::info; use common_telemetry::{info, warn};
use datatypes::prelude::DataType; use datatypes::prelude::DataType;
use prometheus::HistogramTimer; use prometheus::HistogramTimer;
use prost::Message; use prost::Message;
use smallvec::SmallVec;
use snafu::{ensure, OptionExt, ResultExt}; use snafu::{ensure, OptionExt, ResultExt};
use store_api::metadata::{ColumnMetadata, RegionMetadata}; use store_api::metadata::{ColumnMetadata, RegionMetadata};
use store_api::region_engine::SetReadonlyResponse; use store_api::region_engine::SetReadonlyResponse;
@@ -43,7 +44,10 @@ use crate::error::{
FlushRegionSnafu, InvalidRequestSnafu, Result, FlushRegionSnafu, InvalidRequestSnafu, Result,
}; };
use crate::manifest::action::RegionEdit; use crate::manifest::action::RegionEdit;
use crate::memtable::MemtableId;
use crate::metrics::COMPACTION_ELAPSED_TOTAL; use crate::metrics::COMPACTION_ELAPSED_TOTAL;
use crate::sst::file::FileMeta;
use crate::sst::file_purger::{FilePurgerRef, PurgeRequest};
use crate::wal::EntryId; use crate::wal::EntryId;
/// Request to write a region. /// Request to write a region.
@@ -616,8 +620,6 @@ pub(crate) enum BackgroundNotify {
CompactionFinished(CompactionFinished), CompactionFinished(CompactionFinished),
/// Compaction has failed. /// Compaction has failed.
CompactionFailed(CompactionFailed), CompactionFailed(CompactionFailed),
/// Truncate result.
Truncate(TruncateResult),
} }
/// Notifies a flush job is finished. /// Notifies a flush job is finished.
@@ -625,10 +627,18 @@ pub(crate) enum BackgroundNotify {
pub(crate) struct FlushFinished { pub(crate) struct FlushFinished {
/// Region id. /// Region id.
pub(crate) region_id: RegionId, pub(crate) region_id: RegionId,
/// Meta of the flushed SSTs.
pub(crate) file_metas: Vec<FileMeta>,
/// Entry id of flushed data. /// Entry id of flushed data.
pub(crate) flushed_entry_id: EntryId, pub(crate) flushed_entry_id: EntryId,
/// Sequence of flushed data.
pub(crate) flushed_sequence: SequenceNumber,
/// Id of memtables to remove.
pub(crate) memtables_to_remove: SmallVec<[MemtableId; 2]>,
/// Flush result senders. /// Flush result senders.
pub(crate) senders: Vec<OutputTx>, pub(crate) senders: Vec<OutputTx>,
/// File purger for cleaning files on failure.
pub(crate) file_purger: FilePurgerRef,
/// Flush timer. /// Flush timer.
pub(crate) _timer: HistogramTimer, pub(crate) _timer: HistogramTimer,
} }
@@ -650,6 +660,12 @@ impl OnFailure for FlushFinished {
region_id: self.region_id, region_id: self.region_id,
})); }));
} }
// Clean flushed files.
for file in &self.file_metas {
self.file_purger.send_request(PurgeRequest {
file_meta: file.clone(),
});
}
} }
} }
@@ -665,8 +681,16 @@ pub(crate) struct FlushFailed {
pub(crate) struct CompactionFinished { pub(crate) struct CompactionFinished {
/// Region id. /// Region id.
pub(crate) region_id: RegionId, pub(crate) region_id: RegionId,
/// Compaction output files that are to be added to region version.
pub(crate) compaction_outputs: Vec<FileMeta>,
/// Compacted files that are to be removed from region version.
pub(crate) compacted_files: Vec<FileMeta>,
/// Compaction result senders. /// Compaction result senders.
pub(crate) senders: Vec<OutputTx>, pub(crate) senders: Vec<OutputTx>,
/// File purger for cleaning files on failure.
pub(crate) file_purger: FilePurgerRef,
/// Inferred Compaction time window.
pub(crate) compaction_time_window: Option<Duration>,
/// Start time of compaction task. /// Start time of compaction task.
pub(crate) start_time: Instant, pub(crate) start_time: Instant,
} }
@@ -684,7 +708,8 @@ impl CompactionFinished {
} }
impl OnFailure for CompactionFinished { impl OnFailure for CompactionFinished {
/// Compaction succeeded but failed to update manifest or region's already been dropped. /// Compaction succeeded but failed to update manifest or region's already been dropped,
/// clean compaction output files.
fn on_failure(&mut self, err: Error) { fn on_failure(&mut self, err: Error) {
let err = Arc::new(err); let err = Arc::new(err);
for sender in self.senders.drain(..) { for sender in self.senders.drain(..) {
@@ -692,6 +717,15 @@ impl OnFailure for CompactionFinished {
region_id: self.region_id, region_id: self.region_id,
})); }));
} }
for file in &self.compaction_outputs {
warn!(
"Cleaning region {} compaction output file: {}",
self.region_id, file.file_id
);
self.file_purger.send_request(PurgeRequest {
file_meta: file.clone(),
});
}
} }
} }
@@ -703,21 +737,6 @@ pub(crate) struct CompactionFailed {
pub(crate) err: Arc<Error>, pub(crate) err: Arc<Error>,
} }
/// Notifies the truncate result of a region.
#[derive(Debug)]
pub(crate) struct TruncateResult {
/// Region id.
pub(crate) region_id: RegionId,
/// Result sender.
pub(crate) sender: OptionOutputTx,
/// Truncate result.
pub(crate) result: Result<()>,
/// Truncated entry id.
pub(crate) truncated_entry_id: EntryId,
/// Truncated sequence.
pub(crate) truncated_sequence: SequenceNumber,
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use api::v1::value::ValueData; use api::v1::value::ValueData;

View File

@@ -396,7 +396,6 @@ pub struct CreateRequestBuilder {
primary_key: Option<Vec<ColumnId>>, primary_key: Option<Vec<ColumnId>>,
all_not_null: bool, all_not_null: bool,
engine: String, engine: String,
ts_type: ConcreteDataType,
} }
impl Default for CreateRequestBuilder { impl Default for CreateRequestBuilder {
@@ -409,7 +408,6 @@ impl Default for CreateRequestBuilder {
primary_key: None, primary_key: None,
all_not_null: false, all_not_null: false,
engine: MITO_ENGINE_NAME.to_string(), engine: MITO_ENGINE_NAME.to_string(),
ts_type: ConcreteDataType::timestamp_millisecond_datatype(),
} }
} }
} }
@@ -456,12 +454,6 @@ impl CreateRequestBuilder {
self self
} }
#[must_use]
pub fn with_ts_type(mut self, ty: ConcreteDataType) -> Self {
self.ts_type = ty;
self
}
pub fn build(&self) -> RegionCreateRequest { pub fn build(&self) -> RegionCreateRequest {
let mut column_id = 0; let mut column_id = 0;
let mut column_metadatas = Vec::with_capacity(self.tag_num + self.field_num + 1); let mut column_metadatas = Vec::with_capacity(self.tag_num + self.field_num + 1);
@@ -495,7 +487,7 @@ impl CreateRequestBuilder {
column_metadatas.push(ColumnMetadata { column_metadatas.push(ColumnMetadata {
column_schema: ColumnSchema::new( column_schema: ColumnSchema::new(
"ts", "ts",
self.ts_type.clone(), ConcreteDataType::timestamp_millisecond_datatype(),
// Time index is always not null. // Time index is always not null.
false, false,
), ),

View File

@@ -16,25 +16,19 @@
use std::sync::Arc; use std::sync::Arc;
use common_datasource::compression::CompressionType;
use common_test_util::temp_dir::{create_temp_dir, TempDir}; use common_test_util::temp_dir::{create_temp_dir, TempDir};
use object_store::services::Fs; use object_store::services::Fs;
use object_store::util::join_dir; use object_store::util::join_dir;
use object_store::ObjectStore; use object_store::ObjectStore;
use store_api::metadata::RegionMetadataRef;
use tokio::sync::mpsc::Sender; use tokio::sync::mpsc::Sender;
use crate::access_layer::{AccessLayer, AccessLayerRef}; use crate::access_layer::{AccessLayer, AccessLayerRef};
use crate::cache::CacheManager; use crate::cache::CacheManager;
use crate::compaction::CompactionScheduler; use crate::compaction::CompactionScheduler;
use crate::config::MitoConfig;
use crate::flush::FlushScheduler; use crate::flush::FlushScheduler;
use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
use crate::region::{ManifestContext, ManifestContextRef, RegionState};
use crate::request::WorkerRequest; use crate::request::WorkerRequest;
use crate::schedule::scheduler::{LocalScheduler, SchedulerRef}; use crate::schedule::scheduler::{LocalScheduler, SchedulerRef};
use crate::sst::index::intermediate::IntermediateManager; use crate::sst::index::intermediate::IntermediateManager;
use crate::worker::WorkerListener;
/// Scheduler mocker. /// Scheduler mocker.
pub(crate) struct SchedulerEnv { pub(crate) struct SchedulerEnv {
@@ -79,13 +73,7 @@ impl SchedulerEnv {
) -> CompactionScheduler { ) -> CompactionScheduler {
let scheduler = self.get_scheduler(); let scheduler = self.get_scheduler();
CompactionScheduler::new( CompactionScheduler::new(scheduler, request_sender, Arc::new(CacheManager::default()))
scheduler,
request_sender,
Arc::new(CacheManager::default()),
Arc::new(MitoConfig::default()),
WorkerListener::default(),
)
} }
/// Creates a new flush scheduler. /// Creates a new flush scheduler.
@@ -95,27 +83,6 @@ impl SchedulerEnv {
FlushScheduler::new(scheduler) FlushScheduler::new(scheduler)
} }
/// Creates a new manifest context.
pub(crate) async fn mock_manifest_context(
&self,
metadata: RegionMetadataRef,
) -> ManifestContextRef {
Arc::new(ManifestContext::new(
RegionManifestManager::new(
metadata,
RegionManifestOptions {
manifest_dir: "".to_string(),
object_store: self.access_layer.object_store().clone(),
compress_type: CompressionType::Uncompressed,
checkpoint_distance: 10,
},
)
.await
.unwrap(),
RegionState::Writable,
))
}
fn get_scheduler(&self) -> SchedulerRef { fn get_scheduler(&self) -> SchedulerRef {
self.scheduler self.scheduler
.clone() .clone()

View File

@@ -21,7 +21,6 @@ mod handle_compaction;
mod handle_create; mod handle_create;
mod handle_drop; mod handle_drop;
mod handle_flush; mod handle_flush;
mod handle_manifest;
mod handle_open; mod handle_open;
mod handle_truncate; mod handle_truncate;
mod handle_write; mod handle_write;
@@ -46,8 +45,9 @@ use crate::cache::write_cache::{WriteCache, WriteCacheRef};
use crate::cache::{CacheManager, CacheManagerRef}; use crate::cache::{CacheManager, CacheManagerRef};
use crate::compaction::CompactionScheduler; use crate::compaction::CompactionScheduler;
use crate::config::MitoConfig; use crate::config::MitoConfig;
use crate::error::{JoinSnafu, Result, WorkerStoppedSnafu}; use crate::error::{InvalidRequestSnafu, JoinSnafu, Result, WorkerStoppedSnafu};
use crate::flush::{FlushScheduler, WriteBufferManagerImpl, WriteBufferManagerRef}; use crate::flush::{FlushScheduler, WriteBufferManagerImpl, WriteBufferManagerRef};
use crate::manifest::action::RegionEdit;
use crate::memtable::MemtableBuilderProvider; use crate::memtable::MemtableBuilderProvider;
use crate::region::{MitoRegionRef, RegionMap, RegionMapRef}; use crate::region::{MitoRegionRef, RegionMap, RegionMapRef};
use crate::request::{ use crate::request::{
@@ -367,7 +367,7 @@ impl<S: LogStore> WorkerStarter<S> {
running: running.clone(), running: running.clone(),
memtable_builder_provider: MemtableBuilderProvider::new( memtable_builder_provider: MemtableBuilderProvider::new(
Some(self.write_buffer_manager.clone()), Some(self.write_buffer_manager.clone()),
self.config.clone(), self.config,
), ),
purge_scheduler: self.purge_scheduler.clone(), purge_scheduler: self.purge_scheduler.clone(),
write_buffer_manager: self.write_buffer_manager, write_buffer_manager: self.write_buffer_manager,
@@ -376,8 +376,6 @@ impl<S: LogStore> WorkerStarter<S> {
self.scheduler, self.scheduler,
sender.clone(), sender.clone(),
self.cache_manager.clone(), self.cache_manager.clone(),
self.config,
self.listener.clone(),
), ),
stalled_requests: StalledRequests::default(), stalled_requests: StalledRequests::default(),
listener: self.listener, listener: self.listener,
@@ -624,7 +622,10 @@ impl<S: LogStore> RegionWorkerLoop<S> {
edit, edit,
tx, tx,
} => { } => {
self.handle_region_edit(region_id, edit, tx).await; let result = self.edit_region(region_id, edit).await;
if let Err(Err(e)) = tx.send(result) {
warn!("Failed to send edit region error to caller, error: {e:?}");
}
} }
// We receive a stop signal, but we still want to process remaining // We receive a stop signal, but we still want to process remaining
// requests. The worker thread will then check the running flag and // requests. The worker thread will then check the running flag and
@@ -668,11 +669,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
self.handle_compaction_request(ddl.region_id, ddl.sender); self.handle_compaction_request(ddl.region_id, ddl.sender);
continue; continue;
} }
DdlRequest::Truncate(_) => { DdlRequest::Truncate(_) => self.handle_truncate_request(ddl.region_id).await,
self.handle_truncate_request(ddl.region_id, ddl.sender)
.await;
continue;
}
DdlRequest::Catchup(req) => self.handle_catchup_request(ddl.region_id, req).await, DdlRequest::Catchup(req) => self.handle_catchup_request(ddl.region_id, req).await,
}; };
@@ -709,7 +706,6 @@ impl<S: LogStore> RegionWorkerLoop<S> {
self.handle_compaction_finished(region_id, req).await self.handle_compaction_finished(region_id, req).await
} }
BackgroundNotify::CompactionFailed(req) => self.handle_compaction_failure(req).await, BackgroundNotify::CompactionFailed(req) => self.handle_compaction_failure(req).await,
BackgroundNotify::Truncate(req) => self.handle_truncate_result(req).await,
} }
} }
@@ -720,17 +716,35 @@ impl<S: LogStore> RegionWorkerLoop<S> {
sender: oneshot::Sender<SetReadonlyResponse>, sender: oneshot::Sender<SetReadonlyResponse>,
) { ) {
if let Some(region) = self.regions.get_region(region_id) { if let Some(region) = self.regions.get_region(region_id) {
// We need to do this in background as we need the manifest lock. region.set_writable(false);
common_runtime::spawn_bg(async move {
region.set_readonly_gracefully().await;
let last_entry_id = region.version_control.current().last_entry_id; let last_entry_id = region.version_control.current().last_entry_id;
let _ = sender.send(SetReadonlyResponse::success(Some(last_entry_id))); let _ = sender.send(SetReadonlyResponse::success(Some(last_entry_id)));
});
} else { } else {
let _ = sender.send(SetReadonlyResponse::NotFound); let _ = sender.send(SetReadonlyResponse::NotFound);
} }
} }
async fn edit_region(&self, region_id: RegionId, edit: RegionEdit) -> Result<()> {
let region = self.regions.writable_region(region_id)?;
for file_meta in &edit.files_to_add {
let is_exist = region.access_layer.is_exist(file_meta).await?;
ensure!(
is_exist,
InvalidRequestSnafu {
region_id,
reason: format!(
"trying to add a not exist file '{}' when editing region",
file_meta.file_id
)
}
);
}
// Applying region edit directly has nothing to do with memtables (at least for now).
region.apply_edit(edit, &[]).await
}
} }
impl<S> RegionWorkerLoop<S> { impl<S> RegionWorkerLoop<S> {
@@ -739,7 +753,9 @@ impl<S> RegionWorkerLoop<S> {
// Closes remaining regions. // Closes remaining regions.
let regions = self.regions.list_regions(); let regions = self.regions.list_regions();
for region in regions { for region in regions {
region.stop().await; if let Err(e) = region.stop().await {
error!(e; "Failed to stop region {}", region.region_id);
}
} }
self.regions.clear(); self.regions.clear();
@@ -809,10 +825,10 @@ impl WorkerListener {
let _ = removed; let _ = removed;
} }
pub(crate) async fn on_merge_ssts_finished(&self, region_id: RegionId) { pub(crate) async fn on_handle_compaction_finished(&self, region_id: RegionId) {
#[cfg(any(test, feature = "test"))] #[cfg(any(test, feature = "test"))]
if let Some(listener) = &self.listener { if let Some(listener) = &self.listener {
listener.on_merge_ssts_finished(region_id).await; listener.on_handle_compaction_finished(region_id).await;
} }
// Avoid compiler warning. // Avoid compiler warning.
let _ = region_id; let _ = region_id;

View File

@@ -16,7 +16,7 @@
use std::sync::Arc; use std::sync::Arc;
use common_telemetry::{debug, info}; use common_telemetry::{debug, error, info};
use snafu::ResultExt; use snafu::ResultExt;
use store_api::metadata::{RegionMetadata, RegionMetadataBuilder, RegionMetadataRef}; use store_api::metadata::{RegionMetadata, RegionMetadataBuilder, RegionMetadataRef};
use store_api::region_request::RegionAlterRequest; use store_api::region_request::RegionAlterRequest;
@@ -26,7 +26,9 @@ use crate::error::{
InvalidMetadataSnafu, InvalidRegionRequestSchemaVersionSnafu, InvalidRegionRequestSnafu, Result, InvalidMetadataSnafu, InvalidRegionRequestSchemaVersionSnafu, InvalidRegionRequestSnafu, Result,
}; };
use crate::flush::FlushReason; use crate::flush::FlushReason;
use crate::manifest::action::RegionChange; use crate::manifest::action::{RegionChange, RegionMetaAction, RegionMetaActionList};
use crate::region::version::Version;
use crate::region::MitoRegionRef;
use crate::request::{DdlRequest, OptionOutputTx, SenderDdlRequest}; use crate::request::{DdlRequest, OptionOutputTx, SenderDdlRequest};
use crate::worker::RegionWorkerLoop; use crate::worker::RegionWorkerLoop;
@@ -105,26 +107,49 @@ impl<S> RegionWorkerLoop<S> {
return; return;
} }
// Now we can alter the region directly.
if let Err(e) = alter_region_schema(&region, &version, request).await {
error!(e; "Failed to alter region schema, region_id: {}", region_id);
sender.send(Err(e));
return;
}
info!( info!(
"Try to alter region {} from version {} to {}", "Schema of region {} is altered from {} to {}",
region_id, region_id,
version.metadata.schema_version, version.metadata.schema_version,
region.metadata().schema_version region.metadata().schema_version
); );
let new_meta = match metadata_after_alteration(&version.metadata, request) { // Notifies waiters.
Ok(new_meta) => new_meta, sender.send(Ok(0));
Err(e) => {
sender.send(Err(e));
return;
} }
}; }
/// Alter the schema of the region.
async fn alter_region_schema(
region: &MitoRegionRef,
version: &Version,
request: RegionAlterRequest,
) -> Result<()> {
let new_meta = metadata_after_alteration(&version.metadata, request)?;
// Persist the metadata to region's manifest. // Persist the metadata to region's manifest.
let change = RegionChange { let change = RegionChange {
metadata: new_meta.clone(), metadata: new_meta.clone(),
}; };
self.handle_manifest_region_change(region, change, sender) let action_list = RegionMetaActionList::with_action(RegionMetaAction::Change(change));
} region
.manifest_manager
.write()
.await
.update(action_list)
.await?;
// Apply the metadata to region's version.
region
.version_control
.alter_schema(new_meta, &region.memtable_builder);
Ok(())
} }
/// Creates a metadata after applying the alter `request` to the old `metadata`. /// Creates a metadata after applying the alter `request` to the old `metadata`.

View File

@@ -45,7 +45,8 @@ impl<S: LogStore> RegionWorkerLoop<S> {
let is_mutable_empty = region.version().memtables.mutable.is_empty(); let is_mutable_empty = region.version().memtables.mutable.is_empty();
// Utilizes the short circuit evaluation. // Utilizes the short circuit evaluation.
let region = if !is_mutable_empty || region.manifest_ctx.has_update().await? { let region =
if !is_mutable_empty || region.manifest_manager.read().await.has_update().await? {
info!("Reopening the region: {region_id}, empty mutable: {is_mutable_empty}"); info!("Reopening the region: {region_id}, empty mutable: {is_mutable_empty}");
let reopened_region = Arc::new( let reopened_region = Arc::new(
RegionOpener::new( RegionOpener::new(

View File

@@ -33,7 +33,7 @@ impl<S> RegionWorkerLoop<S> {
info!("Try to close region {}", region_id); info!("Try to close region {}", region_id);
region.stop().await; region.stop().await?;
self.regions.remove_region(region_id); self.regions.remove_region(region_id);
// Clean flush status. // Clean flush status.
self.flush_scheduler.on_region_closed(region_id); self.flush_scheduler.on_region_closed(region_id);

View File

@@ -16,8 +16,9 @@ use common_telemetry::{error, info, warn};
use store_api::logstore::LogStore; use store_api::logstore::LogStore;
use store_api::storage::RegionId; use store_api::storage::RegionId;
use crate::metrics::COMPACTION_REQUEST_COUNT; use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
use crate::request::{CompactionFailed, CompactionFinished, OptionOutputTx}; use crate::metrics::{COMPACTION_REQUEST_COUNT, COMPACTION_STAGE_ELAPSED};
use crate::request::{CompactionFailed, CompactionFinished, OnFailure, OptionOutputTx};
use crate::worker::RegionWorkerLoop; use crate::worker::RegionWorkerLoop;
impl<S: LogStore> RegionWorkerLoop<S> { impl<S: LogStore> RegionWorkerLoop<S> {
@@ -37,7 +38,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
&region.access_layer, &region.access_layer,
&region.file_purger, &region.file_purger,
sender, sender,
&region.manifest_ctx, self.config.clone(),
) { ) {
error!(e; "Failed to schedule compaction task for region: {}", region_id); error!(e; "Failed to schedule compaction task for region: {}", region_id);
} else { } else {
@@ -54,6 +55,8 @@ impl<S: LogStore> RegionWorkerLoop<S> {
region_id: RegionId, region_id: RegionId,
mut request: CompactionFinished, mut request: CompactionFinished,
) { ) {
self.listener.on_handle_compaction_finished(region_id).await;
let Some(region) = self.regions.writable_region_or(region_id, &mut request) else { let Some(region) = self.regions.writable_region_or(region_id, &mut request) else {
warn!( warn!(
"Unable to finish the compaction task for a read only region {}", "Unable to finish the compaction task for a read only region {}",
@@ -62,12 +65,44 @@ impl<S: LogStore> RegionWorkerLoop<S> {
return; return;
}; };
{
let manifest_timer = COMPACTION_STAGE_ELAPSED
.with_label_values(&["write_manifest"])
.start_timer();
// Write region edit to manifest.
let edit = RegionEdit {
files_to_add: std::mem::take(&mut request.compaction_outputs),
files_to_remove: std::mem::take(&mut request.compacted_files),
compaction_time_window: request.compaction_time_window,
flushed_entry_id: None,
flushed_sequence: None,
};
let action_list =
RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
if let Err(e) = region
.manifest_manager
.write()
.await
.update(action_list)
.await
{
error!(e; "Failed to update manifest, region: {}", region_id);
manifest_timer.stop_and_discard();
request.on_failure(e);
return;
}
// Apply edit to region's version.
region
.version_control
.apply_edit(edit, &[], region.file_purger.clone());
}
// compaction finished. // compaction finished.
request.on_success(); request.on_success();
// Schedule next compaction if necessary. // Schedule next compaction if necessary.
self.compaction_scheduler self.compaction_scheduler
.on_compaction_finished(region_id, &region.manifest_ctx); .on_compaction_finished(region_id, self.config.clone());
} }
/// When compaction fails, we simply log the error. /// When compaction fails, we simply log the error.

View File

@@ -16,7 +16,7 @@
use std::time::Duration; use std::time::Duration;
use common_telemetry::{error, info, warn}; use common_telemetry::{info, warn};
use futures::TryStreamExt; use futures::TryStreamExt;
use object_store::util::join_path; use object_store::util::join_path;
use object_store::{EntryMode, ObjectStore}; use object_store::{EntryMode, ObjectStore};
@@ -27,7 +27,7 @@ use tokio::time::sleep;
use crate::error::{OpenDalSnafu, Result}; use crate::error::{OpenDalSnafu, Result};
use crate::metrics::REGION_COUNT; use crate::metrics::REGION_COUNT;
use crate::region::{RegionMapRef, RegionState}; use crate::region::RegionMapRef;
use crate::worker::{RegionWorkerLoop, DROPPING_MARKER_FILE}; use crate::worker::{RegionWorkerLoop, DROPPING_MARKER_FILE};
const GC_TASK_INTERVAL_SEC: u64 = 5 * 60; // 5 minutes const GC_TASK_INTERVAL_SEC: u64 = 5 * 60; // 5 minutes
@@ -42,27 +42,17 @@ impl<S> RegionWorkerLoop<S> {
info!("Try to drop region: {}", region_id); info!("Try to drop region: {}", region_id);
// Marks the region as dropping. // write dropping marker
region.set_dropping()?;
// Writes dropping marker
// We rarely drop a region so we still operate in the worker loop.
let marker_path = join_path(region.access_layer.region_dir(), DROPPING_MARKER_FILE); let marker_path = join_path(region.access_layer.region_dir(), DROPPING_MARKER_FILE);
region region
.access_layer .access_layer
.object_store() .object_store()
.write(&marker_path, vec![]) .write(&marker_path, vec![])
.await .await
.context(OpenDalSnafu) .context(OpenDalSnafu)?;
.inspect_err(|e| {
error!(e; "Failed to write the drop marker file for region {}", region_id);
// Sets the state back to writable. It's possible that the marker file has been written. region.stop().await?;
// We sets the state back to writable so we can retry the drop operation. // remove this region from region map to prevent other requests from accessing this region
region.switch_state_to_writable(RegionState::Dropping);
})?;
region.stop().await;
// Removes this region from region map to prevent other requests from accessing this region
self.regions.remove_region(region_id); self.regions.remove_region(region_id);
self.dropping_regions.insert_region(region.clone()); self.dropping_regions.insert_region(region.clone());
// Notifies flush scheduler. // Notifies flush scheduler.
@@ -70,7 +60,7 @@ impl<S> RegionWorkerLoop<S> {
// Notifies compaction scheduler. // Notifies compaction scheduler.
self.compaction_scheduler.on_region_dropped(region_id); self.compaction_scheduler.on_region_dropped(region_id);
// Marks region version as dropped // mark region version as dropped
region region
.version_control .version_control
.mark_dropped(&region.memtable_builder); .mark_dropped(&region.memtable_builder);
@@ -81,7 +71,7 @@ impl<S> RegionWorkerLoop<S> {
REGION_COUNT.dec(); REGION_COUNT.dec();
// Detaches a background task to delete the region dir // detach a background task to delete the region dir
let region_dir = region.access_layer.region_dir().to_owned(); let region_dir = region.access_layer.region_dir().to_owned();
let object_store = region.access_layer.object_store().clone(); let object_store = region.access_layer.object_store().clone();
let dropping_regions = self.dropping_regions.clone(); let dropping_regions = self.dropping_regions.clone();

View File

@@ -22,8 +22,9 @@ use store_api::region_request::RegionFlushRequest;
use store_api::storage::RegionId; use store_api::storage::RegionId;
use crate::config::MitoConfig; use crate::config::MitoConfig;
use crate::error::Result; use crate::error::{RegionTruncatedSnafu, Result};
use crate::flush::{FlushReason, RegionFlushTask}; use crate::flush::{FlushReason, RegionFlushTask};
use crate::manifest::action::RegionEdit;
use crate::region::MitoRegionRef; use crate::region::MitoRegionRef;
use crate::request::{FlushFailed, FlushFinished, OnFailure, OptionOutputTx}; use crate::request::{FlushFailed, FlushFinished, OnFailure, OptionOutputTx};
use crate::worker::RegionWorkerLoop; use crate::worker::RegionWorkerLoop;
@@ -177,7 +178,6 @@ impl<S> RegionWorkerLoop<S> {
engine_config, engine_config,
row_group_size, row_group_size,
cache_manager: self.cache_manager.clone(), cache_manager: self.cache_manager.clone(),
manifest_ctx: region.manifest_ctx.clone(),
index_options: region.version().options.index_options.clone(), index_options: region.version().options.index_options.clone(),
} }
} }
@@ -198,6 +198,29 @@ impl<S: LogStore> RegionWorkerLoop<S> {
return; return;
}; };
// The flush task before truncating the region fails immediately.
let version_data = region.version_control.current();
if let Some(truncated_entry_id) = version_data.version.truncated_entry_id {
if truncated_entry_id >= request.flushed_entry_id {
request.on_failure(RegionTruncatedSnafu { region_id }.build());
return;
}
}
// Write region edit to manifest.
let edit = RegionEdit {
files_to_add: std::mem::take(&mut request.file_metas),
files_to_remove: Vec::new(),
compaction_time_window: None,
flushed_entry_id: Some(request.flushed_entry_id),
flushed_sequence: Some(request.flushed_sequence),
};
if let Err(e) = region.apply_edit(edit, &request.memtables_to_remove).await {
error!(e; "Failed to write manifest, region: {}", region_id);
request.on_failure(e);
return;
}
region.update_flush_millis(); region.update_flush_millis();
// Delete wal. // Delete wal.
@@ -240,7 +263,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
&region.access_layer, &region.access_layer,
&region.file_purger, &region.file_purger,
OptionOutputTx::none(), OptionOutputTx::none(),
&region.manifest_ctx, self.config.clone(),
) { ) {
warn!( warn!(
"Failed to schedule compaction after flush, region: {}, err: {}", "Failed to schedule compaction after flush, region: {}, err: {}",

View File

@@ -1,200 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Handles manifest.
//!
//! It updates the manifest and applies the changes to the region in background.
use common_telemetry::{info, warn};
use snafu::ensure;
use store_api::storage::RegionId;
use tokio::sync::oneshot::Sender;
use crate::error::{InvalidRequestSnafu, Result};
use crate::manifest::action::{
RegionChange, RegionEdit, RegionMetaAction, RegionMetaActionList, RegionTruncate,
};
use crate::region::{MitoRegionRef, RegionState};
use crate::request::{BackgroundNotify, OptionOutputTx, TruncateResult, WorkerRequest};
use crate::worker::RegionWorkerLoop;
impl<S> RegionWorkerLoop<S> {
/// Handles region edit request.
pub(crate) async fn handle_region_edit(
&self,
region_id: RegionId,
edit: RegionEdit,
sender: Sender<Result<()>>,
) {
let region = match self.regions.writable_region(region_id) {
Ok(region) => region,
Err(e) => {
let _ = sender.send(Err(e));
return;
}
};
// Marks the region as editing.
if let Err(e) = region.set_editing() {
let _ = sender.send(Err(e));
return;
}
// Now the region is in editing state.
// Updates manifest in background.
common_runtime::spawn_bg(async move {
let result = edit_region(&region, edit).await;
if let Err(res) = sender.send(result) {
warn!(
"Failed to send result back to the worker, region_id: {}, res: {:?}",
region_id, res
);
}
// Sets the region as writable. For simplicity, we don't send the result
// back to the worker.
region.switch_state_to_writable(RegionState::Editing);
});
}
/// Writes truncate action to the manifest and then applies it to the region in background.
pub(crate) fn handle_manifest_truncate_action(
&self,
region: MitoRegionRef,
truncate: RegionTruncate,
sender: OptionOutputTx,
) {
// Marks the region as truncating.
// This prevents the region from being accessed by other write requests.
if let Err(e) = region.set_truncating() {
sender.send(Err(e));
return;
}
// Now the region is in truncating state.
let request_sender = self.sender.clone();
let manifest_ctx = region.manifest_ctx.clone();
let version_control = region.version_control.clone();
let memtable_builder = region.memtable_builder.clone();
// Updates manifest in background.
common_runtime::spawn_bg(async move {
// Write region truncated to manifest.
let action_list =
RegionMetaActionList::with_action(RegionMetaAction::Truncate(truncate.clone()));
let result = manifest_ctx
.update_manifest(RegionState::Truncating, action_list, || {
// Applies the truncate action to the region.
version_control.truncate(
truncate.truncated_entry_id,
truncate.truncated_sequence,
&memtable_builder,
);
})
.await;
// Sends the result back to the request sender.
let truncate_result = TruncateResult {
region_id: truncate.region_id,
sender,
result,
truncated_entry_id: truncate.truncated_entry_id,
truncated_sequence: truncate.truncated_sequence,
};
let _ = request_sender
.send(WorkerRequest::Background {
region_id: truncate.region_id,
notify: BackgroundNotify::Truncate(truncate_result),
})
.await
.inspect_err(|_| warn!("failed to send truncate result"));
});
}
/// Writes region change action to the manifest and then applies it to the region in background.
pub(crate) fn handle_manifest_region_change(
&self,
region: MitoRegionRef,
change: RegionChange,
sender: OptionOutputTx,
) {
// Marks the region as altering.
if let Err(e) = region.set_altering() {
sender.send(Err(e));
return;
}
// Now the region is in altering state.
common_runtime::spawn_bg(async move {
let new_meta = change.metadata.clone();
let action_list = RegionMetaActionList::with_action(RegionMetaAction::Change(change));
let result = region
.manifest_ctx
.update_manifest(RegionState::Altering, action_list, || {
// Apply the metadata to region's version.
region
.version_control
.alter_schema(new_meta, &region.memtable_builder);
})
.await;
// Sets the region as writable.
region.switch_state_to_writable(RegionState::Altering);
if result.is_ok() {
info!(
"Region {} is altered, schema version is {}",
region.region_id,
region.metadata().schema_version
);
}
sender.send(result.map(|_| 0));
});
}
}
/// Checks the edit, writes and applies it.
async fn edit_region(region: &MitoRegionRef, edit: RegionEdit) -> Result<()> {
let region_id = region.region_id;
for file_meta in &edit.files_to_add {
let is_exist = region.access_layer.is_exist(file_meta).await?;
ensure!(
is_exist,
InvalidRequestSnafu {
region_id,
reason: format!(
"trying to add a not exist file '{}' when editing region",
file_meta.file_id
)
}
);
}
info!("Applying {edit:?} to region {}", region_id);
let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
region
.manifest_ctx
.update_manifest(RegionState::Editing, action_list, || {
// Applies the edit to the region.
region
.version_control
.apply_edit(edit, &[], region.file_purger.clone());
})
.await
}

View File

@@ -16,23 +16,19 @@
use common_telemetry::info; use common_telemetry::info;
use store_api::logstore::LogStore; use store_api::logstore::LogStore;
use store_api::region_request::AffectedRows;
use store_api::storage::RegionId; use store_api::storage::RegionId;
use crate::error::RegionNotFoundSnafu; use crate::error::Result;
use crate::manifest::action::RegionTruncate; use crate::manifest::action::{RegionMetaAction, RegionMetaActionList, RegionTruncate};
use crate::region::RegionState;
use crate::request::{OptionOutputTx, TruncateResult};
use crate::worker::RegionWorkerLoop; use crate::worker::RegionWorkerLoop;
impl<S: LogStore> RegionWorkerLoop<S> { impl<S: LogStore> RegionWorkerLoop<S> {
pub(crate) async fn handle_truncate_request( pub(crate) async fn handle_truncate_request(
&mut self, &mut self,
region_id: RegionId, region_id: RegionId,
mut sender: OptionOutputTx, ) -> Result<AffectedRows> {
) { let region = self.regions.writable_region(region_id)?;
let Some(region) = self.regions.writable_region_or(region_id, &mut sender) else {
return;
};
info!("Try to truncate region {}", region_id); info!("Try to truncate region {}", region_id);
@@ -46,55 +42,36 @@ impl<S: LogStore> RegionWorkerLoop<S> {
truncated_entry_id, truncated_entry_id,
truncated_sequence, truncated_sequence,
}; };
self.handle_manifest_truncate_action(region, truncate, sender); let action_list =
} RegionMetaActionList::with_action(RegionMetaAction::Truncate(truncate.clone()));
region
/// Handles truncate result. .manifest_manager
pub(crate) async fn handle_truncate_result(&mut self, truncate_result: TruncateResult) { .write()
let region_id = truncate_result.region_id; .await
let Some(region) = self.regions.get_region(region_id) else { .update(action_list)
truncate_result.sender.send( .await?;
RegionNotFoundSnafu {
region_id: truncate_result.region_id,
}
.fail(),
);
return;
};
// We are already in the worker loop so we can set the state first.
region.switch_state_to_writable(RegionState::Truncating);
if let Err(e) = truncate_result.result {
// Unable to truncate the region.
truncate_result.sender.send(Err(e));
return;
}
// Notifies flush scheduler. // Notifies flush scheduler.
self.flush_scheduler.on_region_truncated(region_id); self.flush_scheduler.on_region_truncated(region_id);
// Notifies compaction scheduler. // Notifies compaction scheduler.
self.compaction_scheduler.on_region_truncated(region_id); self.compaction_scheduler.on_region_truncated(region_id);
// Make all data obsolete. // Reset region's version and mark all SSTs deleted.
if let Err(e) = self region.version_control.truncate(
.wal truncated_entry_id,
.obsolete( truncated_sequence,
region_id, &region.memtable_builder,
truncate_result.truncated_entry_id,
&region.wal_options,
)
.await
{
truncate_result.sender.send(Err(e));
return;
}
info!(
"Complete truncating region: {}, entry id: {} and sequence: {}.",
region_id, truncate_result.truncated_entry_id, truncate_result.truncated_sequence
); );
truncate_result.sender.send(Ok(0)); // Make all data obsolete.
self.wal
.obsolete(region_id, truncated_entry_id, &region.wal_options)
.await?;
info!(
"Complete truncating region: {}, entry id: {} and sequence: {}.",
region_id, truncated_entry_id, truncated_sequence
);
Ok(0)
} }
} }

View File

@@ -52,6 +52,7 @@ snafu.workspace = true
sql.workspace = true sql.workspace = true
sqlparser.workspace = true sqlparser.workspace = true
store-api.workspace = true store-api.workspace = true
substrait.workspace = true
table.workspace = true table.workspace = true
tokio.workspace = true tokio.workspace = true
tonic.workspace = true tonic.workspace = true

View File

@@ -541,6 +541,12 @@ pub enum Error {
end: String, end: String,
location: Location, location: Location,
}, },
#[snafu(display("Failed to convert between logical plan and substrait plan"))]
SubstraitCodec {
location: Location,
source: substrait::error::Error,
},
} }
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;
@@ -597,6 +603,7 @@ impl ErrorExt for Error {
Error::RequestInserts { source, .. } => source.status_code(), Error::RequestInserts { source, .. } => source.status_code(),
Error::RequestRegion { source, .. } => source.status_code(), Error::RequestRegion { source, .. } => source.status_code(),
Error::RequestDeletes { source, .. } => source.status_code(), Error::RequestDeletes { source, .. } => source.status_code(),
Error::SubstraitCodec { source, .. } => source.status_code(),
Error::ColumnDataType { source, .. } | Error::InvalidColumnDef { source, .. } => { Error::ColumnDataType { source, .. } | Error::InvalidColumnDef { source, .. } => {
source.status_code() source.status_code()

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::collections::{HashMap, HashSet}; use std::collections::HashMap;
use api::helper::ColumnDataTypeWrapper; use api::helper::ColumnDataTypeWrapper;
use api::v1::alter_expr::Kind; use api::v1::alter_expr::Kind;
@@ -31,11 +31,12 @@ use query::sql::{
}; };
use session::context::QueryContextRef; use session::context::QueryContextRef;
use session::table_name::table_idents_to_full_name; use session::table_name::table_idents_to_full_name;
use snafu::{ensure, OptionExt, ResultExt}; use snafu::{ensure, ResultExt};
use sql::ast::{ColumnDef, ColumnOption, TableConstraint}; use sql::ast::{ColumnDef, ColumnOption, TableConstraint};
use sql::statements::alter::{AlterTable, AlterTableOperation}; use sql::statements::alter::{AlterTable, AlterTableOperation};
use sql::statements::create::{CreateExternalTable, CreateTable, TIME_INDEX}; use sql::statements::create::{CreateExternalTable, CreateTable, TIME_INDEX};
use sql::statements::{column_def_to_schema, sql_column_def_to_grpc_column_def}; use sql::statements::{column_def_to_schema, sql_column_def_to_grpc_column_def};
use sql::util::to_lowercase_options_map;
use table::requests::{TableOptions, FILE_TABLE_META_KEY}; use table::requests::{TableOptions, FILE_TABLE_META_KEY};
use table::table_reference::TableReference; use table::table_reference::TableReference;
@@ -189,7 +190,8 @@ pub fn create_to_expr(create: &CreateTable, query_ctx: QueryContextRef) -> Resul
let time_index = find_time_index(&create.constraints)?; let time_index = find_time_index(&create.constraints)?;
let table_options = HashMap::from( let table_options = HashMap::from(
&TableOptions::try_from(create.options.as_ref()).context(UnrecognizedTableOptionSnafu)?, &TableOptions::try_from(&to_lowercase_options_map(&create.options))
.context(UnrecognizedTableOptionSnafu)?,
); );
let primary_keys = find_primary_keys(&create.columns, &create.constraints)?; let primary_keys = find_primary_keys(&create.columns, &create.constraints)?;
@@ -212,72 +214,9 @@ pub fn create_to_expr(create: &CreateTable, query_ctx: QueryContextRef) -> Resul
table_id: None, table_id: None,
engine: create.engine.to_string(), engine: create.engine.to_string(),
}; };
validate_create_expr(&expr)?;
Ok(expr) Ok(expr)
} }
/// Validate the [`CreateTableExpr`] request.
pub fn validate_create_expr(create: &CreateTableExpr) -> Result<()> {
// construct column list
let mut column_to_indices = HashMap::with_capacity(create.column_defs.len());
for (idx, column) in create.column_defs.iter().enumerate() {
if let Some(indices) = column_to_indices.get(&column.name) {
return InvalidSqlSnafu {
err_msg: format!(
"column name `{}` is duplicated at index {} and {}",
column.name, indices, idx
),
}
.fail();
}
column_to_indices.insert(&column.name, idx);
}
// verify time_index exists
let _ = column_to_indices
.get(&create.time_index)
.with_context(|| InvalidSqlSnafu {
err_msg: format!(
"column name `{}` is not found in column list",
create.time_index
),
})?;
// verify primary_key exists
for pk in &create.primary_keys {
let _ = column_to_indices
.get(&pk)
.with_context(|| InvalidSqlSnafu {
err_msg: format!("column name `{}` is not found in column list", pk),
})?;
}
// construct primary_key set
let mut pk_set = HashSet::new();
for pk in &create.primary_keys {
if !pk_set.insert(pk) {
return InvalidSqlSnafu {
err_msg: format!("column name `{}` is duplicated in primary keys", pk),
}
.fail();
}
}
// verify time index is not primary key
if pk_set.contains(&create.time_index) {
return InvalidSqlSnafu {
err_msg: format!(
"column name `{}` is both primary key and time index",
create.time_index
),
}
.fail();
}
Ok(())
}
fn find_primary_keys( fn find_primary_keys(
columns: &[ColumnDef], columns: &[ColumnDef],
constraints: &[TableConstraint], constraints: &[TableConstraint],
@@ -500,7 +439,7 @@ mod tests {
#[test] #[test]
fn test_create_to_expr() { fn test_create_to_expr() {
let sql = "CREATE TABLE monitor (host STRING,ts TIMESTAMP,TIME INDEX (ts),PRIMARY KEY(host)) ENGINE=mito WITH(ttl='3days', write_buffer_size='1024KB');"; let sql = "CREATE TABLE monitor (host STRING,ts TIMESTAMP,TIME INDEX (ts),PRIMARY KEY(host)) ENGINE=mito WITH(regions=1, ttl='3days', write_buffer_size='1024KB');";
let stmt = let stmt =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
.unwrap() .unwrap()
@@ -518,33 +457,6 @@ mod tests {
); );
} }
#[test]
fn test_invalid_create_to_expr() {
let cases = [
// duplicate column declaration
"CREATE TABLE monitor (host STRING primary key, ts TIMESTAMP TIME INDEX, some_column text, some_column string);",
// duplicate primary key
"CREATE TABLE monitor (host STRING, ts TIMESTAMP TIME INDEX, some_column STRING, PRIMARY KEY (some_column, host, some_column));",
// time index is primary key
"CREATE TABLE monitor (host STRING, ts TIMESTAMP TIME INDEX, PRIMARY KEY (host, ts));"
];
for sql in cases {
let stmt = ParserContext::create_with_dialect(
sql,
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.pop()
.unwrap();
let Statement::CreateTable(create_table) = stmt else {
unreachable!()
};
create_to_expr(&create_table, QueryContext::arc()).unwrap_err();
}
}
#[test] #[test]
fn test_create_to_expr_with_default_timestamp_value() { fn test_create_to_expr_with_default_timestamp_value() {
let sql = "CREATE TABLE monitor (v double,ts TIMESTAMP default '2024-01-30T00:01:01',TIME INDEX (ts)) engine=mito;"; let sql = "CREATE TABLE monitor (v double,ts TIMESTAMP default '2024-01-30T00:01:01',TIME INDEX (ts)) engine=mito;";

View File

@@ -164,6 +164,10 @@ impl StatementExecutor {
let _ = self.create_external_table(stmt, query_ctx).await?; let _ = self.create_external_table(stmt, query_ctx).await?;
Ok(Output::new_with_affected_rows(0)) Ok(Output::new_with_affected_rows(0))
} }
Statement::CreateView(stmt) => {
let _ = self.create_view(stmt, query_ctx).await?;
Ok(Output::new_with_affected_rows(0))
}
Statement::Alter(alter_table) => self.alter_table(alter_table, query_ctx).await, Statement::Alter(alter_table) => self.alter_table(alter_table, query_ctx).await,
Statement::DropTable(stmt) => { Statement::DropTable(stmt) => {
let (catalog, schema, table) = let (catalog, schema, table) =
@@ -256,6 +260,13 @@ impl StatementExecutor {
.context(PlanStatementSnafu) .context(PlanStatementSnafu)
} }
pub fn optimize_logical_plan(&self, plan: LogicalPlan) -> Result<LogicalPlan> {
self.query_engine
.planner()
.optimize(plan)
.context(PlanStatementSnafu)
}
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
async fn plan_exec(&self, stmt: QueryStatement, query_ctx: QueryContextRef) -> Result<Output> { async fn plan_exec(&self, stmt: QueryStatement, query_ctx: QueryContextRef) -> Result<Output> {
let plan = self.plan(stmt, query_ctx.clone()).await?; let plan = self.plan(stmt, query_ctx.clone()).await?;

View File

@@ -39,16 +39,21 @@ use datatypes::value::Value;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use partition::expr::{Operand, PartitionExpr, RestrictedOp}; use partition::expr::{Operand, PartitionExpr, RestrictedOp};
use partition::partition::{PartitionBound, PartitionDef}; use partition::partition::{PartitionBound, PartitionDef};
use query::parser::QueryStatement;
use query::sql::create_table_stmt; use query::sql::create_table_stmt;
use regex::Regex; use regex::Regex;
use session::context::QueryContextRef; use session::context::QueryContextRef;
use session::table_name::table_idents_to_full_name; use session::table_name::table_idents_to_full_name;
use snafu::{ensure, IntoError, OptionExt, ResultExt}; use snafu::{ensure, IntoError, OptionExt, ResultExt};
use sql::statements::alter::AlterTable; use sql::statements::alter::AlterTable;
use sql::statements::create::{CreateExternalTable, CreateTable, CreateTableLike, Partitions}; use sql::statements::create::{
CreateExternalTable, CreateTable, CreateTableLike, CreateView, Partitions,
};
use sql::statements::sql_value_to_value; use sql::statements::sql_value_to_value;
use sql::statements::statement::Statement;
use sqlparser::ast::{Expr, Ident, Value as ParserValue}; use sqlparser::ast::{Expr, Ident, Value as ParserValue};
use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME}; use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME};
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use table::dist_table::DistTable; use table::dist_table::DistTable;
use table::metadata::{self, RawTableInfo, RawTableMeta, TableId, TableInfo, TableType}; use table::metadata::{self, RawTableInfo, RawTableMeta, TableId, TableInfo, TableType};
use table::requests::{AlterKind, AlterTableRequest, TableOptions}; use table::requests::{AlterKind, AlterTableRequest, TableOptions};
@@ -60,7 +65,7 @@ use crate::error::{
CreateLogicalTablesSnafu, CreateTableInfoSnafu, DdlWithMultiCatalogsSnafu, CreateLogicalTablesSnafu, CreateTableInfoSnafu, DdlWithMultiCatalogsSnafu,
DdlWithMultiSchemasSnafu, DeserializePartitionSnafu, EmptyDdlExprSnafu, DdlWithMultiSchemasSnafu, DeserializePartitionSnafu, EmptyDdlExprSnafu,
InvalidPartitionColumnsSnafu, InvalidPartitionRuleSnafu, InvalidTableNameSnafu, InvalidPartitionColumnsSnafu, InvalidPartitionRuleSnafu, InvalidTableNameSnafu,
ParseSqlValueSnafu, Result, SchemaNotFoundSnafu, TableAlreadyExistsSnafu, ParseSqlValueSnafu, Result, SchemaNotFoundSnafu, SubstraitCodecSnafu, TableAlreadyExistsSnafu,
TableMetadataManagerSnafu, TableNotFoundSnafu, UnrecognizedTableOptionSnafu, TableMetadataManagerSnafu, TableNotFoundSnafu, UnrecognizedTableOptionSnafu,
}; };
use crate::expr_factory; use crate::expr_factory;
@@ -320,6 +325,33 @@ impl StatementExecutor {
.collect()) .collect())
} }
#[tracing::instrument(skip_all)]
pub async fn create_view(
&self,
create_view: CreateView,
ctx: QueryContextRef,
) -> Result<TableRef> {
// convert input into logical plan
let logical_plan = match *create_view.input {
Statement::Query(query) => {
self.plan(QueryStatement::Sql(Statement::Query(query)), ctx)
.await?
}
Statement::Tql(query) => self.plan_tql(query, &ctx).await?,
_ => {
todo!("throw an error")
}
};
let optimized_plan = self.optimize_logical_plan(logical_plan)?;
// encode logical plan
let encoded_plan = DFLogicalSubstraitConvertor
.encode(&optimized_plan.unwrap_df_plan())
.context(SubstraitCodecSnafu)?;
todo!()
}
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
pub async fn alter_logical_tables(&self, alter_table_exprs: Vec<AlterExpr>) -> Result<Output> { pub async fn alter_logical_tables(&self, alter_table_exprs: Vec<AlterExpr>) -> Result<Output> {
let _timer = crate::metrics::DIST_ALTER_TABLES.start_timer(); let _timer = crate::metrics::DIST_ALTER_TABLES.start_timer();

View File

@@ -20,6 +20,7 @@ use query::parser::{
PromQuery, QueryLanguageParser, ANALYZE_NODE_NAME, ANALYZE_VERBOSE_NODE_NAME, PromQuery, QueryLanguageParser, ANALYZE_NODE_NAME, ANALYZE_VERBOSE_NODE_NAME,
DEFAULT_LOOKBACK_STRING, EXPLAIN_NODE_NAME, EXPLAIN_VERBOSE_NODE_NAME, DEFAULT_LOOKBACK_STRING, EXPLAIN_NODE_NAME, EXPLAIN_VERBOSE_NODE_NAME,
}; };
use query::plan::LogicalPlan;
use session::context::QueryContextRef; use session::context::QueryContextRef;
use snafu::ResultExt; use snafu::ResultExt;
use sql::statements::tql::Tql; use sql::statements::tql::Tql;
@@ -28,8 +29,9 @@ use crate::error::{ExecLogicalPlanSnafu, ParseQuerySnafu, PlanStatementSnafu, Re
use crate::statement::StatementExecutor; use crate::statement::StatementExecutor;
impl StatementExecutor { impl StatementExecutor {
/// Plan the given [Tql] query and return the [LogicalPlan].
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
pub(super) async fn execute_tql(&self, tql: Tql, query_ctx: QueryContextRef) -> Result<Output> { pub async fn plan_tql(&self, tql: Tql, query_ctx: &QueryContextRef) -> Result<LogicalPlan> {
let stmt = match tql { let stmt = match tql {
Tql::Eval(eval) => { Tql::Eval(eval) => {
let promql = PromQuery { let promql = PromQuery {
@@ -86,12 +88,17 @@ impl StatementExecutor {
.unwrap() .unwrap()
} }
}; };
let plan = self self.query_engine
.query_engine
.planner() .planner()
.plan(stmt, query_ctx.clone()) .plan(stmt, query_ctx.clone())
.await .await
.context(PlanStatementSnafu)?; .context(PlanStatementSnafu)
}
/// Execute the given [Tql] query and return the result.
#[tracing::instrument(skip_all)]
pub(super) async fn execute_tql(&self, tql: Tql, query_ctx: QueryContextRef) -> Result<Output> {
let plan = self.plan_tql(tql, &query_ctx).await?;
self.query_engine self.query_engine
.execute(plan, query_ctx) .execute(plan, query_ctx)
.await .await

View File

@@ -381,8 +381,8 @@ impl RecordBatchStream for ScalarCalculateStream {
impl ScalarCalculateStream { impl ScalarCalculateStream {
fn update_batch(&mut self, batch: RecordBatch) -> DataFusionResult<()> { fn update_batch(&mut self, batch: RecordBatch) -> DataFusionResult<()> {
let _timer = self.metric.elapsed_compute(); let _timer = self.metric.elapsed_compute();
// if have multi time series or empty batch, scalar will return NaN // if have multi time series, scalar will return NaN
if self.have_multi_series || batch.num_rows() == 0 { if self.have_multi_series {
return Ok(()); return Ok(());
} }
// fast path: no tag columns means all data belongs to the same series. // fast path: no tag columns means all data belongs to the same series.
@@ -493,18 +493,51 @@ mod test {
use super::*; use super::*;
fn prepare_test_data(series: Vec<RecordBatch>) -> MemoryExec { fn prepare_test_data(diff_series: bool) -> MemoryExec {
let schema = Arc::new(Schema::new(vec![ let schema = Arc::new(Schema::new(vec![
Field::new("ts", DataType::Timestamp(TimeUnit::Millisecond, None), true), Field::new("ts", DataType::Timestamp(TimeUnit::Millisecond, None), true),
Field::new("tag1", DataType::Utf8, true), Field::new("tag1", DataType::Utf8, true),
Field::new("tag2", DataType::Utf8, true), Field::new("tag2", DataType::Utf8, true),
Field::new("val", DataType::Float64, true), Field::new("val", DataType::Float64, true),
])); ]));
MemoryExec::try_new(&[series], schema, None).unwrap() let batch_1 = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(TimestampMillisecondArray::from(vec![0, 5_000])),
Arc::new(StringArray::from(vec!["foo", "foo"])),
Arc::new(StringArray::from(vec!["🥺", "🥺"])),
Arc::new(Float64Array::from(vec![1.0, 2.0])),
],
)
.unwrap();
let batch_2 = if diff_series {
RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(TimestampMillisecondArray::from(vec![10_000, 15_000])),
Arc::new(StringArray::from(vec!["foo", "foo"])),
Arc::new(StringArray::from(vec!["🥺", "😝"])),
Arc::new(Float64Array::from(vec![3.0, 4.0])),
],
)
.unwrap()
} else {
RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(TimestampMillisecondArray::from(vec![10_000, 15_000])),
Arc::new(StringArray::from(vec!["foo", "foo"])),
Arc::new(StringArray::from(vec!["🥺", "🥺"])),
Arc::new(Float64Array::from(vec![3.0, 4.0])),
],
)
.unwrap()
};
MemoryExec::try_new(&[vec![batch_1, batch_2]], schema, None).unwrap()
} }
async fn run_test(series: Vec<RecordBatch>, expected: &str) { async fn run_test(diff_series: bool, expected: &str) {
let memory_exec = Arc::new(prepare_test_data(series)); let memory_exec = Arc::new(prepare_test_data(diff_series));
let schema = Arc::new(Schema::new(vec![ let schema = Arc::new(Schema::new(vec![
Field::new("ts", DataType::Timestamp(TimeUnit::Millisecond, None), true), Field::new("ts", DataType::Timestamp(TimeUnit::Millisecond, None), true),
Field::new("val", DataType::Float64, true), Field::new("val", DataType::Float64, true),
@@ -537,35 +570,8 @@ mod test {
#[tokio::test] #[tokio::test]
async fn same_series() { async fn same_series() {
let schema = Arc::new(Schema::new(vec![
Field::new("ts", DataType::Timestamp(TimeUnit::Millisecond, None), true),
Field::new("tag1", DataType::Utf8, true),
Field::new("tag2", DataType::Utf8, true),
Field::new("val", DataType::Float64, true),
]));
run_test( run_test(
vec![ false,
RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(TimestampMillisecondArray::from(vec![0, 5_000])),
Arc::new(StringArray::from(vec!["foo", "foo"])),
Arc::new(StringArray::from(vec!["🥺", "🥺"])),
Arc::new(Float64Array::from(vec![1.0, 2.0])),
],
)
.unwrap(),
RecordBatch::try_new(
schema,
vec![
Arc::new(TimestampMillisecondArray::from(vec![10_000, 15_000])),
Arc::new(StringArray::from(vec!["foo", "foo"])),
Arc::new(StringArray::from(vec!["🥺", "🥺"])),
Arc::new(Float64Array::from(vec![3.0, 4.0])),
],
)
.unwrap(),
],
"+---------------------+-----+\ "+---------------------+-----+\
\n| ts | val |\ \n| ts | val |\
\n+---------------------+-----+\ \n+---------------------+-----+\
@@ -580,66 +586,8 @@ mod test {
#[tokio::test] #[tokio::test]
async fn diff_series() { async fn diff_series() {
let schema = Arc::new(Schema::new(vec![
Field::new("ts", DataType::Timestamp(TimeUnit::Millisecond, None), true),
Field::new("tag1", DataType::Utf8, true),
Field::new("tag2", DataType::Utf8, true),
Field::new("val", DataType::Float64, true),
]));
run_test( run_test(
vec![ true,
RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(TimestampMillisecondArray::from(vec![0, 5_000])),
Arc::new(StringArray::from(vec!["foo", "foo"])),
Arc::new(StringArray::from(vec!["🥺", "🥺"])),
Arc::new(Float64Array::from(vec![1.0, 2.0])),
],
)
.unwrap(),
RecordBatch::try_new(
schema,
vec![
Arc::new(TimestampMillisecondArray::from(vec![10_000, 15_000])),
Arc::new(StringArray::from(vec!["foo", "foo"])),
Arc::new(StringArray::from(vec!["🥺", "😝"])),
Arc::new(Float64Array::from(vec![3.0, 4.0])),
],
)
.unwrap(),
],
"+---------------------+-----+\
\n| ts | val |\
\n+---------------------+-----+\
\n| 1970-01-01T00:00:00 | NaN |\
\n| 1970-01-01T00:00:05 | NaN |\
\n| 1970-01-01T00:00:10 | NaN |\
\n| 1970-01-01T00:00:15 | NaN |\
\n+---------------------+-----+",
)
.await
}
#[tokio::test]
async fn empty_series() {
let schema = Arc::new(Schema::new(vec![
Field::new("ts", DataType::Timestamp(TimeUnit::Millisecond, None), true),
Field::new("tag1", DataType::Utf8, true),
Field::new("tag2", DataType::Utf8, true),
Field::new("val", DataType::Float64, true),
]));
run_test(
vec![RecordBatch::try_new(
schema,
vec![
Arc::new(TimestampMillisecondArray::new_null(0)),
Arc::new(StringArray::new_null(0)),
Arc::new(StringArray::new_null(0)),
Arc::new(Float64Array::new_null(0)),
],
)
.unwrap()],
"+---------------------+-----+\ "+---------------------+-----+\
\n| ts | val |\ \n| ts | val |\
\n+---------------------+-----+\ \n+---------------------+-----+\

View File

@@ -87,6 +87,13 @@ impl LogicalPlan {
.context(DataFusionSnafu) .context(DataFusionSnafu)
.map(LogicalPlan::DfPlan) .map(LogicalPlan::DfPlan)
} }
/// Unwrap the logical plan into a DataFusion logical plan
pub fn unwrap_df_plan(self) -> DfLogicalPlan {
match self {
LogicalPlan::DfPlan(plan) => plan,
}
}
} }
impl From<DfLogicalPlan> for LogicalPlan { impl From<DfLogicalPlan> for LogicalPlan {

View File

@@ -42,6 +42,8 @@ use crate::{DfContextProviderAdapter, QueryEngineContext};
pub trait LogicalPlanner: Send + Sync { pub trait LogicalPlanner: Send + Sync {
async fn plan(&self, stmt: QueryStatement, query_ctx: QueryContextRef) -> Result<LogicalPlan>; async fn plan(&self, stmt: QueryStatement, query_ctx: QueryContextRef) -> Result<LogicalPlan>;
fn optimize(&self, plan: LogicalPlan) -> Result<LogicalPlan>;
fn as_any(&self) -> &dyn Any; fn as_any(&self) -> &dyn Any;
} }
@@ -145,6 +147,14 @@ impl DfLogicalPlanner {
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(QueryPlanSnafu) .context(QueryPlanSnafu)
} }
#[tracing::instrument(skip_all)]
fn optimize_logical_plan(&self, plan: LogicalPlan) -> Result<LogicalPlan> {
self.engine_state
.optimize_logical_plan(plan.unwrap_df_plan())
.context(DataFusionSnafu)
.map(Into::into)
}
} }
#[async_trait] #[async_trait]
@@ -157,6 +167,10 @@ impl LogicalPlanner for DfLogicalPlanner {
} }
} }
fn optimize(&self, plan: LogicalPlan) -> Result<LogicalPlan> {
self.optimize_logical_plan(plan)
}
fn as_any(&self) -> &dyn Any { fn as_any(&self) -> &dyn Any {
self self
} }

View File

@@ -142,6 +142,11 @@ impl QueryEngineState {
}) })
} }
/// Run the full logical plan optimize phase for the given plan.
pub fn optimize_logical_plan(&self, plan: DfLogicalPlan) -> DfResult<DfLogicalPlan> {
self.session_state().optimize(&plan)
}
/// Register an udf function. /// Register an udf function.
/// Will override if the function with same name is already registered. /// Will override if the function with same name is already registered.
pub fn register_function(&self, func: FunctionRef) { pub fn register_function(&self, func: FunctionRef) {

Some files were not shown because too many files have changed in this diff Show More