Compare commits

..

3 Commits

Author SHA1 Message Date
Ruihang Xia
1bfba48755 Revert "build(deps): upgrade opendal to 0.46 (#4037)"
This reverts commit f9db5ff0d6.
2024-06-03 20:28:59 +08:00
Ruihang Xia
457998f0fe Merge branch 'main' into avoid-query-meta
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-05-31 18:16:36 +08:00
Ruihang Xia
b02c256157 perf: use memory state to check if a logical region exists
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-05-31 18:16:08 +08:00
211 changed files with 2369 additions and 10091 deletions

View File

@@ -57,7 +57,6 @@ runs:
greptime/greptimedb-cluster \
--create-namespace \
-n my-greptimedb \
--values ./.github/actions/setup-greptimedb-cluster/values.yaml \
--wait \
--wait-for-jobs
- name: Wait for GreptimeDB

View File

@@ -1,18 +0,0 @@
meta:
config: |-
[runtime]
read_rt_size = 8
write_rt_size = 8
bg_rt_size = 8
datanode:
config: |-
[runtime]
read_rt_size = 8
write_rt_size = 8
bg_rt_size = 8
frontend:
config: |-
[runtime]
read_rt_size = 8
write_rt_size = 8
bg_rt_size = 8

View File

@@ -155,22 +155,19 @@ jobs:
with:
# Shares across multiple jobs
shared-key: "fuzz-test-targets"
cache-targets: "false"
- name: Set Rust Fuzz
shell: bash
run: |
sudo apt-get install -y libfuzzer-14-dev
rustup install nightly
cargo +nightly install cargo-fuzz cargo-gc-bin
cargo +nightly install cargo-fuzz
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
name: bins
path: .
- name: Unzip binaries
run: |
tar -xvf ./bins.tar.gz
rm ./bins.tar.gz
run: tar -xvf ./bins.tar.gz
- name: Run GreptimeDB
run: |
./bins/greptime standalone start&
@@ -185,7 +182,7 @@ jobs:
unstable-fuzztest:
name: Unstable Fuzz Test
needs: build-greptime-ci
needs: build
runs-on: ubuntu-latest
strategy:
matrix:
@@ -203,27 +200,31 @@ jobs:
with:
# Shares across multiple jobs
shared-key: "fuzz-test-targets"
cache-targets: "false"
- name: Set Rust Fuzz
shell: bash
run: |
sudo apt update && sudo apt install -y libfuzzer-14-dev
cargo install cargo-fuzz cargo-gc-bin
- name: Download pre-built binariy
cargo install cargo-fuzz
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
name: bin
name: bins
path: .
- name: Unzip bianry
- name: Unzip binaries
run: tar -xvf ./bins.tar.gz
- name: Build Fuzz Test
shell: bash
run: |
tar -xvf ./bin.tar.gz
rm ./bin.tar.gz
cd tests-fuzz &
cargo install cargo-gc-bin &
cargo gc &
cd ..
- name: Run Fuzz Test
uses: ./.github/actions/fuzz-test
env:
CUSTOM_LIBFUZZER_PATH: /usr/lib/llvm-14/lib/libFuzzer.a
GT_MYSQL_ADDR: 127.0.0.1:4002
GT_FUZZ_BINARY_PATH: ./bin/greptime
GT_FUZZ_BINARY_PATH: ./bins/greptime
GT_FUZZ_INSTANCE_ROOT_DIR: /tmp/unstable-greptime/
with:
target: ${{ matrix.target }}
@@ -262,7 +263,7 @@ jobs:
- name: Build greptime bianry
shell: bash
# `cargo gc` will invoke `cargo build` with specified args
run: cargo gc --profile ci -- --bin greptime
run: cargo build --bin greptime --profile ci
- name: Pack greptime binary
shell: bash
run: |
@@ -301,13 +302,12 @@ jobs:
with:
# Shares across multiple jobs
shared-key: "fuzz-test-targets"
cache-targets: "false"
- name: Set Rust Fuzz
shell: bash
run: |
sudo apt-get install -y libfuzzer-14-dev
rustup install nightly
cargo +nightly install cargo-fuzz cargo-gc-bin
cargo +nightly install cargo-fuzz
# Downloads ci image
- name: Download pre-built binariy
uses: actions/download-artifact@v4
@@ -315,9 +315,7 @@ jobs:
name: bin
path: .
- name: Unzip binary
run: |
tar -xvf ./bin.tar.gz
rm ./bin.tar.gz
run: tar -xvf ./bin.tar.gz
- name: Build and push GreptimeDB image
uses: ./.github/actions/build-and-push-ci-image
- name: Wait for etcd
@@ -363,29 +361,15 @@ jobs:
name: fuzz-tests-kind-logs-${{ matrix.target }}
path: /tmp/kind
retention-days: 3
- name: Delete cluster
if: success()
shell: bash
run: |
kind delete cluster
docker stop $(docker ps -a -q)
docker rm $(docker ps -a -q)
docker system prune -f
sqlness:
name: Sqlness Test (${{ matrix.mode.name }})
name: Sqlness Test
needs: build
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
mode:
- name: "Basic"
opts: ""
kafka: false
- name: "Remote WAL"
opts: "-w kafka -k 127.0.0.1:9092"
kafka: true
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -396,17 +380,43 @@ jobs:
path: .
- name: Unzip binaries
run: tar -xvf ./bins.tar.gz
- if: matrix.mode.kafka
name: Setup kafka server
- name: Run sqlness
run: RUST_BACKTRACE=1 ./bins/sqlness-runner -c ./tests/cases --bins-dir ./bins --preserve-state
- name: Upload sqlness logs
if: always()
uses: actions/upload-artifact@v4
with:
name: sqlness-logs
path: /tmp/sqlness*
retention-days: 3
sqlness-kafka-wal:
name: Sqlness Test with Kafka Wal
needs: build
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
name: bins
path: .
- name: Unzip binaries
run: tar -xvf ./bins.tar.gz
- name: Setup kafka server
working-directory: tests-integration/fixtures/kafka
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Run sqlness
run: RUST_BACKTRACE=1 ./bins/sqlness-runner ${{ matrix.mode.opts }} -c ./tests/cases --bins-dir ./bins --preserve-state
run: RUST_BACKTRACE=1 ./bins/sqlness-runner -w kafka -k 127.0.0.1:9092 -c ./tests/cases --bins-dir ./bins --preserve-state
- name: Upload sqlness logs
if: failure()
if: always()
uses: actions/upload-artifact@v4
with:
name: sqlness-logs-${{ matrix.mode.name }}
name: sqlness-logs-with-kafka-wal
path: /tmp/sqlness*
retention-days: 3
@@ -495,9 +505,6 @@ jobs:
- name: Setup kafka server
working-directory: tests-integration/fixtures/kafka
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Setup minio
working-directory: tests-integration/fixtures/minio
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Run nextest cases
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend -F dashboard
env:
@@ -508,11 +515,6 @@ jobs:
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
GT_MINIO_BUCKET: greptime
GT_MINIO_ACCESS_KEY_ID: superpower_ci_user
GT_MINIO_ACCESS_KEY: superpower_password
GT_MINIO_REGION: us-west-2
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
UNITTEST_LOG_DIR: "__unittest_logs"

1488
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -104,15 +104,15 @@ clap = { version = "4.4", features = ["derive"] }
config = "0.13.0"
crossbeam-utils = "0.8"
dashmap = "5.4"
datafusion = { git = "https://github.com/apache/datafusion.git", rev = "08e19f4956d32164be6fc66eb5a4c080eb0023d1" }
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "08e19f4956d32164be6fc66eb5a4c080eb0023d1" }
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "08e19f4956d32164be6fc66eb5a4c080eb0023d1" }
datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "08e19f4956d32164be6fc66eb5a4c080eb0023d1" }
datafusion-optimizer = { git = "https://github.com/apache/datafusion.git", rev = "08e19f4956d32164be6fc66eb5a4c080eb0023d1" }
datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "08e19f4956d32164be6fc66eb5a4c080eb0023d1" }
datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "08e19f4956d32164be6fc66eb5a4c080eb0023d1" }
datafusion-sql = { git = "https://github.com/apache/datafusion.git", rev = "08e19f4956d32164be6fc66eb5a4c080eb0023d1" }
datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "08e19f4956d32164be6fc66eb5a4c080eb0023d1" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
datafusion-functions = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
datafusion-physical-plan = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
derive_builder = "0.12"
dotenv = "0.15"
# TODO(LFC): Wait for https://github.com/etcdv3/etcd-client/pull/76
@@ -146,15 +146,13 @@ raft-engine = { version = "0.4.1", default-features = false }
rand = "0.8"
regex = "1.8"
regex-automata = { version = "0.4" }
reqwest = { version = "0.12", default-features = false, features = [
reqwest = { version = "0.11", default-features = false, features = [
"json",
"rustls-tls-native-roots",
"stream",
"multipart",
] }
rskafka = "0.5"
rstest = "0.21"
rstest_reuse = "0.7"
rust_decimal = "1.33"
schemars = "0.8"
serde = { version = "1.0", features = ["derive"] }
@@ -164,7 +162,7 @@ smallvec = { version = "1", features = ["serde"] }
snafu = "0.8"
sysinfo = "0.30"
# on branch v0.44.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "54a267ac89c09b11c0c88934690530807185d3e7", features = [
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "e4e496b8d62416ad50ce70a1b460c7313610cf5d", features = [
"visitor",
] }
strum = { version = "0.25", features = ["derive"] }
@@ -252,12 +250,9 @@ incremental = false
[profile.ci]
inherits = "dev"
debug = false
strip = true
[profile.dev.package.sqlness-runner]
debug = false
strip = true
[profile.dev.package.tests-fuzz]
debug = false
strip = true

View File

@@ -163,13 +163,6 @@ nextest: ## Install nextest tools.
sqlness-test: ## Run sqlness test.
cargo sqlness
# Run fuzz test ${FUZZ_TARGET}.
RUNS ?= 1
FUZZ_TARGET ?= fuzz_alter_table
.PHONY: fuzz
fuzz:
cargo fuzz run ${FUZZ_TARGET} --fuzz-dir tests-fuzz -D -s none -- -runs=${RUNS}
.PHONY: check
check: ## Cargo check all the targets.
cargo check --workspace --all-targets --all-features
@@ -201,10 +194,6 @@ run-it-in-container: start-etcd ## Run integration tests in dev-builder.
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \
make test sqlness-test BUILD_JOBS=${BUILD_JOBS}
.PHONY: run-cluster-with-etcd
run-cluster-with-etcd: ## Run greptime cluster with etcd in docker-compose.
docker compose -f ./docker/docker-compose/cluster-with-etcd.yaml up
##@ Docs
config-docs: ## Generate configuration documentation from toml files.
docker run --rm \

View File

@@ -12,6 +12,7 @@ api.workspace = true
arrow.workspace = true
chrono.workspace = true
clap.workspace = true
client = { workspace = true, features = ["testing"] }
common-base.workspace = true
common-telemetry.workspace = true
common-wal.workspace = true

View File

@@ -13,10 +13,6 @@
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
| `default_timezone` | String | `None` | The default timezone of the server. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.read_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
| `runtime.write_rt_size` | Integer | `8` | The number of threads to execute the runtime for global write operations. |
| `runtime.bg_rt_size` | Integer | `8` | The number of threads to execute the runtime for global background operations. |
| `http` | -- | -- | The HTTP server options. |
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `30s` | HTTP request timeout. |
@@ -158,10 +154,6 @@
| --- | -----| ------- | ----------- |
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
| `default_timezone` | String | `None` | The default timezone of the server. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.read_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
| `runtime.write_rt_size` | Integer | `8` | The number of threads to execute the runtime for global write operations. |
| `runtime.bg_rt_size` | Integer | `8` | The number of threads to execute the runtime for global background operations. |
| `heartbeat` | -- | -- | The heartbeat options. |
| `heartbeat.interval` | String | `18s` | Interval for sending heartbeat messages to the metasrv. |
| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
@@ -248,10 +240,6 @@
| `use_memory_store` | Bool | `false` | Store data in memory. |
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.read_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
| `runtime.write_rt_size` | Integer | `8` | The number of threads to execute the runtime for global write operations. |
| `runtime.bg_rt_size` | Integer | `8` | The number of threads to execute the runtime for global background operations. |
| `procedure` | -- | -- | Procedure storage options. |
| `procedure.max_retry_times` | Integer | `12` | Procedure max retry time. |
| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
@@ -312,10 +300,6 @@
| `rpc_max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
| `rpc_max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.read_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
| `runtime.write_rt_size` | Integer | `8` | The number of threads to execute the runtime for global write operations. |
| `runtime.bg_rt_size` | Integer | `8` | The number of threads to execute the runtime for global background operations. |
| `heartbeat` | -- | -- | The heartbeat options. |
| `heartbeat.interval` | String | `3s` | Interval for sending heartbeat messages to the metasrv. |
| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |

View File

@@ -32,15 +32,6 @@ rpc_max_send_message_size = "512MB"
## Enable telemetry to collect anonymous usage data.
enable_telemetry = true
## The runtime options.
[runtime]
## The number of threads to execute the runtime for global read operations.
read_rt_size = 8
## The number of threads to execute the runtime for global write operations.
write_rt_size = 8
## The number of threads to execute the runtime for global background operations.
bg_rt_size = 8
## The heartbeat options.
[heartbeat]
## Interval for sending heartbeat messages to the metasrv.

View File

@@ -5,15 +5,6 @@ mode = "standalone"
## +toml2docs:none-default
default_timezone = "UTC"
## The runtime options.
[runtime]
## The number of threads to execute the runtime for global read operations.
read_rt_size = 8
## The number of threads to execute the runtime for global write operations.
write_rt_size = 8
## The number of threads to execute the runtime for global background operations.
bg_rt_size = 8
## The heartbeat options.
[heartbeat]
## Interval for sending heartbeat messages to the metasrv.

View File

@@ -25,15 +25,6 @@ enable_telemetry = true
## If it's not empty, the metasrv will store all data with this key prefix.
store_key_prefix = ""
## The runtime options.
[runtime]
## The number of threads to execute the runtime for global read operations.
read_rt_size = 8
## The number of threads to execute the runtime for global write operations.
write_rt_size = 8
## The number of threads to execute the runtime for global background operations.
bg_rt_size = 8
## Procedure storage options.
[procedure]

View File

@@ -8,15 +8,6 @@ enable_telemetry = true
## +toml2docs:none-default
default_timezone = "UTC"
## The runtime options.
[runtime]
## The number of threads to execute the runtime for global read operations.
read_rt_size = 8
## The number of threads to execute the runtime for global write operations.
write_rt_size = 8
## The number of threads to execute the runtime for global background operations.
bg_rt_size = 8
## The HTTP server options.
[http]
## The address to bind the HTTP server.

View File

@@ -1,102 +0,0 @@
x-custom:
initial_cluster_token: &initial_cluster_token "--initial-cluster-token=etcd-cluster"
common_settings: &common_settings
image: quay.io/coreos/etcd:v3.5.10
entrypoint: /usr/local/bin/etcd
services:
etcd0:
<<: *common_settings
container_name: etcd0
ports:
- 2379:2379
- 2380:2380
command:
- --name=etcd0
- --data-dir=/var/lib/etcd
- --initial-advertise-peer-urls=http://etcd0:2380
- --listen-peer-urls=http://0.0.0.0:2380
- --listen-client-urls=http://0.0.0.0:2379
- --advertise-client-urls=http://etcd0:2379
- --heartbeat-interval=250
- --election-timeout=1250
- --initial-cluster=etcd0=http://etcd0:2380
- --initial-cluster-state=new
- *initial_cluster_token
volumes:
- /tmp/greptimedb-cluster-docker-compose/etcd0:/var/lib/etcd
healthcheck:
test: [ "CMD", "etcdctl", "--endpoints=http://etcd0:2379", "endpoint", "health" ]
interval: 5s
timeout: 3s
retries: 5
networks:
- greptimedb
metasrv:
image: docker.io/greptime/greptimedb:latest
container_name: metasrv
ports:
- 3002:3002
command:
- metasrv
- start
- --bind-addr=0.0.0.0:3002
- --server-addr=metasrv:3002
- --store-addrs=etcd0:2379
healthcheck:
test: [ "CMD", "curl", "-f", "http://metasrv:3002/health" ]
interval: 5s
timeout: 3s
retries: 5
depends_on:
etcd0:
condition: service_healthy
networks:
- greptimedb
datanode0:
image: docker.io/greptime/greptimedb:latest
container_name: datanode0
ports:
- 3001:3001
command:
- datanode
- start
- --node-id=0
- --rpc-addr=0.0.0.0:3001
- --rpc-hostname=datanode0:3001
- --metasrv-addr=metasrv:3002
volumes:
- /tmp/greptimedb-cluster-docker-compose/datanode0:/tmp/greptimedb
depends_on:
metasrv:
condition: service_healthy
networks:
- greptimedb
frontend0:
image: docker.io/greptime/greptimedb:latest
container_name: frontend0
ports:
- 4000:4000
- 4001:4001
- 4002:4002
- 4003:4003
command:
- frontend
- start
- --metasrv-addrs=metasrv:3002
- --http-addr=0.0.0.0:4000
- --rpc-addr=0.0.0.0:4001
- --mysql-addr=0.0.0.0:4002
- --postgres-addr=0.0.0.0:4003
depends_on:
metasrv:
condition: service_healthy
networks:
- greptimedb
networks:
greptimedb:
name: greptimedb

View File

@@ -23,6 +23,7 @@ use common_telemetry::info;
use common_telemetry::logging::TracingOptions;
use common_version::{short_version, version};
use common_wal::config::DatanodeWalConfig;
use datanode::config::DatanodeOptions;
use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::service::DatanodeServiceBuilder;
use meta_client::MetaClientOptions;
@@ -33,13 +34,11 @@ use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{
LoadLayeredConfigSnafu, MissingConfigSnafu, Result, ShutdownDatanodeSnafu, StartDatanodeSnafu,
};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::options::GlobalOptions;
use crate::{log_versions, App};
pub const APP_NAME: &str = "greptime-datanode";
type DatanodeOptions = GreptimeOptions<datanode::config::DatanodeOptions>;
pub struct Instance {
datanode: Datanode,
@@ -98,9 +97,7 @@ impl Command {
}
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<DatanodeOptions> {
match &self.subcmd {
SubCommand::Start(cmd) => cmd.load_options(global_options),
}
self.subcmd.load_options(global_options)
}
}
@@ -115,6 +112,12 @@ impl SubCommand {
SubCommand::Start(cmd) => cmd.build(opts).await,
}
}
fn load_options(&self, global_options: &GlobalOptions) -> Result<DatanodeOptions> {
match self {
SubCommand::Start(cmd) => cmd.load_options(global_options),
}
}
}
#[derive(Debug, Parser, Default)]
@@ -143,25 +146,22 @@ struct StartCommand {
impl StartCommand {
fn load_options(&self, global_options: &GlobalOptions) -> Result<DatanodeOptions> {
let mut opts = DatanodeOptions::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
self.merge_with_cli_options(
global_options,
DatanodeOptions::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
)
.context(LoadLayeredConfigSnafu)?,
)
.context(LoadLayeredConfigSnafu)?;
self.merge_with_cli_options(global_options, &mut opts)?;
Ok(opts)
}
// The precedence order is: cli > config file > environment variables > default values.
fn merge_with_cli_options(
&self,
global_options: &GlobalOptions,
opts: &mut DatanodeOptions,
) -> Result<()> {
let opts = &mut opts.component;
mut opts: DatanodeOptions,
) -> Result<DatanodeOptions> {
if let Some(dir) = &global_options.log_dir {
opts.logging.dir.clone_from(dir);
}
@@ -231,28 +231,25 @@ impl StartCommand {
// Disable dashboard in datanode.
opts.http.disable_dashboard = true;
Ok(())
Ok(opts)
}
async fn build(&self, opts: DatanodeOptions) -> Result<Instance> {
common_runtime::init_global_runtimes(&opts.runtime);
async fn build(&self, mut opts: DatanodeOptions) -> Result<Instance> {
let guard = common_telemetry::init_global_logging(
APP_NAME,
&opts.component.logging,
&opts.component.tracing,
opts.component.node_id.map(|x| x.to_string()),
&opts.logging,
&opts.tracing,
opts.node_id.map(|x| x.to_string()),
);
log_versions(version!(), short_version!());
info!("Datanode start command: {:#?}", self);
info!("Datanode options: {:#?}", opts);
let mut opts = opts.component;
let plugins = plugins::setup_datanode_plugins(&mut opts)
.await
.context(StartDatanodeSnafu)?;
info!("Datanode start command: {:#?}", self);
info!("Datanode options: {:#?}", opts);
let node_id = opts
.node_id
.context(MissingConfigSnafu { msg: "'node_id'" })?;
@@ -356,7 +353,7 @@ mod tests {
..Default::default()
};
let options = cmd.load_options(&Default::default()).unwrap().component;
let options = cmd.load_options(&GlobalOptions::default()).unwrap();
assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
assert_eq!(Some(42), options.node_id);
@@ -417,8 +414,7 @@ mod tests {
fn test_try_from_cmd() {
let opt = StartCommand::default()
.load_options(&GlobalOptions::default())
.unwrap()
.component;
.unwrap();
assert_eq!(Mode::Standalone, opt.mode);
let opt = (StartCommand {
@@ -427,8 +423,7 @@ mod tests {
..Default::default()
})
.load_options(&GlobalOptions::default())
.unwrap()
.component;
.unwrap();
assert_eq!(Mode::Distributed, opt.mode);
assert!((StartCommand {
@@ -459,8 +454,7 @@ mod tests {
#[cfg(feature = "tokio-console")]
tokio_console_addr: None,
})
.unwrap()
.component;
.unwrap();
let logging_opt = options.logging;
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
@@ -542,7 +536,7 @@ mod tests {
..Default::default()
};
let opts = command.load_options(&Default::default()).unwrap().component;
let opts = command.load_options(&GlobalOptions::default()).unwrap();
// Should be read from env, env > default values.
let DatanodeWalConfig::RaftEngine(raft_engine_config) = opts.wal else {
@@ -568,10 +562,7 @@ mod tests {
assert_eq!(raft_engine_config.dir.unwrap(), "/other/wal/dir");
// Should be default value.
assert_eq!(
opts.http.addr,
DatanodeOptions::default().component.http.addr
);
assert_eq!(opts.http.addr, DatanodeOptions::default().http.addr);
},
);
}

View File

@@ -29,6 +29,7 @@ use common_telemetry::info;
use common_telemetry::logging::TracingOptions;
use common_time::timezone::set_default_timezone;
use common_version::{short_version, version};
use frontend::frontend::FrontendOptions;
use frontend::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHandler;
use frontend::heartbeat::HeartbeatTask;
use frontend::instance::builder::FrontendBuilder;
@@ -43,11 +44,9 @@ use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{
self, InitTimezoneSnafu, LoadLayeredConfigSnafu, MissingConfigSnafu, Result, StartFrontendSnafu,
};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::options::GlobalOptions;
use crate::{log_versions, App};
type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;
pub struct Instance {
frontend: FeInstance,
@@ -165,25 +164,22 @@ pub struct StartCommand {
impl StartCommand {
fn load_options(&self, global_options: &GlobalOptions) -> Result<FrontendOptions> {
let mut opts = FrontendOptions::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
self.merge_with_cli_options(
global_options,
FrontendOptions::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
)
.context(LoadLayeredConfigSnafu)?,
)
.context(LoadLayeredConfigSnafu)?;
self.merge_with_cli_options(global_options, &mut opts)?;
Ok(opts)
}
// The precedence order is: cli > config file > environment variables > default values.
fn merge_with_cli_options(
&self,
global_options: &GlobalOptions,
opts: &mut FrontendOptions,
) -> Result<()> {
let opts = &mut opts.component;
mut opts: FrontendOptions,
) -> Result<FrontendOptions> {
if let Some(dir) = &global_options.log_dir {
opts.logging.dir.clone_from(dir);
}
@@ -246,29 +242,26 @@ impl StartCommand {
opts.user_provider.clone_from(&self.user_provider);
Ok(())
Ok(opts)
}
async fn build(&self, opts: FrontendOptions) -> Result<Instance> {
common_runtime::init_global_runtimes(&opts.runtime);
async fn build(&self, mut opts: FrontendOptions) -> Result<Instance> {
let guard = common_telemetry::init_global_logging(
APP_NAME,
&opts.component.logging,
&opts.component.tracing,
opts.component.node_id.clone(),
&opts.logging,
&opts.tracing,
opts.node_id.clone(),
);
log_versions(version!(), short_version!());
info!("Frontend start command: {:#?}", self);
info!("Frontend options: {:#?}", opts);
let mut opts = opts.component;
#[allow(clippy::unnecessary_mut_passed)]
let plugins = plugins::setup_frontend_plugins(&mut opts)
.await
.context(StartFrontendSnafu)?;
info!("Frontend start command: {:#?}", self);
info!("Frontend options: {:#?}", opts);
set_default_timezone(opts.default_timezone.as_deref()).context(InitTimezoneSnafu)?;
let meta_client_options = opts.meta_client.as_ref().context(MissingConfigSnafu {
@@ -387,14 +380,14 @@ mod tests {
..Default::default()
};
let opts = command.load_options(&Default::default()).unwrap().component;
let opts = command.load_options(&GlobalOptions::default()).unwrap();
assert_eq!(opts.http.addr, "127.0.0.1:1234");
assert_eq!(ReadableSize::mb(64), opts.http.body_limit);
assert_eq!(opts.mysql.addr, "127.0.0.1:5678");
assert_eq!(opts.postgres.addr, "127.0.0.1:5432");
let default_opts = FrontendOptions::default().component;
let default_opts = FrontendOptions::default();
assert_eq!(opts.grpc.addr, default_opts.grpc.addr);
assert!(opts.mysql.enable);
@@ -435,8 +428,7 @@ mod tests {
..Default::default()
};
let fe_opts = command.load_options(&Default::default()).unwrap().component;
let fe_opts = command.load_options(&GlobalOptions::default()).unwrap();
assert_eq!(Mode::Distributed, fe_opts.mode);
assert_eq!("127.0.0.1:4000".to_string(), fe_opts.http.addr);
assert_eq!(Duration::from_secs(30), fe_opts.http.timeout);
@@ -450,7 +442,7 @@ mod tests {
#[tokio::test]
async fn test_try_from_start_command_to_anymap() {
let mut fe_opts = frontend::frontend::FrontendOptions {
let mut fe_opts = FrontendOptions {
http: HttpOptions {
disable_dashboard: false,
..Default::default()
@@ -487,8 +479,7 @@ mod tests {
#[cfg(feature = "tokio-console")]
tokio_console_addr: None,
})
.unwrap()
.component;
.unwrap();
let logging_opt = options.logging;
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
@@ -566,7 +557,7 @@ mod tests {
..Default::default()
};
let fe_opts = command.load_options(&Default::default()).unwrap().component;
let fe_opts = command.load_options(&GlobalOptions::default()).unwrap();
// Should be read from env, env > default values.
assert_eq!(fe_opts.mysql.runtime_size, 11);

View File

@@ -21,15 +21,14 @@ use common_telemetry::info;
use common_telemetry::logging::TracingOptions;
use common_version::{short_version, version};
use meta_srv::bootstrap::MetasrvInstance;
use meta_srv::metasrv::MetasrvOptions;
use snafu::ResultExt;
use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{self, LoadLayeredConfigSnafu, Result, StartMetaServerSnafu};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::options::GlobalOptions;
use crate::{log_versions, App};
type MetasrvOptions = GreptimeOptions<meta_srv::metasrv::MetasrvOptions>;
pub const APP_NAME: &str = "greptime-metasrv";
pub struct Instance {
@@ -140,25 +139,22 @@ struct StartCommand {
impl StartCommand {
fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
let mut opts = MetasrvOptions::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
self.merge_with_cli_options(
global_options,
MetasrvOptions::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
)
.context(LoadLayeredConfigSnafu)?,
)
.context(LoadLayeredConfigSnafu)?;
self.merge_with_cli_options(global_options, &mut opts)?;
Ok(opts)
}
// The precedence order is: cli > config file > environment variables > default values.
fn merge_with_cli_options(
&self,
global_options: &GlobalOptions,
opts: &mut MetasrvOptions,
) -> Result<()> {
let opts = &mut opts.component;
mut opts: MetasrvOptions,
) -> Result<MetasrvOptions> {
if let Some(dir) = &global_options.log_dir {
opts.logging.dir.clone_from(dir);
}
@@ -221,28 +217,21 @@ impl StartCommand {
// Disable dashboard in metasrv.
opts.http.disable_dashboard = true;
Ok(())
Ok(opts)
}
async fn build(&self, opts: MetasrvOptions) -> Result<Instance> {
common_runtime::init_global_runtimes(&opts.runtime);
let guard = common_telemetry::init_global_logging(
APP_NAME,
&opts.component.logging,
&opts.component.tracing,
None,
);
async fn build(&self, mut opts: MetasrvOptions) -> Result<Instance> {
let guard =
common_telemetry::init_global_logging(APP_NAME, &opts.logging, &opts.tracing, None);
log_versions(version!(), short_version!());
info!("Metasrv start command: {:#?}", self);
info!("Metasrv options: {:#?}", opts);
let mut opts = opts.component;
let plugins = plugins::setup_metasrv_plugins(&mut opts)
.await
.context(StartMetaServerSnafu)?;
info!("Metasrv start command: {:#?}", self);
info!("Metasrv options: {:#?}", opts);
let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins.clone(), None)
.await
.context(error::BuildMetaServerSnafu)?;
@@ -277,7 +266,7 @@ mod tests {
..Default::default()
};
let options = cmd.load_options(&Default::default()).unwrap().component;
let options = cmd.load_options(&GlobalOptions::default()).unwrap();
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
assert_eq!(vec!["127.0.0.1:2380".to_string()], options.store_addrs);
assert_eq!(SelectorType::LoadBased, options.selector);
@@ -310,7 +299,7 @@ mod tests {
..Default::default()
};
let options = cmd.load_options(&Default::default()).unwrap().component;
let options = cmd.load_options(&GlobalOptions::default()).unwrap();
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
assert_eq!("127.0.0.1:3002".to_string(), options.server_addr);
assert_eq!(vec!["127.0.0.1:2379".to_string()], options.store_addrs);
@@ -360,8 +349,7 @@ mod tests {
#[cfg(feature = "tokio-console")]
tokio_console_addr: None,
})
.unwrap()
.component;
.unwrap();
let logging_opt = options.logging;
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
@@ -418,7 +406,7 @@ mod tests {
..Default::default()
};
let opts = command.load_options(&Default::default()).unwrap().component;
let opts = command.load_options(&GlobalOptions::default()).unwrap();
// Should be read from env, env > default values.
assert_eq!(opts.bind_addr, "127.0.0.1:14002");

View File

@@ -13,9 +13,6 @@
// limitations under the License.
use clap::Parser;
use common_config::Configurable;
use common_runtime::global::RuntimeOptions;
use serde::{Deserialize, Serialize};
#[derive(Parser, Default, Debug, Clone)]
pub struct GlobalOptions {
@@ -32,22 +29,3 @@ pub struct GlobalOptions {
#[arg(global = true)]
pub tokio_console_addr: Option<String>,
}
// TODO(LFC): Move logging and tracing options into global options, like the runtime options.
/// All the options of GreptimeDB.
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
#[serde(default)]
pub struct GreptimeOptions<T> {
/// The runtime options.
pub runtime: RuntimeOptions,
/// The options of each component (like Datanode or Standalone) of GreptimeDB.
#[serde(flatten)]
pub component: T,
}
impl<T: Configurable> Configurable for GreptimeOptions<T> {
fn env_list_keys() -> Option<&'static [&'static str]> {
T::env_list_keys()
}
}

View File

@@ -67,7 +67,7 @@ use crate::error::{
ShutdownFrontendSnafu, StartDatanodeSnafu, StartFrontendSnafu, StartProcedureManagerSnafu,
StartWalOptionsAllocatorSnafu, StopProcedureManagerSnafu,
};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::options::GlobalOptions;
use crate::{log_versions, App};
pub const APP_NAME: &str = "greptime-standalone";
@@ -79,14 +79,11 @@ pub struct Command {
}
impl Command {
pub async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
pub async fn build(&self, opts: StandaloneOptions) -> Result<Instance> {
self.subcmd.build(opts).await
}
pub fn load_options(
&self,
global_options: &GlobalOptions,
) -> Result<GreptimeOptions<StandaloneOptions>> {
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<StandaloneOptions> {
self.subcmd.load_options(global_options)
}
}
@@ -97,23 +94,20 @@ enum SubCommand {
}
impl SubCommand {
async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
async fn build(&self, opts: StandaloneOptions) -> Result<Instance> {
match self {
SubCommand::Start(cmd) => cmd.build(opts).await,
}
}
fn load_options(
&self,
global_options: &GlobalOptions,
) -> Result<GreptimeOptions<StandaloneOptions>> {
fn load_options(&self, global_options: &GlobalOptions) -> Result<StandaloneOptions> {
match self {
SubCommand::Start(cmd) => cmd.load_options(global_options),
}
}
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(default)]
pub struct StandaloneOptions {
pub mode: Mode,
@@ -167,7 +161,7 @@ impl Default for StandaloneOptions {
}
}
impl Configurable for StandaloneOptions {
impl Configurable<'_> for StandaloneOptions {
fn env_list_keys() -> Option<&'static [&'static str]> {
Some(&["wal.broker_endpoints"])
}
@@ -297,27 +291,23 @@ pub struct StartCommand {
}
impl StartCommand {
fn load_options(
&self,
global_options: &GlobalOptions,
) -> Result<GreptimeOptions<StandaloneOptions>> {
let mut opts = GreptimeOptions::<StandaloneOptions>::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
fn load_options(&self, global_options: &GlobalOptions) -> Result<StandaloneOptions> {
self.merge_with_cli_options(
global_options,
StandaloneOptions::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
)
.context(LoadLayeredConfigSnafu)?,
)
.context(LoadLayeredConfigSnafu)?;
self.merge_with_cli_options(global_options, &mut opts.component)?;
Ok(opts)
}
// The precedence order is: cli > config file > environment variables > default values.
pub fn merge_with_cli_options(
&self,
global_options: &GlobalOptions,
opts: &mut StandaloneOptions,
) -> Result<()> {
mut opts: StandaloneOptions,
) -> Result<StandaloneOptions> {
// Should always be standalone mode.
opts.mode = Mode::Standalone;
@@ -379,27 +369,20 @@ impl StartCommand {
opts.user_provider.clone_from(&self.user_provider);
Ok(())
Ok(opts)
}
#[allow(unreachable_code)]
#[allow(unused_variables)]
#[allow(clippy::diverging_sub_expression)]
async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
common_runtime::init_global_runtimes(&opts.runtime);
let guard = common_telemetry::init_global_logging(
APP_NAME,
&opts.component.logging,
&opts.component.tracing,
None,
);
async fn build(&self, opts: StandaloneOptions) -> Result<Instance> {
let guard =
common_telemetry::init_global_logging(APP_NAME, &opts.logging, &opts.tracing, None);
log_versions(version!(), short_version!());
info!("Standalone start command: {:#?}", self);
info!("Standalone options: {opts:#?}");
info!("Building standalone instance with {opts:#?}");
let opts = opts.component;
let mut fe_opts = opts.frontend_options();
#[allow(clippy::unnecessary_mut_passed)]
let fe_plugins = plugins::setup_frontend_plugins(&mut fe_opts) // mut ref is MUST, DO NOT change it
@@ -454,11 +437,9 @@ impl StartCommand {
);
let flownode = Arc::new(flow_builder.build().await);
let datanode = DatanodeBuilder::new(dn_opts, fe_plugins.clone())
.with_kv_backend(kv_backend.clone())
.build()
.await
.context(StartDatanodeSnafu)?;
let builder =
DatanodeBuilder::new(dn_opts, fe_plugins.clone()).with_kv_backend(kv_backend.clone());
let datanode = builder.build().await.context(StartDatanodeSnafu)?;
let node_manager = Arc::new(StandaloneDatanodeManager {
region_server: datanode.region_server(),
@@ -683,10 +664,7 @@ mod tests {
..Default::default()
};
let options = cmd
.load_options(&GlobalOptions::default())
.unwrap()
.component;
let options = cmd.load_options(&GlobalOptions::default()).unwrap();
let fe_opts = options.frontend_options();
let dn_opts = options.datanode_options();
let logging_opts = options.logging;
@@ -747,8 +725,7 @@ mod tests {
#[cfg(feature = "tokio-console")]
tokio_console_addr: None,
})
.unwrap()
.component;
.unwrap();
assert_eq!("/tmp/greptimedb/test/logs", opts.logging.dir);
assert_eq!("debug", opts.logging.level.unwrap());
@@ -810,7 +787,7 @@ mod tests {
..Default::default()
};
let opts = command.load_options(&Default::default()).unwrap().component;
let opts = command.load_options(&GlobalOptions::default()).unwrap();
// Should be read from env, env > default values.
assert_eq!(opts.logging.dir, "/other/log/dir");

View File

@@ -1,218 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::time::Duration;
use cmd::options::GreptimeOptions;
use cmd::standalone::StandaloneOptions;
use common_config::Configurable;
use common_runtime::global::RuntimeOptions;
use common_telemetry::logging::LoggingOptions;
use common_wal::config::raft_engine::RaftEngineConfig;
use common_wal::config::{DatanodeWalConfig, StandaloneWalConfig};
use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
use frontend::frontend::FrontendOptions;
use frontend::service_config::datanode::DatanodeClientOptions;
use meta_client::MetaClientOptions;
use meta_srv::metasrv::MetasrvOptions;
use meta_srv::selector::SelectorType;
use mito2::config::MitoConfig;
use servers::export_metrics::ExportMetricsOption;
#[test]
fn test_load_datanode_example_config() {
let example_config = common_test_util::find_workspace_path("config/datanode.example.toml");
let options =
GreptimeOptions::<DatanodeOptions>::load_layered_options(example_config.to_str(), "")
.unwrap();
let expected = GreptimeOptions::<DatanodeOptions> {
runtime: RuntimeOptions {
read_rt_size: 8,
write_rt_size: 8,
bg_rt_size: 8,
},
component: DatanodeOptions {
node_id: Some(42),
rpc_hostname: Some("127.0.0.1".to_string()),
meta_client: Some(MetaClientOptions {
metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
timeout: Duration::from_secs(3),
heartbeat_timeout: Duration::from_millis(500),
ddl_timeout: Duration::from_secs(10),
connect_timeout: Duration::from_secs(1),
tcp_nodelay: true,
metadata_cache_max_capacity: 100000,
metadata_cache_ttl: Duration::from_secs(600),
metadata_cache_tti: Duration::from_secs(300),
}),
wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
dir: Some("/tmp/greptimedb/wal".to_string()),
sync_period: Some(Duration::from_secs(10)),
..Default::default()
}),
storage: StorageConfig {
data_home: "/tmp/greptimedb/".to_string(),
..Default::default()
},
region_engine: vec![RegionEngineConfig::Mito(MitoConfig {
num_workers: 8,
auto_flush_interval: Duration::from_secs(3600),
scan_parallelism: 0,
..Default::default()
})],
logging: LoggingOptions {
level: Some("info".to_string()),
otlp_endpoint: Some("".to_string()),
tracing_sample_ratio: Some(Default::default()),
..Default::default()
},
export_metrics: ExportMetricsOption {
self_import: Some(Default::default()),
remote_write: Some(Default::default()),
..Default::default()
},
..Default::default()
},
};
assert_eq!(options, expected);
}
#[test]
fn test_load_frontend_example_config() {
let example_config = common_test_util::find_workspace_path("config/frontend.example.toml");
let options =
GreptimeOptions::<FrontendOptions>::load_layered_options(example_config.to_str(), "")
.unwrap();
let expected = GreptimeOptions::<FrontendOptions> {
runtime: RuntimeOptions {
read_rt_size: 8,
write_rt_size: 8,
bg_rt_size: 8,
},
component: FrontendOptions {
default_timezone: Some("UTC".to_string()),
meta_client: Some(MetaClientOptions {
metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
timeout: Duration::from_secs(3),
heartbeat_timeout: Duration::from_millis(500),
ddl_timeout: Duration::from_secs(10),
connect_timeout: Duration::from_secs(1),
tcp_nodelay: true,
metadata_cache_max_capacity: 100000,
metadata_cache_ttl: Duration::from_secs(600),
metadata_cache_tti: Duration::from_secs(300),
}),
logging: LoggingOptions {
level: Some("info".to_string()),
otlp_endpoint: Some("".to_string()),
tracing_sample_ratio: Some(Default::default()),
..Default::default()
},
datanode: frontend::service_config::DatanodeOptions {
client: DatanodeClientOptions {
connect_timeout: Duration::from_secs(10),
tcp_nodelay: true,
},
},
export_metrics: ExportMetricsOption {
self_import: Some(Default::default()),
remote_write: Some(Default::default()),
..Default::default()
},
..Default::default()
},
};
assert_eq!(options, expected);
}
#[test]
fn test_load_metasrv_example_config() {
let example_config = common_test_util::find_workspace_path("config/metasrv.example.toml");
let options =
GreptimeOptions::<MetasrvOptions>::load_layered_options(example_config.to_str(), "")
.unwrap();
let expected = GreptimeOptions::<MetasrvOptions> {
runtime: RuntimeOptions {
read_rt_size: 8,
write_rt_size: 8,
bg_rt_size: 8,
},
component: MetasrvOptions {
selector: SelectorType::LeaseBased,
data_home: "/tmp/metasrv/".to_string(),
logging: LoggingOptions {
dir: "/tmp/greptimedb/logs".to_string(),
level: Some("info".to_string()),
otlp_endpoint: Some("".to_string()),
tracing_sample_ratio: Some(Default::default()),
..Default::default()
},
export_metrics: ExportMetricsOption {
self_import: Some(Default::default()),
remote_write: Some(Default::default()),
..Default::default()
},
..Default::default()
},
};
assert_eq!(options, expected);
}
#[test]
fn test_load_standalone_example_config() {
let example_config = common_test_util::find_workspace_path("config/standalone.example.toml");
let options =
GreptimeOptions::<StandaloneOptions>::load_layered_options(example_config.to_str(), "")
.unwrap();
let expected = GreptimeOptions::<StandaloneOptions> {
runtime: RuntimeOptions {
read_rt_size: 8,
write_rt_size: 8,
bg_rt_size: 8,
},
component: StandaloneOptions {
default_timezone: Some("UTC".to_string()),
wal: StandaloneWalConfig::RaftEngine(RaftEngineConfig {
dir: Some("/tmp/greptimedb/wal".to_string()),
sync_period: Some(Duration::from_secs(10)),
..Default::default()
}),
region_engine: vec![RegionEngineConfig::Mito(MitoConfig {
num_workers: 8,
auto_flush_interval: Duration::from_secs(3600),
scan_parallelism: 0,
..Default::default()
})],
storage: StorageConfig {
data_home: "/tmp/greptimedb/".to_string(),
..Default::default()
},
logging: LoggingOptions {
level: Some("info".to_string()),
otlp_endpoint: Some("".to_string()),
tracing_sample_ratio: Some(Default::default()),
..Default::default()
},
export_metrics: ExportMetricsOption {
self_import: Some(Default::default()),
remote_write: Some(Default::default()),
..Default::default()
},
..Default::default()
},
};
assert_eq!(options, expected);
}

View File

@@ -13,8 +13,7 @@
// limitations under the License.
use config::{Environment, File, FileFormat};
use serde::de::DeserializeOwned;
use serde::Serialize;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use crate::error::{LoadLayeredConfigSnafu, Result, SerdeJsonSnafu, TomlFormatSnafu};
@@ -26,7 +25,7 @@ pub const ENV_VAR_SEP: &str = "__";
pub const ENV_LIST_SEP: &str = ",";
/// Configuration trait defines the common interface for configuration that can be loaded from multiple sources and serialized to TOML.
pub trait Configurable: Serialize + DeserializeOwned + Default + Sized {
pub trait Configurable<'de>: Serialize + Deserialize<'de> + Default + Sized {
/// Load the configuration from multiple sources and merge them.
/// The precedence order is: config file > environment variables > default values.
/// `env_prefix` is the prefix of environment variables, e.g. "FRONTEND__xxx".
@@ -129,7 +128,7 @@ mod tests {
}
}
impl Configurable for TestDatanodeConfig {
impl Configurable<'_> for TestDatanodeConfig {
fn env_list_keys() -> Option<&'static [&'static str]> {
Some(&["meta_client.metasrv_addrs"])
}

View File

@@ -20,7 +20,6 @@ async-compression = { version = "0.3", features = [
] }
async-trait.workspace = true
bytes.workspace = true
common-base.workspace = true
common-error.workspace = true
common-macro.workspace = true
common-recordbatch.workspace = true
@@ -34,7 +33,6 @@ object-store.workspace = true
orc-rust = { git = "https://github.com/datafusion-contrib/datafusion-orc.git", rev = "502217315726314c4008808fe169764529640599" }
parquet.workspace = true
paste = "1.0"
rand.workspace = true
regex = "1.7"
serde.workspace = true
snafu.workspace = true
@@ -44,7 +42,4 @@ tokio-util.workspace = true
url = "2.3"
[dev-dependencies]
common-telemetry.workspace = true
common-test-util.workspace = true
dotenv.workspace = true
uuid.workspace = true

View File

@@ -92,44 +92,34 @@ impl CompressionType {
macro_rules! impl_compression_type {
($(($enum_item:ident, $prefix:ident)),*) => {
paste::item! {
use bytes::{Buf, BufMut, BytesMut};
impl CompressionType {
pub async fn encode<B: Buf>(&self, mut content: B) -> io::Result<Vec<u8>> {
pub async fn encode(&self, content: impl AsRef<[u8]>) -> io::Result<Vec<u8>> {
match self {
$(
CompressionType::$enum_item => {
let mut buffer = Vec::with_capacity(content.remaining());
let mut buffer = Vec::with_capacity(content.as_ref().len());
let mut encoder = write::[<$prefix Encoder>]::new(&mut buffer);
encoder.write_all_buf(&mut content).await?;
encoder.write_all(content.as_ref()).await?;
encoder.shutdown().await?;
Ok(buffer)
}
)*
CompressionType::Uncompressed => {
let mut bs = BytesMut::with_capacity(content.remaining());
bs.put(content);
Ok(bs.to_vec())
},
CompressionType::Uncompressed => Ok(content.as_ref().to_vec()),
}
}
pub async fn decode<B: Buf>(&self, mut content: B) -> io::Result<Vec<u8>> {
pub async fn decode(&self, content: impl AsRef<[u8]>) -> io::Result<Vec<u8>> {
match self {
$(
CompressionType::$enum_item => {
let mut buffer = Vec::with_capacity(content.remaining() * 2);
let mut buffer = Vec::with_capacity(content.as_ref().len() * 2);
let mut encoder = write::[<$prefix Decoder>]::new(&mut buffer);
encoder.write_all_buf(&mut content).await?;
encoder.write_all(content.as_ref()).await?;
encoder.shutdown().await?;
Ok(buffer)
}
)*
CompressionType::Uncompressed => {
let mut bs = BytesMut::with_capacity(content.remaining());
bs.put(content);
Ok(bs.to_vec())
},
CompressionType::Uncompressed => Ok(content.as_ref().to_vec()),
}
}
@@ -161,13 +151,13 @@ macro_rules! impl_compression_type {
$(
#[tokio::test]
async fn [<test_ $enum_item:lower _compression>]() {
let string = "foo_bar".as_bytes();
let string = "foo_bar".as_bytes().to_vec();
let compress = CompressionType::$enum_item
.encode(string)
.encode(&string)
.await
.unwrap();
let decompress = CompressionType::$enum_item
.decode(compress.as_slice())
.decode(&compress)
.await
.unwrap();
assert_eq!(decompress, string);
@@ -175,13 +165,13 @@ macro_rules! impl_compression_type {
#[tokio::test]
async fn test_uncompression() {
let string = "foo_bar".as_bytes();
let string = "foo_bar".as_bytes().to_vec();
let compress = CompressionType::Uncompressed
.encode(string)
.encode(&string)
.await
.unwrap();
let decompress = CompressionType::Uncompressed
.decode(compress.as_slice())
.decode(&compress)
.await
.unwrap();
assert_eq!(decompress, string);

View File

@@ -36,7 +36,6 @@ use datafusion::physical_plan::SendableRecordBatchStream;
use futures::StreamExt;
use object_store::ObjectStore;
use snafu::ResultExt;
use tokio_util::compat::FuturesAsyncWriteCompatExt;
use self::csv::CsvFormat;
use self::json::JsonFormat;
@@ -46,7 +45,6 @@ use crate::buffered_writer::{DfRecordBatchEncoder, LazyBufferedWriter};
use crate::compression::CompressionType;
use crate::error::{self, Result};
use crate::share_buffer::SharedBuffer;
use crate::DEFAULT_WRITE_BUFFER_SIZE;
pub const FORMAT_COMPRESSION_TYPE: &str = "compression_type";
pub const FORMAT_DELIMITER: &str = "delimiter";
@@ -148,8 +146,7 @@ pub fn open_with_decoder<T: ArrowDecoder, F: Fn() -> DataFusionResult<T>>(
let reader = object_store
.reader(&path)
.await
.map_err(|e| DataFusionError::External(Box::new(e)))?
.into_bytes_stream(..);
.map_err(|e| DataFusionError::External(Box::new(e)))?;
let mut upstream = compression_type.convert_stream(reader).fuse();
@@ -205,9 +202,7 @@ pub async fn stream_to_file<T: DfRecordBatchEncoder, U: Fn(SharedBuffer) -> T>(
store
.writer_with(&path)
.concurrent(concurrency)
.chunk(DEFAULT_WRITE_BUFFER_SIZE.as_bytes() as usize)
.await
.map(|v| v.into_futures_async_write().compat_write())
.context(error::WriteObjectSnafu { path })
});

View File

@@ -29,7 +29,6 @@ use datafusion::physical_plan::SendableRecordBatchStream;
use derive_builder::Builder;
use object_store::ObjectStore;
use snafu::ResultExt;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tokio_util::io::SyncIoBridge;
use super::stream_to_file;
@@ -165,16 +164,10 @@ impl FileOpener for CsvOpener {
#[async_trait]
impl FileFormat for CsvFormat {
async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {
let meta = store
.stat(path)
.await
.context(error::ReadObjectSnafu { path })?;
let reader = store
.reader(path)
.await
.context(error::ReadObjectSnafu { path })?
.into_futures_async_read(0..meta.content_length())
.compat();
.context(error::ReadObjectSnafu { path })?;
let decoded = self.compression_type.convert_async_read(reader);

View File

@@ -31,7 +31,6 @@ use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::physical_plan::SendableRecordBatchStream;
use object_store::ObjectStore;
use snafu::ResultExt;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tokio_util::io::SyncIoBridge;
use super::stream_to_file;
@@ -83,16 +82,10 @@ impl Default for JsonFormat {
#[async_trait]
impl FileFormat for JsonFormat {
async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {
let meta = store
.stat(path)
.await
.context(error::ReadObjectSnafu { path })?;
let reader = store
.reader(path)
.await
.context(error::ReadObjectSnafu { path })?
.into_futures_async_read(0..meta.content_length())
.compat();
.context(error::ReadObjectSnafu { path })?;
let decoded = self.compression_type.convert_async_read(reader);

View File

@@ -16,17 +16,15 @@ use std::sync::Arc;
use arrow_schema::{ArrowError, Schema, SchemaRef};
use async_trait::async_trait;
use bytes::Bytes;
use common_recordbatch::adapter::RecordBatchStreamTypeAdapter;
use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::error::{DataFusionError, Result as DfResult};
use futures::future::BoxFuture;
use futures::{FutureExt, StreamExt, TryStreamExt};
use futures::{StreamExt, TryStreamExt};
use object_store::ObjectStore;
use orc_rust::arrow_reader::ArrowReaderBuilder;
use orc_rust::async_arrow_reader::ArrowStreamReader;
use orc_rust::reader::AsyncChunkReader;
use snafu::ResultExt;
use tokio::io::{AsyncRead, AsyncSeek};
use crate::error::{self, Result};
use crate::file_format::FileFormat;
@@ -34,49 +32,18 @@ use crate::file_format::FileFormat;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct OrcFormat;
#[derive(Clone)]
pub struct ReaderAdapter {
reader: object_store::Reader,
len: u64,
}
impl ReaderAdapter {
pub fn new(reader: object_store::Reader, len: u64) -> Self {
Self { reader, len }
}
}
impl AsyncChunkReader for ReaderAdapter {
fn len(&mut self) -> BoxFuture<'_, std::io::Result<u64>> {
async move { Ok(self.len) }.boxed()
}
fn get_bytes(
&mut self,
offset_from_start: u64,
length: u64,
) -> BoxFuture<'_, std::io::Result<Bytes>> {
async move {
let bytes = self
.reader
.read(offset_from_start..offset_from_start + length)
.await?;
Ok(bytes.to_bytes())
}
.boxed()
}
}
pub async fn new_orc_stream_reader(
reader: ReaderAdapter,
) -> Result<ArrowStreamReader<ReaderAdapter>> {
pub async fn new_orc_stream_reader<R: AsyncRead + AsyncSeek + Unpin + Send + 'static>(
reader: R,
) -> Result<ArrowStreamReader<R>> {
let reader_build = ArrowReaderBuilder::try_new_async(reader)
.await
.context(error::OrcReaderSnafu)?;
Ok(reader_build.build_async())
}
pub async fn infer_orc_schema(reader: ReaderAdapter) -> Result<Schema> {
pub async fn infer_orc_schema<R: AsyncRead + AsyncSeek + Unpin + Send + 'static>(
reader: R,
) -> Result<Schema> {
let reader = new_orc_stream_reader(reader).await?;
Ok(reader.schema().as_ref().clone())
}
@@ -84,15 +51,13 @@ pub async fn infer_orc_schema(reader: ReaderAdapter) -> Result<Schema> {
#[async_trait]
impl FileFormat for OrcFormat {
async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {
let meta = store
.stat(path)
.await
.context(error::ReadObjectSnafu { path })?;
let reader = store
.reader(path)
.await
.context(error::ReadObjectSnafu { path })?;
let schema = infer_orc_schema(ReaderAdapter::new(reader, meta.content_length())).await?;
let schema = infer_orc_schema(reader).await?;
Ok(schema)
}
}
@@ -132,22 +97,14 @@ impl FileOpener for OrcOpener {
};
let projection = self.projection.clone();
Ok(Box::pin(async move {
let path = meta.location().to_string();
let meta = object_store
.stat(&path)
.await
.map_err(|e| DataFusionError::External(Box::new(e)))?;
let reader = object_store
.reader(&path)
.reader(meta.location().to_string().as_str())
.await
.map_err(|e| DataFusionError::External(Box::new(e)))?;
let stream_reader =
new_orc_stream_reader(ReaderAdapter::new(reader, meta.content_length()))
.await
.map_err(|e| DataFusionError::External(Box::new(e)))?;
let stream_reader = new_orc_stream_reader(reader)
.await
.map_err(|e| DataFusionError::External(Box::new(e)))?;
let stream =
RecordBatchStreamTypeAdapter::new(projected_schema, stream_reader, projection);

View File

@@ -29,17 +29,15 @@ use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use datafusion::physical_plan::SendableRecordBatchStream;
use futures::future::BoxFuture;
use futures::StreamExt;
use object_store::{FuturesAsyncReader, ObjectStore};
use object_store::{ObjectStore, Reader, Writer};
use parquet::basic::{Compression, ZstdLevel};
use parquet::file::properties::WriterProperties;
use snafu::ResultExt;
use tokio_util::compat::{Compat, FuturesAsyncReadCompatExt, FuturesAsyncWriteCompatExt};
use crate::buffered_writer::{ArrowWriterCloser, DfRecordBatchEncoder, LazyBufferedWriter};
use crate::error::{self, Result};
use crate::file_format::FileFormat;
use crate::share_buffer::SharedBuffer;
use crate::DEFAULT_WRITE_BUFFER_SIZE;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct ParquetFormat {}
@@ -47,16 +45,10 @@ pub struct ParquetFormat {}
#[async_trait]
impl FileFormat for ParquetFormat {
async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {
let meta = store
.stat(path)
.await
.context(error::ReadObjectSnafu { path })?;
let mut reader = store
.reader(path)
.await
.context(error::ReadObjectSnafu { path })?
.into_futures_async_read(0..meta.content_length())
.compat();
.context(error::ReadObjectSnafu { path })?;
let metadata = reader
.get_metadata()
@@ -106,7 +98,7 @@ impl ParquetFileReaderFactory for DefaultParquetFileReaderFactory {
pub struct LazyParquetFileReader {
object_store: ObjectStore,
reader: Option<Compat<FuturesAsyncReader>>,
reader: Option<Reader>,
path: String,
}
@@ -122,13 +114,7 @@ impl LazyParquetFileReader {
/// Must initialize the reader, or throw an error from the future.
async fn maybe_initialize(&mut self) -> result::Result<(), object_store::Error> {
if self.reader.is_none() {
let meta = self.object_store.stat(&self.path).await?;
let reader = self
.object_store
.reader(&self.path)
.await?
.into_futures_async_read(0..meta.content_length())
.compat();
let reader = self.object_store.reader(&self.path).await?;
self.reader = Some(reader);
}
@@ -181,26 +167,23 @@ pub struct BufferedWriter {
}
type InnerBufferedWriter = LazyBufferedWriter<
Compat<object_store::FuturesAsyncWriter>,
object_store::Writer,
ArrowWriter<SharedBuffer>,
impl Fn(String) -> BoxFuture<'static, Result<Compat<object_store::FuturesAsyncWriter>>>,
impl Fn(String) -> BoxFuture<'static, Result<Writer>>,
>;
impl BufferedWriter {
fn make_write_factory(
store: ObjectStore,
concurrency: usize,
) -> impl Fn(String) -> BoxFuture<'static, Result<Compat<object_store::FuturesAsyncWriter>>>
{
) -> impl Fn(String) -> BoxFuture<'static, Result<Writer>> {
move |path| {
let store = store.clone();
Box::pin(async move {
store
.writer_with(&path)
.concurrent(concurrency)
.chunk(DEFAULT_WRITE_BUFFER_SIZE.as_bytes() as usize)
.await
.map(|v| v.into_futures_async_write().compat_write())
.context(error::WriteObjectSnafu { path })
})
}
@@ -278,19 +261,9 @@ pub async fn stream_to_parquet(
#[cfg(test)]
mod tests {
use std::env;
use std::sync::Arc;
use common_telemetry::warn;
use common_test_util::find_workspace_path;
use datatypes::arrow::array::{ArrayRef, Int64Array, RecordBatch};
use datatypes::arrow::datatypes::{DataType, Field, Schema};
use object_store::services::S3;
use object_store::ObjectStore;
use rand::{thread_rng, Rng};
use super::*;
use crate::file_format::parquet::BufferedWriter;
use crate::test_util::{format_schema, test_store};
fn test_data_root() -> String {
@@ -308,64 +281,4 @@ mod tests {
assert_eq!(vec!["num: Int64: NULL", "str: Utf8: NULL"], formatted);
}
#[tokio::test]
async fn test_parquet_writer() {
common_telemetry::init_default_ut_logging();
let _ = dotenv::dotenv();
let Ok(bucket) = env::var("GT_MINIO_BUCKET") else {
warn!("ignoring test parquet writer");
return;
};
let mut builder = S3::default();
let _ = builder
.root(&uuid::Uuid::new_v4().to_string())
.access_key_id(&env::var("GT_MINIO_ACCESS_KEY_ID").unwrap())
.secret_access_key(&env::var("GT_MINIO_ACCESS_KEY").unwrap())
.bucket(&bucket)
.region(&env::var("GT_MINIO_REGION").unwrap())
.endpoint(&env::var("GT_MINIO_ENDPOINT_URL").unwrap());
let object_store = ObjectStore::new(builder).unwrap().finish();
let file_path = uuid::Uuid::new_v4().to_string();
let fields = vec![
Field::new("field1", DataType::Int64, true),
Field::new("field0", DataType::Int64, true),
];
let arrow_schema = Arc::new(Schema::new(fields));
let mut buffered_writer = BufferedWriter::try_new(
file_path.clone(),
object_store.clone(),
arrow_schema.clone(),
None,
// Sets a small value.
128,
8,
)
.await
.unwrap();
let rows = 200000;
let generator = || {
let columns: Vec<ArrayRef> = vec![
Arc::new(Int64Array::from(
(0..rows)
.map(|_| thread_rng().gen::<i64>())
.collect::<Vec<_>>(),
)),
Arc::new(Int64Array::from(
(0..rows)
.map(|_| thread_rng().gen::<i64>())
.collect::<Vec<_>>(),
)),
];
RecordBatch::try_new(arrow_schema.clone(), columns).unwrap()
};
let batch = generator();
// Writes about ~30Mi
for _ in 0..10 {
buffered_writer.write(&batch).await.unwrap();
}
buffered_writer.close().await.unwrap();
}
}

View File

@@ -27,8 +27,3 @@ pub mod test_util;
#[cfg(test)]
pub mod tests;
pub mod util;
use common_base::readable_size::ReadableSize;
/// Default write buffer size, it should be greater than the default minimum upload part of S3 (5mb).
pub const DEFAULT_WRITE_BUFFER_SIZE: ReadableSize = ReadableSize::mb(8);

View File

@@ -120,7 +120,7 @@ pub async fn setup_stream_to_json_test(origin_path: &str, threshold: impl Fn(usi
let written = tmp_store.read(&output_path).await.unwrap();
let origin = store.read(origin_path).await.unwrap();
assert_eq_lines(written.to_vec(), origin.to_vec());
assert_eq_lines(written, origin);
}
pub async fn setup_stream_to_csv_test(origin_path: &str, threshold: impl Fn(usize) -> usize) {
@@ -158,7 +158,7 @@ pub async fn setup_stream_to_csv_test(origin_path: &str, threshold: impl Fn(usiz
let written = tmp_store.read(&output_path).await.unwrap();
let origin = store.read(origin_path).await.unwrap();
assert_eq_lines(written.to_vec(), origin.to_vec());
assert_eq_lines(written, origin);
}
// Ignore the CRLF difference across operating systems.

View File

@@ -10,4 +10,3 @@ workspace = true
[dependencies]
snafu.workspace = true
strum.workspace = true
tonic.workspace = true

View File

@@ -15,7 +15,6 @@
use std::fmt;
use strum::{AsRefStr, EnumIter, EnumString, FromRepr};
use tonic::Code;
/// Common status code for public API.
#[derive(Debug, Clone, Copy, PartialEq, Eq, EnumString, AsRefStr, EnumIter, FromRepr)]
@@ -203,75 +202,6 @@ impl fmt::Display for StatusCode {
}
}
#[macro_export]
macro_rules! define_into_tonic_status {
($Error: ty) => {
impl From<$Error> for tonic::Status {
fn from(err: $Error) -> Self {
use tonic::codegen::http::{HeaderMap, HeaderValue};
use tonic::metadata::MetadataMap;
use $crate::GREPTIME_DB_HEADER_ERROR_CODE;
let mut headers = HeaderMap::<HeaderValue>::with_capacity(2);
// If either of the status_code or error msg cannot convert to valid HTTP header value
// (which is a very rare case), just ignore. Client will use Tonic status code and message.
let status_code = err.status_code();
headers.insert(
GREPTIME_DB_HEADER_ERROR_CODE,
HeaderValue::from(status_code as u32),
);
let root_error = err.output_msg();
let metadata = MetadataMap::from_headers(headers);
tonic::Status::with_metadata(
$crate::status_code::status_to_tonic_code(status_code),
root_error,
metadata,
)
}
}
};
}
/// Returns the tonic [Code] of a [StatusCode].
pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
match status_code {
StatusCode::Success => Code::Ok,
StatusCode::Unknown => Code::Unknown,
StatusCode::Unsupported => Code::Unimplemented,
StatusCode::Unexpected
| StatusCode::Internal
| StatusCode::PlanQuery
| StatusCode::EngineExecuteQuery => Code::Internal,
StatusCode::InvalidArguments | StatusCode::InvalidSyntax | StatusCode::RequestOutdated => {
Code::InvalidArgument
}
StatusCode::Cancelled => Code::Cancelled,
StatusCode::TableAlreadyExists
| StatusCode::TableColumnExists
| StatusCode::RegionAlreadyExists
| StatusCode::FlowAlreadyExists => Code::AlreadyExists,
StatusCode::TableNotFound
| StatusCode::RegionNotFound
| StatusCode::TableColumnNotFound
| StatusCode::DatabaseNotFound
| StatusCode::UserNotFound
| StatusCode::FlowNotFound => Code::NotFound,
StatusCode::StorageUnavailable | StatusCode::RegionNotReady => Code::Unavailable,
StatusCode::RuntimeResourcesExhausted
| StatusCode::RateLimited
| StatusCode::RegionBusy => Code::ResourceExhausted,
StatusCode::UnsupportedPasswordType
| StatusCode::UserPasswordMismatch
| StatusCode::AuthHeaderNotFound
| StatusCode::InvalidAuthHeader => Code::Unauthenticated,
StatusCode::AccessDenied | StatusCode::PermissionDenied | StatusCode::RegionReadonly => {
Code::PermissionDenied
}
}
}
#[cfg(test)]
mod tests {
use strum::IntoEnumIterator;

View File

@@ -179,7 +179,7 @@ impl StateStore for ObjectStateStore {
))
})
.context(ListStateSnafu { path: key })?;
yield (key.into(), value.to_vec());
yield (key.into(), value);
}
}
});

View File

@@ -22,7 +22,7 @@ use std::sync::Arc;
use datafusion::arrow::datatypes::Field;
use datafusion_common::Result;
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
use datafusion_expr::function::AccumulatorArgs;
use datafusion_expr::{
Accumulator, AccumulatorFactoryFunction, AggregateUDF as DfAggregateUdf, AggregateUDFImpl,
};
@@ -129,13 +129,13 @@ impl AggregateUDFImpl for DfUdafAdapter {
(self.accumulator)(acc_args)
}
fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
fn state_fields(&self, name: &str, _: ArrowDataType, _: Vec<Field>) -> Result<Vec<Field>> {
let state_types = self.creator.state_types()?;
let fields = state_types
.into_iter()
.enumerate()
.map(|(i, t)| {
let name = format!("{}_{i}", args.name);
let name = format!("{name}_{i}");
Field::new(name, t.as_arrow_type(), true)
})
.collect::<Vec<_>>();

View File

@@ -108,10 +108,6 @@ impl ScalarUDFImpl for DfUdfAdapter {
fn invoke(&self, args: &[DfColumnarValue]) -> datafusion_common::Result<DfColumnarValue> {
(self.fun)(args)
}
fn invoke_no_args(&self, number_rows: usize) -> datafusion_common::Result<DfColumnarValue> {
Ok((self.fun)(&[])?.into_array(number_rows)?.into())
}
}
impl From<ScalarUdf> for DfScalarUDF {

View File

@@ -27,6 +27,10 @@ pub enum TypeSignature {
/// arbitrary number of arguments of an common type out of a list of valid types
// A function such as `concat` is `Variadic(vec![ConcreteDataType::String, ConcreteDataType::String])`
Variadic(Vec<ConcreteDataType>),
/// arbitrary number of arguments of an arbitrary but equal type
// A function such as `array` is `VariadicEqual`
// The first argument decides the type used for coercion
VariadicEqual,
/// One or more arguments with arbitrary types
VariadicAny,
/// fixed number of arguments of an arbitrary but equal type out of a list of valid types
@@ -63,7 +67,6 @@ impl Signature {
volatility,
}
}
/// variadic - Creates a variadic signature that represents an arbitrary number of arguments all from a type in common_types.
pub fn variadic(common_types: Vec<ConcreteDataType>, volatility: Volatility) -> Self {
Self {
@@ -71,6 +74,13 @@ impl Signature {
volatility,
}
}
/// variadic_equal - Creates a variadic signature that represents an arbitrary number of arguments of the same type.
pub fn variadic_equal(volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::VariadicEqual,
volatility,
}
}
/// variadic_any - Creates a variadic signature that represents an arbitrary number of arguments of any type.
pub fn variadic_any(volatility: Volatility) -> Self {
@@ -121,6 +131,7 @@ impl From<TypeSignature> for DfTypeSignature {
TypeSignature::Variadic(types) => {
DfTypeSignature::Variadic(concrete_types_to_arrow_types(types))
}
TypeSignature::VariadicEqual => DfTypeSignature::VariadicEqual,
TypeSignature::Uniform(n, types) => {
DfTypeSignature::Uniform(n, concrete_types_to_arrow_types(types))
}

View File

@@ -292,7 +292,7 @@ impl ExecutionPlanVisitor for MetricCollector {
// skip if no metric available
let Some(metric) = plan.metrics() else {
self.record_batch_metrics.plan_metrics.push(PlanMetrics {
plan: std::any::type_name::<Self>().to_string(),
plan: plan.name().to_string(),
level: self.current_level,
metrics: vec![],
});

View File

@@ -13,15 +13,13 @@ common-error.workspace = true
common-macro.workspace = true
common-telemetry.workspace = true
lazy_static.workspace = true
num_cpus.workspace = true
once_cell.workspace = true
paste.workspace = true
prometheus.workspace = true
serde.workspace = true
snafu.workspace = true
tokio.workspace = true
tokio-metrics = "0.3"
tokio-metrics-collector = { git = "https://github.com/MichaelScofield/tokio-metrics-collector.git", rev = "89d692d5753d28564a7aac73c6ac5aba22243ba0" }
tokio-metrics-collector = "0.2"
tokio-util.workspace = true
[dev-dependencies]

View File

@@ -19,7 +19,6 @@ use std::sync::{Mutex, Once};
use common_telemetry::info;
use once_cell::sync::Lazy;
use paste::paste;
use serde::{Deserialize, Serialize};
use crate::{Builder, JoinHandle, Runtime};
@@ -27,28 +26,6 @@ const READ_WORKERS: usize = 8;
const WRITE_WORKERS: usize = 8;
const BG_WORKERS: usize = 8;
/// The options for the global runtimes.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct RuntimeOptions {
/// The number of threads to execute the runtime for global read operations.
pub read_rt_size: usize,
/// The number of threads to execute the runtime for global write operations.
pub write_rt_size: usize,
/// The number of threads to execute the runtime for global background operations.
pub bg_rt_size: usize,
}
impl Default for RuntimeOptions {
fn default() -> Self {
let cpus = num_cpus::get();
Self {
read_rt_size: cpus,
write_rt_size: cpus,
bg_rt_size: cpus,
}
}
}
pub fn create_runtime(runtime_name: &str, thread_name: &str, worker_threads: usize) -> Runtime {
info!("Creating runtime with runtime_name: {runtime_name}, thread_name: {thread_name}, work_threads: {worker_threads}.");
Builder::default()
@@ -135,26 +112,18 @@ static CONFIG_RUNTIMES: Lazy<Mutex<ConfigRuntimes>> =
/// # Panics
/// Panics when the global runtimes are already initialized.
/// You should call this function before using any runtime functions.
pub fn init_global_runtimes(options: &RuntimeOptions) {
pub fn init_global_runtimes(
read: Option<Runtime>,
write: Option<Runtime>,
background: Option<Runtime>,
) {
static START: Once = Once::new();
START.call_once(move || {
let mut c = CONFIG_RUNTIMES.lock().unwrap();
assert!(!c.already_init, "Global runtimes already initialized");
c.read_runtime = Some(create_runtime(
"global-read",
"global-read-worker",
options.read_rt_size,
));
c.write_runtime = Some(create_runtime(
"global-write",
"global-write-worker",
options.write_rt_size,
));
c.bg_runtime = Some(create_runtime(
"global-bg",
"global-bg-worker",
options.bg_rt_size,
));
c.read_runtime = read;
c.write_runtime = write;
c.bg_runtime = background;
});
}

View File

@@ -13,7 +13,7 @@
// limitations under the License.
pub mod error;
pub mod global;
mod global;
mod metrics;
mod repeated_task;
pub mod runtime;

View File

@@ -8,7 +8,7 @@ license.workspace = true
workspace = true
[dependencies]
client = { workspace = true, features = ["testing"] }
client.workspace = true
common-query.workspace = true
common-recordbatch.workspace = true
once_cell.workspace = true

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use client::Database;
use common_query::OutputData;
use common_recordbatch::util;
@@ -30,25 +29,3 @@ pub async fn check_output_stream(output: OutputData, expected: &str) {
let pretty_print = recordbatches.pretty_print().unwrap();
assert_eq!(pretty_print, expected, "actual: \n{}", pretty_print);
}
pub async fn execute_and_check_output(db: &Database, sql: &str, expected: ExpectedOutput<'_>) {
let output = db.sql(sql).await.unwrap();
let output = output.data;
match (&output, expected) {
(OutputData::AffectedRows(x), ExpectedOutput::AffectedRows(y)) => {
assert_eq!(
*x, y,
r#"
expected: {y}
actual: {x}
"#
)
}
(OutputData::RecordBatches(_), ExpectedOutput::QueryResult(x))
| (OutputData::Stream(_), ExpectedOutput::QueryResult(x)) => {
check_output_stream(output, x).await
}
_ => panic!(),
}
}

View File

@@ -15,7 +15,7 @@
//! Datanode configurations
use common_base::readable_size::ReadableSize;
use common_base::secrets::{ExposeSecret, SecretString};
use common_base::secrets::SecretString;
use common_config::Configurable;
use common_grpc::channel_manager::{
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
@@ -38,7 +38,7 @@ pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::mb(256);
const DEFAULT_DATA_HOME: &str = "/tmp/greptimedb";
/// Object storage config
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum ObjectStoreConfig {
File(FileConfig),
@@ -61,7 +61,7 @@ impl ObjectStoreConfig {
}
/// Storage engine config
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct StorageConfig {
/// The working directory of database
@@ -85,7 +85,7 @@ impl Default for StorageConfig {
#[serde(default)]
pub struct FileConfig {}
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(default)]
pub struct ObjectStorageCacheConfig {
/// The local file cache directory
@@ -109,18 +109,6 @@ pub struct S3Config {
pub cache: ObjectStorageCacheConfig,
}
impl PartialEq for S3Config {
fn eq(&self, other: &Self) -> bool {
self.bucket == other.bucket
&& self.root == other.root
&& self.access_key_id.expose_secret() == other.access_key_id.expose_secret()
&& self.secret_access_key.expose_secret() == other.secret_access_key.expose_secret()
&& self.endpoint == other.endpoint
&& self.region == other.region
&& self.cache == other.cache
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct OssConfig {
@@ -135,17 +123,6 @@ pub struct OssConfig {
pub cache: ObjectStorageCacheConfig,
}
impl PartialEq for OssConfig {
fn eq(&self, other: &Self) -> bool {
self.bucket == other.bucket
&& self.root == other.root
&& self.access_key_id.expose_secret() == other.access_key_id.expose_secret()
&& self.access_key_secret.expose_secret() == other.access_key_secret.expose_secret()
&& self.endpoint == other.endpoint
&& self.cache == other.cache
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct AzblobConfig {
@@ -161,18 +138,6 @@ pub struct AzblobConfig {
pub cache: ObjectStorageCacheConfig,
}
impl PartialEq for AzblobConfig {
fn eq(&self, other: &Self) -> bool {
self.container == other.container
&& self.root == other.root
&& self.account_name.expose_secret() == other.account_name.expose_secret()
&& self.account_key.expose_secret() == other.account_key.expose_secret()
&& self.endpoint == other.endpoint
&& self.sas_token == other.sas_token
&& self.cache == other.cache
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct GcsConfig {
@@ -186,17 +151,6 @@ pub struct GcsConfig {
pub cache: ObjectStorageCacheConfig,
}
impl PartialEq for GcsConfig {
fn eq(&self, other: &Self) -> bool {
self.root == other.root
&& self.bucket == other.bucket
&& self.scope == other.scope
&& self.credential_path.expose_secret() == other.credential_path.expose_secret()
&& self.endpoint == other.endpoint
&& self.cache == other.cache
}
}
impl Default for S3Config {
fn default() -> Self {
Self {
@@ -257,7 +211,7 @@ impl Default for ObjectStoreConfig {
}
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(default)]
pub struct DatanodeOptions {
pub mode: Mode,
@@ -313,7 +267,7 @@ impl Default for DatanodeOptions {
}
}
impl Configurable for DatanodeOptions {
impl Configurable<'_> for DatanodeOptions {
fn env_list_keys() -> Option<&'static [&'static str]> {
Some(&["meta_client.metasrv_addrs", "wal.broker_endpoints"])
}

View File

@@ -15,10 +15,10 @@
use std::any::Any;
use std::sync::Arc;
use common_error::define_into_tonic_status;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use servers::define_into_tonic_status;
use snafu::{Location, Snafu};
use store_api::storage::RegionId;
use table::error::Error as TableError;

View File

@@ -20,6 +20,7 @@ mod gcs;
mod oss;
mod s3;
use std::sync::Arc;
use std::time::Duration;
use std::{env, path};
@@ -28,7 +29,7 @@ use common_telemetry::info;
use object_store::layers::{LruCacheLayer, RetryLayer};
use object_store::services::Fs;
use object_store::util::{join_dir, normalize_dir, with_instrument_layers};
use object_store::{HttpClient, ObjectStore};
use object_store::{HttpClient, ObjectStore, ObjectStoreBuilder};
use snafu::prelude::*;
use crate::config::{ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
@@ -106,13 +107,13 @@ async fn create_object_store_with_cache(
if let Some(path) = cache_path {
let atomic_temp_dir = join_dir(path, ".tmp/");
clean_temp_dir(&atomic_temp_dir)?;
let mut builder = Fs::default();
builder.root(path).atomic_write_dir(&atomic_temp_dir);
let cache_store = ObjectStore::new(builder)
.context(error::InitBackendSnafu)?
.finish();
let cache_store = Fs::default()
.root(path)
.atomic_write_dir(&atomic_temp_dir)
.build()
.context(error::InitBackendSnafu)?;
let cache_layer = LruCacheLayer::new(cache_store, cache_capacity.0 as usize)
let cache_layer = LruCacheLayer::new(Arc::new(cache_store), cache_capacity.0 as usize)
.await
.context(error::InitBackendSnafu)?;

View File

@@ -71,8 +71,7 @@ impl FileRegionManifest {
let bs = object_store
.read(path)
.await
.context(LoadRegionManifestSnafu { region_id })?
.to_vec();
.context(LoadRegionManifestSnafu { region_id })?;
Self::decode(bs.as_slice())
}

View File

@@ -64,6 +64,8 @@ mod table_source;
use error::Error;
pub const PER_REQ_MAX_ROW_CNT: usize = 8192;
// TODO: replace this with `GREPTIME_TIMESTAMP` before v0.9
pub const AUTO_CREATED_PLACEHOLDER_TS_COL: &str = "__ts_placeholder";

View File

@@ -16,12 +16,12 @@
use std::any::Any;
use common_error::define_into_tonic_status;
use common_error::ext::BoxedError;
use common_macro::stack_trace_debug;
use common_telemetry::common_error::ext::ErrorExt;
use common_telemetry::common_error::status_code::StatusCode;
use datatypes::value::Value;
use servers::define_into_tonic_status;
use snafu::{Location, Snafu};
use crate::adapter::FlowId;

View File

@@ -113,21 +113,9 @@ fn mfp_subgraph(
scheduler: &Scheduler,
send: &PortCtx<SEND, Toff>,
) {
// all updates that should be send immediately
let mut output_now = vec![];
let run_mfp = || {
let mut all_updates = eval_mfp_core(input, mfp_plan, now, err_collector);
all_updates.retain(|(kv, ts, d)| {
if *ts > now {
true
} else {
output_now.push((kv.clone(), *ts, *d));
false
}
});
let future_updates = all_updates;
arrange.write().apply_updates(now, future_updates)?;
let all_updates = eval_mfp_core(input, mfp_plan, now, err_collector);
arrange.write().apply_updates(now, all_updates)?;
Ok(())
};
err_collector.run(run_mfp);
@@ -142,19 +130,13 @@ fn mfp_subgraph(
std::ops::Bound::Excluded(from),
std::ops::Bound::Included(now),
);
// find all updates that need to be send from arrangement
let output_kv = arrange.read().get_updates_in_range(range);
// the output is expected to be key -> empty val
let output = output_kv
.into_iter()
.chain(output_now) // chain previous immediately send updates
.map(|((key, _v), ts, diff)| (key, ts, diff))
.collect_vec();
// send output
send.give(output);
let run_compaction = || {
arrange.write().compact_to(now)?;
Ok(())
@@ -323,42 +305,4 @@ mod test {
]);
run_and_check(&mut state, &mut df, 1..5, expected, output);
}
/// test if mfp operator can run multiple times within same tick
#[test]
fn test_render_mfp_multiple_times() {
let mut df = Hydroflow::new();
let mut state = DataflowState::default();
let mut ctx = harness_test_ctx(&mut df, &mut state);
let (sender, recv) = tokio::sync::broadcast::channel(1000);
let collection = ctx.render_source(recv).unwrap();
ctx.insert_global(GlobalId::User(1), collection);
let input_plan = Plan::Get {
id: expr::Id::Global(GlobalId::User(1)),
};
let typ = RelationType::new(vec![ColumnType::new_nullable(
ConcreteDataType::int64_datatype(),
)]);
// filter: col(0)>1
let mfp = MapFilterProject::new(1)
.filter(vec![ScalarExpr::Column(0).call_binary(
ScalarExpr::literal(1.into(), ConcreteDataType::int32_datatype()),
BinaryFunc::Gt,
)])
.unwrap();
let bundle = ctx
.render_mfp(Box::new(input_plan.with_types(typ)), mfp)
.unwrap();
let output = get_output_handle(&mut ctx, bundle);
drop(ctx);
sender.send((Row::new(vec![2.into()]), 0, 1)).unwrap();
state.run_available_with_schedule(&mut df);
assert_eq!(output.borrow().len(), 1);
output.borrow_mut().clear();
sender.send((Row::new(vec![3.into()]), 0, 1)).unwrap();
state.run_available_with_schedule(&mut df);
assert_eq!(output.borrow().len(), 1);
}
}

View File

@@ -53,8 +53,7 @@ pub type KeyValDiffRow = ((Row, Row), Timestamp, Diff);
/// broadcast channel capacity, can be important to memory consumption, since this influence how many
/// updates can be buffered in memory in the entire dataflow
/// TODO(discord9): add config for this, so cpu&mem usage can be balanced and configured by this
pub const BROADCAST_CAP: usize = 65535;
pub const BROADCAST_CAP: usize = 8192;
/// Convert a value that is or can be converted to Datetime to internal timestamp
///

View File

@@ -15,10 +15,10 @@
use std::any::Any;
use common_datasource::file_format::Format;
use common_error::define_into_tonic_status;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use servers::define_into_tonic_status;
use snafu::{Location, Snafu};
use store_api::storage::RegionNumber;

View File

@@ -74,7 +74,7 @@ impl Default for FrontendOptions {
}
}
impl Configurable for FrontendOptions {
impl Configurable<'_> for FrontendOptions {
fn env_list_keys() -> Option<&'static [&'static str]> {
Some(&["meta_client.metasrv_addrs"])
}

View File

@@ -188,7 +188,7 @@ impl Instance {
pub fn build_servers(
&mut self,
opts: impl Into<FrontendOptions> + Configurable,
opts: impl Into<FrontendOptions> + for<'de> Configurable<'de>,
servers: ServerHandlers,
) -> Result<()> {
let opts: FrontendOptions = opts.into();
@@ -543,9 +543,7 @@ pub fn check_permission(
validate_param(&stmt.source_name, query_ctx)?;
}
Statement::DropTable(drop_stmt) => {
for table_name in drop_stmt.table_names() {
validate_param(table_name, query_ctx)?;
}
validate_param(drop_stmt.table_name(), query_ctx)?;
}
Statement::ShowTables(stmt) => {
validate_db_permission!(stmt, query_ctx);

View File

@@ -218,7 +218,7 @@ impl PromStoreProtocolHandler for Instance {
let plan = output.meta.plan.clone();
query_results.push(to_query_result(&table_name, output).await?);
if let Some(ref plan) = plan {
collect_plan_metrics(plan, &mut [&mut map]);
collect_plan_metrics(plan.clone(), &mut [&mut map]);
}
}

View File

@@ -17,7 +17,7 @@ use std::sync::Arc;
use auth::UserProviderRef;
use common_base::Plugins;
use common_config::{Configurable, Mode};
use common_config::Configurable;
use common_runtime::Builder as RuntimeBuilder;
use servers::grpc::builder::GrpcServerBuilder;
use servers::grpc::greptime_handler::GreptimeRequestHandler;
@@ -39,7 +39,7 @@ use crate::service_config::GrpcOptions;
pub struct Services<T, U>
where
T: Into<FrontendOptions> + Configurable + Clone,
T: Into<FrontendOptions> + for<'de> Configurable<'de> + Clone,
U: FrontendInstance,
{
opts: T,
@@ -51,7 +51,7 @@ where
impl<T, U> Services<T, U>
where
T: Into<FrontendOptions> + Configurable + Clone,
T: Into<FrontendOptions> + for<'de> Configurable<'de> + Clone,
U: FrontendInstance,
{
pub fn new(opts: T, instance: Arc<U>, plugins: Plugins) -> Self {
@@ -140,15 +140,11 @@ where
};
let user_provider = self.plugins.get::<UserProviderRef>();
let runtime = match opts.mode {
Mode::Standalone => Some(builder.runtime().clone()),
_ => None,
};
let greptime_request_handler = GreptimeRequestHandler::new(
ServerGrpcQueryHandlerAdapter::arc(self.instance.clone()),
user_provider.clone(),
runtime,
builder.runtime().clone(),
);
let grpc_server = builder

View File

@@ -19,7 +19,7 @@ use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct DatanodeOptions {
pub client: DatanodeClientOptions,
client: DatanodeClientOptions,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]

View File

@@ -13,13 +13,10 @@
// limitations under the License.
use std::path::Path;
use std::time::Duration;
use common_base::readable_size::ReadableSize;
use common_wal::config::kafka::DatanodeKafkaConfig;
use common_wal::config::raft_engine::RaftEngineConfig;
use crate::kafka::log_store::KafkaLogStore;
use crate::raft_engine::log_store::RaftEngineLogStore;
/// Create a write log for the provided path, used for test.
@@ -31,14 +28,3 @@ pub async fn create_tmp_local_file_log_store<P: AsRef<Path>>(path: P) -> RaftEng
};
RaftEngineLogStore::try_new(path, cfg).await.unwrap()
}
/// Create a [KafkaLogStore].
pub async fn create_kafka_log_store(broker_endpoints: Vec<String>) -> KafkaLogStore {
KafkaLogStore::try_new(&DatanodeKafkaConfig {
broker_endpoints,
linger: Duration::from_millis(1),
..Default::default()
})
.await
.unwrap()
}

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_error::define_into_tonic_status;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
@@ -20,6 +19,7 @@ use common_meta::peer::Peer;
use common_meta::DatanodeId;
use common_runtime::JoinError;
use rand::distributions::WeightedError;
use servers::define_into_tonic_status;
use snafu::{Location, Snafu};
use store_api::storage::RegionId;
use table::metadata::TableId;

View File

@@ -148,7 +148,7 @@ impl Default for MetasrvOptions {
}
}
impl Configurable for MetasrvOptions {
impl Configurable<'_> for MetasrvOptions {
fn env_list_keys() -> Option<&'static [&'static str]> {
Some(&["wal.broker_endpoints"])
}

View File

@@ -145,7 +145,19 @@ impl RegionEngine for MetricEngine {
.alter_region(region_id, alter, &mut extension_return_value)
.await
}
RegionRequest::Flush(_) | RegionRequest::Compact(_) => {
RegionRequest::Flush(_) => {
if self.inner.is_physical_region(region_id) {
self.inner
.mito
.handle_request(region_id, request)
.await
.context(error::MitoFlushOperationSnafu)
.map(|response| response.affected_rows)
} else {
UnsupportedRegionRequestSnafu { request }.fail()
}
}
RegionRequest::Compact(_) => {
if self.inner.is_physical_region(region_id) {
self.inner
.mito

View File

@@ -38,7 +38,9 @@ use store_api::region_request::{AffectedRows, RegionCreateRequest, RegionRequest
use store_api::storage::consts::ReservedColumnId;
use store_api::storage::RegionId;
use crate::engine::options::set_data_region_options;
use crate::engine::options::{
set_index_options_for_data_region, set_memtable_options_for_data_region,
};
use crate::engine::MetricEngineInner;
use crate::error::{
AddingFieldColumnSnafu, ColumnNotFoundSnafu, ColumnTypeMismatchSnafu,
@@ -169,9 +171,10 @@ impl MetricEngineInner {
// check if the logical region already exist
if self
.metadata_region
.is_logical_region_exists(metadata_region_id, logical_region_id)
.await?
.state
.read()
.unwrap()
.is_logical_region_exists(logical_region_id)
{
info!("Create a existing logical region {logical_region_id}. Skipped");
return Ok(data_region_id);
@@ -476,8 +479,11 @@ impl MetricEngineInner {
data_region_request.column_metadatas.push(tsid_col);
data_region_request.primary_key = primary_key;
// set data region options
set_data_region_options(&mut data_region_request.options);
// set index options
set_index_options_for_data_region(&mut data_region_request.options);
// Set memtable options.
set_memtable_options_for_data_region(&mut data_region_request.options);
data_region_request
}

View File

@@ -26,7 +26,7 @@ use store_api::region_request::{AffectedRows, RegionOpenRequest, RegionRequest};
use store_api::storage::RegionId;
use super::MetricEngineInner;
use crate::engine::options::set_data_region_options;
use crate::engine::options::set_index_options_for_data_region;
use crate::error::{OpenMitoRegionSnafu, Result};
use crate::metrics::{LOGICAL_REGION_COUNT, PHYSICAL_REGION_COUNT};
use crate::utils;
@@ -80,7 +80,7 @@ impl MetricEngineInner {
};
let mut data_region_options = request.options;
set_data_region_options(&mut data_region_options);
set_index_options_for_data_region(&mut data_region_options);
let open_data_region_request = RegionOpenRequest {
region_dir: data_region_dir,
options: data_region_options,

View File

@@ -30,17 +30,20 @@ const IGNORE_COLUMN_IDS_FOR_DATA_REGION: [ColumnId; 1] = [ReservedColumnId::tsid
/// value and appropriately increasing the size of the index, it results in an improved indexing effect.
const SEG_ROW_COUNT_FOR_DATA_REGION: u32 = 256;
/// Sets data region specific options.
pub fn set_data_region_options(options: &mut HashMap<String, String>) {
// Set the index options for the data region.
/// Set the index options for the data region.
pub fn set_index_options_for_data_region(options: &mut HashMap<String, String>) {
options.insert(
"index.inverted_index.ignore_column_ids".to_string(),
IGNORE_COLUMN_IDS_FOR_DATA_REGION.iter().join(","),
);
options.insert(
"index.inverted_index.segment_row_count".to_string(),
SEG_ROW_COUNT_FOR_DATA_REGION.to_string(),
);
// Set memtable options for the data region.
}
/// Set memtable options for the data region.
pub fn set_memtable_options_for_data_region(options: &mut HashMap<String, String>) {
options.insert("memtable.type".to_string(), "partition_tree".to_string());
}

View File

@@ -17,7 +17,7 @@ use std::hash::Hash;
use api::v1::value::ValueData;
use api::v1::{ColumnDataType, ColumnSchema, Row, Rows, SemanticType};
use common_telemetry::{error, info};
use snafu::{ensure, OptionExt};
use snafu::OptionExt;
use store_api::metric_engine_consts::{
DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME,
};
@@ -26,8 +26,7 @@ use store_api::storage::{RegionId, TableId};
use crate::engine::MetricEngineInner;
use crate::error::{
ColumnNotFoundSnafu, ForbiddenPhysicalAlterSnafu, LogicalRegionNotFoundSnafu,
PhysicalRegionNotFoundSnafu, Result,
ColumnNotFoundSnafu, ForbiddenPhysicalAlterSnafu, LogicalRegionNotFoundSnafu, Result,
};
use crate::metrics::{FORBIDDEN_OPERATION_COUNT, MITO_OPERATION_ELAPSED};
use crate::utils::to_data_region_id;
@@ -102,10 +101,10 @@ impl MetricEngineInner {
physical_region_id: RegionId,
request: &RegionPutRequest,
) -> Result<()> {
// Check if the region exists
// check if the region exists
let data_region_id = to_data_region_id(physical_region_id);
let state = self.state.read().unwrap();
if !state.is_logical_region_exist(logical_region_id) {
if !state.is_logical_region_exists(logical_region_id) {
error!("Trying to write to an nonexistent region {logical_region_id}");
return LogicalRegionNotFoundSnafu {
region_id: logical_region_id,
@@ -113,22 +112,15 @@ impl MetricEngineInner {
.fail();
}
// Check if a physical column exists
let physical_columns =
state
.physical_columns()
.get(&data_region_id)
.context(PhysicalRegionNotFoundSnafu {
region_id: data_region_id,
})?;
// check if the columns exist
for col in &request.rows.schema {
ensure!(
physical_columns.contains(&col.column_name),
ColumnNotFoundSnafu {
if !state.is_physical_column_exist(data_region_id, &col.column_name)? {
return ColumnNotFoundSnafu {
name: col.column_name.clone(),
region_id: logical_region_id,
}
);
.fail();
}
}
Ok(())

View File

@@ -132,7 +132,24 @@ impl MetricEngineState {
Ok(())
}
pub fn is_logical_region_exist(&self, logical_region_id: RegionId) -> bool {
/// Check if a physical column exists.
pub fn is_physical_column_exist(
&self,
physical_region_id: RegionId,
column_name: &str,
) -> Result<bool> {
let data_region_id = to_data_region_id(physical_region_id);
let exist = self
.physical_columns()
.get(&data_region_id)
.context(PhysicalRegionNotFoundSnafu {
region_id: data_region_id,
})?
.contains(column_name);
Ok(exist)
}
pub fn is_logical_region_exists(&self, logical_region_id: RegionId) -> bool {
self.logical_regions().contains_key(&logical_region_id)
}
}

View File

@@ -139,17 +139,6 @@ impl MetadataRegion {
Ok(())
}
/// Check if the given logical region exists.
pub async fn is_logical_region_exists(
&self,
physical_region_id: RegionId,
logical_region_id: RegionId,
) -> Result<bool> {
let region_id = utils::to_metadata_region_id(physical_region_id);
let region_key = Self::concat_region_key(logical_region_id);
self.exists(region_id, &region_key).await
}
/// Check if the given column exists. Return the semantic type if exists.
pub async fn column_semantic_type(
&self,
@@ -669,10 +658,6 @@ mod test {
.add_logical_region(physical_region_id, logical_region_id)
.await
.unwrap();
assert!(metadata_region
.is_logical_region_exists(physical_region_id, logical_region_id)
.await
.unwrap());
// add it again
assert!(metadata_region

View File

@@ -6,7 +6,7 @@ license.workspace = true
[features]
default = []
test = ["common-test-util", "log-store", "rstest", "rstest_reuse", "rskafka"]
test = ["common-test-util", "log-store"]
[lints]
workspace = true
@@ -37,7 +37,6 @@ datafusion.workspace = true
datafusion-common.workspace = true
datafusion-expr.workspace = true
datatypes.workspace = true
dotenv.workspace = true
futures.workspace = true
humantime-serde.workspace = true
index.workspace = true
@@ -55,9 +54,6 @@ prost.workspace = true
puffin.workspace = true
rand.workspace = true
regex = "1.5"
rskafka = { workspace = true, optional = true }
rstest = { workspace = true, optional = true }
rstest_reuse = { workspace = true, optional = true }
serde.workspace = true
serde_json.workspace = true
serde_with.workspace = true
@@ -75,12 +71,8 @@ uuid.workspace = true
common-procedure-test.workspace = true
common-test-util.workspace = true
criterion = "0.4"
dotenv.workspace = true
log-store.workspace = true
object-store = { workspace = true, features = ["services-memory"] }
rskafka.workspace = true
rstest.workspace = true
rstest_reuse.workspace = true
toml.workspace = true
[[bench]]

View File

@@ -112,10 +112,6 @@ impl FileCache {
self.memory_index.insert(key, value).await;
}
pub(crate) async fn get(&self, key: IndexKey) -> Option<IndexValue> {
self.memory_index.get(&key).await
}
/// Reads a file from the cache.
pub(crate) async fn reader(&self, key: IndexKey) -> Option<Reader> {
// We must use `get()` to update the estimator of the cache.
@@ -376,6 +372,7 @@ fn parse_index_key(name: &str) -> Option<IndexKey> {
#[cfg(test)]
mod tests {
use common_test_util::temp_dir::create_temp_dir;
use futures::AsyncReadExt;
use object_store::services::Fs;
use super::*;
@@ -454,9 +451,10 @@ mod tests {
.await;
// Read file content.
let reader = cache.reader(key).await.unwrap();
let buf = reader.read(..).await.unwrap().to_vec();
assert_eq!("hello", String::from_utf8(buf).unwrap());
let mut reader = cache.reader(key).await.unwrap();
let mut buf = String::new();
reader.read_to_string(&mut buf).await.unwrap();
assert_eq!("hello", buf);
// Get weighted size.
cache.memory_index.run_pending_tasks().await;
@@ -551,9 +549,10 @@ mod tests {
for (i, file_id) in file_ids.iter().enumerate() {
let key = IndexKey::new(region_id, *file_id, file_type);
let reader = cache.reader(key).await.unwrap();
let buf = reader.read(..).await.unwrap().to_vec();
assert_eq!(i.to_string(), String::from_utf8(buf).unwrap());
let mut reader = cache.reader(key).await.unwrap();
let mut buf = String::new();
reader.read_to_string(&mut buf).await.unwrap();
assert_eq!(i.to_string(), buf);
}
}

View File

@@ -19,7 +19,6 @@ use std::time::Duration;
use common_base::readable_size::ReadableSize;
use common_telemetry::{debug, info};
use futures::AsyncWriteExt;
use object_store::manager::ObjectStoreManagerRef;
use object_store::ObjectStore;
use snafu::ResultExt;
@@ -176,27 +175,19 @@ impl WriteCache {
}])
.start_timer();
let cached_value = self
.file_cache
.local_store()
.stat(&cache_path)
.await
.context(error::OpenDalSnafu)?;
let reader = self
.file_cache
.local_store()
.reader(&cache_path)
.await
.context(error::OpenDalSnafu)?
.into_futures_async_read(0..cached_value.content_length());
.context(error::OpenDalSnafu)?;
let mut writer = remote_store
.writer_with(upload_path)
.chunk(DEFAULT_WRITE_BUFFER_SIZE.as_bytes() as usize)
.buffer(DEFAULT_WRITE_BUFFER_SIZE.as_bytes() as usize)
.concurrent(DEFAULT_WRITE_CONCURRENCY)
.await
.context(error::OpenDalSnafu)?
.into_futures_async_write();
.context(error::OpenDalSnafu)?;
let bytes_written =
futures::io::copy(reader, &mut writer)
@@ -208,11 +199,7 @@ impl WriteCache {
})?;
// Must close to upload all data.
writer.close().await.context(error::UploadSnafu {
region_id,
file_id,
file_type,
})?;
writer.close().await.context(error::OpenDalSnafu)?;
UPLOAD_BYTES_TOTAL.inc_by(bytes_written);
@@ -328,7 +315,7 @@ mod tests {
.read(&write_cache.file_cache.cache_file_path(key))
.await
.unwrap();
assert_eq!(remote_data.to_vec(), cache_data.to_vec());
assert_eq!(remote_data, cache_data);
// Check write cache contains the index key
let index_key = IndexKey::new(region_id, file_id, FileType::Puffin);
@@ -339,7 +326,7 @@ mod tests {
.read(&write_cache.file_cache.cache_file_path(index_key))
.await
.unwrap();
assert_eq!(remote_index_data.to_vec(), cache_index_data.to_vec());
assert_eq!(remote_index_data, cache_index_data);
}
#[tokio::test]

View File

@@ -21,8 +21,6 @@ mod append_mode_test;
#[cfg(test)]
mod basic_test;
#[cfg(test)]
mod batch_open_test;
#[cfg(test)]
mod catchup_test;
#[cfg(test)]
mod close_test;
@@ -52,7 +50,6 @@ mod set_readonly_test;
mod truncate_test;
use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Instant;
@@ -61,33 +58,22 @@ use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::tracing;
use common_wal::options::{WalOptions, WAL_OPTIONS_KEY};
use futures::future::{join_all, try_join_all};
use object_store::manager::ObjectStoreManagerRef;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::logstore::provider::Provider;
use store_api::logstore::LogStore;
use store_api::metadata::RegionMetadataRef;
use store_api::region_engine::{
BatchResponses, RegionEngine, RegionRole, RegionScannerRef, SetReadonlyResponse,
};
use store_api::region_request::{AffectedRows, RegionOpenRequest, RegionRequest};
use store_api::region_engine::{RegionEngine, RegionRole, RegionScannerRef, SetReadonlyResponse};
use store_api::region_request::{AffectedRows, RegionRequest};
use store_api::storage::{RegionId, ScanRequest};
use tokio::sync::{oneshot, Semaphore};
use tokio::sync::oneshot;
use crate::config::MitoConfig;
use crate::error::{
InvalidRequestSnafu, JoinSnafu, RecvSnafu, RegionNotFoundSnafu, Result, SerdeJsonSnafu,
};
use crate::error::{InvalidRequestSnafu, RecvSnafu, RegionNotFoundSnafu, Result};
use crate::manifest::action::RegionEdit;
use crate::metrics::HANDLE_REQUEST_ELAPSED;
use crate::read::scan_region::{ScanParallism, ScanRegion, Scanner};
use crate::region::RegionUsage;
use crate::request::WorkerRequest;
use crate::wal::entry_distributor::{
build_wal_entry_distributor_and_receivers, DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE,
};
use crate::wal::raw_entry_reader::{LogStoreRawEntryReader, RawEntryReader};
use crate::worker::WorkerGroup;
pub const MITO_ENGINE_NAME: &str = "mito";
@@ -225,41 +211,6 @@ struct EngineInner {
workers: WorkerGroup,
/// Config of the engine.
config: Arc<MitoConfig>,
/// The Wal raw entry reader.
wal_raw_entry_reader: Arc<dyn RawEntryReader>,
}
type TopicGroupedRegionOpenRequests = HashMap<String, Vec<(RegionId, RegionOpenRequest)>>;
/// Returns requests([TopicGroupedRegionOpenRequests]) grouped by topic and remaining requests.
fn prepare_batch_open_requests(
requests: Vec<(RegionId, RegionOpenRequest)>,
) -> Result<(
TopicGroupedRegionOpenRequests,
Vec<(RegionId, RegionOpenRequest)>,
)> {
let mut topic_to_regions: HashMap<String, Vec<(RegionId, RegionOpenRequest)>> = HashMap::new();
let mut remaining_regions: Vec<(RegionId, RegionOpenRequest)> = Vec::new();
for (region_id, request) in requests {
let options = if let Some(options) = request.options.get(WAL_OPTIONS_KEY) {
serde_json::from_str(options).context(SerdeJsonSnafu)?
} else {
WalOptions::RaftEngine
};
match options {
WalOptions::Kafka(options) => {
topic_to_regions
.entry(options.topic)
.or_default()
.push((region_id, request));
}
WalOptions::RaftEngine => {
remaining_regions.push((region_id, request));
}
}
}
Ok((topic_to_regions, remaining_regions))
}
impl EngineInner {
@@ -270,11 +221,9 @@ impl EngineInner {
object_store_manager: ObjectStoreManagerRef,
) -> Result<EngineInner> {
let config = Arc::new(config);
let wal_raw_entry_reader = Arc::new(LogStoreRawEntryReader::new(log_store.clone()));
Ok(EngineInner {
workers: WorkerGroup::start(config.clone(), log_store, object_store_manager).await?,
config,
wal_raw_entry_reader,
})
}
@@ -295,93 +244,6 @@ impl EngineInner {
Ok(region.metadata())
}
async fn open_topic_regions(
&self,
topic: String,
region_requests: Vec<(RegionId, RegionOpenRequest)>,
) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
let region_ids = region_requests
.iter()
.map(|(region_id, _)| *region_id)
.collect::<Vec<_>>();
let provider = Provider::kafka_provider(topic);
let (distributor, entry_receivers) = build_wal_entry_distributor_and_receivers(
provider,
self.wal_raw_entry_reader.clone(),
&region_ids,
DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE,
);
let mut responses = Vec::with_capacity(region_requests.len());
for ((region_id, request), entry_receiver) in
region_requests.into_iter().zip(entry_receivers)
{
let (request, receiver) =
WorkerRequest::new_open_region_request(region_id, request, Some(entry_receiver));
self.workers.submit_to_worker(region_id, request).await?;
responses.push(async move { receiver.await.context(RecvSnafu)? });
}
// Waits for entries distribution.
let distribution =
common_runtime::spawn_read(async move { distributor.distribute().await });
// Waits for worker returns.
let responses = join_all(responses).await;
distribution.await.context(JoinSnafu)??;
Ok(region_ids.into_iter().zip(responses).collect())
}
async fn handle_batch_open_requests(
&self,
parallelism: usize,
requests: Vec<(RegionId, RegionOpenRequest)>,
) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
let semaphore = Arc::new(Semaphore::new(parallelism));
let (topic_to_region_requests, remaining_region_requests) =
prepare_batch_open_requests(requests)?;
let mut responses =
Vec::with_capacity(topic_to_region_requests.len() + remaining_region_requests.len());
if !topic_to_region_requests.is_empty() {
let mut tasks = Vec::with_capacity(topic_to_region_requests.len());
for (topic, region_requests) in topic_to_region_requests {
let semaphore_moved = semaphore.clone();
tasks.push(async move {
// Safety: semaphore must exist
let _permit = semaphore_moved.acquire().await.unwrap();
self.open_topic_regions(topic, region_requests).await
})
}
let r = try_join_all(tasks).await?;
responses.extend(r.into_iter().flatten());
}
if !remaining_region_requests.is_empty() {
let mut tasks = Vec::with_capacity(remaining_region_requests.len());
let mut region_ids = Vec::with_capacity(remaining_region_requests.len());
for (region_id, request) in remaining_region_requests {
let semaphore_moved = semaphore.clone();
region_ids.push(region_id);
tasks.push(async move {
// Safety: semaphore must exist
let _permit = semaphore_moved.acquire().await.unwrap();
let (request, receiver) =
WorkerRequest::new_open_region_request(region_id, request, None);
self.workers.submit_to_worker(region_id, request).await?;
receiver.await.context(RecvSnafu)?
})
}
let results = join_all(tasks).await;
responses.extend(region_ids.into_iter().zip(results));
}
Ok(responses)
}
/// Handles [RegionRequest] and return its executed result.
async fn handle_request(
&self,
@@ -461,30 +323,6 @@ impl RegionEngine for MitoEngine {
MITO_ENGINE_NAME
}
#[tracing::instrument(skip_all)]
async fn handle_batch_open_requests(
&self,
parallelism: usize,
requests: Vec<(RegionId, RegionOpenRequest)>,
) -> Result<BatchResponses, BoxedError> {
// TODO(weny): add metrics.
self.inner
.handle_batch_open_requests(parallelism, requests)
.await
.map(|responses| {
responses
.into_iter()
.map(|(region_id, response)| {
(
region_id,
response.map(RegionResponse::new).map_err(BoxedError::new),
)
})
.collect::<Vec<_>>()
})
.map_err(BoxedError::new)
}
#[tracing::instrument(skip_all)]
async fn handle_request(
&self,
@@ -583,7 +421,6 @@ impl MitoEngine {
config.sanitize(data_home)?;
let config = Arc::new(config);
let wal_raw_entry_reader = Arc::new(LogStoreRawEntryReader::new(log_store.clone()));
Ok(MitoEngine {
inner: Arc::new(EngineInner {
workers: WorkerGroup::start_for_test(
@@ -596,7 +433,6 @@ impl MitoEngine {
)
.await?,
config,
wal_raw_entry_reader,
}),
})
}

View File

@@ -22,11 +22,8 @@ use common_base::readable_size::ReadableSize;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_recordbatch::RecordBatches;
use common_wal::options::WAL_OPTIONS_KEY;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnSchema;
use rstest::rstest;
use rstest_reuse::{self, apply};
use store_api::metadata::ColumnMetadata;
use store_api::region_request::{RegionCreateRequest, RegionOpenRequest, RegionPutRequest};
use store_api::storage::RegionId;
@@ -35,9 +32,7 @@ use super::*;
use crate::region::version::VersionControlData;
use crate::test_util::{
build_delete_rows_for_key, build_rows, build_rows_for_key, delete_rows, delete_rows_schema,
flush_region, kafka_log_store_factory, multiple_log_store_factories,
prepare_test_for_kafka_log_store, put_rows, raft_engine_log_store_factory, reopen_region,
rows_schema, CreateRequestBuilder, LogStoreFactory, TestEnv,
flush_region, put_rows, reopen_region, rows_schema, CreateRequestBuilder, TestEnv,
};
#[tokio::test]
@@ -88,24 +83,14 @@ async fn test_write_to_region() {
put_rows(&engine, region_id, rows).await;
}
#[apply(multiple_log_store_factories)]
async fn test_region_replay(factory: Option<LogStoreFactory>) {
use common_wal::options::{KafkaWalOptions, WalOptions};
#[tokio::test]
async fn test_region_replay() {
common_telemetry::init_default_ut_logging();
let Some(factory) = factory else {
return;
};
let mut env = TestEnv::with_prefix("region-replay").with_log_store_factory(factory.clone());
let mut env = TestEnv::with_prefix("region-replay");
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
let topic = prepare_test_for_kafka_log_store(&factory).await;
let request = CreateRequestBuilder::new()
.kafka_topic(topic.clone())
.build();
let request = CreateRequestBuilder::new().build();
let region_dir = request.region_dir.clone();
let column_schemas = rows_schema(&request);
@@ -128,24 +113,13 @@ async fn test_region_replay(factory: Option<LogStoreFactory>) {
let engine = env.reopen_engine(engine, MitoConfig::default()).await;
let mut options = HashMap::new();
if let Some(topic) = &topic {
options.insert(
WAL_OPTIONS_KEY.to_string(),
serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
topic: topic.to_string(),
}))
.unwrap(),
);
};
let result = engine
.handle_request(
region_id,
RegionRequest::Open(RegionOpenRequest {
engine: String::new(),
region_dir,
options,
options: HashMap::default(),
skip_wal_replay: false,
}),
)

View File

@@ -1,203 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use api::v1::Rows;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_recordbatch::RecordBatches;
use common_wal::options::{KafkaWalOptions, WalOptions, WAL_OPTIONS_KEY};
use rstest::rstest;
use rstest_reuse::apply;
use store_api::region_engine::RegionEngine;
use store_api::region_request::{RegionOpenRequest, RegionRequest};
use store_api::storage::{RegionId, ScanRequest};
use super::MitoEngine;
use crate::config::MitoConfig;
use crate::test_util::{
build_rows, kafka_log_store_factory, multiple_log_store_factories,
prepare_test_for_kafka_log_store, put_rows, raft_engine_log_store_factory, rows_schema,
CreateRequestBuilder, LogStoreFactory, TestEnv,
};
#[apply(multiple_log_store_factories)]
async fn test_batch_open(factory: Option<LogStoreFactory>) {
common_telemetry::init_default_ut_logging();
let Some(factory) = factory else {
return;
};
let mut env =
TestEnv::with_prefix("open-batch-regions").with_log_store_factory(factory.clone());
let engine = env.create_engine(MitoConfig::default()).await;
let topic = prepare_test_for_kafka_log_store(&factory).await;
let num_regions = 3u32;
let region_dir = |region_id| format!("test/{region_id}");
let mut region_schema = HashMap::new();
for id in 1..=num_regions {
let engine = engine.clone();
let topic = topic.clone();
let region_id = RegionId::new(1, id);
let request = CreateRequestBuilder::new()
.region_dir(&region_dir(region_id))
.kafka_topic(topic.clone())
.build();
let column_schemas = rows_schema(&request);
region_schema.insert(region_id, column_schemas);
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
}
for i in 0..10 {
for region_number in 1..=num_regions {
let region_id = RegionId::new(1, region_number);
let rows = Rows {
schema: region_schema[&region_id].clone(),
rows: build_rows(
(region_number as usize) * 120 + i as usize,
(region_number as usize) * 120 + i as usize + 1,
),
};
put_rows(&engine, region_id, rows).await;
}
}
let assert_result = |engine: MitoEngine| async move {
for i in 1..num_regions {
let region_id = RegionId::new(1, i);
let request = ScanRequest::default();
let stream = engine.scan_to_stream(region_id, request).await.unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
let mut expected = String::new();
expected.push_str(
"+-------+---------+---------------------+\n| tag_0 | field_0 | ts |\n+-------+---------+---------------------+\n",
);
for row in 0..10 {
expected.push_str(&format!(
"| {} | {}.0 | 1970-01-01T00:{:02}:{:02} |\n",
i * 120 + row,
i * 120 + row,
2 * i,
row
));
}
expected.push_str("+-------+---------+---------------------+");
assert_eq!(expected, batches.pretty_print().unwrap());
}
};
assert_result(engine.clone()).await;
let mut options = HashMap::new();
if let Some(topic) = &topic {
options.insert(
WAL_OPTIONS_KEY.to_string(),
serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
topic: topic.to_string(),
}))
.unwrap(),
);
};
let mut requests = (1..=num_regions)
.map(|id| {
let region_id = RegionId::new(1, id);
(
region_id,
RegionOpenRequest {
engine: String::new(),
region_dir: region_dir(region_id),
options: options.clone(),
skip_wal_replay: false,
},
)
})
.collect::<Vec<_>>();
requests.push((
RegionId::new(1, 4),
RegionOpenRequest {
engine: String::new(),
region_dir: "no-exists".to_string(),
options: options.clone(),
skip_wal_replay: false,
},
));
// Reopen engine.
let engine = env.reopen_engine(engine, MitoConfig::default()).await;
let mut results = engine
.handle_batch_open_requests(4, requests)
.await
.unwrap();
let (_, result) = results.pop().unwrap();
assert_eq!(
result.unwrap_err().status_code(),
StatusCode::RegionNotFound
);
for (_, result) in results {
assert!(result.is_ok());
}
assert_result(engine.clone()).await;
}
#[apply(multiple_log_store_factories)]
async fn test_batch_open_err(factory: Option<LogStoreFactory>) {
common_telemetry::init_default_ut_logging();
let Some(factory) = factory else {
return;
};
let mut env =
TestEnv::with_prefix("open-batch-regions-err").with_log_store_factory(factory.clone());
let engine = env.create_engine(MitoConfig::default()).await;
let topic = prepare_test_for_kafka_log_store(&factory).await;
let mut options = HashMap::new();
if let Some(topic) = &topic {
options.insert(
WAL_OPTIONS_KEY.to_string(),
serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
topic: topic.to_string(),
}))
.unwrap(),
);
};
let num_regions = 3u32;
let region_dir = "test".to_string();
let requests = (1..=num_regions)
.map(|id| {
(
RegionId::new(1, id),
RegionOpenRequest {
engine: String::new(),
region_dir: region_dir.to_string(),
options: options.clone(),
skip_wal_replay: false,
},
)
})
.collect::<Vec<_>>();
let results = engine
.handle_batch_open_requests(3, requests)
.await
.unwrap();
for (_, result) in results {
assert_eq!(
result.unwrap_err().status_code(),
StatusCode::RegionNotFound
);
}
}

View File

@@ -21,9 +21,6 @@ use std::time::Duration;
use api::v1::Rows;
use common_recordbatch::RecordBatches;
use common_time::util::current_time_millis;
use common_wal::options::WAL_OPTIONS_KEY;
use rstest::rstest;
use rstest_reuse::{self, apply};
use store_api::region_engine::RegionEngine;
use store_api::region_request::RegionRequest;
use store_api::storage::{RegionId, ScanRequest};
@@ -31,10 +28,8 @@ use store_api::storage::{RegionId, ScanRequest};
use crate::config::MitoConfig;
use crate::engine::listener::{FlushListener, StallListener};
use crate::test_util::{
build_rows, build_rows_for_key, flush_region, kafka_log_store_factory,
multiple_log_store_factories, prepare_test_for_kafka_log_store, put_rows,
raft_engine_log_store_factory, reopen_region, rows_schema, CreateRequestBuilder,
LogStoreFactory, MockWriteBufferManager, TestEnv,
build_rows, build_rows_for_key, flush_region, put_rows, reopen_region, rows_schema,
CreateRequestBuilder, MockWriteBufferManager, TestEnv,
};
use crate::time_provider::TimeProvider;
use crate::worker::MAX_INITIAL_CHECK_DELAY_SECS;
@@ -236,25 +231,13 @@ async fn test_flush_empty() {
assert_eq!(expected, batches.pretty_print().unwrap());
}
#[apply(multiple_log_store_factories)]
async fn test_flush_reopen_region(factory: Option<LogStoreFactory>) {
use std::collections::HashMap;
use common_wal::options::{KafkaWalOptions, WalOptions};
common_telemetry::init_default_ut_logging();
let Some(factory) = factory else {
return;
};
let mut env = TestEnv::new().with_log_store_factory(factory.clone());
#[tokio::test]
async fn test_flush_reopen_region() {
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
let topic = prepare_test_for_kafka_log_store(&factory).await;
let request = CreateRequestBuilder::new()
.kafka_topic(topic.clone())
.build();
let request = CreateRequestBuilder::new().build();
let region_dir = request.region_dir.clone();
let column_schemas = rows_schema(&request);
@@ -280,17 +263,7 @@ async fn test_flush_reopen_region(factory: Option<LogStoreFactory>) {
};
check_region();
let mut options = HashMap::new();
if let Some(topic) = &topic {
options.insert(
WAL_OPTIONS_KEY.to_string(),
serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
topic: topic.to_string(),
}))
.unwrap(),
);
};
reopen_region(&engine, region_id, region_dir, true, options).await;
reopen_region(&engine, region_id, region_dir, true, Default::default()).await;
check_region();
// Puts again.

View File

@@ -497,7 +497,7 @@ impl ManifestObjectStore {
}
};
let checkpoint_metadata = CheckpointMetadata::decode(&last_checkpoint_data.to_vec())?;
let checkpoint_metadata = CheckpointMetadata::decode(&last_checkpoint_data)?;
debug!(
"Load checkpoint in path: {}, metadata: {:?}",
@@ -509,11 +509,7 @@ impl ManifestObjectStore {
#[cfg(test)]
pub async fn read_file(&self, path: &str) -> Result<Vec<u8>> {
self.object_store
.read(path)
.await
.context(OpenDalSnafu)
.map(|v| v.to_vec())
self.object_store.read(path).await.context(OpenDalSnafu)
}
#[cfg(test)]

View File

@@ -20,7 +20,6 @@ use std::sync::Arc;
use common_telemetry::{debug, error, info, warn};
use common_wal::options::WalOptions;
use futures::future::BoxFuture;
use futures::StreamExt;
use object_store::manager::ObjectStoreManagerRef;
use object_store::util::{join_dir, normalize_dir};
@@ -49,7 +48,6 @@ use crate::schedule::scheduler::SchedulerRef;
use crate::sst::file_purger::LocalFilePurger;
use crate::sst::index::intermediate::IntermediateManager;
use crate::time_provider::{StdTimeProvider, TimeProviderRef};
use crate::wal::entry_reader::WalEntryReader;
use crate::wal::{EntryId, Wal};
/// Builder to create a new [MitoRegion] or open an existing one.
@@ -66,7 +64,6 @@ pub(crate) struct RegionOpener {
intermediate_manager: IntermediateManager,
time_provider: Option<TimeProviderRef>,
stats: ManifestStats,
wal_entry_reader: Option<Box<dyn WalEntryReader>>,
}
impl RegionOpener {
@@ -92,7 +89,6 @@ impl RegionOpener {
intermediate_manager,
time_provider: None,
stats: Default::default(),
wal_entry_reader: None,
}
}
@@ -108,16 +104,6 @@ impl RegionOpener {
Ok(self)
}
/// If a [WalEntryReader] is set, the [RegionOpener] will use [WalEntryReader] instead of
/// constructing a new one from scratch.
pub(crate) fn wal_entry_reader(
mut self,
wal_entry_reader: Option<Box<dyn WalEntryReader>>,
) -> Self {
self.wal_entry_reader = wal_entry_reader;
self
}
/// Sets options for the region.
pub(crate) fn options(mut self, options: RegionOptions) -> Self {
self.options = Some(options);
@@ -179,8 +165,8 @@ impl RegionOpener {
}
}
let options = self.options.take().unwrap();
let object_store = self.object_store(&options.storage)?.clone();
let provider = self.provider(&options.wal_options);
let object_store = self.object_store(&options.storage)?.clone();
// Create a manifest manager for this region and writes regions to the manifest file.
let region_manifest_options = self.manifest_options(config, &options)?;
@@ -245,7 +231,7 @@ impl RegionOpener {
///
/// Returns error if the region doesn't exist.
pub(crate) async fn open<S: LogStore>(
mut self,
self,
config: &MitoConfig,
wal: &Wal<S>,
) -> Result<MitoRegion> {
@@ -281,7 +267,7 @@ impl RegionOpener {
/// Tries to open the region and returns `None` if the region directory is empty.
async fn maybe_open<S: LogStore>(
&mut self,
&self,
config: &MitoConfig,
wal: &Wal<S>,
) -> Result<Option<MitoRegion>> {
@@ -302,11 +288,6 @@ impl RegionOpener {
let region_id = self.region_id;
let provider = self.provider(&region_options.wal_options);
let wal_entry_reader = self
.wal_entry_reader
.take()
.unwrap_or_else(|| wal.wal_entry_reader(&provider, region_id));
let on_region_opened = wal.on_region_opened();
let object_store = self.object_store(&region_options.storage)?.clone();
debug!("Open region {} with options: {:?}", region_id, self.options);
@@ -350,13 +331,12 @@ impl RegionOpener {
region_id
);
replay_memtable(
wal,
&provider,
wal_entry_reader,
region_id,
flushed_entry_id,
&version_control,
config.allow_stale_entries,
on_region_opened,
)
.await?;
} else {
@@ -377,7 +357,7 @@ impl RegionOpener {
RegionState::ReadOnly,
)),
file_purger,
provider: provider.clone(),
provider,
last_flush_millis: AtomicI64::new(time_provider.current_time_millis()),
time_provider,
memtable_builder,
@@ -468,25 +448,21 @@ pub(crate) fn check_recovered_region(
}
/// Replays the mutations from WAL and inserts mutations to memtable of given region.
pub(crate) async fn replay_memtable<F>(
pub(crate) async fn replay_memtable<S: LogStore>(
wal: &Wal<S>,
provider: &Provider,
mut wal_entry_reader: Box<dyn WalEntryReader>,
region_id: RegionId,
flushed_entry_id: EntryId,
version_control: &VersionControlRef,
allow_stale_entries: bool,
on_region_opened: F,
) -> Result<EntryId>
where
F: FnOnce(RegionId, EntryId, &Provider) -> BoxFuture<Result<()>> + Send,
{
) -> Result<EntryId> {
let mut rows_replayed = 0;
// Last entry id should start from flushed entry id since there might be no
// data in the WAL.
let mut last_entry_id = flushed_entry_id;
let replay_from_entry_id = flushed_entry_id + 1;
let mut wal_stream = wal_entry_reader.read(provider, replay_from_entry_id)?;
let mut wal_stream = wal.scan(region_id, replay_from_entry_id, provider)?;
while let Some(res) = wal_stream.next().await {
let (entry_id, entry) = res?;
if entry_id <= flushed_entry_id {
@@ -520,7 +496,7 @@ where
// TODO(weny): We need to update `flushed_entry_id` in the region manifest
// to avoid reading potentially incomplete entries in the future.
(on_region_opened)(region_id, flushed_entry_id, provider).await?;
wal.obsolete(region_id, flushed_entry_id, provider).await?;
info!(
"Replay WAL for region: {}, rows recovered: {}, last entry id: {}",

View File

@@ -44,7 +44,6 @@ use crate::error::{
};
use crate::manifest::action::RegionEdit;
use crate::metrics::COMPACTION_ELAPSED_TOTAL;
use crate::wal::entry_distributor::WalEntryReceiver;
use crate::wal::EntryId;
/// Request to write a region.
@@ -498,22 +497,6 @@ pub(crate) enum WorkerRequest {
}
impl WorkerRequest {
pub(crate) fn new_open_region_request(
region_id: RegionId,
request: RegionOpenRequest,
entry_receiver: Option<WalEntryReceiver>,
) -> (WorkerRequest, Receiver<Result<AffectedRows>>) {
let (sender, receiver) = oneshot::channel();
let worker_request = WorkerRequest::Ddl(SenderDdlRequest {
region_id,
sender: sender.into(),
request: DdlRequest::Open((request, entry_receiver)),
});
(worker_request, receiver)
}
/// Converts request from a [RegionRequest].
pub(crate) fn try_from_region_request(
region_id: RegionId,
@@ -548,7 +531,7 @@ impl WorkerRequest {
RegionRequest::Open(v) => WorkerRequest::Ddl(SenderDdlRequest {
region_id,
sender: sender.into(),
request: DdlRequest::Open((v, None)),
request: DdlRequest::Open(v),
}),
RegionRequest::Close(v) => WorkerRequest::Ddl(SenderDdlRequest {
region_id,
@@ -602,7 +585,7 @@ impl WorkerRequest {
pub(crate) enum DdlRequest {
Create(RegionCreateRequest),
Drop(RegionDropRequest),
Open((RegionOpenRequest, Option<WalEntryReceiver>)),
Open(RegionOpenRequest),
Close(RegionCloseRequest),
Alter(RegionAlterRequest),
Flush(RegionFlushRequest),

View File

@@ -121,17 +121,9 @@ impl SstIndexApplier {
return Ok(None);
};
let Some(indexed_value) = file_cache
.get(IndexKey::new(self.region_id, file_id, FileType::Puffin))
.await
else {
return Ok(None);
};
Ok(file_cache
.reader(IndexKey::new(self.region_id, file_id, FileType::Puffin))
.await
.map(|v| v.into_futures_async_read(0..indexed_value.file_size as u64))
.map(PuffinFileReader::new))
}
@@ -198,13 +190,7 @@ mod tests {
let region_dir = "region_dir".to_string();
let path = location::index_file_path(&region_dir, file_id);
let mut puffin_writer = PuffinFileWriter::new(
object_store
.writer(&path)
.await
.unwrap()
.into_futures_async_write(),
);
let mut puffin_writer = PuffinFileWriter::new(object_store.writer(&path).await.unwrap());
puffin_writer
.add_blob(Blob {
blob_type: INDEX_BLOB_TYPE.to_string(),
@@ -250,13 +236,7 @@ mod tests {
let region_dir = "region_dir".to_string();
let path = location::index_file_path(&region_dir, file_id);
let mut puffin_writer = PuffinFileWriter::new(
object_store
.writer(&path)
.await
.unwrap()
.into_futures_async_write(),
);
let mut puffin_writer = PuffinFileWriter::new(object_store.writer(&path).await.unwrap());
puffin_writer
.add_blob(Blob {
blob_type: "invalid_blob_type".to_string(),

View File

@@ -26,8 +26,6 @@ use crate::error::{OpenDalSnafu, Result};
/// A wrapper around [`ObjectStore`] that adds instrumentation for monitoring
/// metrics such as bytes read, bytes written, and the number of seek operations.
///
/// TODO: Consider refactor InstrumentedStore to use async in trait instead of AsyncRead.
#[derive(Clone)]
pub(crate) struct InstrumentedStore {
/// The underlying object store.
@@ -60,14 +58,8 @@ impl InstrumentedStore {
read_byte_count: &'a IntCounter,
read_count: &'a IntCounter,
seek_count: &'a IntCounter,
) -> Result<InstrumentedAsyncRead<'a, object_store::FuturesAsyncReader>> {
let meta = self.object_store.stat(path).await.context(OpenDalSnafu)?;
let reader = self
.object_store
.reader(path)
.await
.context(OpenDalSnafu)?
.into_futures_async_read(0..meta.content_length());
) -> Result<InstrumentedAsyncRead<'a, object_store::Reader>> {
let reader = self.object_store.reader(path).await.context(OpenDalSnafu)?;
Ok(InstrumentedAsyncRead::new(
reader,
read_byte_count,
@@ -85,21 +77,15 @@ impl InstrumentedStore {
write_byte_count: &'a IntCounter,
write_count: &'a IntCounter,
flush_count: &'a IntCounter,
) -> Result<InstrumentedAsyncWrite<'a, object_store::FuturesAsyncWriter>> {
) -> Result<InstrumentedAsyncWrite<'a, object_store::Writer>> {
let writer = match self.write_buffer_size {
Some(size) => self
.object_store
.writer_with(path)
.chunk(size)
.buffer(size)
.await
.context(OpenDalSnafu)?
.into_futures_async_write(),
None => self
.object_store
.writer(path)
.await
.context(OpenDalSnafu)?
.into_futures_async_write(),
.context(OpenDalSnafu)?,
None => self.object_store.writer(path).await.context(OpenDalSnafu)?,
};
Ok(InstrumentedAsyncWrite::new(
writer,

View File

@@ -121,7 +121,7 @@ async fn fetch_ranges_seq(
.read_with(&file_path)
.range(range.start..range.end)
.call()?;
Ok::<_, object_store::Error>(data.to_bytes())
Ok::<_, object_store::Error>(Bytes::from(data))
})
.collect::<object_store::Result<Vec<_>>>()
};
@@ -141,7 +141,7 @@ async fn fetch_ranges_concurrent(
let future_read = object_store.read_with(file_path);
handles.push(async move {
let data = future_read.range(range.start..range.end).await?;
Ok::<_, object_store::Error>(data.to_bytes())
Ok::<_, object_store::Error>(Bytes::from(data))
});
}
let results = futures::future::try_join_all(handles).await?;
@@ -164,7 +164,7 @@ where
}
}
// https://github.com/apache/opendal/blob/v0.46.0/core/src/raw/tokio_util.rs#L21-L24
// https://github.com/apache/incubator-opendal/blob/7144ab1ca2409dff0c324bfed062ce985997f8ce/core/src/raw/tokio_util.rs#L21-L23
/// Parse tokio error into opendal::Error.
fn new_task_join_error(e: tokio::task::JoinError) -> object_store::Error {
object_store::Error::new(ErrorKind::Unexpected, "tokio task join failed").set_source(e)

View File

@@ -85,8 +85,7 @@ impl<'a> MetadataLoader<'a> {
.read_with(path)
.range(buffer_start..file_size)
.await
.context(error::OpenDalSnafu)?
.to_vec();
.context(error::OpenDalSnafu)?;
let buffer_len = buffer.len();
let mut footer = [0; 8];
@@ -130,8 +129,7 @@ impl<'a> MetadataLoader<'a> {
.read_with(path)
.range(metadata_start..(file_size - FOOTER_SIZE as u64))
.await
.context(error::OpenDalSnafu)?
.to_vec();
.context(error::OpenDalSnafu)?;
let metadata = decode_metadata(&data).map_err(|e| {
error::InvalidParquetSnafu {

View File

@@ -33,23 +33,16 @@ use api::v1::value::ValueData;
use api::v1::{OpType, Row, Rows, SemanticType};
use common_base::readable_size::ReadableSize;
use common_datasource::compression::CompressionType;
use common_telemetry::warn;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use common_wal::options::{KafkaWalOptions, WalOptions, WAL_OPTIONS_KEY};
use datatypes::arrow::array::{TimestampMillisecondArray, UInt64Array, UInt8Array};
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnSchema;
use log_store::kafka::log_store::KafkaLogStore;
use log_store::raft_engine::log_store::RaftEngineLogStore;
use log_store::test_util::log_store_util;
use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef};
use object_store::services::Fs;
use object_store::util::join_dir;
use object_store::ObjectStore;
use rskafka::client::partition::{Compression, UnknownTopicHandling};
use rskafka::client::{Client, ClientBuilder};
use rskafka::record::Record;
use rstest_reuse::template;
use store_api::metadata::{ColumnMetadata, RegionMetadataRef};
use store_api::region_engine::RegionEngine;
use store_api::region_request::{
@@ -82,110 +75,11 @@ pub(crate) fn new_noop_file_purger() -> FilePurgerRef {
Arc::new(NoopFilePurger {})
}
pub(crate) fn raft_engine_log_store_factory() -> Option<LogStoreFactory> {
Some(LogStoreFactory::RaftEngine(RaftEngineLogStoreFactory))
}
pub(crate) fn kafka_log_store_factory() -> Option<LogStoreFactory> {
let _ = dotenv::dotenv();
let Ok(broker_endpoints) = std::env::var("GT_KAFKA_ENDPOINTS") else {
warn!("env GT_KAFKA_ENDPOINTS not found");
return None;
};
let broker_endpoints = broker_endpoints
.split(',')
.map(|s| s.trim().to_string())
.collect::<Vec<_>>();
Some(LogStoreFactory::Kafka(KafkaLogStoreFactory {
broker_endpoints,
}))
}
#[template]
#[rstest]
#[case::with_raft_engine(raft_engine_log_store_factory())]
#[case::with_kafka(kafka_log_store_factory())]
#[tokio::test]
pub(crate) fn multiple_log_store_factories(#[case] factory: Option<LogStoreFactory>) {}
#[derive(Clone)]
pub(crate) struct RaftEngineLogStoreFactory;
impl RaftEngineLogStoreFactory {
async fn create_log_store<P: AsRef<Path>>(&self, wal_path: P) -> RaftEngineLogStore {
log_store_util::create_tmp_local_file_log_store(wal_path).await
}
}
pub(crate) async fn prepare_test_for_kafka_log_store(factory: &LogStoreFactory) -> Option<String> {
if let LogStoreFactory::Kafka(factory) = factory {
let topic = uuid::Uuid::new_v4().to_string();
let client = factory.client().await;
append_noop_record(&client, &topic).await;
Some(topic)
} else {
None
}
}
pub(crate) async fn append_noop_record(client: &Client, topic: &str) {
let partition_client = client
.partition_client(topic, 0, UnknownTopicHandling::Retry)
.await
.unwrap();
partition_client
.produce(
vec![Record {
key: None,
value: None,
timestamp: rskafka::chrono::Utc::now(),
headers: Default::default(),
}],
Compression::NoCompression,
)
.await
.unwrap();
}
#[derive(Clone)]
pub(crate) struct KafkaLogStoreFactory {
broker_endpoints: Vec<String>,
}
impl KafkaLogStoreFactory {
async fn create_log_store(&self) -> KafkaLogStore {
log_store_util::create_kafka_log_store(self.broker_endpoints.clone()).await
}
pub(crate) async fn client(&self) -> Client {
ClientBuilder::new(self.broker_endpoints.clone())
.build()
.await
.unwrap()
}
}
#[derive(Clone)]
pub(crate) enum LogStoreFactory {
RaftEngine(RaftEngineLogStoreFactory),
Kafka(KafkaLogStoreFactory),
}
#[derive(Clone)]
pub(crate) enum LogStoreImpl {
RaftEngine(Arc<RaftEngineLogStore>),
Kafka(Arc<KafkaLogStore>),
}
/// Env to test mito engine.
pub struct TestEnv {
/// Path to store data.
data_home: TempDir,
log_store: Option<LogStoreImpl>,
log_store_factory: LogStoreFactory,
logstore: Option<Arc<RaftEngineLogStore>>,
object_store_manager: Option<ObjectStoreManagerRef>,
}
@@ -200,8 +94,7 @@ impl TestEnv {
pub fn new() -> TestEnv {
TestEnv {
data_home: create_temp_dir(""),
log_store: None,
log_store_factory: LogStoreFactory::RaftEngine(RaftEngineLogStoreFactory),
logstore: None,
object_store_manager: None,
}
}
@@ -210,8 +103,7 @@ impl TestEnv {
pub fn with_prefix(prefix: &str) -> TestEnv {
TestEnv {
data_home: create_temp_dir(prefix),
log_store: None,
log_store_factory: LogStoreFactory::RaftEngine(RaftEngineLogStoreFactory),
logstore: None,
object_store_manager: None,
}
}
@@ -220,16 +112,13 @@ impl TestEnv {
pub fn with_data_home(data_home: TempDir) -> TestEnv {
TestEnv {
data_home,
log_store: None,
log_store_factory: LogStoreFactory::RaftEngine(RaftEngineLogStoreFactory),
logstore: None,
object_store_manager: None,
}
}
/// Overwrites the original `log_store_factory`.
pub(crate) fn with_log_store_factory(mut self, log_store_factory: LogStoreFactory) -> TestEnv {
self.log_store_factory = log_store_factory;
self
pub fn get_logstore(&self) -> Option<Arc<RaftEngineLogStore>> {
self.logstore.clone()
}
pub fn get_object_store(&self) -> Option<ObjectStore> {
@@ -250,41 +139,24 @@ impl TestEnv {
pub async fn create_engine(&mut self, config: MitoConfig) -> MitoEngine {
let (log_store, object_store_manager) = self.create_log_and_object_store_manager().await;
let logstore = Arc::new(log_store);
let object_store_manager = Arc::new(object_store_manager);
self.log_store = Some(log_store.clone());
self.logstore = Some(logstore.clone());
self.object_store_manager = Some(object_store_manager.clone());
let data_home = self.data_home().display().to_string();
match log_store {
LogStoreImpl::RaftEngine(log_store) => {
MitoEngine::new(&data_home, config, log_store, object_store_manager)
.await
.unwrap()
}
LogStoreImpl::Kafka(log_store) => {
MitoEngine::new(&data_home, config, log_store, object_store_manager)
.await
.unwrap()
}
}
MitoEngine::new(&data_home, config, logstore, object_store_manager)
.await
.unwrap()
}
/// Creates a new engine with specific config and existing logstore and object store manager.
pub async fn create_follower_engine(&mut self, config: MitoConfig) -> MitoEngine {
let logstore = self.logstore.as_ref().unwrap().clone();
let object_store_manager = self.object_store_manager.as_ref().unwrap().clone();
let data_home = self.data_home().display().to_string();
match self.log_store.as_ref().unwrap().clone() {
LogStoreImpl::RaftEngine(log_store) => {
MitoEngine::new(&data_home, config, log_store, object_store_manager)
.await
.unwrap()
}
LogStoreImpl::Kafka(log_store) => {
MitoEngine::new(&data_home, config, log_store, object_store_manager)
.await
.unwrap()
}
}
MitoEngine::new(&data_home, config, logstore, object_store_manager)
.await
.unwrap()
}
/// Creates a new engine with specific config and manager/listener/purge_scheduler under this env.
@@ -296,36 +168,24 @@ impl TestEnv {
) -> MitoEngine {
let (log_store, object_store_manager) = self.create_log_and_object_store_manager().await;
let logstore = Arc::new(log_store);
let object_store_manager = Arc::new(object_store_manager);
self.log_store = Some(log_store.clone());
self.logstore = Some(logstore.clone());
self.object_store_manager = Some(object_store_manager.clone());
let data_home = self.data_home().display().to_string();
match log_store {
LogStoreImpl::RaftEngine(log_store) => MitoEngine::new_for_test(
&data_home,
config,
log_store,
object_store_manager,
manager,
listener,
Arc::new(StdTimeProvider),
)
.await
.unwrap(),
LogStoreImpl::Kafka(log_store) => MitoEngine::new_for_test(
&data_home,
config,
log_store,
object_store_manager,
manager,
listener,
Arc::new(StdTimeProvider),
)
.await
.unwrap(),
}
MitoEngine::new_for_test(
&data_home,
config,
logstore,
object_store_manager,
manager,
listener,
Arc::new(StdTimeProvider),
)
.await
.unwrap()
}
pub async fn create_engine_with_multiple_object_stores(
@@ -335,8 +195,7 @@ impl TestEnv {
listener: Option<EventListenerRef>,
custom_storage_names: &[&str],
) -> MitoEngine {
let (log_store, mut object_store_manager) =
self.create_log_and_object_store_manager().await;
let (logstore, mut object_store_manager) = self.create_log_and_object_store_manager().await;
for storage_name in custom_storage_names {
let data_path = self
.data_home
@@ -351,35 +210,23 @@ impl TestEnv {
let object_store = ObjectStore::new(builder).unwrap().finish();
object_store_manager.add(storage_name, object_store);
}
let logstore = Arc::new(logstore);
let object_store_manager = Arc::new(object_store_manager);
self.log_store = Some(log_store.clone());
self.logstore = Some(logstore.clone());
self.object_store_manager = Some(object_store_manager.clone());
let data_home = self.data_home().display().to_string();
match log_store {
LogStoreImpl::RaftEngine(log_store) => MitoEngine::new_for_test(
&data_home,
config,
log_store,
object_store_manager,
manager,
listener,
Arc::new(StdTimeProvider),
)
.await
.unwrap(),
LogStoreImpl::Kafka(log_store) => MitoEngine::new_for_test(
&data_home,
config,
log_store,
object_store_manager,
manager,
listener,
Arc::new(StdTimeProvider),
)
.await
.unwrap(),
}
MitoEngine::new_for_test(
&data_home,
config,
logstore,
object_store_manager,
manager,
listener,
Arc::new(StdTimeProvider),
)
.await
.unwrap()
}
/// Creates a new engine with specific config and manager/listener/time provider under this env.
@@ -392,82 +239,50 @@ impl TestEnv {
) -> MitoEngine {
let (log_store, object_store_manager) = self.create_log_and_object_store_manager().await;
let logstore = Arc::new(log_store);
let object_store_manager = Arc::new(object_store_manager);
self.log_store = Some(log_store.clone());
self.logstore = Some(logstore.clone());
self.object_store_manager = Some(object_store_manager.clone());
let data_home = self.data_home().display().to_string();
match log_store {
LogStoreImpl::RaftEngine(log_store) => MitoEngine::new_for_test(
&data_home,
config,
log_store,
object_store_manager,
manager,
listener,
time_provider.clone(),
)
.await
.unwrap(),
LogStoreImpl::Kafka(log_store) => MitoEngine::new_for_test(
&data_home,
config,
log_store,
object_store_manager,
manager,
listener,
time_provider.clone(),
)
.await
.unwrap(),
}
MitoEngine::new_for_test(
&data_home,
config,
logstore,
object_store_manager,
manager,
listener,
time_provider.clone(),
)
.await
.unwrap()
}
/// Reopen the engine.
pub async fn reopen_engine(&mut self, engine: MitoEngine, config: MitoConfig) -> MitoEngine {
engine.stop().await.unwrap();
match self.log_store.as_ref().unwrap().clone() {
LogStoreImpl::RaftEngine(log_store) => MitoEngine::new(
&self.data_home().display().to_string(),
config,
log_store,
self.object_store_manager.clone().unwrap(),
)
.await
.unwrap(),
LogStoreImpl::Kafka(log_store) => MitoEngine::new(
&self.data_home().display().to_string(),
config,
log_store,
self.object_store_manager.clone().unwrap(),
)
.await
.unwrap(),
}
MitoEngine::new(
&self.data_home().display().to_string(),
config,
self.logstore.clone().unwrap(),
self.object_store_manager.clone().unwrap(),
)
.await
.unwrap()
}
/// Open the engine.
pub async fn open_engine(&mut self, config: MitoConfig) -> MitoEngine {
match self.log_store.as_ref().unwrap().clone() {
LogStoreImpl::RaftEngine(log_store) => MitoEngine::new(
&self.data_home().display().to_string(),
config,
log_store,
self.object_store_manager.clone().unwrap(),
)
.await
.unwrap(),
LogStoreImpl::Kafka(log_store) => MitoEngine::new(
&self.data_home().display().to_string(),
config,
log_store,
self.object_store_manager.clone().unwrap(),
)
.await
.unwrap(),
}
MitoEngine::new(
&self.data_home().display().to_string(),
config,
self.logstore.clone().unwrap(),
self.object_store_manager.clone().unwrap(),
)
.await
.unwrap()
}
/// Only initializes the object store manager, returns the default object store.
@@ -482,44 +297,25 @@ impl TestEnv {
let data_home = self.data_home().display().to_string();
config.sanitize(&data_home).unwrap();
match log_store {
LogStoreImpl::RaftEngine(log_store) => {
WorkerGroup::start(Arc::new(config), log_store, Arc::new(object_store_manager))
.await
.unwrap()
}
LogStoreImpl::Kafka(log_store) => {
WorkerGroup::start(Arc::new(config), log_store, Arc::new(object_store_manager))
.await
.unwrap()
}
}
WorkerGroup::start(
Arc::new(config),
Arc::new(log_store),
Arc::new(object_store_manager),
)
.await
.unwrap()
}
/// Returns the log store and object store manager.
async fn create_log_and_object_store_manager(&self) -> (LogStoreImpl, ObjectStoreManager) {
async fn create_log_and_object_store_manager(
&self,
) -> (RaftEngineLogStore, ObjectStoreManager) {
let data_home = self.data_home.path();
let wal_path = data_home.join("wal");
let log_store = log_store_util::create_tmp_local_file_log_store(&wal_path).await;
let object_store_manager = self.create_object_store_manager();
match &self.log_store_factory {
LogStoreFactory::RaftEngine(factory) => {
let log_store = factory.create_log_store(wal_path).await;
(
LogStoreImpl::RaftEngine(Arc::new(log_store)),
object_store_manager,
)
}
LogStoreFactory::Kafka(factory) => {
let log_store = factory.create_log_store().await;
(
LogStoreImpl::Kafka(Arc::new(log_store)),
object_store_manager,
)
}
}
(log_store, object_store_manager)
}
fn create_object_store_manager(&self) -> ObjectStoreManager {
@@ -603,8 +399,6 @@ pub struct CreateRequestBuilder {
all_not_null: bool,
engine: String,
ts_type: ConcreteDataType,
/// kafka topic name
kafka_topic: Option<String>,
}
impl Default for CreateRequestBuilder {
@@ -618,7 +412,6 @@ impl Default for CreateRequestBuilder {
all_not_null: false,
engine: MITO_ENGINE_NAME.to_string(),
ts_type: ConcreteDataType::timestamp_millisecond_datatype(),
kafka_topic: None,
}
}
}
@@ -671,12 +464,6 @@ impl CreateRequestBuilder {
self
}
#[must_use]
pub fn kafka_topic(mut self, topic: Option<String>) -> Self {
self.kafka_topic = topic;
self
}
pub fn build(&self) -> RegionCreateRequest {
let mut column_id = 0;
let mut column_metadatas = Vec::with_capacity(self.tag_num + self.field_num + 1);
@@ -717,21 +504,12 @@ impl CreateRequestBuilder {
semantic_type: SemanticType::Timestamp,
column_id,
});
let mut options = self.options.clone();
if let Some(topic) = &self.kafka_topic {
let wal_options = WalOptions::Kafka(KafkaWalOptions {
topic: topic.to_string(),
});
options.insert(
WAL_OPTIONS_KEY.to_string(),
serde_json::to_string(&wal_options).unwrap(),
);
}
RegionCreateRequest {
engine: self.engine.to_string(),
column_metadatas,
primary_key: self.primary_key.clone().unwrap_or(primary_key),
options,
options: self.options.clone(),
region_dir: self.region_dir.clone(),
}
}

View File

@@ -30,7 +30,6 @@ use std::sync::Arc;
use api::v1::WalEntry;
use common_error::ext::BoxedError;
use futures::future::BoxFuture;
use futures::stream::BoxStream;
use prost::Message;
use snafu::ResultExt;
@@ -87,39 +86,6 @@ impl<S: LogStore> Wal<S> {
}
}
/// Returns a [OnRegionOpened] function.
pub(crate) fn on_region_opened(
&self,
) -> impl FnOnce(RegionId, EntryId, &Provider) -> BoxFuture<Result<()>> {
let store = self.store.clone();
move |region_id, last_entry_id, provider| -> BoxFuture<'_, Result<()>> {
Box::pin(async move {
store
.obsolete(provider, last_entry_id)
.await
.map_err(BoxedError::new)
.context(DeleteWalSnafu { region_id })
})
}
}
/// Returns a [WalEntryReader]
pub(crate) fn wal_entry_reader(
&self,
provider: &Provider,
region_id: RegionId,
) -> Box<dyn WalEntryReader> {
match provider {
Provider::RaftEngine(_) => Box::new(LogStoreEntryReader::new(
LogStoreRawEntryReader::new(self.store.clone()),
)),
Provider::Kafka(_) => Box::new(LogStoreEntryReader::new(RegionRawEntryReader::new(
LogStoreRawEntryReader::new(self.store.clone()),
region_id,
))),
}
}
/// Scan entries of specific region starting from `start_id` (inclusive).
pub fn scan<'a>(
&'a self,

View File

@@ -20,7 +20,7 @@ use api::v1::WalEntry;
use async_stream::stream;
use common_telemetry::{debug, error};
use futures::future::join_all;
use snafu::{ensure, OptionExt};
use snafu::ensure;
use store_api::logstore::entry::Entry;
use store_api::logstore::provider::Provider;
use store_api::storage::RegionId;
@@ -101,9 +101,9 @@ impl WalEntryDistributor {
pub(crate) struct WalEntryReceiver {
region_id: RegionId,
/// Receives the [Entry] from the [WalEntryDistributor].
entry_receiver: Option<Receiver<Entry>>,
entry_receiver: Receiver<Entry>,
/// Sends the `start_id` to the [WalEntryDistributor].
arg_sender: Option<oneshot::Sender<EntryId>>,
arg_sender: oneshot::Sender<EntryId>,
}
impl WalEntryReceiver {
@@ -114,22 +114,19 @@ impl WalEntryReceiver {
) -> Self {
Self {
region_id,
entry_receiver: Some(entry_receiver),
arg_sender: Some(arg_sender),
entry_receiver,
arg_sender,
}
}
}
impl WalEntryReader for WalEntryReceiver {
fn read(&mut self, provider: &Provider, start_id: EntryId) -> Result<WalEntryStream<'static>> {
let mut arg_sender =
self.arg_sender
.take()
.with_context(|| error::InvalidWalReadRequestSnafu {
reason: format!("Call WalEntryReceiver multiple time, start_id: {start_id}"),
})?;
// Safety: check via arg_sender
let mut entry_receiver = self.entry_receiver.take().unwrap();
fn read(self, provider: &Provider, start_id: EntryId) -> Result<WalEntryStream<'static>> {
let WalEntryReceiver {
region_id: expected_region_id,
mut entry_receiver,
arg_sender,
} = self;
if arg_sender.send(start_id).is_err() {
return error::InvalidWalReadRequestSnafu {
@@ -170,9 +167,6 @@ struct EntryReceiver {
sender: Sender<Entry>,
}
/// The default buffer size of the [Entry] receiver.
pub const DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE: usize = 2048;
/// Returns [WalEntryDistributor] and batch [WalEntryReceiver]s.
///
/// ### Note:
@@ -192,14 +186,14 @@ pub const DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE: usize = 2048;
pub fn build_wal_entry_distributor_and_receivers(
provider: Provider,
raw_wal_reader: Arc<dyn RawEntryReader>,
region_ids: &[RegionId],
region_ids: Vec<RegionId>,
buffer_size: usize,
) -> (WalEntryDistributor, Vec<WalEntryReceiver>) {
let mut senders = HashMap::with_capacity(region_ids.len());
let mut readers = Vec::with_capacity(region_ids.len());
let mut arg_receivers = Vec::with_capacity(region_ids.len());
for &region_id in region_ids {
for region_id in region_ids {
let (entry_sender, entry_receiver) = mpsc::channel(buffer_size);
let (arg_sender, arg_receiver) = oneshot::channel();
@@ -263,7 +257,7 @@ mod tests {
let (distributor, receivers) = build_wal_entry_distributor_and_receivers(
provider,
reader,
&[RegionId::new(1024, 1), RegionId::new(1025, 1)],
vec![RegionId::new(1024, 1), RegionId::new(1025, 1)],
128,
);
@@ -323,7 +317,7 @@ mod tests {
let (distributor, mut receivers) = build_wal_entry_distributor_and_receivers(
provider.clone(),
reader,
&[
vec![
RegionId::new(1024, 1),
RegionId::new(1024, 2),
RegionId::new(1024, 3),
@@ -337,7 +331,7 @@ mod tests {
drop(last);
let mut streams = receivers
.iter_mut()
.into_iter()
.map(|receiver| receiver.read(&provider, 0).unwrap())
.collect::<Vec<_>>();
distributor.distribute().await.unwrap();
@@ -433,12 +427,12 @@ mod tests {
let (distributor, mut receivers) = build_wal_entry_distributor_and_receivers(
provider.clone(),
Arc::new(corrupted_stream),
&[region1, region2, region3],
vec![region1, region2, region3],
128,
);
assert_eq!(receivers.len(), 3);
let mut streams = receivers
.iter_mut()
.into_iter()
.map(|receiver| receiver.read(&provider, 0).unwrap())
.collect::<Vec<_>>();
distributor.distribute().await.unwrap();
@@ -516,12 +510,12 @@ mod tests {
let (distributor, mut receivers) = build_wal_entry_distributor_and_receivers(
provider.clone(),
Arc::new(corrupted_stream),
&[region1, region2],
vec![region1, region2],
128,
);
assert_eq!(receivers.len(), 2);
let mut streams = receivers
.iter_mut()
.into_iter()
.map(|receiver| receiver.read(&provider, 0).unwrap())
.collect::<Vec<_>>();
distributor.distribute().await.unwrap();
@@ -608,12 +602,12 @@ mod tests {
let (distributor, mut receivers) = build_wal_entry_distributor_and_receivers(
provider.clone(),
reader,
&[RegionId::new(1024, 1), RegionId::new(1024, 2)],
vec![RegionId::new(1024, 1), RegionId::new(1024, 2)],
128,
);
assert_eq!(receivers.len(), 2);
let mut streams = receivers
.iter_mut()
.into_iter()
.map(|receiver| receiver.read(&provider, 4).unwrap())
.collect::<Vec<_>>();
distributor.distribute().await.unwrap();

View File

@@ -38,10 +38,8 @@ pub(crate) fn decode_raw_entry(raw_entry: Entry) -> Result<(EntryId, WalEntry)>
}
/// [WalEntryReader] provides the ability to read and decode entries from the underlying store.
///
/// Notes: It will consume the inner stream and only allow invoking the `read` at once.
pub(crate) trait WalEntryReader: Send + Sync {
fn read(&mut self, ns: &'_ Provider, start_id: EntryId) -> Result<WalEntryStream<'static>>;
fn read(self, ns: &'_ Provider, start_id: EntryId) -> Result<WalEntryStream<'static>>;
}
/// A Reader reads the [RawEntry] from [RawEntryReader] and decodes [RawEntry] into [WalEntry].
@@ -56,7 +54,7 @@ impl<R> LogStoreEntryReader<R> {
}
impl<R: RawEntryReader> WalEntryReader for LogStoreEntryReader<R> {
fn read(&mut self, ns: &'_ Provider, start_id: EntryId) -> Result<WalEntryStream<'static>> {
fn read(self, ns: &'_ Provider, start_id: EntryId) -> Result<WalEntryStream<'static>> {
let LogStoreEntryReader { reader } = self;
let mut stream = reader.read(ns, start_id)?;
@@ -138,7 +136,7 @@ mod tests {
],
};
let mut reader = LogStoreEntryReader::new(raw_entry_stream);
let reader = LogStoreEntryReader::new(raw_entry_stream);
let entries = reader
.read(&provider, 0)
.unwrap()
@@ -174,7 +172,7 @@ mod tests {
],
};
let mut reader = LogStoreEntryReader::new(raw_entry_stream);
let reader = LogStoreEntryReader::new(raw_entry_stream);
let err = reader
.read(&provider, 0)
.unwrap()

View File

@@ -721,8 +721,8 @@ impl<S: LogStore> RegionWorkerLoop<S> {
let res = match ddl.request {
DdlRequest::Create(req) => self.handle_create_request(ddl.region_id, req).await,
DdlRequest::Drop(_) => self.handle_drop_request(ddl.region_id).await,
DdlRequest::Open((req, wal_entry_receiver)) => {
self.handle_open_request(ddl.region_id, req, wal_entry_receiver, ddl.sender)
DdlRequest::Open(req) => {
self.handle_open_request(ddl.region_id, req, ddl.sender)
.await;
continue;
}

View File

@@ -73,16 +73,13 @@ impl<S: LogStore> RegionWorkerLoop<S> {
let flushed_entry_id = region.version_control.current().last_entry_id;
info!("Trying to replay memtable for region: {region_id}, flushed entry id: {flushed_entry_id}");
let timer = Instant::now();
let wal_entry_reader = self.wal.wal_entry_reader(&region.provider, region_id);
let on_region_opened = self.wal.on_region_opened();
let last_entry_id = replay_memtable(
&self.wal,
&region.provider,
wal_entry_reader,
region_id,
flushed_entry_id,
&region.version_control,
self.config.allow_stale_entries,
on_region_opened,
)
.await?;
info!(

View File

@@ -16,7 +16,6 @@
use std::time::Duration;
use bytes::Bytes;
use common_telemetry::{error, info, warn};
use futures::TryStreamExt;
use object_store::util::join_path;
@@ -51,7 +50,7 @@ impl<S> RegionWorkerLoop<S> {
region
.access_layer
.object_store()
.write(&marker_path, Bytes::new())
.write(&marker_path, vec![])
.await
.context(OpenDalSnafu)
.inspect_err(|e| {

View File

@@ -29,7 +29,6 @@ use crate::error::{
use crate::metrics::REGION_COUNT;
use crate::region::opener::RegionOpener;
use crate::request::OptionOutputTx;
use crate::wal::entry_distributor::WalEntryReceiver;
use crate::worker::handle_drop::remove_region_dir_once;
use crate::worker::{RegionWorkerLoop, DROPPING_MARKER_FILE};
@@ -67,7 +66,6 @@ impl<S: LogStore> RegionWorkerLoop<S> {
&mut self,
region_id: RegionId,
request: RegionOpenRequest,
wal_entry_receiver: Option<WalEntryReceiver>,
sender: OptionOutputTx,
) {
if self.regions.is_region_exists(region_id) {
@@ -97,7 +95,6 @@ impl<S: LogStore> RegionWorkerLoop<S> {
)
.skip_wal_replay(request.skip_wal_replay)
.cache(Some(self.cache_manager.clone()))
.wal_entry_reader(wal_entry_receiver.map(|receiver| Box::new(receiver) as _))
.parse_options(request.options)
{
Ok(opener) => opener,

View File

@@ -11,21 +11,23 @@ workspace = true
services-memory = ["opendal/services-memory"]
[dependencies]
async-trait = "0.1"
bytes.workspace = true
common-telemetry.workspace = true
futures.workspace = true
lazy_static.workspace = true
md5 = "0.7"
moka = { workspace = true, features = ["future"] }
opendal = { version = "0.46", features = [
opendal = { version = "0.45", features = [
"layers-tracing",
"rustls",
"services-azblob",
"services-fs",
"services-gcs",
"services-http",
"services-oss",
"services-s3",
] }
], default-features = false }
prometheus.workspace = true
uuid.workspace = true
@@ -33,4 +35,5 @@ uuid.workspace = true
anyhow = "1.0"
common-telemetry.workspace = true
common-test-util.workspace = true
opendal = { version = "0.45", features = ["services-memory"] }
tokio.workspace = true

View File

@@ -14,26 +14,27 @@
use std::sync::Arc;
use opendal::raw::oio::ReadDyn;
use async_trait::async_trait;
use opendal::raw::oio::Read;
use opendal::raw::{
Access, Layer, LayeredAccess, OpDelete, OpList, OpRead, OpWrite, RpDelete, RpList, RpRead,
Accessor, Layer, LayeredAccessor, OpDelete, OpList, OpRead, OpWrite, RpDelete, RpList, RpRead,
RpWrite,
};
use opendal::{Operator, Result};
use opendal::Result;
mod read_cache;
use common_telemetry::info;
use read_cache::ReadCache;
/// An opendal layer with local LRU file cache supporting.
#[derive(Clone)]
pub struct LruCacheLayer {
pub struct LruCacheLayer<C: Clone> {
// The read cache
read_cache: ReadCache,
read_cache: ReadCache<C>,
}
impl LruCacheLayer {
impl<C: Accessor + Clone> LruCacheLayer<C> {
/// Create a `[LruCacheLayer]` with local file cache and capacity in bytes.
pub async fn new(file_cache: Operator, capacity: usize) -> Result<Self> {
pub async fn new(file_cache: Arc<C>, capacity: usize) -> Result<Self> {
let read_cache = ReadCache::new(file_cache, capacity);
let (entries, bytes) = read_cache.recover_cache().await?;
@@ -56,11 +57,11 @@ impl LruCacheLayer {
}
}
impl<I: Access> Layer<I> for LruCacheLayer {
type LayeredAccess = LruCacheAccess<I>;
impl<I: Accessor, C: Accessor + Clone> Layer<I> for LruCacheLayer<C> {
type LayeredAccessor = LruCacheAccessor<I, C>;
fn layer(&self, inner: I) -> Self::LayeredAccess {
LruCacheAccess {
fn layer(&self, inner: I) -> Self::LayeredAccessor {
LruCacheAccessor {
inner,
read_cache: self.read_cache.clone(),
}
@@ -68,14 +69,15 @@ impl<I: Access> Layer<I> for LruCacheLayer {
}
#[derive(Debug)]
pub struct LruCacheAccess<I> {
pub struct LruCacheAccessor<I, C: Clone> {
inner: I,
read_cache: ReadCache,
read_cache: ReadCache<C>,
}
impl<I: Access> LayeredAccess for LruCacheAccess<I> {
#[async_trait]
impl<I: Accessor, C: Accessor + Clone> LayeredAccessor for LruCacheAccessor<I, C> {
type Inner = I;
type Reader = Arc<dyn ReadDyn>;
type Reader = Box<dyn Read>;
type BlockingReader = I::BlockingReader;
type Writer = I::Writer;
type BlockingWriter = I::BlockingWriter;

View File

@@ -15,12 +15,12 @@
use std::sync::Arc;
use common_telemetry::debug;
use futures::{FutureExt, StreamExt};
use futures::FutureExt;
use moka::future::Cache;
use moka::notification::ListenerFuture;
use opendal::raw::oio::{Read, ReadDyn, Reader};
use opendal::raw::{Access, BytesRange, OpRead, OpStat, RpRead};
use opendal::{Buffer, Error as OpendalError, ErrorKind, Operator, Result};
use opendal::raw::oio::{ListExt, Read, ReadExt, Reader, WriteExt};
use opendal::raw::{Accessor, OpDelete, OpList, OpRead, OpStat, OpWrite, RpRead};
use opendal::{Error as OpendalError, ErrorKind, Result};
use crate::metrics::{
OBJECT_STORE_LRU_CACHE_BYTES, OBJECT_STORE_LRU_CACHE_ENTRIES, OBJECT_STORE_LRU_CACHE_HIT,
@@ -52,22 +52,26 @@ fn can_cache(path: &str) -> bool {
}
/// Generate an unique cache key for the read path and range.
fn read_cache_key(path: &str, range: BytesRange) -> String {
format!("{:x}.cache-{}", md5::compute(path), range.to_header())
fn read_cache_key(path: &str, args: &OpRead) -> String {
format!(
"{:x}.cache-{}",
md5::compute(path),
args.range().to_header()
)
}
/// Local read cache for files in object storage
#[derive(Clone, Debug)]
pub(crate) struct ReadCache {
pub(crate) struct ReadCache<C: Clone> {
/// Local file cache backend
file_cache: Operator,
file_cache: Arc<C>,
/// Local memory cache to track local cache files
mem_cache: Cache<String, ReadResult>,
}
impl ReadCache {
impl<C: Accessor + Clone> ReadCache<C> {
/// Create a [`ReadCache`] with capacity in bytes.
pub(crate) fn new(file_cache: Operator, capacity: usize) -> Self {
pub(crate) fn new(file_cache: Arc<C>, capacity: usize) -> Self {
let file_cache_cloned = file_cache.clone();
let eviction_listener =
move |read_key: Arc<String>, read_result: ReadResult, cause| -> ListenerFuture {
@@ -79,7 +83,7 @@ impl ReadCache {
if let ReadResult::Success(size) = read_result {
OBJECT_STORE_LRU_CACHE_BYTES.sub(size as i64);
let result = file_cache_cloned.delete(&read_key).await;
let result = file_cache_cloned.delete(&read_key, OpDelete::new()).await;
debug!(
"Deleted local cache file `{}`, result: {:?}, cause: {:?}.",
read_key, result, cause
@@ -129,17 +133,17 @@ impl ReadCache {
/// Recover existing cache items from `file_cache` to `mem_cache`.
/// Return entry count and total approximate entry size in bytes.
pub(crate) async fn recover_cache(&self) -> Result<(u64, u64)> {
let mut pager = self.file_cache.lister("/").await?;
let (_, mut pager) = self.file_cache.list("/", OpList::default()).await?;
while let Some(entry) = pager.next().await.transpose()? {
while let Some(entry) = pager.next().await? {
let read_key = entry.path();
// We can't retrieve the metadata from `[opendal::raw::oio::Entry]` directly,
// because it's private field.
let size = {
let stat = self.file_cache.stat(read_key).await?;
let stat = self.file_cache.stat(read_key, OpStat::default()).await?;
stat.content_length()
stat.into_metadata().content_length()
};
OBJECT_STORE_LRU_CACHE_ENTRIES.inc();
@@ -155,7 +159,8 @@ impl ReadCache {
/// Returns true when the read cache contains the specific file.
pub(crate) async fn contains_file(&self, path: &str) -> bool {
self.mem_cache.run_pending_tasks().await;
self.mem_cache.contains_key(path) && self.file_cache.stat(path).await.is_ok()
self.mem_cache.contains_key(path)
&& self.file_cache.stat(path, OpStat::default()).await.is_ok()
}
/// Read from a specific path using the OpRead operation.
@@ -168,54 +173,86 @@ impl ReadCache {
inner: &I,
path: &str,
args: OpRead,
) -> Result<(RpRead, Arc<dyn ReadDyn>)>
) -> Result<(RpRead, Box<dyn Read>)>
where
I: Access,
I: Accessor,
{
if !can_cache(path) {
return inner.read(path, args).await.map(to_output_reader);
}
// FIXME: remove this block after opendal v0.47 released.
let meta = inner.stat(path, OpStat::new()).await?;
let (rp, reader) = inner.read(path, args).await?;
let reader: ReadCacheReader<I> = ReadCacheReader {
path: Arc::new(path.to_string()),
inner_reader: reader,
size: meta.into_metadata().content_length(),
file_cache: self.file_cache.clone(),
mem_cache: self.mem_cache.clone(),
};
Ok((rp, Arc::new(reader)))
let read_key = read_cache_key(path, &args);
let read_result = self
.mem_cache
.try_get_with(
read_key.clone(),
self.read_remote(inner, &read_key, path, args.clone()),
)
.await
.map_err(|e| OpendalError::new(e.kind(), &e.to_string()))?;
match read_result {
ReadResult::Success(_) => {
// There is a concurrent issue here, the local cache may be purged
// while reading, we have to fallback to remote read
match self.file_cache.read(&read_key, OpRead::default()).await {
Ok(ret) => {
OBJECT_STORE_LRU_CACHE_HIT
.with_label_values(&["success"])
.inc();
Ok(to_output_reader(ret))
}
Err(_) => {
OBJECT_STORE_LRU_CACHE_MISS.inc();
inner.read(path, args).await.map(to_output_reader)
}
}
}
ReadResult::NotFound => {
OBJECT_STORE_LRU_CACHE_HIT
.with_label_values(&["not_found"])
.inc();
Err(OpendalError::new(
ErrorKind::NotFound,
&format!("File not found: {path}"),
))
}
}
}
}
pub struct ReadCacheReader<I: Access> {
/// Path of the file
path: Arc<String>,
/// Remote file reader.
inner_reader: I::Reader,
/// FIXME: remove this field after opendal v0.47 released.
///
/// OpenDAL's read_at takes `offset, limit` which means the underlying storage
/// services could return less data than limit. We store size here as a workaround.
///
/// This API has been refactor into `offset, size` instead. After opendal v0.47 released,
/// we don't need this anymore.
size: u64,
/// Local file cache backend
file_cache: Operator,
/// Local memory cache to track local cache files
mem_cache: Cache<String, ReadResult>,
}
async fn try_write_cache<I>(&self, mut reader: I::Reader, read_key: &str) -> Result<usize>
where
I: Accessor,
{
let (_, mut writer) = self.file_cache.write(read_key, OpWrite::new()).await?;
let mut total = 0;
while let Some(bytes) = reader.next().await {
let bytes = &bytes?;
total += bytes.len();
writer.write(bytes).await?;
}
// Call `close` to ensure data is written.
writer.close().await?;
Ok(total)
}
impl<I: Access> ReadCacheReader<I> {
/// TODO: we can return the Buffer directly to avoid another read from cache.
async fn read_remote(&self, offset: u64, limit: usize) -> Result<ReadResult> {
/// Read the file from remote storage. If success, write the content into local cache.
async fn read_remote<I>(
&self,
inner: &I,
read_key: &str,
path: &str,
args: OpRead,
) -> Result<ReadResult>
where
I: Accessor,
{
OBJECT_STORE_LRU_CACHE_MISS.inc();
let buf = self.inner_reader.read_at(offset, limit).await?;
let result = self.try_write_cache(buf, offset).await;
let (_, reader) = inner.read(path, args).await?;
let result = self.try_write_cache::<I>(reader, read_key).await;
match result {
Ok(read_bytes) => {
@@ -242,59 +279,10 @@ impl<I: Access> ReadCacheReader<I> {
}
}
}
async fn try_write_cache(&self, buf: Buffer, offset: u64) -> Result<usize> {
let size = buf.len();
let read_key = read_cache_key(&self.path, BytesRange::new(offset, Some(size as _)));
self.file_cache.write(&read_key, buf).await?;
Ok(size)
}
}
impl<I: Access> Read for ReadCacheReader<I> {
async fn read_at(&self, offset: u64, limit: usize) -> Result<Buffer> {
let size = self.size.min(offset + limit as u64) - offset;
let read_key = read_cache_key(&self.path, BytesRange::new(offset, Some(size as _)));
let read_result = self
.mem_cache
.try_get_with(read_key.clone(), self.read_remote(offset, limit))
.await
.map_err(|e| OpendalError::new(e.kind(), &e.to_string()))?;
match read_result {
ReadResult::Success(_) => {
// There is a concurrent issue here, the local cache may be purged
// while reading, we have to fallback to remote read
match self.file_cache.read(&read_key).await {
Ok(ret) => {
OBJECT_STORE_LRU_CACHE_HIT
.with_label_values(&["success"])
.inc();
Ok(ret)
}
Err(_) => {
OBJECT_STORE_LRU_CACHE_MISS.inc();
self.inner_reader.read_at(offset, limit).await
}
}
}
ReadResult::NotFound => {
OBJECT_STORE_LRU_CACHE_HIT
.with_label_values(&["not_found"])
.inc();
Err(OpendalError::new(
ErrorKind::NotFound,
&format!("File not found: {}", self.path),
))
}
}
}
}
fn to_output_reader<R: Read + 'static>(input: (RpRead, R)) -> (RpRead, Reader) {
(input.0, Arc::new(input.1))
(input.0, Box::new(input.1))
}
#[cfg(test)]

View File

@@ -15,11 +15,16 @@
//! code originally from <https://github.com/apache/incubator-opendal/blob/main/core/src/layers/prometheus.rs>, make a tiny change to avoid crash in multi thread env
use std::fmt::{Debug, Formatter};
use std::io;
use std::task::{Context, Poll};
use async_trait::async_trait;
use bytes::Bytes;
use common_telemetry::debug;
use futures::FutureExt;
use lazy_static::lazy_static;
use opendal::raw::*;
use opendal::{Buffer, ErrorKind};
use opendal::ErrorKind;
use prometheus::{
exponential_buckets, histogram_opts, register_histogram_vec, register_int_counter_vec,
Histogram, HistogramTimer, HistogramVec, IntCounterVec,
@@ -84,14 +89,14 @@ fn increment_errors_total(op: Operation, kind: ErrorKind) {
#[derive(Default, Debug, Clone)]
pub struct PrometheusMetricsLayer;
impl<A: Access> Layer<A> for PrometheusMetricsLayer {
type LayeredAccess = PrometheusAccess<A>;
impl<A: Accessor> Layer<A> for PrometheusMetricsLayer {
type LayeredAccessor = PrometheusAccessor<A>;
fn layer(&self, inner: A) -> Self::LayeredAccess {
fn layer(&self, inner: A) -> Self::LayeredAccessor {
let meta = inner.info();
let scheme = meta.scheme();
PrometheusAccess {
PrometheusAccessor {
inner,
scheme: scheme.to_string(),
}
@@ -99,12 +104,12 @@ impl<A: Access> Layer<A> for PrometheusMetricsLayer {
}
#[derive(Clone)]
pub struct PrometheusAccess<A: Access> {
pub struct PrometheusAccessor<A: Accessor> {
inner: A,
scheme: String,
}
impl<A: Access> Debug for PrometheusAccess<A> {
impl<A: Accessor> Debug for PrometheusAccessor<A> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PrometheusAccessor")
.field("inner", &self.inner)
@@ -112,7 +117,8 @@ impl<A: Access> Debug for PrometheusAccess<A> {
}
}
impl<A: Access> LayeredAccess for PrometheusAccess<A> {
#[async_trait]
impl<A: Accessor> LayeredAccessor for PrometheusAccessor<A> {
type Inner = A;
type Reader = PrometheusMetricWrapper<A::Reader>;
type BlockingReader = PrometheusMetricWrapper<A::BlockingReader>;
@@ -151,20 +157,27 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
.with_label_values(&[&self.scheme, Operation::Read.into_static()])
.start_timer();
let (rp, r) = self.inner.read(path, args).await.map_err(|e| {
increment_errors_total(Operation::Read, e.kind());
e
})?;
Ok((
rp,
PrometheusMetricWrapper::new(
r,
Operation::Read,
BYTES_TOTAL.with_label_values(&[&self.scheme, Operation::Read.into_static()]),
timer,
),
))
self.inner
.read(path, args)
.map(|v| {
v.map(|(rp, r)| {
(
rp,
PrometheusMetricWrapper::new(
r,
Operation::Read,
BYTES_TOTAL
.with_label_values(&[&self.scheme, Operation::Read.into_static()]),
timer,
),
)
})
})
.await
.map_err(|e| {
increment_errors_total(Operation::Read, e.kind());
e
})
}
async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> {
@@ -176,20 +189,27 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
.with_label_values(&[&self.scheme, Operation::Write.into_static()])
.start_timer();
let (rp, r) = self.inner.write(path, args).await.map_err(|e| {
increment_errors_total(Operation::Write, e.kind());
e
})?;
Ok((
rp,
PrometheusMetricWrapper::new(
r,
Operation::Write,
BYTES_TOTAL.with_label_values(&[&self.scheme, Operation::Write.into_static()]),
timer,
),
))
self.inner
.write(path, args)
.map(|v| {
v.map(|(rp, r)| {
(
rp,
PrometheusMetricWrapper::new(
r,
Operation::Write,
BYTES_TOTAL
.with_label_values(&[&self.scheme, Operation::Write.into_static()]),
timer,
),
)
})
})
.await
.map_err(|e| {
increment_errors_total(Operation::Write, e.kind());
e
})
}
async fn stat(&self, path: &str, args: OpStat) -> Result<RpStat> {
@@ -441,46 +461,103 @@ impl<R> PrometheusMetricWrapper<R> {
}
impl<R: oio::Read> oio::Read for PrometheusMetricWrapper<R> {
async fn read_at(&self, offset: u64, limit: usize) -> Result<Buffer> {
self.inner.read_at(offset, limit).await.map_err(|err| {
increment_errors_total(self.op, err.kind());
err
fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll<Result<usize>> {
self.inner.poll_read(cx, buf).map(|res| match res {
Ok(bytes) => {
self.bytes += bytes as u64;
Ok(bytes)
}
Err(e) => {
increment_errors_total(self.op, e.kind());
Err(e)
}
})
}
fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll<Result<u64>> {
self.inner.poll_seek(cx, pos).map(|res| match res {
Ok(n) => Ok(n),
Err(e) => {
increment_errors_total(self.op, e.kind());
Err(e)
}
})
}
fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll<Option<Result<Bytes>>> {
self.inner.poll_next(cx).map(|res| match res {
Some(Ok(bytes)) => {
self.bytes += bytes.len() as u64;
Some(Ok(bytes))
}
Some(Err(e)) => {
increment_errors_total(self.op, e.kind());
Some(Err(e))
}
None => None,
})
}
}
impl<R: oio::BlockingRead> oio::BlockingRead for PrometheusMetricWrapper<R> {
fn read_at(&self, offset: u64, limit: usize) -> opendal::Result<Buffer> {
self.inner.read_at(offset, limit).map_err(|err| {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
self.inner
.read(buf)
.map(|n| {
self.bytes += n as u64;
n
})
.map_err(|e| {
increment_errors_total(self.op, e.kind());
e
})
}
fn seek(&mut self, pos: io::SeekFrom) -> Result<u64> {
self.inner.seek(pos).map_err(|err| {
increment_errors_total(self.op, err.kind());
err
})
}
fn next(&mut self) -> Option<Result<Bytes>> {
self.inner.next().map(|res| match res {
Ok(bytes) => {
self.bytes += bytes.len() as u64;
Ok(bytes)
}
Err(e) => {
increment_errors_total(self.op, e.kind());
Err(e)
}
})
}
}
#[async_trait]
impl<R: oio::Write> oio::Write for PrometheusMetricWrapper<R> {
async fn write(&mut self, bs: Buffer) -> Result<usize> {
match self.inner.write(bs).await {
Ok(n) => {
fn poll_write(&mut self, cx: &mut Context<'_>, bs: &dyn oio::WriteBuf) -> Poll<Result<usize>> {
self.inner
.poll_write(cx, bs)
.map_ok(|n| {
self.bytes += n as u64;
Ok(n)
}
Err(err) => {
n
})
.map_err(|err| {
increment_errors_total(self.op, err.kind());
Err(err)
}
}
err
})
}
async fn close(&mut self) -> Result<()> {
self.inner.close().await.map_err(|err| {
fn poll_abort(&mut self, cx: &mut Context<'_>) -> Poll<Result<()>> {
self.inner.poll_abort(cx).map_err(|err| {
increment_errors_total(self.op, err.kind());
err
})
}
async fn abort(&mut self) -> Result<()> {
self.inner.close().await.map_err(|err| {
fn poll_close(&mut self, cx: &mut Context<'_>) -> Poll<Result<()>> {
self.inner.poll_close(cx).map_err(|err| {
increment_errors_total(self.op, err.kind());
err
})
@@ -488,7 +565,7 @@ impl<R: oio::Write> oio::Write for PrometheusMetricWrapper<R> {
}
impl<R: oio::BlockingWrite> oio::BlockingWrite for PrometheusMetricWrapper<R> {
fn write(&mut self, bs: Buffer) -> Result<usize> {
fn write(&mut self, bs: &dyn oio::WriteBuf) -> Result<usize> {
self.inner
.write(bs)
.map(|n| {

View File

@@ -14,9 +14,8 @@
pub use opendal::raw::{normalize_path as raw_normalize_path, HttpClient};
pub use opendal::{
services, Builder as ObjectStoreBuilder, Entry, EntryMode, Error, ErrorKind,
FuturesAsyncReader, FuturesAsyncWriter, Lister, Metakey, Operator as ObjectStore, Reader,
Result, Writer,
services, Builder as ObjectStoreBuilder, Entry, EntryMode, Error, ErrorKind, Lister, Metakey,
Operator as ObjectStore, Reader, Result, Writer,
};
pub mod layers;

View File

@@ -22,6 +22,7 @@ use object_store::layers::LruCacheLayer;
use object_store::services::{Fs, S3};
use object_store::test_util::TempFolder;
use object_store::{ObjectStore, ObjectStoreBuilder};
use opendal::raw::Accessor;
use opendal::services::{Azblob, Gcs, Oss};
use opendal::{EntryMode, Operator, OperatorBuilder};
@@ -35,11 +36,11 @@ async fn test_object_crud(store: &ObjectStore) -> Result<()> {
// Read data from object;
let bs = store.read(file_name).await?;
assert_eq!("Hello, World!", String::from_utf8(bs.to_vec())?);
assert_eq!("Hello, World!", String::from_utf8(bs)?);
// Read range from object;
let bs = store.read_with(file_name).range(1..=11).await?;
assert_eq!("ello, World", String::from_utf8(bs.to_vec())?);
assert_eq!("ello, World", String::from_utf8(bs)?);
// Get object's Metadata
let meta = store.stat(file_name).await?;
@@ -76,7 +77,7 @@ async fn test_object_list(store: &ObjectStore) -> Result<()> {
assert_eq!(p2, entries.first().unwrap().path());
let content = store.read(p2).await?;
assert_eq!("Hello, object2!", String::from_utf8(content.to_vec())?);
assert_eq!("Hello, object2!", String::from_utf8(content)?);
store.delete(p2).await?;
let entries = store.list("/").await?;
@@ -235,9 +236,11 @@ async fn test_file_backend_with_lru_cache() -> Result<()> {
let _ = builder
.root(&cache_dir.path().to_string_lossy())
.atomic_write_dir(&cache_dir.path().to_string_lossy());
let file_cache = Operator::new(builder).unwrap().finish();
let file_cache = Arc::new(builder.build().unwrap());
LruCacheLayer::new(file_cache, 32).await.unwrap()
LruCacheLayer::new(Arc::new(file_cache.clone()), 32)
.await
.unwrap()
};
let store = store.layer(cache_layer.clone());
@@ -250,7 +253,10 @@ async fn test_file_backend_with_lru_cache() -> Result<()> {
Ok(())
}
async fn assert_lru_cache(cache_layer: &LruCacheLayer, file_names: &[&str]) {
async fn assert_lru_cache<C: Accessor + Clone>(
cache_layer: &LruCacheLayer<C>,
file_names: &[&str],
) {
for file_name in file_names {
assert!(cache_layer.contains_file(file_name).await);
}
@@ -272,7 +278,7 @@ async fn assert_cache_files(
let bs = store.read(o.path()).await.unwrap();
assert_eq!(
file_contents[position],
String::from_utf8(bs.to_vec())?,
String::from_utf8(bs.clone())?,
"file content not match: {}",
o.name()
);
@@ -306,7 +312,9 @@ async fn test_object_store_cache_policy() -> Result<()> {
let cache_store = OperatorBuilder::new(file_cache.clone()).finish();
// create operator for cache dir to verify cache file
let cache_layer = LruCacheLayer::new(cache_store.clone(), 38).await.unwrap();
let cache_layer = LruCacheLayer::new(Arc::new(file_cache.clone()), 38)
.await
.unwrap();
let store = store.layer(cache_layer.clone());
// create several object handler.
@@ -378,7 +386,7 @@ async fn test_object_store_cache_policy() -> Result<()> {
// instead of returning `NotFound` during the reader creation.
// The entry count is 4, because we have the p2 `NotFound` cache.
assert!(store.read_with(p2).range(0..4).await.is_err());
assert_eq!(cache_layer.read_cache_stat().await, (3, 35));
assert_eq!(cache_layer.read_cache_stat().await, (4, 35));
assert_cache_files(
&cache_store,
@@ -406,7 +414,7 @@ async fn test_object_store_cache_policy() -> Result<()> {
assert!(store.read(p2).await.is_err());
// Read p1 with range `1..` , the existing p1 with range `0..` must be evicted.
let _ = store.read_with(p1).range(1..15).await.unwrap();
assert_eq!(cache_layer.read_cache_stat().await, (3, 34));
assert_eq!(cache_layer.read_cache_stat().await, (4, 34));
assert_cache_files(
&cache_store,
&[
@@ -434,7 +442,7 @@ async fn test_object_store_cache_policy() -> Result<()> {
drop(cache_layer);
// Test recover
let cache_layer = LruCacheLayer::new(cache_store, 38).await.unwrap();
let cache_layer = LruCacheLayer::new(Arc::new(file_cache), 38).await.unwrap();
// The p2 `NotFound` cache will not be recovered
assert_eq!(cache_layer.read_cache_stat().await, (3, 34));

View File

@@ -47,6 +47,7 @@ prometheus.workspace = true
query.workspace = true
regex.workspace = true
serde_json.workspace = true
servers.workspace = true
session.workspace = true
snafu.workspace = true
sql.workspace = true
@@ -55,7 +56,6 @@ store-api.workspace = true
substrait.workspace = true
table.workspace = true
tokio.workspace = true
tokio-util.workspace = true
tonic.workspace = true
[dev-dependencies]

View File

@@ -15,14 +15,13 @@
use std::any::Any;
use common_datasource::file_format::Format;
use common_error::define_into_tonic_status;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use datafusion::parquet;
use datatypes::arrow::error::ArrowError;
use servers::define_into_tonic_status;
use snafu::{Location, Snafu};
use table::metadata::TableType;
#[derive(Snafu)]
#[snafu(visibility(pub))]
@@ -642,6 +641,20 @@ pub enum Error {
location: Location,
},
#[snafu(display("Do not support {} in multiple catalogs", ddl_name))]
DdlWithMultiCatalogs {
ddl_name: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Do not support {} in multiple schemas", ddl_name))]
DdlWithMultiSchemas {
ddl_name: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Empty {} expr", name))]
EmptyDdlExpr {
name: String,
@@ -684,18 +697,6 @@ pub enum Error {
location: Location,
source: substrait::error::Error,
},
#[snafu(display(
"Show create table only for base table. {} is {}",
table_name,
table_type
))]
ShowCreateTableBaseOnly {
table_name: String,
table_type: TableType,
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -733,9 +734,7 @@ impl ErrorExt for Error {
StatusCode::TableAlreadyExists
}
Error::NotSupported { .. } | Error::ShowCreateTableBaseOnly { .. } => {
StatusCode::Unsupported
}
Error::NotSupported { .. } => StatusCode::Unsupported,
Error::TableMetadataManager { source, .. } => source.status_code(),
@@ -821,7 +820,9 @@ impl ErrorExt for Error {
Error::ColumnDefaultValue { source, .. } => source.status_code(),
Error::EmptyDdlExpr { .. }
Error::DdlWithMultiCatalogs { .. }
| Error::DdlWithMultiSchemas { .. }
| Error::EmptyDdlExpr { .. }
| Error::InvalidPartitionRule { .. }
| Error::ParseSqlValue { .. }
| Error::InvalidTimestampRange { .. } => StatusCode::InvalidArguments,

View File

@@ -322,8 +322,11 @@ fn find_primary_keys(
let constraints_pk = constraints
.iter()
.filter_map(|constraint| match constraint {
TableConstraint::PrimaryKey {
name: _, columns, ..
TableConstraint::Unique {
name: _,
columns,
is_primary: true,
..
} => Some(columns.iter().map(|ident| ident.value.clone())),
_ => None,
})
@@ -350,6 +353,7 @@ pub fn find_time_index(constraints: &[TableConstraint]) -> Result<String> {
TableConstraint::Unique {
name: Some(name),
columns,
is_primary: false,
..
} => {
if name.value == TIME_INDEX {

View File

@@ -394,10 +394,11 @@ impl Inserter {
Some(table) => {
let table_info = table.table_info();
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
// TODO(jeremy): alter in batch? (from `handle_metric_row_inserts`)
validate_request_with_table(req, &table)?;
if let Some(alter_expr) =
self.get_alter_table_expr_on_demand(req, table, ctx)?
{
let alter_expr = self.get_alter_table_expr_on_demand(req, table, ctx)?;
if let Some(alter_expr) = alter_expr {
alter_tables.push(alter_expr);
}
}
@@ -591,12 +592,15 @@ impl Inserter {
physical_table: &str,
statement_executor: &StatementExecutor,
) -> Result<Vec<TableRef>> {
let catalog_name = ctx.current_catalog();
let schema_name = ctx.current_schema();
let create_table_exprs = create_tables
.iter()
.map(|req| {
let table_ref = TableReference::full(catalog_name, schema_name, &req.table_name);
let table_ref = TableReference::full(
ctx.current_catalog(),
ctx.current_schema(),
&req.table_name,
);
let request_schema = req.rows.as_ref().unwrap().schema.as_slice();
let mut create_table_expr = build_create_table_expr(&table_ref, request_schema)?;
@@ -611,7 +615,7 @@ impl Inserter {
.collect::<Result<Vec<_>>>()?;
let res = statement_executor
.create_logical_tables(catalog_name, schema_name, &create_table_exprs, ctx.clone())
.create_logical_tables(&create_table_exprs, ctx.clone())
.await;
match res {

View File

@@ -185,15 +185,12 @@ impl StatementExecutor {
}
Statement::Alter(alter_table) => self.alter_table(alter_table, query_ctx).await,
Statement::DropTable(stmt) => {
let mut table_names = Vec::with_capacity(stmt.table_names().len());
for table_name_stmt in stmt.table_names() {
let (catalog, schema, table) =
table_idents_to_full_name(table_name_stmt, &query_ctx)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
table_names.push(TableName::new(catalog, schema, table));
}
self.drop_tables(&table_names[..], stmt.drop_if_exists(), query_ctx.clone())
let (catalog, schema, table) =
table_idents_to_full_name(stmt.table_name(), &query_ctx)
.map_err(BoxedError::new)
.context(error::ExternalSnafu)?;
let table_name = TableName::new(catalog, schema, table);
self.drop_table(table_name, stmt.drop_if_exists(), query_ctx)
.await
}
Statement::DropDatabase(stmt) => {
@@ -209,7 +206,7 @@ impl StatementExecutor {
let (catalog, schema, table) =
table_idents_to_full_name(stmt.table_name(), &query_ctx)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
.context(error::ExternalSnafu)?;
let table_name = TableName::new(catalog, schema, table);
self.truncate_table(table_name, query_ctx).await
}
@@ -226,14 +223,14 @@ impl StatementExecutor {
let (catalog, schema, table) =
table_idents_to_full_name(&show.table_name, &query_ctx)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
.context(error::ExternalSnafu)?;
let table_ref = self
.catalog_manager
.table(&catalog, &schema, &table)
.await
.context(CatalogSnafu)?
.context(TableNotFoundSnafu { table_name: &table })?;
.context(error::CatalogSnafu)?
.context(error::TableNotFoundSnafu { table_name: &table })?;
let table_name = TableName::new(catalog, schema, table);
self.show_create_table(table_name, table_ref, query_ctx)

View File

@@ -20,7 +20,7 @@ use client::{Output, OutputData, OutputMeta};
use common_base::readable_size::ReadableSize;
use common_datasource::file_format::csv::{CsvConfigBuilder, CsvFormat, CsvOpener};
use common_datasource::file_format::json::{JsonFormat, JsonOpener};
use common_datasource::file_format::orc::{infer_orc_schema, new_orc_stream_reader, ReaderAdapter};
use common_datasource::file_format::orc::{infer_orc_schema, new_orc_stream_reader};
use common_datasource::file_format::{FileFormat, Format};
use common_datasource::lister::{Lister, Source};
use common_datasource::object_store::{build_backend, parse_url};
@@ -46,7 +46,6 @@ use session::context::QueryContextRef;
use snafu::ResultExt;
use table::requests::{CopyTableRequest, InsertRequest};
use table::table_reference::TableReference;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use crate::error::{self, IntoVectorsSnafu, Result};
use crate::statement::StatementExecutor;
@@ -147,16 +146,10 @@ impl StatementExecutor {
path,
}),
Format::Parquet(_) => {
let meta = object_store
.stat(&path)
.await
.context(error::ReadObjectSnafu { path: &path })?;
let mut reader = object_store
.reader(&path)
.await
.context(error::ReadObjectSnafu { path: &path })?
.into_futures_async_read(0..meta.content_length())
.compat();
.context(error::ReadObjectSnafu { path: &path })?;
let metadata = ArrowReaderMetadata::load_async(&mut reader, Default::default())
.await
.context(error::ReadParquetMetadataSnafu)?;
@@ -168,17 +161,12 @@ impl StatementExecutor {
})
}
Format::Orc(_) => {
let meta = object_store
.stat(&path)
.await
.context(error::ReadObjectSnafu { path: &path })?;
let reader = object_store
.reader(&path)
.await
.context(error::ReadObjectSnafu { path: &path })?;
let schema = infer_orc_schema(ReaderAdapter::new(reader, meta.content_length()))
let schema = infer_orc_schema(reader)
.await
.context(error::ReadOrcSnafu)?;
@@ -291,17 +279,11 @@ impl StatementExecutor {
)))
}
FileMetadata::Parquet { metadata, path, .. } => {
let meta = object_store
.stat(path)
.await
.context(error::ReadObjectSnafu { path })?;
let reader = object_store
.reader_with(path)
.chunk(DEFAULT_READ_BUFFER)
.buffer(DEFAULT_READ_BUFFER)
.await
.context(error::ReadObjectSnafu { path })?
.into_futures_async_read(0..meta.content_length())
.compat();
.context(error::ReadObjectSnafu { path })?;
let builder =
ParquetRecordBatchStreamBuilder::new_with_metadata(reader, metadata.clone());
let stream = builder
@@ -320,20 +302,14 @@ impl StatementExecutor {
)))
}
FileMetadata::Orc { path, .. } => {
let meta = object_store
.stat(path)
.await
.context(error::ReadObjectSnafu { path })?;
let reader = object_store
.reader_with(path)
.chunk(DEFAULT_READ_BUFFER)
.buffer(DEFAULT_READ_BUFFER)
.await
.context(error::ReadObjectSnafu { path })?;
let stream =
new_orc_stream_reader(ReaderAdapter::new(reader, meta.content_length()))
.await
.context(error::ReadOrcSnafu)?;
let stream = new_orc_stream_reader(reader)
.await
.context(error::ReadOrcSnafu)?;
let projected_schema = Arc::new(
compat_schema

View File

@@ -67,12 +67,13 @@ use table::TableRef;
use super::StatementExecutor;
use crate::error::{
self, AlterExprToRequestSnafu, CatalogSnafu, ColumnDataTypeSnafu, ColumnNotFoundSnafu,
CreateLogicalTablesSnafu, CreateTableInfoSnafu, DeserializePartitionSnafu, EmptyDdlExprSnafu,
ExtractTableNamesSnafu, FlowNotFoundSnafu, InvalidPartitionColumnsSnafu,
InvalidPartitionRuleSnafu, InvalidTableNameSnafu, InvalidViewNameSnafu, InvalidViewStmtSnafu,
ParseSqlValueSnafu, Result, SchemaInUseSnafu, SchemaNotFoundSnafu, SubstraitCodecSnafu,
TableAlreadyExistsSnafu, TableMetadataManagerSnafu, TableNotFoundSnafu,
UnrecognizedTableOptionSnafu, ViewAlreadyExistsSnafu,
CreateLogicalTablesSnafu, CreateTableInfoSnafu, DdlWithMultiCatalogsSnafu,
DdlWithMultiSchemasSnafu, DeserializePartitionSnafu, EmptyDdlExprSnafu, ExtractTableNamesSnafu,
FlowNotFoundSnafu, InvalidPartitionColumnsSnafu, InvalidPartitionRuleSnafu,
InvalidTableNameSnafu, InvalidViewNameSnafu, InvalidViewStmtSnafu, ParseSqlValueSnafu, Result,
SchemaInUseSnafu, SchemaNotFoundSnafu, SubstraitCodecSnafu, TableAlreadyExistsSnafu,
TableMetadataManagerSnafu, TableNotFoundSnafu, UnrecognizedTableOptionSnafu,
ViewAlreadyExistsSnafu,
};
use crate::expr_factory;
use crate::statement::show::create_partitions_stmt;
@@ -156,15 +157,8 @@ impl StatementExecutor {
.table_options
.contains_key(LOGICAL_TABLE_METADATA_KEY)
{
let catalog_name = &create_table.catalog_name;
let schema_name = &create_table.schema_name;
return self
.create_logical_tables(
catalog_name,
schema_name,
&[create_table.clone()],
query_ctx,
)
.create_logical_tables(&[create_table.clone()], query_ctx)
.await?
.into_iter()
.next()
@@ -266,8 +260,6 @@ impl StatementExecutor {
#[tracing::instrument(skip_all)]
pub async fn create_logical_tables(
&self,
catalog_name: &str,
schema_name: &str,
create_table_exprs: &[CreateTableExpr],
query_context: QueryContextRef,
) -> Result<Vec<TableRef>> {
@@ -275,16 +267,35 @@ impl StatementExecutor {
ensure!(
!create_table_exprs.is_empty(),
EmptyDdlExprSnafu {
name: "create logic tables"
name: "create table"
}
);
ensure!(
create_table_exprs
.windows(2)
.all(|expr| expr[0].catalog_name == expr[1].catalog_name),
DdlWithMultiCatalogsSnafu {
ddl_name: "create tables"
}
);
let catalog_name = create_table_exprs[0].catalog_name.to_string();
ensure!(
create_table_exprs
.windows(2)
.all(|expr| expr[0].schema_name == expr[1].schema_name),
DdlWithMultiSchemasSnafu {
ddl_name: "create tables"
}
);
let schema_name = create_table_exprs[0].schema_name.to_string();
// Check table names
for create_table in create_table_exprs {
ensure!(
NAME_PATTERN_REG.is_match(&create_table.table_name),
InvalidTableNameSnafu {
table_name: &create_table.table_name,
table_name: create_table.table_name.clone(),
}
);
}
@@ -292,11 +303,11 @@ impl StatementExecutor {
let schema = self
.table_metadata_manager
.schema_manager()
.get(SchemaNameKey::new(catalog_name, schema_name))
.get(SchemaNameKey::new(&catalog_name, &schema_name))
.await
.context(TableMetadataManagerSnafu)?
.context(SchemaNotFoundSnafu {
schema_info: schema_name,
schema_info: &schema_name,
})?;
let mut raw_tables_info = create_table_exprs
@@ -615,7 +626,7 @@ impl StatementExecutor {
ensure!(
!alter_table_exprs.is_empty(),
EmptyDdlExprSnafu {
name: "alter logical tables"
name: "alter table"
}
);
@@ -632,44 +643,18 @@ impl StatementExecutor {
drop_if_exists: bool,
query_context: QueryContextRef,
) -> Result<Output> {
// Reserved for grpc call
self.drop_tables(&[table_name], drop_if_exists, query_context)
if let Some(table) = self
.catalog_manager
.table(
&table_name.catalog_name,
&table_name.schema_name,
&table_name.table_name,
)
.await
}
#[tracing::instrument(skip_all)]
pub async fn drop_tables(
&self,
table_names: &[TableName],
drop_if_exists: bool,
query_context: QueryContextRef,
) -> Result<Output> {
let mut tables = Vec::with_capacity(table_names.len());
for table_name in table_names {
if let Some(table) = self
.catalog_manager
.table(
&table_name.catalog_name,
&table_name.schema_name,
&table_name.table_name,
)
.await
.context(CatalogSnafu)?
{
tables.push(table.table_info().table_id());
} else if drop_if_exists {
// DROP TABLE IF EXISTS meets table not found - ignored
continue;
} else {
return TableNotFoundSnafu {
table_name: table_name.to_string(),
}
.fail();
}
}
for (table_name, table_id) in table_names.iter().zip(tables.into_iter()) {
self.drop_table_procedure(table_name, table_id, drop_if_exists, query_context.clone())
.context(CatalogSnafu)?
{
let table_id = table.table_info().table_id();
self.drop_table_procedure(&table_name, table_id, drop_if_exists, query_context)
.await?;
// Invalidates local cache ASAP.
@@ -683,8 +668,17 @@ impl StatementExecutor {
)
.await
.context(error::InvalidateTableCacheSnafu)?;
Ok(Output::new_with_affected_rows(0))
} else if drop_if_exists {
// DROP TABLE IF EXISTS meets table not found - ignored
Ok(Output::new_with_affected_rows(0))
} else {
TableNotFoundSnafu {
table_name: table_name.to_string(),
}
.fail()
}
Ok(Output::new_with_affected_rows(0))
}
#[tracing::instrument(skip_all)]

Some files were not shown because too many files have changed in this diff Show More