Compare commits

...

21 Commits

Author SHA1 Message Date
LFC
48c2841e4d feat: execute python script in distributed mode (#1264)
* feat: execute python script in distributed mode

* fix: rebase develop
2023-04-02 20:36:48 +08:00
Lei, HUANG
d2542552d3 fix: unit test fails when try to copy table to s3 and copy back (#1302)
fix: unit test fails when try to copy table to s3 and copy back to greptimedb
2023-04-02 16:43:44 +08:00
Ruihang Xia
c0132e6cc0 feat: impl quantile_over_time function (#1287)
* fix qualifier alias

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix in another way

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl quantile_over_time

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-04-02 16:20:32 +08:00
dennis zhuang
aea932b891 fix: checkpoint fails when deleting old logs fails (#1300) 2023-04-02 11:06:36 +08:00
Lei, HUANG
0253136333 feat: buffered parquet writer (#1263)
* wip: use

* rebase develop

* chore: fix typos

* feat: replace export parquet writer with buffered writer

* fix: some cr comments

* feat: add sst_write_buffer_size config item to config how many bytes to buffer before flush to underlying storage

* chore: reabse onto develop
2023-04-01 17:21:19 +08:00
Eugene Tolbakov
6a05f617a4 feat(stddev_over_time): add initial implementation (#1289)
* feat(stddev_over_time): add initial implementation

* feat(stddev_over_time): address code review remarks, add compensated summation

* feat(stddev_over_time): fix fmt issues

* feat(stddev_over_time): add docs, minor renamings
2023-04-01 17:16:51 +08:00
localhost
a2b262ebc0 chore: add http metrics server in datanode node when greptime start in distributed mode (#1256)
* chore: add http metrics server in datanode node when greptime start in distributed mode

* chore: add some docs and license

* chore: change metrics_addr to resolve address already in use error

* chore add metrics for meta service

* chore: replace metrics exporter http server from hyper to axum

* chore: format

* fix: datanode mode branching error

* fix: sqlness test address already in use and start metrics in defualt config

* chore: change metrics location

* chore: use builder pattern to builder httpserver

* chore: remove useless debug_assert macro in httpserver builder

* chore: resolve conflicting build error

* chore: format code
2023-03-31 18:37:52 +08:00
dennis zhuang
972f64c3d7 chore: improve opendal layers (#1295)
* chore: improve opendal layers

* chore: log level
2023-03-31 09:48:11 +00:00
LFC
eb77f9aafd feat: start LocalManager in Metasrv (#1279)
* feat: procedure store in Metasrv, backed by Etcd; start `LocalManager` in Metasrv leader

* fix: resolve PR comments

* fix: resolve PR comments
2023-03-31 15:32:59 +08:00
Yingwen
dee20144d7 feat: Implement procedure to alter a table for mito engine (#1259)
* feat: wip

* fix: Fix CreateMitoTable::table_schema not initialized from json

* feat: Implement AlterMitoTable procedure

* test: Add test for alter procedure

* feat: Register alter procedure

* fix: Recover procedures after catalog manager is started

* feat: Simplify usage of table schema in create table procedure

* test: Add rename test

* test: Add drop columns test
2023-03-31 14:40:54 +08:00
dennis zhuang
563adbabe9 feat!: improve region manifest service (#1268)
* feat: try to use batch delete in ManifestLogStorage

* feat: clean temp dir when startup with file backend

* refactor: export region manifest checkpoint actions magin and refactor storage options

* feat: purge unused manifest and checkpoint files by repeat gc task

* chore: debug deleted logs

* feat: adds RepeatedTask and refactor all gc tasks

* chore: clean code

* feat: export gc_duration to manifest config

* test: assert gc works

* fix: typo

* Update src/common/runtime/src/error.rs

Co-authored-by: LFC <bayinamine@gmail.com>

* Update src/common/runtime/src/repeated_task.rs

Co-authored-by: LFC <bayinamine@gmail.com>

* Update src/common/runtime/src/repeated_task.rs

Co-authored-by: LFC <bayinamine@gmail.com>

* fix: format

* Update src/common/runtime/src/repeated_task.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: by CR comments

* chore: by CR comments

* fix: serde default for StorageConfig

* chore: remove compaction config in StandaloneOptions

---------

Co-authored-by: LFC <bayinamine@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2023-03-31 10:42:00 +08:00
Ruihang Xia
b71bb4e5fa feat: implement restart argument for sqlness-runner (#1262)
* refactor standalone mode and distribute mode start process

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* implement restart arg

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update tests/runner/src/env.rs

Co-authored-by: LFC <bayinamine@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: LFC <bayinamine@gmail.com>
2023-03-31 10:02:19 +08:00
LFC
fae293310c feat: unify describe table execution (#1285) 2023-03-31 09:59:19 +08:00
LFC
3e51640442 ci: release binary with embedded dashboard enabled (#1283) 2023-03-30 21:35:47 +08:00
discord9
b40193d7da test: align RsPy PyO3 Behavior (#1280)
* feat: allow PyList Return in PyO3 Backend

* feat: mixed list

* feat: align&test

* chore: PR advices
2023-03-30 17:45:21 +08:00
Ruihang Xia
b5e5f8e555 chore(deps): bump arrow and parquet to 36.0.0, and datafusion to the latest (#1282)
* chore: update arrow, parquet to 36.0 and datafusion

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update deps

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Apply suggestions from code review

Co-authored-by: LFC <bayinamine@gmail.com>

* update sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: LFC <bayinamine@gmail.com>
2023-03-30 16:24:10 +08:00
zyy17
192fa0caa5 ci: only builds binaries for manually trigger workflow (#1284) 2023-03-30 15:58:28 +08:00
Weny Xu
30eb676d6a feat: implement create external table parser (#1252)
* refactor: move parse_option_string to util

* feat: implement create external table parser
2023-03-30 13:37:53 +08:00
Ruihang Xia
d7cadf6e6d fix: nyc-taxi bench tools and limit max parallel compaction task number (#1275)
* limit mas parallel compaction subtask

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* correct type map

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-29 09:16:53 +00:00
Lei, HUANG
d7a1435517 fix: remove backtrace from ratelimit error (#1273) 2023-03-29 15:58:01 +08:00
xiaomin tang
0943079de2 feat: Create SECURITY.md (#1270)
Create SECURITY.md
2023-03-28 19:14:29 +08:00
151 changed files with 4582 additions and 1485 deletions

View File

@@ -5,6 +5,7 @@ on:
schedule:
# At 00:00 on Monday.
- cron: '0 0 * * 1'
# Mannually trigger only builds binaries.
workflow_dispatch:
name: Release
@@ -32,38 +33,42 @@ jobs:
os: ubuntu-2004-16-cores
file: greptime-linux-amd64
continue-on-error: false
opts: "-F servers/dashboard"
- arch: aarch64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-arm64
continue-on-error: false
opts: "-F servers/dashboard"
- arch: aarch64-apple-darwin
os: macos-latest
file: greptime-darwin-arm64
continue-on-error: false
opts: "-F servers/dashboard"
- arch: x86_64-apple-darwin
os: macos-latest
file: greptime-darwin-amd64
continue-on-error: false
opts: "-F servers/dashboard"
- arch: x86_64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-amd64-pyo3
continue-on-error: false
opts: "-F pyo3_backend"
opts: "-F pyo3_backend,servers/dashboard"
- arch: aarch64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-arm64-pyo3
continue-on-error: false
opts: "-F pyo3_backend"
opts: "-F pyo3_backend,servers/dashboard"
- arch: aarch64-apple-darwin
os: macos-latest
file: greptime-darwin-arm64-pyo3
continue-on-error: false
opts: "-F pyo3_backend"
opts: "-F pyo3_backend,servers/dashboard"
- arch: x86_64-apple-darwin
os: macos-latest
file: greptime-darwin-amd64-pyo3
continue-on-error: false
opts: "-F pyo3_backend"
opts: "-F pyo3_backend,servers/dashboard"
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.continue-on-error }}
if: github.repository == 'GreptimeTeam/greptimedb'
@@ -164,7 +169,7 @@ jobs:
export LD_LIBRARY_PATH=$PYTHON_INSTALL_PATH_AMD64/lib:$LD_LIBRARY_PATH
export LIBRARY_PATH=$PYTHON_INSTALL_PATH_AMD64/lib:$LIBRARY_PATH
export PATH=$PYTHON_INSTALL_PATH_AMD64/bin:$PATH
echo "implementation=CPython" >> pyo3.config
echo "version=3.10" >> pyo3.config
echo "implementation=CPython" >> pyo3.config
@@ -212,7 +217,7 @@ jobs:
name: Build docker image
needs: [build]
runs-on: ubuntu-latest
if: github.repository == 'GreptimeTeam/greptimedb'
if: github.repository == 'GreptimeTeam/greptimedb' && github.event_name != 'workflow_dispatch'
steps:
- name: Checkout sources
uses: actions/checkout@v3
@@ -298,7 +303,7 @@ jobs:
# Release artifacts only when all the artifacts are built successfully.
needs: [build,docker]
runs-on: ubuntu-latest
if: github.repository == 'GreptimeTeam/greptimedb'
if: github.repository == 'GreptimeTeam/greptimedb' && github.event_name != 'workflow_dispatch'
steps:
- name: Checkout sources
uses: actions/checkout@v3
@@ -341,7 +346,7 @@ jobs:
name: Push docker image to UCloud Container Registry
needs: [docker]
runs-on: ubuntu-latest
if: github.repository == 'GreptimeTeam/greptimedb'
if: github.repository == 'GreptimeTeam/greptimedb' && github.event_name != 'workflow_dispatch'
# Push to uhub may fail(500 error), but we don't want to block the release process. The failed job will be retried manually.
continue-on-error: true
steps:

607
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -51,22 +51,22 @@ edition = "2021"
license = "Apache-2.0"
[workspace.dependencies]
arrow = { version = "34.0" }
arrow-array = "34.0"
arrow-flight = "34.0"
arrow-schema = { version = "34.0", features = ["serde"] }
arrow = { version = "36.0" }
arrow-array = "36.0"
arrow-flight = "36.0"
arrow-schema = { version = "36.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
chrono = { version = "0.4", features = ["serde"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "8e125d2ecf242b4f4b81f06839900dbb2037cc2a" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "8e125d2ecf242b4f4b81f06839900dbb2037cc2a" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "8e125d2ecf242b4f4b81f06839900dbb2037cc2a" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "8e125d2ecf242b4f4b81f06839900dbb2037cc2a" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "8e125d2ecf242b4f4b81f06839900dbb2037cc2a" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "8e125d2ecf242b4f4b81f06839900dbb2037cc2a" }
futures = "0.3"
futures-util = "0.3"
parquet = "34.0"
parquet = "36.0"
paste = "1.0"
prost = "0.11"
rand = "0.8"

19
SECURITY.md Normal file
View File

@@ -0,0 +1,19 @@
# Security Policy
## Supported Versions
| Version | Supported |
| ------- | ------------------ |
| >= v0.1.0 | :white_check_mark: |
| < v0.1.0 | :x: |
## Reporting a Vulnerability
We place great importance on the security of GreptimeDB code, software,
and cloud platform. If you come across a security vulnerability in GreptimeDB,
we kindly request that you inform us immediately. We will thoroughly investigate
all valid reports and make every effort to resolve the issue promptly.
To report any issues or vulnerabilities, please email us at info@greptime.com, rather than
posting publicly on GitHub. Be sure to provide us with the version identifier as well as details
on how the vulnerability can be exploited.

View File

@@ -126,12 +126,13 @@ fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
for (array, field) in record_batch.columns().iter().zip(fields.iter()) {
let (values, datatype) = build_values(array);
let column = Column {
column_name: field.name().to_owned(),
values: Some(values),
null_mask: array
.data()
.null_bitmap()
.nulls()
.map(|bitmap| bitmap.buffer().as_slice().to_vec())
.unwrap_or_default(),
datatype: datatype.into(),
@@ -182,10 +183,10 @@ fn build_values(column: &ArrayRef) -> (Values, ColumnDataType) {
let values = array.values();
(
Values {
i64_values: values.to_vec(),
ts_microsecond_values: values.to_vec(),
..Default::default()
},
ColumnDataType::Int64,
ColumnDataType::TimestampMicrosecond,
)
}
DataType::Utf8 => {
@@ -252,13 +253,13 @@ fn create_table_expr() -> CreateTableExpr {
},
ColumnDef {
name: "tpep_pickup_datetime".to_string(),
datatype: ColumnDataType::Int64 as i32,
datatype: ColumnDataType::TimestampMicrosecond as i32,
is_nullable: true,
default_constraint: vec![],
},
ColumnDef {
name: "tpep_dropoff_datetime".to_string(),
datatype: ColumnDataType::Int64 as i32,
datatype: ColumnDataType::TimestampMicrosecond as i32,
is_nullable: true,
default_constraint: vec![],
},

View File

@@ -37,11 +37,19 @@ type = "File"
data_dir = "/tmp/greptimedb/data/"
# Compaction options, see `standalone.example.toml`.
[compaction]
[storage.compaction]
max_inflight_tasks = 4
max_files_in_level0 = 8
max_purge_tasks = 32
# Storage manifest options
[storage.manifest]
# Region checkpoint actions margin.
# Create a checkpoint every <checkpoint_margin> actions.
checkpoint_margin = 10
# Region manifest logs and checkpoints gc execution duration
gc_duration = '30s'
# Procedure storage options, see `standalone.example.toml`.
# [procedure.store]
# type = "File"

View File

@@ -99,7 +99,7 @@ type = "File"
data_dir = "/tmp/greptimedb/data/"
# Compaction options.
[compaction]
[storage.compaction]
# Max task number that can concurrently run.
max_inflight_tasks = 4
# Max files in level 0 to trigger compaction.
@@ -107,6 +107,15 @@ max_files_in_level0 = 8
# Max task number for SST purge task after compaction.
max_purge_tasks = 32
# Storage manifest options
[storage.manifest]
# Region checkpoint actions margin.
# Create a checkpoint every <checkpoint_margin> actions.
checkpoint_margin = 10
# Region manifest logs and checkpoints gc execution duration
gc_duration = '30s'
# Procedure storage options.
# Uncomment to enable.
# [procedure.store]

View File

@@ -219,6 +219,12 @@ pub enum Error {
#[snafu(backtrace)]
source: table::error::Error,
},
#[snafu(display("Invalid system table definition: {err_msg}"))]
InvalidSystemTableDef {
err_msg: String,
backtrace: Backtrace,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -231,7 +237,8 @@ impl ErrorExt for Error {
| Error::TableNotFound { .. }
| Error::IllegalManagerState { .. }
| Error::CatalogNotFound { .. }
| Error::InvalidEntryType { .. } => StatusCode::Unexpected,
| Error::InvalidEntryType { .. }
| Error::InvalidSystemTableDef { .. } => StatusCode::Unexpected,
Error::SystemCatalog { .. }
| Error::EmptyValue { .. }

View File

@@ -16,7 +16,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use common_catalog::format_full_table_name;
use datafusion::common::{OwnedTableReference, ResolvedTableReference, TableReference};
use datafusion::common::{ResolvedTableReference, TableReference};
use datafusion::datasource::provider_as_source;
use datafusion::logical_expr::TableSource;
use session::context::QueryContext;
@@ -87,9 +87,8 @@ impl DfTableSourceProvider {
pub async fn resolve_table(
&mut self,
table_ref: OwnedTableReference,
table_ref: TableReference<'_>,
) -> Result<Arc<dyn TableSource>> {
let table_ref = table_ref.as_table_reference();
let table_ref = self.resolve_table_ref(table_ref)?;
let resolved_name = table_ref.to_string();

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::time::Duration;
use clap::Parser;
use common_telemetry::logging;
use datanode::datanode::{
@@ -86,6 +88,10 @@ struct StartCommand {
wal_dir: Option<String>,
#[clap(long)]
procedure_dir: Option<String>,
#[clap(long)]
http_addr: Option<String>,
#[clap(long)]
http_timeout: Option<u64>,
}
impl StartCommand {
@@ -146,7 +152,7 @@ impl TryFrom<StartCommand> for DatanodeOptions {
}
if let Some(data_dir) = cmd.data_dir {
opts.storage = ObjectStoreConfig::File(FileConfig { data_dir });
opts.storage.store = ObjectStoreConfig::File(FileConfig { data_dir });
}
if let Some(wal_dir) = cmd.wal_dir {
@@ -155,6 +161,12 @@ impl TryFrom<StartCommand> for DatanodeOptions {
if let Some(procedure_dir) = cmd.procedure_dir {
opts.procedure = Some(ProcedureConfig::from_file_path(procedure_dir));
}
if let Some(http_addr) = cmd.http_addr {
opts.http_opts.addr = http_addr
}
if let Some(http_timeout) = cmd.http_timeout {
opts.http_opts.timeout = Duration::from_secs(http_timeout)
}
Ok(opts)
}
@@ -166,8 +178,9 @@ mod tests {
use std::io::Write;
use std::time::Duration;
use common_base::readable_size::ReadableSize;
use common_test_util::temp_dir::create_named_temp_file;
use datanode::datanode::{CompactionConfig, ObjectStoreConfig};
use datanode::datanode::{CompactionConfig, ObjectStoreConfig, RegionManifestConfig};
use servers::Mode;
use super::*;
@@ -203,10 +216,14 @@ mod tests {
type = "File"
data_dir = "/tmp/greptimedb/data/"
[compaction]
max_inflight_tasks = 4
max_files_in_level0 = 8
[storage.compaction]
max_inflight_tasks = 3
max_files_in_level0 = 7
max_purge_tasks = 32
[storage.manifest]
checkpoint_margin = 9
gc_duration = '7s'
"#;
write!(file, "{}", toml_str).unwrap();
@@ -237,9 +254,9 @@ mod tests {
assert_eq!(3000, timeout_millis);
assert!(tcp_nodelay);
match options.storage {
ObjectStoreConfig::File(FileConfig { data_dir }) => {
assert_eq!("/tmp/greptimedb/data/".to_string(), data_dir)
match &options.storage.store {
ObjectStoreConfig::File(FileConfig { data_dir, .. }) => {
assert_eq!("/tmp/greptimedb/data/", data_dir)
}
ObjectStoreConfig::S3 { .. } => unreachable!(),
ObjectStoreConfig::Oss { .. } => unreachable!(),
@@ -247,11 +264,19 @@ mod tests {
assert_eq!(
CompactionConfig {
max_inflight_tasks: 4,
max_files_in_level0: 8,
max_inflight_tasks: 3,
max_files_in_level0: 7,
max_purge_tasks: 32,
sst_write_buffer_size: ReadableSize::mb(8),
},
options.compaction
options.storage.compaction,
);
assert_eq!(
RegionManifestConfig {
checkpoint_margin: Some(9),
gc_duration: Some(Duration::from_secs(7)),
},
options.storage.manifest,
);
}

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::time::Duration;
use clap::Parser;
use common_telemetry::{info, logging, warn};
use meta_srv::bootstrap::MetaSrvInstance;
@@ -80,6 +82,10 @@ struct StartCommand {
selector: Option<String>,
#[clap(long)]
use_memory_store: bool,
#[clap(long)]
http_addr: Option<String>,
#[clap(long)]
http_timeout: Option<u64>,
}
impl StartCommand {
@@ -128,6 +134,13 @@ impl TryFrom<StartCommand> for MetaSrvOptions {
opts.use_memory_store = true;
}
if let Some(http_addr) = cmd.http_addr {
opts.http_opts.addr = http_addr;
}
if let Some(http_timeout) = cmd.http_timeout {
opts.http_opts.timeout = Duration::from_secs(http_timeout);
}
Ok(opts)
}
}
@@ -150,6 +163,8 @@ mod tests {
config_file: None,
selector: Some("LoadBased".to_string()),
use_memory_store: false,
http_addr: None,
http_timeout: None,
};
let options: MetaSrvOptions = cmd.try_into().unwrap();
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
@@ -178,6 +193,8 @@ mod tests {
selector: None,
config_file: Some(file.path().to_str().unwrap().to_string()),
use_memory_store: false,
http_addr: None,
http_timeout: None,
};
let options: MetaSrvOptions = cmd.try_into().unwrap();
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);

View File

@@ -17,9 +17,7 @@ use std::sync::Arc;
use clap::Parser;
use common_base::Plugins;
use common_telemetry::info;
use datanode::datanode::{
CompactionConfig, Datanode, DatanodeOptions, ObjectStoreConfig, ProcedureConfig, WalConfig,
};
use datanode::datanode::{Datanode, DatanodeOptions, ProcedureConfig, StorageConfig, WalConfig};
use datanode::instance::InstanceRef;
use frontend::frontend::FrontendOptions;
use frontend::grpc::GrpcOptions;
@@ -82,8 +80,7 @@ pub struct StandaloneOptions {
pub prometheus_options: Option<PrometheusOptions>,
pub prom_options: Option<PromOptions>,
pub wal: WalConfig,
pub storage: ObjectStoreConfig,
pub compaction: CompactionConfig,
pub storage: StorageConfig,
pub procedure: Option<ProcedureConfig>,
}
@@ -101,8 +98,7 @@ impl Default for StandaloneOptions {
prometheus_options: Some(PrometheusOptions::default()),
prom_options: Some(PromOptions::default()),
wal: WalConfig::default(),
storage: ObjectStoreConfig::default(),
compaction: CompactionConfig::default(),
storage: StorageConfig::default(),
procedure: None,
}
}
@@ -129,7 +125,6 @@ impl StandaloneOptions {
enable_memory_catalog: self.enable_memory_catalog,
wal: self.wal,
storage: self.storage,
compaction: self.compaction,
procedure: self.procedure,
..Default::default()
}
@@ -241,8 +236,9 @@ async fn build_frontend(
plugins: Arc<Plugins>,
datanode_instance: InstanceRef,
) -> Result<FeInstance> {
let mut frontend_instance = FeInstance::new_standalone(datanode_instance.clone());
frontend_instance.set_script_handler(datanode_instance);
let mut frontend_instance = FeInstance::try_new_standalone(datanode_instance.clone())
.await
.context(StartFrontendSnafu)?;
frontend_instance.set_plugins(plugins.clone());
Ok(frontend_instance)
}

View File

@@ -53,6 +53,10 @@ impl ReadableSize {
pub const fn as_mb(self) -> u64 {
self.0 / MIB
}
pub const fn as_bytes(self) -> u64 {
self.0
}
}
impl Div<u64> for ReadableSize {

View File

@@ -36,6 +36,8 @@ macro_rules! ok {
}
pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
let mut result = TokenStream::new();
// extract arg map
let arg_pairs = parse_macro_input!(args as AttributeArgs);
let arg_span = arg_pairs[0].span();
@@ -59,12 +61,17 @@ pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenSt
let arg_types = ok!(extract_input_types(inputs));
// build the struct and its impl block
let struct_code = build_struct(
attrs,
vis,
ok!(get_ident(&arg_map, "name", arg_span)),
ok!(get_ident(&arg_map, "display_name", arg_span)),
);
// only do this when `display_name` is specified
if let Ok(display_name) = get_ident(&arg_map, "display_name", arg_span) {
let struct_code = build_struct(
attrs,
vis,
ok!(get_ident(&arg_map, "name", arg_span)),
display_name,
);
result.extend(struct_code);
}
let calc_fn_code = build_calc_fn(
ok!(get_ident(&arg_map, "name", arg_span)),
arg_types,
@@ -77,8 +84,6 @@ pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenSt
}
.into();
let mut result = TokenStream::new();
result.extend(struct_code);
result.extend(calc_fn_code);
result.extend(input_fn_code);
result

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use std::any::Any;
use std::string::FromUtf8Error;
use std::sync::Arc;
use common_error::prelude::*;
@@ -47,10 +48,11 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display("Failed to put {}, source: {}", key, source))]
#[snafu(display("Failed to put state, key: '{key}', source: {source}"))]
PutState {
key: String,
source: object_store::Error,
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Failed to delete {}, source: {}", key, source))]
@@ -59,10 +61,18 @@ pub enum Error {
source: object_store::Error,
},
#[snafu(display("Failed to list {}, source: {}", path, source))]
#[snafu(display("Failed to delete keys: '{keys}', source: {source}"))]
DeleteStates {
keys: String,
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Failed to list state, path: '{path}', source: {source}"))]
ListState {
path: String,
source: object_store::Error,
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Failed to read {}, source: {}", key, source))]
@@ -107,6 +117,9 @@ pub enum Error {
source: Arc<Error>,
procedure_id: ProcedureId,
},
#[snafu(display("Corrupted data, error: {source}"))]
CorruptedData { source: FromUtf8Error },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -114,11 +127,13 @@ pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::External { source } => source.status_code(),
Error::External { source }
| Error::PutState { source, .. }
| Error::DeleteStates { source, .. }
| Error::ListState { source, .. } => source.status_code(),
Error::ToJson { .. }
| Error::PutState { .. }
| Error::DeleteState { .. }
| Error::ListState { .. }
| Error::ReadState { .. }
| Error::FromJson { .. }
| Error::RetryTimesExceeded { .. }
@@ -127,7 +142,7 @@ impl ErrorExt for Error {
Error::LoaderConflict { .. } | Error::DuplicateProcedure { .. } => {
StatusCode::InvalidArguments
}
Error::ProcedurePanic { .. } => StatusCode::Unexpected,
Error::ProcedurePanic { .. } | Error::CorruptedData { .. } => StatusCode::Unexpected,
Error::ProcedureExec { source, .. } => source.status_code(),
}
}

View File

@@ -17,7 +17,7 @@
pub mod error;
pub mod local;
mod procedure;
mod store;
pub mod store;
pub mod watcher;
pub use crate::error::{Error, Result};

View File

@@ -22,7 +22,6 @@ use std::time::Duration;
use async_trait::async_trait;
use backon::ExponentialBuilder;
use common_telemetry::logging;
use object_store::ObjectStore;
use snafu::ensure;
use tokio::sync::watch::{self, Receiver, Sender};
use tokio::sync::Notify;
@@ -31,7 +30,7 @@ use crate::error::{DuplicateProcedureSnafu, LoaderConflictSnafu, Result};
use crate::local::lock::LockMap;
use crate::local::runner::Runner;
use crate::procedure::BoxedProcedureLoader;
use crate::store::{ObjectStateStore, ProcedureMessage, ProcedureStore, StateStoreRef};
use crate::store::{ProcedureMessage, ProcedureStore, StateStoreRef};
use crate::{
BoxedProcedure, ContextProvider, LockKey, ProcedureId, ProcedureManager, ProcedureState,
ProcedureWithId, Watcher,
@@ -291,12 +290,19 @@ impl ManagerContext {
/// Config for [LocalManager].
#[derive(Debug)]
pub struct ManagerConfig {
/// Object store
pub object_store: ObjectStore,
pub max_retry_times: usize,
pub retry_delay: Duration,
}
impl Default for ManagerConfig {
fn default() -> Self {
Self {
max_retry_times: 3,
retry_delay: Duration::from_millis(500),
}
}
}
/// A [ProcedureManager] that maintains procedure states locally.
pub struct LocalManager {
manager_ctx: Arc<ManagerContext>,
@@ -307,10 +313,10 @@ pub struct LocalManager {
impl LocalManager {
/// Create a new [LocalManager] with specific `config`.
pub fn new(config: ManagerConfig) -> LocalManager {
pub fn new(config: ManagerConfig, state_store: StateStoreRef) -> LocalManager {
LocalManager {
manager_ctx: Arc::new(ManagerContext::new()),
state_store: Arc::new(ObjectStateStore::new(config.object_store)),
state_store,
max_retry_times: config.max_retry_times,
retry_delay: config.retry_delay,
}
@@ -423,6 +429,7 @@ impl ProcedureManager for LocalManager {
mod test_util {
use common_test_util::temp_dir::TempDir;
use object_store::services::Fs as Builder;
use object_store::ObjectStore;
use super::*;
@@ -446,6 +453,7 @@ mod tests {
use super::*;
use crate::error::Error;
use crate::store::ObjectStateStore;
use crate::{Context, Procedure, Status};
#[test]
@@ -554,11 +562,11 @@ mod tests {
fn test_register_loader() {
let dir = create_temp_dir("register");
let config = ManagerConfig {
object_store: test_util::new_object_store(&dir),
max_retry_times: 3,
retry_delay: Duration::from_millis(500),
};
let manager = LocalManager::new(config);
let state_store = Arc::new(ObjectStateStore::new(test_util::new_object_store(&dir)));
let manager = LocalManager::new(config, state_store);
manager
.register_loader("ProcedureToLoad", ProcedureToLoad::loader())
@@ -575,11 +583,11 @@ mod tests {
let dir = create_temp_dir("recover");
let object_store = test_util::new_object_store(&dir);
let config = ManagerConfig {
object_store: object_store.clone(),
max_retry_times: 3,
retry_delay: Duration::from_millis(500),
};
let manager = LocalManager::new(config);
let state_store = Arc::new(ObjectStateStore::new(object_store.clone()));
let manager = LocalManager::new(config, state_store);
manager
.register_loader("ProcedureToLoad", ProcedureToLoad::loader())
@@ -621,11 +629,11 @@ mod tests {
async fn test_submit_procedure() {
let dir = create_temp_dir("submit");
let config = ManagerConfig {
object_store: test_util::new_object_store(&dir),
max_retry_times: 3,
retry_delay: Duration::from_millis(500),
};
let manager = LocalManager::new(config);
let state_store = Arc::new(ObjectStateStore::new(test_util::new_object_store(&dir)));
let manager = LocalManager::new(config, state_store);
let procedure_id = ProcedureId::random();
assert!(manager
@@ -669,11 +677,11 @@ mod tests {
async fn test_state_changed_on_err() {
let dir = create_temp_dir("on_err");
let config = ManagerConfig {
object_store: test_util::new_object_store(&dir),
max_retry_times: 3,
retry_delay: Duration::from_millis(500),
};
let manager = LocalManager::new(config);
let state_store = Arc::new(ObjectStateStore::new(test_util::new_object_store(&dir)));
let manager = LocalManager::new(config, state_store);
#[derive(Debug)]
struct MockProcedure {

View File

@@ -26,7 +26,7 @@ use crate::error::{Result, ToJsonSnafu};
pub(crate) use crate::store::state_store::{ObjectStateStore, StateStoreRef};
use crate::{BoxedProcedure, ProcedureId};
mod state_store;
pub mod state_store;
/// Serialized data of a procedure.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]

View File

@@ -17,21 +17,23 @@ use std::sync::Arc;
use async_stream::try_stream;
use async_trait::async_trait;
use common_error::ext::PlainError;
use common_error::prelude::{BoxedError, StatusCode};
use futures::{Stream, StreamExt};
use object_store::{EntryMode, Metakey, ObjectStore};
use snafu::ResultExt;
use crate::error::{DeleteStateSnafu, Error, ListStateSnafu, PutStateSnafu, Result};
use crate::error::{DeleteStateSnafu, ListStateSnafu, PutStateSnafu, Result};
/// Key value from state store.
type KeyValue = (String, Vec<u8>);
pub type KeyValue = (String, Vec<u8>);
/// Stream that yields [KeyValue].
type KeyValueStream = Pin<Box<dyn Stream<Item = Result<KeyValue>> + Send>>;
pub type KeyValueStream = Pin<Box<dyn Stream<Item = Result<KeyValue>> + Send>>;
/// Storage layer for persisting procedure's state.
#[async_trait]
pub(crate) trait StateStore: Send + Sync {
pub trait StateStore: Send + Sync {
/// Puts `key` and `value` into the store.
async fn put(&self, key: &str, value: Vec<u8>) -> Result<()>;
@@ -51,13 +53,13 @@ pub(crate) type StateStoreRef = Arc<dyn StateStore>;
/// [StateStore] based on [ObjectStore].
#[derive(Debug)]
pub(crate) struct ObjectStateStore {
pub struct ObjectStateStore {
store: ObjectStore,
}
impl ObjectStateStore {
/// Returns a new [ObjectStateStore] with specific `store`.
pub(crate) fn new(store: ObjectStore) -> ObjectStateStore {
pub fn new(store: ObjectStore) -> ObjectStateStore {
ObjectStateStore { store }
}
}
@@ -68,31 +70,65 @@ impl StateStore for ObjectStateStore {
self.store
.write(key, value)
.await
.map_err(|e| {
BoxedError::new(PlainError::new(
e.to_string(),
StatusCode::StorageUnavailable,
))
})
.context(PutStateSnafu { key })
}
async fn walk_top_down(&self, path: &str) -> Result<KeyValueStream> {
let path_string = path.to_string();
let mut lister = self.store.scan(path).await.map_err(|e| Error::ListState {
path: path_string.clone(),
source: e,
})?;
let mut lister = self
.store
.scan(path)
.await
.map_err(|e| {
BoxedError::new(PlainError::new(
e.to_string(),
StatusCode::StorageUnavailable,
))
})
.with_context(|_| ListStateSnafu {
path: path_string.clone(),
})?;
let store = self.store.clone();
let stream = try_stream!({
while let Some(res) = lister.next().await {
let entry = res.context(ListStateSnafu { path: &path_string })?;
let entry = res
.map_err(|e| {
BoxedError::new(PlainError::new(
e.to_string(),
StatusCode::StorageUnavailable,
))
})
.context(ListStateSnafu { path: &path_string })?;
let key = entry.path();
let metadata = store
.metadata(&entry, Metakey::Mode)
.await
.map_err(|e| {
BoxedError::new(PlainError::new(
e.to_string(),
StatusCode::StorageUnavailable,
))
})
.context(ListStateSnafu { path: key })?;
if let EntryMode::FILE = metadata.mode() {
let value = store
.read(key)
.await
.map_err(|e| {
BoxedError::new(PlainError::new(
e.to_string(),
StatusCode::StorageUnavailable,
))
})
.context(ListStateSnafu { path: key })?;
yield (key.to_string(), value);
}

View File

@@ -5,6 +5,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
async-trait.workspace = true
common-error = { path = "../error" }
common-telemetry = { path = "../telemetry" }
metrics = "0.20"
@@ -12,6 +13,7 @@ once_cell = "1.12"
paste.workspace = true
snafu.workspace = true
tokio.workspace = true
tokio-util.workspace = true
[dev-dependencies]
tokio-test = "0.4"

View File

@@ -15,6 +15,7 @@
use std::any::Any;
use common_error::prelude::*;
use tokio::task::JoinError;
pub type Result<T> = std::result::Result<T, Error>;
@@ -26,6 +27,19 @@ pub enum Error {
source: std::io::Error,
backtrace: Backtrace,
},
#[snafu(display("Repeated task {} not started yet", name))]
IllegalState { name: String, backtrace: Backtrace },
#[snafu(display(
"Failed to wait for repeated task {} to stop, source: {}",
name,
source
))]
WaitGcTaskStop {
name: String,
source: JoinError,
backtrace: Backtrace,
},
}
impl ErrorExt for Error {

View File

@@ -14,7 +14,8 @@
pub mod error;
mod global;
pub mod metric;
mod metrics;
mod repeated_task;
pub mod runtime;
pub use global::{
@@ -23,4 +24,5 @@ pub use global::{
spawn_read, spawn_write, write_runtime,
};
pub use crate::repeated_task::{RepeatedTask, TaskFunction, TaskFunctionRef};
pub use crate::runtime::{Builder, JoinError, JoinHandle, Runtime};

View File

@@ -0,0 +1,174 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use common_error::prelude::ErrorExt;
use common_telemetry::logging;
use snafu::{ensure, OptionExt, ResultExt};
use tokio::sync::Mutex;
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use crate::error::{IllegalStateSnafu, Result, WaitGcTaskStopSnafu};
use crate::Runtime;
#[async_trait::async_trait]
pub trait TaskFunction<E: ErrorExt> {
async fn call(&self) -> std::result::Result<(), E>;
fn name(&self) -> &str;
}
pub type TaskFunctionRef<E> = Arc<dyn TaskFunction<E> + Send + Sync>;
pub struct RepeatedTask<E> {
cancel_token: Mutex<Option<CancellationToken>>,
task_handle: Mutex<Option<JoinHandle<()>>>,
started: AtomicBool,
interval: Duration,
task_fn: TaskFunctionRef<E>,
}
impl<E: ErrorExt> std::fmt::Display for RepeatedTask<E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "RepeatedTask({})", self.task_fn.name())
}
}
impl<E: ErrorExt> std::fmt::Debug for RepeatedTask<E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("RepeatedTask")
.field(&self.task_fn.name())
.finish()
}
}
impl<E: ErrorExt + 'static> RepeatedTask<E> {
pub fn new(interval: Duration, task_fn: TaskFunctionRef<E>) -> Self {
Self {
cancel_token: Mutex::new(None),
task_handle: Mutex::new(None),
started: AtomicBool::new(false),
interval,
task_fn,
}
}
pub fn started(&self) -> bool {
self.started.load(Ordering::Relaxed)
}
pub async fn start(&self, runtime: Runtime) -> Result<()> {
let token = CancellationToken::new();
let interval = self.interval;
let child = token.child_token();
let task_fn = self.task_fn.clone();
// TODO(hl): Maybe spawn to a blocking runtime.
let handle = runtime.spawn(async move {
loop {
tokio::select! {
_ = tokio::time::sleep(interval) => {}
_ = child.cancelled() => {
return;
}
}
if let Err(e) = task_fn.call().await {
logging::error!(e; "Failed to run repeated task: {}", task_fn.name());
}
}
});
*self.cancel_token.lock().await = Some(token);
*self.task_handle.lock().await = Some(handle);
self.started.store(true, Ordering::Relaxed);
logging::debug!(
"Repeated task {} started with interval: {:?}",
self.task_fn.name(),
self.interval
);
Ok(())
}
pub async fn stop(&self) -> Result<()> {
let name = self.task_fn.name();
ensure!(
self.started
.compare_exchange(true, false, Ordering::Relaxed, Ordering::Relaxed)
.is_ok(),
IllegalStateSnafu { name }
);
let token = self
.cancel_token
.lock()
.await
.take()
.context(IllegalStateSnafu { name })?;
let handle = self
.task_handle
.lock()
.await
.take()
.context(IllegalStateSnafu { name })?;
token.cancel();
handle.await.context(WaitGcTaskStopSnafu { name })?;
logging::debug!("Repeated task {} stopped", name);
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::sync::atomic::AtomicI32;
use super::*;
struct TickTask {
n: AtomicI32,
}
#[async_trait::async_trait]
impl TaskFunction<crate::error::Error> for TickTask {
fn name(&self) -> &str {
"test"
}
async fn call(&self) -> Result<()> {
self.n.fetch_add(1, Ordering::Relaxed);
Ok(())
}
}
#[tokio::test]
async fn test_repeated_task() {
common_telemetry::init_default_ut_logging();
let task_fn = Arc::new(TickTask {
n: AtomicI32::new(0),
});
let task = RepeatedTask::new(Duration::from_millis(100), task_fn.clone());
task.start(crate::bg_runtime()).await.unwrap();
tokio::time::sleep(Duration::from_millis(550)).await;
task.stop().await.unwrap();
assert_eq!(task_fn.n.load(Ordering::Relaxed), 5);
}
}

View File

@@ -24,7 +24,7 @@ use tokio::sync::oneshot;
pub use tokio::task::{JoinError, JoinHandle};
use crate::error::*;
use crate::metric::*;
use crate::metrics::*;
/// A runtime to run future tasks
#[derive(Clone, Debug)]

View File

@@ -581,7 +581,7 @@ pub fn expression_from_df_expr(
| Expr::ScalarSubquery(..)
| Expr::Placeholder { .. }
| Expr::QualifiedWildcard { .. } => todo!(),
Expr::GroupingSet(_) => UnsupportedExprSnafu {
Expr::GroupingSet(_) | Expr::OuterReferenceColumn(_, _) => UnsupportedExprSnafu {
name: expr.to_string(),
}
.fail()?,

View File

@@ -22,9 +22,10 @@ use catalog::CatalogManagerRef;
use common_catalog::format_full_table_name;
use common_telemetry::debug;
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
use datafusion::common::{DFField, DFSchema, OwnedTableReference};
use datafusion::common::{DFField, DFSchema};
use datafusion::datasource::DefaultTableSource;
use datafusion::physical_plan::project_schema;
use datafusion::sql::TableReference;
use datafusion_expr::{Filter, LogicalPlan, TableScan};
use prost::Message;
use session::context::QueryContext;
@@ -240,13 +241,13 @@ impl DFLogicalSubstraitConvertor {
.projection
.map(|mask_expr| self.convert_mask_expression(mask_expr));
let table_ref = OwnedTableReference::Full {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table: table_name.clone(),
};
let table_ref = TableReference::full(
catalog_name.clone(),
schema_name.clone(),
table_name.clone(),
);
let adapter = table_provider
.resolve_table(table_ref)
.resolve_table(table_ref.clone())
.await
.with_context(|_| ResolveTableSnafu {
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
@@ -272,14 +273,13 @@ impl DFLogicalSubstraitConvertor {
};
// Calculate the projected schema
let qualified = &format_full_table_name(&catalog_name, &schema_name, &table_name);
let projected_schema = Arc::new(
project_schema(&stored_schema, projection.as_ref())
.and_then(|x| {
DFSchema::new_with_metadata(
x.fields()
.iter()
.map(|f| DFField::from_qualified(qualified, f.clone()))
.map(|f| DFField::from_qualified(table_ref.clone(), f.clone()))
.collect(),
x.metadata().clone(),
)
@@ -291,7 +291,7 @@ impl DFLogicalSubstraitConvertor {
// TODO(ruihang): Support limit(fetch)
Ok(LogicalPlan::TableScan(TableScan {
table_name: qualified.to_string(),
table_name: table_ref,
source: adapter,
projection,
projected_schema,
@@ -620,10 +620,13 @@ mod test {
let projected_schema =
Arc::new(DFSchema::new_with_metadata(projected_fields, Default::default()).unwrap());
let table_name = TableReference::full(
DEFAULT_CATALOG_NAME,
DEFAULT_SCHEMA_NAME,
DEFAULT_TABLE_NAME,
);
let table_scan_plan = LogicalPlan::TableScan(TableScan {
table_name: format!(
"{DEFAULT_CATALOG_NAME}.{DEFAULT_SCHEMA_NAME}.{DEFAULT_TABLE_NAME}",
),
table_name,
source: adapter,
projection: Some(projection),
projected_schema,

View File

@@ -12,7 +12,7 @@ deadlock_detection = ["parking_lot"]
backtrace = "0.3"
common-error = { path = "../error" }
console-subscriber = { version = "0.1", optional = true }
metrics = "0.20"
metrics = "0.20.1"
metrics-exporter-prometheus = { version = "0.11", default-features = false }
once_cell = "1.10"
opentelemetry = { version = "0.17", default-features = false, features = [

View File

@@ -4,10 +4,6 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[features]
default = ["python"]
python = ["dep:script"]
[dependencies]
async-compat = "0.2"
async-stream.workspace = true
@@ -38,6 +34,7 @@ futures = "0.3"
futures-util.workspace = true
hyper = { version = "0.14", features = ["full"] }
humantime-serde = "1.1"
log = "0.4"
log-store = { path = "../log-store" }
meta-client = { path = "../meta-client" }
meta-srv = { path = "../meta-srv", features = ["mock"] }
@@ -48,7 +45,6 @@ pin-project = "1.0"
prost.workspace = true
query = { path = "../query" }
regex = "1.6"
script = { path = "../script", features = ["python"], optional = true }
serde = "1.0"
serde_json = "1.0"
servers = { path = "../servers" }
@@ -66,7 +62,6 @@ tonic.workspace = true
tower = { version = "0.4", features = ["full"] }
tower-http = { version = "0.3", features = ["full"] }
url = "2.3.1"
uuid.workspace = true
[dev-dependencies]
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }

View File

@@ -19,6 +19,7 @@ use common_base::readable_size::ReadableSize;
use common_telemetry::info;
use meta_client::MetaClientOptions;
use serde::{Deserialize, Serialize};
use servers::http::HttpOptions;
use servers::Mode;
use storage::config::EngineConfig as StorageEngineConfig;
use storage::scheduler::SchedulerConfig;
@@ -29,6 +30,7 @@ use crate::server::Services;
pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize(1024);
/// Object storage config
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum ObjectStoreConfig {
@@ -37,6 +39,16 @@ pub enum ObjectStoreConfig {
Oss(OssConfig),
}
/// Storage engine config
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(default)]
pub struct StorageConfig {
#[serde(flatten)]
pub store: ObjectStoreConfig,
pub compaction: CompactionConfig,
pub manifest: RegionManifestConfig,
}
#[derive(Debug, Clone, Serialize, Default, Deserialize)]
#[serde(default)]
pub struct FileConfig {
@@ -107,6 +119,27 @@ impl Default for WalConfig {
}
}
/// Options for region manifest
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
#[serde(default)]
pub struct RegionManifestConfig {
/// Region manifest checkpoint actions margin.
/// Manifest service create a checkpoint every [checkpoint_margin] actions.
pub checkpoint_margin: Option<u16>,
/// Region manifest logs and checkpoints gc task execution duration.
#[serde(with = "humantime_serde")]
pub gc_duration: Option<Duration>,
}
impl Default for RegionManifestConfig {
fn default() -> Self {
Self {
checkpoint_margin: Some(10u16),
gc_duration: Some(Duration::from_secs(30)),
}
}
}
/// Options for table compaction
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
#[serde(default)]
@@ -117,6 +150,8 @@ pub struct CompactionConfig {
pub max_files_in_level0: usize,
/// Max task number for SST purge task after compaction.
pub max_purge_tasks: usize,
/// Buffer threshold while writing SST files
pub sst_write_buffer_size: ReadableSize,
}
impl Default for CompactionConfig {
@@ -125,6 +160,7 @@ impl Default for CompactionConfig {
max_inflight_tasks: 4,
max_files_in_level0: 8,
max_purge_tasks: 32,
sst_write_buffer_size: ReadableSize::mb(8),
}
}
}
@@ -132,7 +168,7 @@ impl Default for CompactionConfig {
impl From<&DatanodeOptions> for SchedulerConfig {
fn from(value: &DatanodeOptions) -> Self {
Self {
max_inflight_tasks: value.compaction.max_inflight_tasks,
max_inflight_tasks: value.storage.compaction.max_inflight_tasks,
}
}
}
@@ -140,8 +176,11 @@ impl From<&DatanodeOptions> for SchedulerConfig {
impl From<&DatanodeOptions> for StorageEngineConfig {
fn from(value: &DatanodeOptions) -> Self {
Self {
max_files_in_l0: value.compaction.max_files_in_level0,
max_purge_tasks: value.compaction.max_purge_tasks,
manifest_checkpoint_margin: value.storage.manifest.checkpoint_margin,
manifest_gc_duration: value.storage.manifest.gc_duration,
max_files_in_l0: value.storage.compaction.max_files_in_level0,
max_purge_tasks: value.storage.compaction.max_purge_tasks,
sst_write_buffer_size: value.storage.compaction.sst_write_buffer_size,
}
}
}
@@ -190,10 +229,10 @@ pub struct DatanodeOptions {
pub rpc_runtime_size: usize,
pub mysql_addr: String,
pub mysql_runtime_size: usize,
pub http_opts: HttpOptions,
pub meta_client_options: Option<MetaClientOptions>,
pub wal: WalConfig,
pub storage: ObjectStoreConfig,
pub compaction: CompactionConfig,
pub storage: StorageConfig,
pub procedure: Option<ProcedureConfig>,
}
@@ -208,10 +247,10 @@ impl Default for DatanodeOptions {
rpc_runtime_size: 8,
mysql_addr: "127.0.0.1:4406".to_string(),
mysql_runtime_size: 2,
http_opts: HttpOptions::default(),
meta_client_options: None,
wal: WalConfig::default(),
storage: ObjectStoreConfig::default(),
compaction: CompactionConfig::default(),
storage: StorageConfig::default(),
procedure: None,
}
}
@@ -220,14 +259,17 @@ impl Default for DatanodeOptions {
/// Datanode service.
pub struct Datanode {
opts: DatanodeOptions,
services: Services,
services: Option<Services>,
instance: InstanceRef,
}
impl Datanode {
pub async fn new(opts: DatanodeOptions) -> Result<Datanode> {
let instance = Arc::new(Instance::new(&opts).await?);
let services = Services::try_new(instance.clone(), &opts).await?;
let services = match opts.mode {
Mode::Distributed => Some(Services::try_new(instance.clone(), &opts).await?),
Mode::Standalone => None,
};
Ok(Self {
opts,
services,
@@ -248,7 +290,11 @@ impl Datanode {
/// Start services of datanode. This method call will block until services are shutdown.
pub async fn start_services(&mut self) -> Result<()> {
self.services.start(&self.opts).await
if let Some(service) = self.services.as_mut() {
service.start(&self.opts).await
} else {
Ok(())
}
}
pub fn get_instance(&self) -> InstanceRef {
@@ -260,7 +306,11 @@ impl Datanode {
}
async fn shutdown_services(&self) -> Result<()> {
self.services.shutdown().await
if let Some(service) = self.services.as_ref() {
service.shutdown().await
} else {
Ok(())
}
}
pub async fn shutdown(&self) -> Result<()> {

View File

@@ -181,6 +181,9 @@ pub enum Error {
#[snafu(display("Failed to create directory {}, source: {}", dir, source))]
CreateDir { dir: String, source: std::io::Error },
#[snafu(display("Failed to remove directory {}, source: {}", dir, source))]
RemoveDir { dir: String, source: std::io::Error },
#[snafu(display("Failed to open log store, source: {}", source))]
OpenLogStore {
#[snafu(backtrace)]
@@ -311,12 +314,6 @@ pub enum Error {
source: sql::error::Error,
},
#[snafu(display("Failed to start script manager, source: {}", source))]
StartScriptManager {
#[snafu(backtrace)]
source: script::error::Error,
},
#[snafu(display(
"Failed to parse string to timestamp, string: {}, source: {}",
raw,
@@ -436,12 +433,6 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display("Failed to write parquet file, source: {}", source))]
WriteParquet {
source: parquet::errors::ParquetError,
backtrace: Backtrace,
},
#[snafu(display("Failed to poll stream, source: {}", source))]
PollStream {
source: datafusion_common::DataFusionError,
@@ -511,6 +502,12 @@ pub enum Error {
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Failed to copy table to parquet file, source: {}", source))]
WriteParquet {
#[snafu(backtrace)]
source: storage::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -576,6 +573,7 @@ impl ErrorExt for Error {
| TcpBind { .. }
| StartGrpc { .. }
| CreateDir { .. }
| RemoveDir { .. }
| InsertSystemCatalog { .. }
| RenameTable { .. }
| Catalog { .. }
@@ -597,7 +595,6 @@ impl ErrorExt for Error {
| WriteObject { .. }
| ListObjects { .. } => StatusCode::StorageUnavailable,
OpenLogStore { source } => source.status_code(),
StartScriptManager { source } => source.status_code(),
OpenStorageEngine { source } => source.status_code(),
RuntimeResource { .. } => StatusCode::RuntimeResourcesExhausted,
MetaClientInit { source, .. } => source.status_code(),

View File

@@ -23,6 +23,7 @@ use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER
use common_error::prelude::BoxedError;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_procedure::local::{LocalManager, ManagerConfig};
use common_procedure::store::state_store::ObjectStateStore;
use common_procedure::ProcedureManagerRef;
use common_telemetry::logging::info;
use log_store::raft_engine::log_store::RaftEngineLogStore;
@@ -57,11 +58,9 @@ use crate::error::{
NewCatalogSnafu, OpenLogStoreSnafu, RecoverProcedureSnafu, Result, ShutdownInstanceSnafu,
};
use crate::heartbeat::HeartbeatTask;
use crate::script::ScriptExecutor;
use crate::sql::{SqlHandler, SqlRequest};
mod grpc;
mod script;
pub mod sql;
pub(crate) type DefaultEngine = MitoEngine<EngineImpl<RaftEngineLogStore>>;
@@ -71,9 +70,9 @@ pub struct Instance {
pub(crate) query_engine: QueryEngineRef,
pub(crate) sql_handler: SqlHandler,
pub(crate) catalog_manager: CatalogManagerRef,
pub(crate) script_executor: ScriptExecutor,
pub(crate) table_id_provider: Option<TableIdProviderRef>,
pub(crate) heartbeat_task: Option<HeartbeatTask>,
procedure_manager: Option<ProcedureManagerRef>,
}
pub type InstanceRef = Arc<Instance>;
@@ -104,7 +103,7 @@ impl Instance {
meta_client: Option<Arc<MetaClient>>,
compaction_scheduler: CompactionSchedulerRef<RaftEngineLogStore>,
) -> Result<Self> {
let object_store = new_object_store(&opts.storage).await?;
let object_store = new_object_store(&opts.storage.store).await?;
let log_store = Arc::new(create_log_store(&opts.wal).await?);
let table_engine = Arc::new(DefaultEngine::new(
@@ -168,8 +167,6 @@ impl Instance {
let factory = QueryEngineFactory::new(catalog_manager.clone());
let query_engine = factory.query_engine();
let script_executor =
ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?;
let heartbeat_task = match opts.mode {
Mode::Standalone => None,
@@ -183,6 +180,7 @@ impl Instance {
};
let procedure_manager = create_procedure_manager(&opts.procedure).await?;
// Register all procedures.
if let Some(procedure_manager) = &procedure_manager {
table_engine.register_procedure_loaders(&**procedure_manager);
table_procedure::register_procedure_loaders(
@@ -191,12 +189,6 @@ impl Instance {
table_engine.clone(),
&**procedure_manager,
);
// Recover procedures.
procedure_manager
.recover()
.await
.context(RecoverProcedureSnafu)?;
}
Ok(Self {
@@ -205,12 +197,12 @@ impl Instance {
table_engine.clone(),
catalog_manager.clone(),
table_engine,
procedure_manager,
procedure_manager.clone(),
),
catalog_manager,
script_executor,
heartbeat_task,
table_id_provider,
procedure_manager,
})
}
@@ -222,6 +214,15 @@ impl Instance {
if let Some(task) = &self.heartbeat_task {
task.start().await?;
}
// Recover procedures after the catalog manager is started, so we can
// ensure we can access all tables from the catalog manager.
if let Some(procedure_manager) = &self.procedure_manager {
procedure_manager
.recover()
.await
.context(RecoverProcedureSnafu)?;
}
Ok(())
}
@@ -308,11 +309,22 @@ pub(crate) async fn new_object_store(store_config: &ObjectStoreConfig) -> Result
ObjectStoreConfig::Oss { .. } => new_oss_object_store(store_config).await,
};
// Don't enable retry layer when using local file backend.
let object_store = if !matches!(store_config, ObjectStoreConfig::File(..)) {
object_store.map(|object_store| object_store.layer(RetryLayer::new().with_jitter()))
} else {
object_store
};
object_store.map(|object_store| {
object_store
.layer(RetryLayer::new().with_jitter())
.layer(MetricsLayer)
.layer(LoggingLayer::default())
.layer(
LoggingLayer::default()
// Print the expected error only in DEBUG level.
// See https://docs.rs/opendal/latest/opendal/layers/struct.LoggingLayer.html#method.with_error_level
.with_error_level(Some(log::Level::Debug)),
)
.layer(TracingLayer)
})
}
@@ -430,15 +442,27 @@ pub(crate) async fn new_fs_object_store(store_config: &ObjectStoreConfig) -> Res
info!("The file storage directory is: {}", &data_dir);
let atomic_write_dir = format!("{data_dir}/.tmp/");
if path::Path::new(&atomic_write_dir).exists() {
info!(
"Begin to clean temp storage directory: {}",
&atomic_write_dir
);
fs::remove_dir_all(&atomic_write_dir).context(error::RemoveDirSnafu {
dir: &atomic_write_dir,
})?;
info!("Cleaned temp storage directory: {}", &atomic_write_dir);
}
let mut builder = FsBuilder::default();
builder.root(&data_dir).atomic_write_dir(&atomic_write_dir);
Ok(ObjectStore::new(builder)
let object_store = ObjectStore::new(builder)
.context(error::InitBackendSnafu {
config: store_config.clone(),
})?
.finish())
.finish();
Ok(object_store)
}
/// Create metasrv client instance and spawn heartbeat loop.
@@ -504,11 +528,15 @@ pub(crate) async fn create_procedure_manager(
);
let object_store = new_object_store(&procedure_config.store).await?;
let state_store = Arc::new(ObjectStateStore::new(object_store));
let manager_config = ManagerConfig {
object_store,
max_retry_times: procedure_config.max_retry_times,
retry_delay: procedure_config.retry_delay,
};
Ok(Some(Arc::new(LocalManager::new(manager_config))))
Ok(Some(Arc::new(LocalManager::new(
manager_config,
state_store,
))))
}

View File

@@ -37,7 +37,7 @@ use crate::error::{
TableIdProviderNotFoundSnafu,
};
use crate::instance::Instance;
use crate::metric;
use crate::metrics;
use crate::sql::{SqlHandler, SqlRequest};
impl Instance {
@@ -90,6 +90,9 @@ impl Instance {
.execute(SqlRequest::CreateTable(request), query_ctx)
.await
}
QueryStatement::Sql(Statement::CreateExternalTable(_create_external_table)) => {
unimplemented!()
}
QueryStatement::Sql(Statement::Alter(alter_table)) => {
let name = alter_table.table_name().clone();
let (catalog, schema, table) = table_idents_to_full_name(&name, query_ctx.clone())?;
@@ -121,11 +124,6 @@ impl Instance {
.execute(SqlRequest::ShowTables(show_tables), query_ctx)
.await
}
QueryStatement::Sql(Statement::DescribeTable(describe_table)) => {
self.sql_handler
.execute(SqlRequest::DescribeTable(describe_table), query_ctx)
.await
}
QueryStatement::Sql(Statement::ShowCreateTable(_show_create_table)) => {
unimplemented!("SHOW CREATE TABLE is unimplemented yet");
}
@@ -182,6 +180,7 @@ impl Instance {
| QueryStatement::Sql(Statement::Use(_))
| QueryStatement::Sql(Statement::Tql(_))
| QueryStatement::Sql(Statement::Delete(_))
| QueryStatement::Sql(Statement::DescribeTable(_))
| QueryStatement::Promql(_) => unreachable!(),
}
}
@@ -191,7 +190,7 @@ impl Instance {
promql: &PromQuery,
query_ctx: QueryContextRef,
) -> Result<Output> {
let _timer = timer!(metric::METRIC_HANDLE_PROMQL_ELAPSED);
let _timer = timer!(metrics::METRIC_HANDLE_PROMQL_ELAPSED);
let stmt = QueryLanguageParser::parse_promql(promql).context(ExecuteSqlSnafu)?;
@@ -295,7 +294,7 @@ impl StatementHandler for Instance {
#[async_trait]
impl PromHandler for Instance {
async fn do_query(&self, query: &PromQuery) -> server_error::Result<Output> {
let _timer = timer!(metric::METRIC_HANDLE_PROMQL_ELAPSED);
let _timer = timer!(metrics::METRIC_HANDLE_PROMQL_ELAPSED);
self.execute_promql(query, QueryContext::arc())
.await

View File

@@ -19,9 +19,8 @@ pub mod datanode;
pub mod error;
mod heartbeat;
pub mod instance;
pub mod metric;
pub mod metrics;
mod mock;
mod script;
pub mod server;
pub mod sql;
#[cfg(test)]

View File

@@ -15,6 +15,4 @@
//! datanode metrics
pub const METRIC_HANDLE_SQL_ELAPSED: &str = "datanode.handle_sql_elapsed";
pub const METRIC_HANDLE_SCRIPTS_ELAPSED: &str = "datanode.handle_scripts_elapsed";
pub const METRIC_RUN_SCRIPT_ELAPSED: &str = "datanode.run_script_elapsed";
pub const METRIC_HANDLE_PROMQL_ELAPSED: &str = "datanode.handle_promql_elapsed";

View File

@@ -18,9 +18,12 @@ use std::sync::Arc;
use common_runtime::Builder as RuntimeBuilder;
use servers::grpc::GrpcServer;
use servers::http::{HttpServer, HttpServerBuilder};
use servers::metrics_handler::MetricsHandler;
use servers::query_handler::grpc::ServerGrpcQueryHandlerAdaptor;
use servers::server::Server;
use snafu::ResultExt;
use tokio::select;
use crate::datanode::DatanodeOptions;
use crate::error::{
@@ -33,6 +36,7 @@ pub mod grpc;
/// All rpc services.
pub struct Services {
grpc_server: GrpcServer,
http_server: HttpServer,
}
impl Services {
@@ -51,6 +55,9 @@ impl Services {
None,
grpc_runtime,
),
http_server: HttpServerBuilder::new(opts.http_opts.clone())
.with_metrics_handler(MetricsHandler)
.build(),
})
}
@@ -58,10 +65,15 @@ impl Services {
let grpc_addr: SocketAddr = opts.rpc_addr.parse().context(ParseAddrSnafu {
addr: &opts.rpc_addr,
})?;
self.grpc_server
.start(grpc_addr)
.await
.context(StartServerSnafu)?;
let http_addr = opts.http_opts.addr.parse().context(ParseAddrSnafu {
addr: &opts.http_opts.addr,
})?;
let grpc = self.grpc_server.start(grpc_addr);
let http = self.http_server.start(http_addr);
select!(
v = grpc => v.context(StartServerSnafu)?,
v = http => v.context(StartServerSnafu)?,
);
Ok(())
}
@@ -69,6 +81,11 @@ impl Services {
self.grpc_server
.shutdown()
.await
.context(ShutdownServerSnafu)
.context(ShutdownServerSnafu)?;
self.http_server
.shutdown()
.await
.context(ShutdownServerSnafu)?;
Ok(())
}
}

View File

@@ -17,17 +17,16 @@ use common_error::prelude::BoxedError;
use common_procedure::ProcedureManagerRef;
use common_query::Output;
use common_telemetry::error;
use query::sql::{describe_table, show_databases, show_tables};
use query::sql::{show_databases, show_tables};
use session::context::QueryContextRef;
use snafu::{OptionExt, ResultExt};
use sql::statements::describe::DescribeTable;
use sql::statements::show::{ShowDatabases, ShowTables};
use table::engine::{EngineContext, TableEngineProcedureRef, TableEngineRef, TableReference};
use table::requests::*;
use table::TableRef;
use crate::error::{
self, CloseTableEngineSnafu, ExecuteSqlSnafu, GetTableSnafu, Result, TableNotFoundSnafu,
CloseTableEngineSnafu, ExecuteSqlSnafu, GetTableSnafu, Result, TableNotFoundSnafu,
};
use crate::instance::sql::table_idents_to_full_name;
@@ -48,7 +47,6 @@ pub enum SqlRequest {
FlushTable(FlushTableRequest),
ShowDatabases(ShowDatabases),
ShowTables(ShowTables),
DescribeTable(DescribeTable),
CopyTable(CopyTableRequest),
}
@@ -97,19 +95,6 @@ impl SqlHandler {
show_tables(req, self.catalog_manager.clone(), query_ctx.clone())
.context(ExecuteSqlSnafu)
}
SqlRequest::DescribeTable(req) => {
let (catalog, schema, table) =
table_idents_to_full_name(req.name(), query_ctx.clone())?;
let table = self
.catalog_manager
.table(&catalog, &schema, &table)
.await
.context(error::CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: req.name().to_string(),
})?;
describe_table(table).context(ExecuteSqlSnafu)
}
SqlRequest::FlushTable(req) => self.flush_table(req).await,
};
if let Err(e) = &result {

View File

@@ -12,24 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::pin::Pin;
use common_datasource;
use common_datasource::object_store::{build_backend, parse_url};
use common_query::physical_plan::SessionContext;
use common_query::Output;
use common_recordbatch::adapter::DfRecordBatchStreamAdapter;
use datafusion::parquet::arrow::ArrowWriter;
use datafusion::parquet::basic::{Compression, Encoding};
use datafusion::parquet::file::properties::WriterProperties;
use datafusion::physical_plan::RecordBatchStream;
use futures::TryStreamExt;
use object_store::ObjectStore;
use snafu::ResultExt;
use storage::sst::SstInfo;
use storage::{ParquetWriter, Source};
use table::engine::TableReference;
use table::requests::CopyTableRequest;
use crate::error::{self, Result};
use crate::error::{self, Result, WriteParquetSnafu};
use crate::sql::SqlHandler;
impl SqlHandler {
@@ -51,99 +44,20 @@ impl SqlHandler {
let stream = stream
.execute(0, SessionContext::default().task_ctx())
.context(error::TableScanExecSnafu)?;
let stream = Box::pin(DfRecordBatchStreamAdapter::new(stream));
let (_schema, _host, path) = parse_url(&req.location).context(error::ParseUrlSnafu)?;
let object_store =
build_backend(&req.location, req.connection).context(error::BuildBackendSnafu)?;
let mut parquet_writer = ParquetWriter::new(path.to_string(), stream, object_store);
// TODO(jiachun):
// For now, COPY is implemented synchronously.
// When copying large table, it will be blocked for a long time.
// Maybe we should make "copy" runs in background?
// Like PG: https://www.postgresql.org/docs/current/sql-copy.html
let rows = parquet_writer.flush().await?;
let writer = ParquetWriter::new(&path, Source::Stream(stream), object_store);
Ok(Output::AffectedRows(rows))
}
}
type DfRecordBatchStream = Pin<Box<DfRecordBatchStreamAdapter>>;
struct ParquetWriter {
file_name: String,
stream: DfRecordBatchStream,
object_store: ObjectStore,
max_row_group_size: usize,
max_rows_in_segment: usize,
}
impl ParquetWriter {
pub fn new(file_name: String, stream: DfRecordBatchStream, object_store: ObjectStore) -> Self {
Self {
file_name,
stream,
object_store,
// TODO(jiachun): make these configurable: WITH (max_row_group_size=xxx, max_rows_in_segment=xxx)
max_row_group_size: 4096,
max_rows_in_segment: 5000000, // default 5M rows per segment
}
}
pub async fn flush(&mut self) -> Result<usize> {
let schema = self.stream.as_ref().schema();
let writer_props = WriterProperties::builder()
.set_compression(Compression::ZSTD)
.set_encoding(Encoding::PLAIN)
.set_max_row_group_size(self.max_row_group_size)
.build();
let mut total_rows = 0;
loop {
let mut buf = vec![];
let mut arrow_writer =
ArrowWriter::try_new(&mut buf, schema.clone(), Some(writer_props.clone()))
.context(error::WriteParquetSnafu)?;
let mut rows = 0;
let mut end_loop = true;
// TODO(hl & jiachun): Since OpenDAL's writer is async and ArrowWriter requires a `std::io::Write`,
// here we use a Vec<u8> to buffer all parquet bytes in memory and write to object store
// at a time. Maybe we should find a better way to bridge ArrowWriter and OpenDAL's object.
while let Some(batch) = self
.stream
.try_next()
.await
.context(error::PollStreamSnafu)?
{
arrow_writer
.write(&batch)
.context(error::WriteParquetSnafu)?;
rows += batch.num_rows();
if rows >= self.max_rows_in_segment {
end_loop = false;
break;
}
}
let start_row_num = total_rows + 1;
total_rows += rows;
arrow_writer.close().context(error::WriteParquetSnafu)?;
// if rows == 0, we just end up with an empty file.
//
// file_name like:
// "file_name_1_1000000" (row num: 1 ~ 1000000),
// "file_name_1000001_xxx" (row num: 1000001 ~ xxx)
let file_name = format!("{}_{}_{}", self.file_name, start_row_num, total_rows);
self.object_store
.write(&file_name, buf)
.await
.context(error::WriteObjectSnafu { path: file_name })?;
if end_loop {
return Ok(total_rows);
}
}
let rows_copied = writer
.write_sst(&storage::sst::WriteOptions::default())
.await
.context(WriteParquetSnafu)?
.map(|SstInfo { num_rows, .. }| num_rows)
.unwrap_or(0);
Ok(Output::AffectedRows(rows_copied))
}
}

View File

@@ -313,13 +313,15 @@ mod tests {
use common_base::readable_size::ReadableSize;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::Schema;
use query::parser::QueryLanguageParser;
use session::context::QueryContext;
use sql::dialect::GenericDialect;
use sql::parser::ParserContext;
use sql::statements::statement::Statement;
use super::*;
use crate::error::Error;
use crate::tests::test_util::create_mock_sql_handler;
use crate::tests::test_util::{create_mock_sql_handler, MockInstance};
fn sql_to_statement(sql: &str) -> CreateTable {
let mut res = ParserContext::create_with_dialect(sql, &GenericDialect {}).unwrap();
@@ -522,4 +524,42 @@ mod tests {
schema.column_schema_by_name("memory").unwrap().data_type
);
}
#[tokio::test(flavor = "multi_thread")]
async fn create_table_by_procedure() {
let instance = MockInstance::with_procedure_enabled("create_table_by_procedure").await;
let sql = r#"create table test_table(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
) engine=mito with(regions=1);"#;
let stmt = QueryLanguageParser::parse_sql(sql).unwrap();
let output = instance
.inner()
.execute_stmt(stmt, QueryContext::arc())
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(0)));
// create if not exists
let sql = r#"create table if not exists test_table(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
) engine=mito with(regions=1);"#;
let stmt = QueryLanguageParser::parse_sql(sql).unwrap();
let output = instance
.inner()
.execute_stmt(stmt, QueryContext::arc())
.await
.unwrap();
assert!(matches!(output, Output::AffectedRows(0)));
}
}

View File

@@ -13,6 +13,5 @@
// limitations under the License.
// TODO(LFC): These tests should be moved to frontend crate. They are actually standalone instance tests.
mod instance_test;
mod promql_test;
pub(crate) mod test_util;

View File

@@ -32,7 +32,9 @@ use sql::statements::tql::Tql;
use table::engine::{EngineContext, TableEngineRef};
use table::requests::{CreateTableRequest, TableOptions};
use crate::datanode::{DatanodeOptions, FileConfig, ObjectStoreConfig, ProcedureConfig, WalConfig};
use crate::datanode::{
DatanodeOptions, FileConfig, ObjectStoreConfig, ProcedureConfig, StorageConfig, WalConfig,
};
use crate::error::{CreateTableSnafu, Result};
use crate::instance::Instance;
use crate::sql::SqlHandler;
@@ -130,9 +132,12 @@ fn create_tmp_dir_and_datanode_opts(name: &str) -> (DatanodeOptions, TestGuard)
dir: wal_tmp_dir.path().to_str().unwrap().to_string(),
..Default::default()
},
storage: ObjectStoreConfig::File(FileConfig {
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
}),
storage: StorageConfig {
store: ObjectStoreConfig::File(FileConfig {
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
}),
..Default::default()
},
mode: Mode::Standalone,
..Default::default()
};
@@ -207,16 +212,6 @@ pub(crate) async fn setup_test_instance(test_name: &str) -> MockInstance {
MockInstance::new(test_name).await
}
pub async fn check_output_stream(output: Output, expected: String) {
let recordbatches = match output {
Output::Stream(stream) => util::collect_batches(stream).await.unwrap(),
Output::RecordBatches(recordbatches) => recordbatches,
_ => unreachable!(),
};
let pretty_print = recordbatches.pretty_print().unwrap();
assert_eq!(pretty_print, expected, "{}", pretty_print);
}
pub async fn check_unordered_output_stream(output: Output, expected: String) {
let sort_table = |table: String| -> String {
let replaced = table.replace("\\n", "\n");

View File

@@ -13,13 +13,13 @@
// limitations under the License.
use arrow::array::ArrayData;
use arrow::bitmap::Bitmap;
use arrow::buffer::NullBuffer;
#[derive(Debug, PartialEq)]
enum ValidityKind<'a> {
/// Whether the array slot is valid or not (null).
Slots {
bitmap: &'a Bitmap,
bitmap: &'a NullBuffer,
len: usize,
null_count: usize,
},
@@ -38,7 +38,7 @@ pub struct Validity<'a> {
impl<'a> Validity<'a> {
/// Creates a `Validity` from [`ArrayData`].
pub fn from_array_data(data: &'a ArrayData) -> Validity<'a> {
match data.null_bitmap() {
match data.nulls() {
Some(bitmap) => Validity {
kind: ValidityKind::Slots {
bitmap,
@@ -67,7 +67,7 @@ impl<'a> Validity<'a> {
/// Returns whether `i-th` bit is set.
pub fn is_set(&self, i: usize) -> bool {
match self.kind {
ValidityKind::Slots { bitmap, .. } => bitmap.is_set(i),
ValidityKind::Slots { bitmap, .. } => bitmap.is_valid(i),
ValidityKind::AllValid { len } => i < len,
ValidityKind::AllNull { .. } => false,
}

View File

@@ -4,6 +4,10 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[features]
default = ["python"]
python = ["dep:script"]
[dependencies]
api = { path = "../api" }
async-stream.workspace = true
@@ -30,12 +34,14 @@ futures = "0.3"
futures-util.workspace = true
itertools = "0.10"
meta-client = { path = "../meta-client" }
mito = { path = "../mito", features = ["test"] }
moka = { version = "0.9", features = ["future"] }
openmetrics-parser = "0.4"
partition = { path = "../partition" }
prost.workspace = true
query = { path = "../query" }
rustls = "0.20"
script = { path = "../script", features = ["python"], optional = true }
serde = "1.0"
serde_json = "1.0"
servers = { path = "../servers" }
@@ -56,3 +62,4 @@ meta-srv = { path = "../meta-srv", features = ["mock"] }
strfmt = "0.2"
toml = "0.5"
tower = "0.4"
uuid.workspace = true

View File

@@ -16,8 +16,12 @@ use std::any::Any;
use std::collections::HashSet;
use std::sync::Arc;
use api::v1::CreateTableExpr;
use async_trait::async_trait;
use catalog::error::{self as catalog_err, InvalidCatalogValueSnafu, Result as CatalogResult};
use catalog::error::{
self as catalog_err, InternalSnafu, InvalidCatalogValueSnafu, InvalidSystemTableDefSnafu,
Result as CatalogResult, UnimplementedSnafu,
};
use catalog::helper::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, SchemaKey,
TableGlobalKey, TableGlobalValue,
@@ -28,6 +32,7 @@ use catalog::{
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest,
SchemaProvider, SchemaProviderRef,
};
use common_error::prelude::BoxedError;
use common_telemetry::error;
use futures::StreamExt;
use meta_client::rpc::TableName;
@@ -36,6 +41,8 @@ use snafu::prelude::*;
use table::TableRef;
use crate::datanode::DatanodeClients;
use crate::expr_factory;
use crate::instance::distributed::DistInstance;
use crate::table::DistTable;
#[derive(Clone)]
@@ -43,6 +50,12 @@ pub struct FrontendCatalogManager {
backend: KvBackendRef,
partition_manager: PartitionRuleManagerRef,
datanode_clients: Arc<DatanodeClients>,
// TODO(LFC): Remove this field.
// DistInstance in FrontendCatalogManager is only used for creating distributed script table now.
// Once we have some standalone distributed table creator (like create distributed table procedure),
// we should use that.
dist_instance: Option<Arc<DistInstance>>,
}
impl FrontendCatalogManager {
@@ -55,9 +68,14 @@ impl FrontendCatalogManager {
backend,
partition_manager,
datanode_clients,
dist_instance: None,
}
}
pub(crate) fn set_dist_instance(&mut self, dist_instance: Arc<DistInstance>) {
self.dist_instance = Some(dist_instance)
}
pub(crate) fn backend(&self) -> KvBackendRef {
self.backend.clone()
}
@@ -106,9 +124,93 @@ impl CatalogManager for FrontendCatalogManager {
async fn register_system_table(
&self,
_request: RegisterSystemTableRequest,
request: RegisterSystemTableRequest,
) -> catalog::error::Result<()> {
unimplemented!()
if let Some(dist_instance) = &self.dist_instance {
let open_hook = request.open_hook;
let request = request.create_table_request;
if let Some(table) = self
.table(
&request.catalog_name,
&request.schema_name,
&request.table_name,
)
.await?
{
if let Some(hook) = open_hook {
(hook)(table)?;
}
return Ok(());
}
let time_index = request
.schema
.column_schemas
.iter()
.find_map(|x| {
if x.is_time_index() {
Some(x.name.clone())
} else {
None
}
})
.context(InvalidSystemTableDefSnafu {
err_msg: "Time index is not defined.",
})?;
let primary_keys = request
.schema
.column_schemas
.iter()
.enumerate()
.filter_map(|(i, x)| {
if request.primary_key_indices.contains(&i) {
Some(x.name.clone())
} else {
None
}
})
.collect();
let column_defs = expr_factory::column_schemas_to_defs(request.schema.column_schemas)
.map_err(|e| {
InvalidSystemTableDefSnafu {
err_msg: e.to_string(),
}
.build()
})?;
let mut create_table = CreateTableExpr {
catalog_name: request.catalog_name,
schema_name: request.schema_name,
table_name: request.table_name,
desc: request.desc.unwrap_or("".to_string()),
column_defs,
time_index,
primary_keys,
create_if_not_exists: request.create_if_not_exists,
table_options: (&request.table_options).into(),
table_id: None, // Should and will be assigned by Meta.
region_ids: vec![0],
};
let table = dist_instance
.create_table(&mut create_table, None)
.await
.map_err(BoxedError::new)
.context(InternalSnafu)?;
if let Some(hook) = open_hook {
(hook)(table)?;
}
Ok(())
} else {
UnimplementedSnafu {
operation: "register system table",
}
.fail()
}
}
fn schema(
@@ -330,3 +432,70 @@ impl SchemaProvider for FrontendSchemaProvider {
Ok(self.table_names()?.contains(&name.to_string()))
}
}
#[cfg(test)]
mod tests {
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use script::table::{build_scripts_schema, SCRIPTS_TABLE_NAME};
use table::requests::{CreateTableRequest, TableOptions};
use super::*;
#[tokio::test(flavor = "multi_thread")]
async fn test_register_system_table() {
let instance =
crate::tests::create_distributed_instance("test_register_system_table").await;
let catalog_name = DEFAULT_CATALOG_NAME;
let schema_name = DEFAULT_SCHEMA_NAME;
let table_name = SCRIPTS_TABLE_NAME;
let request = CreateTableRequest {
id: 1,
catalog_name: catalog_name.to_string(),
schema_name: schema_name.to_string(),
table_name: table_name.to_string(),
desc: Some("Scripts table".to_string()),
schema: build_scripts_schema(),
region_numbers: vec![0],
primary_key_indices: vec![0, 1],
create_if_not_exists: true,
table_options: TableOptions::default(),
};
let result = instance
.catalog_manager
.register_system_table(RegisterSystemTableRequest {
create_table_request: request,
open_hook: None,
})
.await;
assert!(result.is_ok());
assert!(
instance
.catalog_manager
.table(catalog_name, schema_name, table_name)
.await
.unwrap()
.is_some(),
"the registered system table cannot be found in catalog"
);
let mut actually_created_table_in_datanode = 0;
for datanode in instance.datanodes.values() {
if datanode
.catalog_manager()
.table(catalog_name, schema_name, table_name)
.await
.unwrap()
.is_some()
{
actually_created_table_in_datanode += 1;
}
}
assert_eq!(
actually_created_table_in_datanode, 1,
"system table should be actually created at one and only one datanode"
)
}
}

View File

@@ -378,6 +378,12 @@ pub enum Error {
#[snafu(backtrace)]
source: table::error::Error,
},
#[snafu(display("Failed to start script manager, source: {}", source))]
StartScriptManager {
#[snafu(backtrace)]
source: script::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -462,6 +468,8 @@ impl ErrorExt for Error {
source.status_code()
}
Error::UnrecognizedTableOption { .. } => StatusCode::InvalidArguments,
Error::StartScriptManager { source } => source.status_code(),
}
}

View File

@@ -187,7 +187,12 @@ fn columns_to_expr(
.iter()
.map(|c| column_def_to_schema(c, c.name.to_string() == time_index).context(ParseSqlSnafu))
.collect::<Result<Vec<ColumnSchema>>>()?;
column_schemas_to_defs(column_schemas)
}
pub(crate) fn column_schemas_to_defs(
column_schemas: Vec<ColumnSchema>,
) -> Result<Vec<api::v1::ColumnDef>> {
let column_datatypes = column_schemas
.iter()
.map(|c| {

View File

@@ -17,6 +17,7 @@ mod grpc;
mod influxdb;
mod opentsdb;
mod prometheus;
mod script;
mod standalone;
use std::collections::HashMap;
@@ -40,7 +41,6 @@ use common_telemetry::timer;
use datafusion::sql::sqlparser::ast::ObjectName;
use datanode::instance::sql::table_idents_to_full_name;
use datanode::instance::InstanceRef as DnInstanceRef;
use datanode::metric;
use datatypes::schema::Schema;
use distributed::DistInstance;
use meta_client::client::{MetaClient, MetaClientBuilder};
@@ -58,26 +58,29 @@ use servers::query_handler::grpc::{GrpcQueryHandler, GrpcQueryHandlerRef};
use servers::query_handler::sql::SqlQueryHandler;
use servers::query_handler::{
InfluxdbLineProtocolHandler, OpentsdbProtocolHandler, PrometheusProtocolHandler, ScriptHandler,
ScriptHandlerRef,
};
use session::context::QueryContextRef;
use snafu::prelude::*;
use sql::dialect::GenericDialect;
use sql::parser::ParserContext;
use sql::statements::copy::CopyTable;
use sql::statements::describe::DescribeTable;
use sql::statements::statement::Statement;
use sql::statements::tql::Tql;
use crate::catalog::FrontendCatalogManager;
use crate::datanode::DatanodeClients;
use crate::error::{
self, Error, ExecLogicalPlanSnafu, ExecutePromqlSnafu, ExecuteStatementSnafu, ExternalSnafu,
InvalidInsertRequestSnafu, MissingMetasrvOptsSnafu, NotSupportedSnafu, ParseQuerySnafu,
ParseSqlSnafu, PlanStatementSnafu, Result, SqlExecInterceptedSnafu,
self, CatalogSnafu, DescribeStatementSnafu, Error, ExecLogicalPlanSnafu, ExecutePromqlSnafu,
ExecuteStatementSnafu, ExternalSnafu, InvalidInsertRequestSnafu, MissingMetasrvOptsSnafu,
NotSupportedSnafu, ParseQuerySnafu, ParseSqlSnafu, PlanStatementSnafu, Result,
SqlExecInterceptedSnafu, TableNotFoundSnafu,
};
use crate::expr_factory::{CreateExprFactoryRef, DefaultCreateExprFactory};
use crate::frontend::FrontendOptions;
use crate::instance::standalone::StandaloneGrpcQueryHandler;
use crate::metric;
use crate::script::ScriptExecutor;
use crate::server::{start_server, ServerHandlers, Services};
#[async_trait]
@@ -101,9 +104,7 @@ pub type FrontendInstanceRef = Arc<dyn FrontendInstance>;
#[derive(Clone)]
pub struct Instance {
catalog_manager: CatalogManagerRef,
/// Script handler is None in distributed mode, only works on standalone mode.
script_handler: Option<ScriptHandlerRef>,
script_executor: Arc<ScriptExecutor>,
statement_handler: StatementHandlerRef,
query_engine: QueryEngineRef,
grpc_query_handler: GrpcQueryHandlerRef<Error>,
@@ -132,23 +133,29 @@ impl Instance {
let partition_manager = Arc::new(PartitionRuleManager::new(table_routes));
let datanode_clients = Arc::new(DatanodeClients::default());
let catalog_manager = Arc::new(FrontendCatalogManager::new(
meta_backend,
partition_manager,
datanode_clients.clone(),
));
let mut catalog_manager =
FrontendCatalogManager::new(meta_backend, partition_manager, datanode_clients.clone());
let dist_instance =
DistInstance::new(meta_client, catalog_manager.clone(), datanode_clients);
let dist_instance = DistInstance::new(
meta_client,
Arc::new(catalog_manager.clone()),
datanode_clients,
);
let dist_instance = Arc::new(dist_instance);
catalog_manager.set_dist_instance(dist_instance.clone());
let catalog_manager = Arc::new(catalog_manager);
let query_engine =
QueryEngineFactory::new_with_plugins(catalog_manager.clone(), plugins.clone())
.query_engine();
let script_executor =
Arc::new(ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?);
Ok(Instance {
catalog_manager,
script_handler: None,
script_executor,
create_expr_factory: Arc::new(DefaultCreateExprFactory),
statement_handler: dist_instance.clone(),
query_engine,
@@ -189,18 +196,22 @@ impl Instance {
Ok(Arc::new(meta_client))
}
pub fn new_standalone(dn_instance: DnInstanceRef) -> Self {
Instance {
catalog_manager: dn_instance.catalog_manager().clone(),
script_handler: None,
pub async fn try_new_standalone(dn_instance: DnInstanceRef) -> Result<Self> {
let catalog_manager = dn_instance.catalog_manager();
let query_engine = dn_instance.query_engine();
let script_executor =
Arc::new(ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?);
Ok(Instance {
catalog_manager: catalog_manager.clone(),
script_executor,
create_expr_factory: Arc::new(DefaultCreateExprFactory),
statement_handler: dn_instance.clone(),
query_engine: dn_instance.query_engine(),
query_engine,
grpc_query_handler: StandaloneGrpcQueryHandler::arc(dn_instance.clone()),
promql_handler: Some(dn_instance.clone()),
plugins: Default::default(),
servers: Arc::new(HashMap::new()),
}
})
}
pub async fn build_servers(
@@ -215,12 +226,19 @@ impl Instance {
}
#[cfg(test)]
pub(crate) fn new_distributed(dist_instance: Arc<DistInstance>) -> Self {
let catalog_manager = dist_instance.catalog_manager();
pub(crate) async fn new_distributed(
catalog_manager: CatalogManagerRef,
dist_instance: Arc<DistInstance>,
) -> Self {
let query_engine = QueryEngineFactory::new(catalog_manager.clone()).query_engine();
let script_executor = Arc::new(
ScriptExecutor::new(catalog_manager.clone(), query_engine.clone())
.await
.unwrap(),
);
Instance {
catalog_manager,
script_handler: None,
script_executor,
statement_handler: dist_instance.clone(),
query_engine,
create_expr_factory: Arc::new(DefaultCreateExprFactory),
@@ -235,14 +253,6 @@ impl Instance {
&self.catalog_manager
}
pub fn set_script_handler(&mut self, handler: ScriptHandlerRef) {
debug_assert!(
self.script_handler.is_none(),
"Script handler can be set only once!"
);
self.script_handler = Some(handler);
}
/// Handle batch inserts
pub async fn handle_inserts(
&self,
@@ -464,6 +474,27 @@ impl Instance {
.context(ExecLogicalPlanSnafu)
}
async fn describe_table(
&self,
stmt: DescribeTable,
query_ctx: QueryContextRef,
) -> Result<Output> {
let (catalog, schema, table) = table_idents_to_full_name(stmt.name(), query_ctx)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let table = self
.catalog_manager
.table(&catalog, &schema, &table)
.await
.context(CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: stmt.name().to_string(),
})?;
query::sql::describe_table(table).context(DescribeStatementSnafu)
}
async fn query_statement(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Output> {
check_permission(self.plugins.clone(), &stmt, &query_ctx)?;
@@ -479,11 +510,14 @@ impl Instance {
}
Statement::Tql(tql) => self.execute_tql(tql, query_ctx).await,
Statement::DescribeTable(stmt) => self.describe_table(stmt, query_ctx).await,
Statement::CreateDatabase(_)
| Statement::CreateExternalTable(_)
| Statement::ShowDatabases(_)
| Statement::CreateTable(_)
| Statement::ShowTables(_)
| Statement::DescribeTable(_)
| Statement::Insert(_)
| Statement::Alter(_)
| Statement::DropTable(_)
@@ -594,41 +628,6 @@ impl SqlQueryHandler for Instance {
}
}
#[async_trait]
impl ScriptHandler for Instance {
async fn insert_script(
&self,
schema: &str,
name: &str,
script: &str,
) -> server_error::Result<()> {
if let Some(handler) = &self.script_handler {
handler.insert_script(schema, name, script).await
} else {
server_error::NotSupportedSnafu {
feat: "Script execution in Frontend",
}
.fail()
}
}
async fn execute_script(
&self,
schema: &str,
script: &str,
params: HashMap<String, String>,
) -> server_error::Result<Output> {
if let Some(handler) = &self.script_handler {
handler.execute_script(schema, script, params).await
} else {
server_error::NotSupportedSnafu {
feat: "Script execution in Frontend",
}
.fail()
}
}
}
#[async_trait]
impl PromHandler for Instance {
async fn do_query(&self, query: &PromQuery) -> server_error::Result<Output> {
@@ -663,7 +662,8 @@ pub fn check_permission(
// database ops won't be checked
Statement::CreateDatabase(_) | Statement::ShowDatabases(_) | Statement::Use(_) => {}
// show create table and alter are not supported yet
Statement::ShowCreateTable(_) | Statement::Alter(_) => {}
Statement::ShowCreateTable(_) | Statement::CreateExternalTable(_) | Statement::Alter(_) => {
}
Statement::Insert(insert) => {
validate_param(insert.table_name(), query_ctx)?;

View File

@@ -46,7 +46,7 @@ use partition::partition::{PartitionBound, PartitionDef};
use query::error::QueryExecutionSnafu;
use query::parser::QueryStatement;
use query::query_engine::StatementHandler;
use query::sql::{describe_table, show_databases, show_tables};
use query::sql::{show_databases, show_tables};
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use sql::ast::Value as SqlValue;
@@ -56,6 +56,7 @@ use sql::statements::statement::Statement;
use table::metadata::{RawTableInfo, RawTableMeta, TableIdent, TableType};
use table::requests::TableOptions;
use table::table::AlterContext;
use table::TableRef;
use crate::catalog::FrontendCatalogManager;
use crate::datanode::DatanodeClients;
@@ -93,14 +94,14 @@ impl DistInstance {
&self,
create_table: &mut CreateTableExpr,
partitions: Option<Partitions>,
) -> Result<Output> {
) -> Result<TableRef> {
let table_name = TableName::new(
&create_table.catalog_name,
&create_table.schema_name,
&create_table.table_name,
);
if self
if let Some(table) = self
.catalog_manager
.table(
&table_name.catalog_name,
@@ -109,10 +110,9 @@ impl DistInstance {
)
.await
.context(CatalogSnafu)?
.is_some()
{
return if create_table.create_if_not_exists {
Ok(Output::AffectedRows(0))
Ok(table)
} else {
TableAlreadyExistSnafu {
table: table_name.to_string(),
@@ -153,20 +153,20 @@ impl DistInstance {
create_table.table_id = Some(TableId { id: table_id });
let table = DistTable::new(
let table = Arc::new(DistTable::new(
table_name.clone(),
table_info,
self.catalog_manager.partition_manager(),
self.catalog_manager.datanode_clients(),
self.catalog_manager.backend(),
);
));
let request = RegisterTableRequest {
catalog: table_name.catalog_name.clone(),
schema: table_name.schema_name.clone(),
table_name: table_name.table_name.clone(),
table_id,
table: Arc::new(table),
table: table.clone(),
};
ensure!(
self.catalog_manager
@@ -196,9 +196,7 @@ impl DistInstance {
.await
.context(RequestDatanodeSnafu)?;
}
// Checked in real MySQL, it truly returns "0 rows affected".
Ok(Output::AffectedRows(0))
Ok(table)
}
async fn drop_table(&self, table_name: TableName) -> Result<Output> {
@@ -329,7 +327,8 @@ impl DistInstance {
}
Statement::CreateTable(stmt) => {
let create_expr = &mut expr_factory::create_to_expr(&stmt, query_ctx)?;
Ok(self.create_table(create_expr, stmt.partitions).await?)
let _ = self.create_table(create_expr, stmt.partitions).await?;
Ok(Output::AffectedRows(0))
}
Statement::Alter(alter_table) => {
let expr = grpc::to_alter_expr(alter_table, query_ctx)?;
@@ -347,20 +346,6 @@ impl DistInstance {
Statement::ShowTables(stmt) => {
show_tables(stmt, self.catalog_manager.clone(), query_ctx)
}
Statement::DescribeTable(stmt) => {
let (catalog, schema, table) = table_idents_to_full_name(stmt.name(), query_ctx)
.map_err(BoxedError::new)
.context(error::ExternalSnafu)?;
let table = self
.catalog_manager
.table(&catalog, &schema, &table)
.await
.context(CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: stmt.name().to_string(),
})?;
describe_table(table)
}
Statement::Insert(insert) => {
let (catalog, schema, table) =
table_idents_to_full_name(insert.table_name(), query_ctx.clone())

View File

@@ -49,7 +49,8 @@ impl GrpcQueryHandler for DistInstance {
DdlExpr::CreateTable(mut expr) => {
// TODO(LFC): Support creating distributed table through GRPC interface.
// Currently only SQL supports it; how to design the fields in CreateTableExpr?
self.create_table(&mut expr, None).await
let _ = self.create_table(&mut expr, None).await;
Ok(Output::AffectedRows(0))
}
DdlExpr::Alter(expr) => self.handle_alter_table(expr).await,
DdlExpr::DropTable(expr) => {

View File

@@ -383,8 +383,6 @@ CREATE TABLE {table_name} (
// Wait for previous task finished
flush_table(frontend, "greptime", "public", table_name, None).await;
let table_id = 1024;
let table = instance
.frontend
.catalog_manager()
@@ -394,7 +392,7 @@ CREATE TABLE {table_name} (
.unwrap();
let table = table.as_any().downcast_ref::<DistTable>().unwrap();
let TableGlobalValue { regions_id_map, .. } = table
let tgv = table
.table_global_value(&TableGlobalKey {
catalog_name: "greptime".to_string(),
schema_name: "public".to_string(),
@@ -403,7 +401,10 @@ CREATE TABLE {table_name} (
.await
.unwrap()
.unwrap();
let region_to_dn_map = regions_id_map
let table_id = tgv.table_id();
let region_to_dn_map = tgv
.regions_id_map
.iter()
.map(|(k, v)| (v[0], *k))
.collect::<HashMap<u32, u64>>();

View File

@@ -13,6 +13,7 @@
// limitations under the License.
#![feature(assert_matches)]
#![feature(trait_upcasting)]
pub mod catalog;
pub mod datanode;
@@ -22,11 +23,13 @@ pub mod frontend;
pub mod grpc;
pub mod influxdb;
pub mod instance;
pub(crate) mod metric;
pub mod mysql;
pub mod opentsdb;
pub mod postgres;
pub mod prom;
pub mod prometheus;
mod script;
mod server;
mod table;
#[cfg(test)]

View File

@@ -0,0 +1,17 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) const METRIC_HANDLE_SQL_ELAPSED: &str = "frontend.handle_sql_elapsed";
pub(crate) const METRIC_HANDLE_SCRIPTS_ELAPSED: &str = "frontend.handle_scripts_elapsed";
pub(crate) const METRIC_RUN_SCRIPT_ELAPSED: &str = "frontend.run_script_elapsed";

View File

@@ -22,7 +22,7 @@ use common_telemetry::info;
use servers::auth::UserProviderRef;
use servers::error::Error::InternalIo;
use servers::grpc::GrpcServer;
use servers::http::HttpServer;
use servers::http::HttpServerBuilder;
use servers::mysql::server::{MysqlServer, MysqlSpawnConfig, MysqlSpawnRef};
use servers::opentsdb::OpentsdbServer;
use servers::postgres::PostgresServer;
@@ -150,33 +150,33 @@ impl Services {
if let Some(http_options) = &opts.http_options {
let http_addr = parse_addr(&http_options.addr)?;
let mut http_server = HttpServer::new(
ServerSqlQueryHandlerAdaptor::arc(instance.clone()),
ServerGrpcQueryHandlerAdaptor::arc(instance.clone()),
http_options.clone(),
);
let mut http_server_builder = HttpServerBuilder::new(http_options.clone());
http_server_builder
.with_sql_handler(ServerSqlQueryHandlerAdaptor::arc(instance.clone()))
.with_grpc_handler(ServerGrpcQueryHandlerAdaptor::arc(instance.clone()));
if let Some(user_provider) = user_provider.clone() {
http_server.set_user_provider(user_provider);
http_server_builder.with_user_provider(user_provider);
}
if set_opentsdb_handler {
http_server.set_opentsdb_handler(instance.clone());
http_server_builder.with_opentsdb_handler(instance.clone());
}
if matches!(
opts.influxdb_options,
Some(InfluxdbOptions { enable: true })
) {
http_server.set_influxdb_handler(instance.clone());
http_server_builder.with_influxdb_handler(instance.clone());
}
if matches!(
opts.prometheus_options,
Some(PrometheusOptions { enable: true })
) {
http_server.set_prom_handler(instance.clone());
http_server_builder.with_prom_handler(instance.clone());
}
http_server.set_script_handler(instance.clone());
http_server_builder.with_script_handler(instance.clone());
let http_server = http_server_builder.build();
result.push((Box::new(http_server), http_addr));
}

View File

@@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod instance_test;
mod test_util;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
@@ -21,7 +24,9 @@ use client::Client;
use common_grpc::channel_manager::ChannelManager;
use common_runtime::Builder as RuntimeBuilder;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datanode::datanode::{DatanodeOptions, FileConfig, ObjectStoreConfig, WalConfig};
use datanode::datanode::{
DatanodeOptions, FileConfig, ObjectStoreConfig, StorageConfig, WalConfig,
};
use datanode::instance::Instance as DatanodeInstance;
use meta_client::client::MetaClientBuilder;
use meta_client::rpc::Peer;
@@ -54,6 +59,7 @@ pub(crate) struct MockDistributedInstance {
pub(crate) frontend: Arc<Instance>,
pub(crate) dist_instance: Arc<DistInstance>,
pub(crate) datanodes: HashMap<u64, Arc<DatanodeInstance>>,
pub(crate) catalog_manager: Arc<FrontendCatalogManager>,
_guards: Vec<TestGuard>,
}
@@ -76,11 +82,11 @@ impl MockStandaloneInstance {
pub(crate) async fn create_standalone_instance(test_name: &str) -> MockStandaloneInstance {
let (opts, guard) = create_tmp_dir_and_datanode_opts(test_name);
let datanode_instance = DatanodeInstance::new(&opts).await.unwrap();
datanode_instance.start().await.unwrap();
let frontend_instance = Instance::new_standalone(Arc::new(datanode_instance));
let dn_instance = Arc::new(DatanodeInstance::new(&opts).await.unwrap());
let frontend_instance = Instance::try_new_standalone(dn_instance.clone())
.await
.unwrap();
dn_instance.start().await.unwrap();
MockStandaloneInstance {
instance: Arc::new(frontend_instance),
_guard: guard,
@@ -95,9 +101,12 @@ fn create_tmp_dir_and_datanode_opts(name: &str) -> (DatanodeOptions, TestGuard)
dir: wal_tmp_dir.path().to_str().unwrap().to_string(),
..Default::default()
},
storage: ObjectStoreConfig::File(FileConfig {
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
}),
storage: StorageConfig {
store: ObjectStoreConfig::File(FileConfig {
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
}),
..Default::default()
},
mode: Mode::Standalone,
..Default::default()
};
@@ -182,9 +191,12 @@ async fn create_distributed_datanode(
dir: wal_tmp_dir.path().to_str().unwrap().to_string(),
..Default::default()
},
storage: ObjectStoreConfig::File(FileConfig {
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
}),
storage: StorageConfig {
store: ObjectStoreConfig::File(FileConfig {
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
}),
..Default::default()
},
mode: Mode::Distributed,
..Default::default()
};
@@ -259,26 +271,28 @@ pub(crate) async fn create_distributed_instance(test_name: &str) -> MockDistribu
let partition_manager = Arc::new(PartitionRuleManager::new(Arc::new(TableRoutes::new(
meta_client.clone(),
))));
let catalog_manager = Arc::new(FrontendCatalogManager::new(
meta_backend,
partition_manager,
datanode_clients.clone(),
));
let mut catalog_manager =
FrontendCatalogManager::new(meta_backend, partition_manager, datanode_clients.clone());
wait_datanodes_alive(kv_store).await;
let dist_instance = DistInstance::new(
meta_client.clone(),
catalog_manager,
Arc::new(catalog_manager.clone()),
datanode_clients.clone(),
);
let dist_instance = Arc::new(dist_instance);
let frontend = Instance::new_distributed(dist_instance.clone());
catalog_manager.set_dist_instance(dist_instance.clone());
let catalog_manager = Arc::new(catalog_manager);
let frontend = Instance::new_distributed(catalog_manager.clone(), dist_instance.clone()).await;
MockDistributedInstance {
frontend: Arc::new(frontend),
dist_instance,
datanodes: datanode_instances,
catalog_manager,
_guards: test_guards,
}
}

View File

@@ -19,19 +19,17 @@ use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_query::Output;
use common_recordbatch::util;
use common_telemetry::logging;
use datatypes::data_type::ConcreteDataType;
use datatypes::vectors::{Int64Vector, StringVector, UInt64Vector, VectorRef};
use query::parser::{QueryLanguageParser, QueryStatement};
use session::context::{QueryContext, QueryContextRef};
use snafu::ResultExt;
use sql::statements::statement::Statement;
use servers::query_handler::sql::SqlQueryHandler;
use session::context::QueryContext;
use crate::error::{Error, ExecuteLogicalPlanSnafu, PlanStatementSnafu};
use crate::tests::test_util::{self, check_output_stream, setup_test_instance, MockInstance};
use crate::error::Error;
use crate::tests::test_util::check_output_stream;
use crate::tests::{create_standalone_instance, MockStandaloneInstance};
#[tokio::test(flavor = "multi_thread")]
async fn test_create_database_and_insert_query() {
let instance = MockInstance::new("create_database_and_insert_query").await;
let instance = create_standalone_instance("create_database_and_insert_query").await;
let output = execute_sql(&instance, "create database test").await;
assert!(matches!(output, Output::AffectedRows(1)));
@@ -78,7 +76,8 @@ async fn test_create_database_and_insert_query() {
#[tokio::test(flavor = "multi_thread")]
async fn test_issue477_same_table_name_in_different_databases() {
let instance = MockInstance::new("test_issue477_same_table_name_in_different_databases").await;
let instance =
create_standalone_instance("test_issue477_same_table_name_in_different_databases").await;
// Create database a and b
let output = execute_sql(&instance, "create database a").await;
@@ -145,7 +144,7 @@ async fn test_issue477_same_table_name_in_different_databases() {
.await;
}
async fn assert_query_result(instance: &MockInstance, sql: &str, ts: i64, host: &str) {
async fn assert_query_result(instance: &MockStandaloneInstance, sql: &str, ts: i64, host: &str) {
let query_output = execute_sql(instance, sql).await;
match query_output {
Output::Stream(s) => {
@@ -167,7 +166,7 @@ async fn assert_query_result(instance: &MockInstance, sql: &str, ts: i64, host:
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_insert() {
let instance = setup_test_instance("test_execute_insert").await;
let instance = create_standalone_instance("test_execute_insert").await;
// create table
execute_sql(
@@ -189,7 +188,7 @@ async fn test_execute_insert() {
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_insert_by_select() {
let instance = setup_test_instance("test_execute_insert_by_select").await;
let instance = create_standalone_instance("test_execute_insert_by_select").await;
// create table
execute_sql(
@@ -250,11 +249,12 @@ async fn test_execute_insert_by_select() {
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_insert_query_with_i64_timestamp() {
let instance = MockInstance::new("insert_query_i64_timestamp").await;
let instance = create_standalone_instance("insert_query_i64_timestamp").await;
test_util::create_test_table(instance.inner(), ConcreteDataType::int64_datatype())
.await
.unwrap();
execute_sql(
&instance,
"create table demo(host string, cpu double, memory double, ts bigint time index, primary key (host));",
).await;
let output = execute_sql(
&instance,
@@ -301,7 +301,7 @@ async fn test_execute_insert_query_with_i64_timestamp() {
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_query() {
let instance = MockInstance::new("execute_query").await;
let instance = create_standalone_instance("execute_query").await;
let output = execute_sql(&instance, "select sum(number) from numbers limit 20").await;
match output {
@@ -321,7 +321,7 @@ async fn test_execute_query() {
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_show_databases_tables() {
let instance = MockInstance::new("execute_show_databases_tables").await;
let instance = create_standalone_instance("execute_show_databases_tables").await;
let output = execute_sql(&instance, "show databases").await;
match output {
@@ -363,13 +363,10 @@ async fn test_execute_show_databases_tables() {
_ => unreachable!(),
}
// creat a table
test_util::create_test_table(
instance.inner(),
ConcreteDataType::timestamp_millisecond_datatype(),
)
.await
.unwrap();
execute_sql(
&instance,
"create table demo(host string, cpu double, memory double, ts timestamp time index, primary key (host));",
).await;
let output = execute_sql(&instance, "show tables").await;
match output {
@@ -400,7 +397,7 @@ async fn test_execute_show_databases_tables() {
#[tokio::test(flavor = "multi_thread")]
pub async fn test_execute_create() {
let instance = MockInstance::new("execute_create").await;
let instance = create_standalone_instance("execute_create").await;
let output = execute_sql(
&instance,
@@ -419,7 +416,7 @@ pub async fn test_execute_create() {
#[tokio::test]
async fn test_rename_table() {
let instance = MockInstance::new("test_rename_table_local").await;
let instance = create_standalone_instance("test_rename_table_local").await;
let output = execute_sql(&instance, "create database db").await;
assert!(matches!(output, Output::AffectedRows(1)));
@@ -475,7 +472,7 @@ async fn test_rename_table() {
#[tokio::test]
async fn test_create_table_after_rename_table() {
let instance = MockInstance::new("test_rename_table_local").await;
let instance = create_standalone_instance("test_rename_table_local").await;
let output = execute_sql(&instance, "create database db").await;
assert!(matches!(output, Output::AffectedRows(1)));
@@ -525,7 +522,7 @@ async fn test_create_table_after_rename_table() {
#[tokio::test(flavor = "multi_thread")]
async fn test_alter_table() {
let instance = setup_test_instance("test_alter_table").await;
let instance = create_standalone_instance("test_alter_table").await;
// create table
execute_sql(
@@ -612,7 +609,7 @@ async fn test_alter_table() {
}
async fn test_insert_with_default_value_for_type(type_name: &str) {
let instance = MockInstance::new("execute_create").await;
let instance = create_standalone_instance("execute_create").await;
let create_sql = format!(
r#"create table test_table(
@@ -663,7 +660,7 @@ async fn test_insert_with_default_value() {
#[tokio::test(flavor = "multi_thread")]
async fn test_use_database() {
let instance = MockInstance::new("test_use_database").await;
let instance = create_standalone_instance("test_use_database").await;
let output = execute_sql(&instance, "create database db1").await;
assert!(matches!(output, Output::AffectedRows(1)));
@@ -722,7 +719,7 @@ async fn test_use_database() {
#[tokio::test(flavor = "multi_thread")]
async fn test_delete() {
let instance = MockInstance::new("test_delete").await;
let instance = create_standalone_instance("test_delete").await;
let output = execute_sql(
&instance,
@@ -774,7 +771,7 @@ async fn test_execute_copy_to_s3() {
logging::init_default_ut_logging();
if let Ok(bucket) = env::var("GT_S3_BUCKET") {
if !bucket.is_empty() {
let instance = setup_test_instance("test_execute_copy_to_s3").await;
let instance = create_standalone_instance("test_execute_copy_to_s3").await;
// setups
execute_sql(
@@ -813,7 +810,7 @@ async fn test_execute_copy_from_s3() {
logging::init_default_ut_logging();
if let Ok(bucket) = env::var("GT_S3_BUCKET") {
if !bucket.is_empty() {
let instance = setup_test_instance("test_execute_copy_from_s3").await;
let instance = create_standalone_instance("test_execute_copy_from_s3").await;
// setups
execute_sql(
@@ -852,7 +849,7 @@ async fn test_execute_copy_from_s3() {
let tests = [
Test {
sql: &format!(
"Copy with_filename FROM 's3://{}/{}/export/demo.parquet_1_2'",
"Copy with_filename FROM 's3://{}/{}/export/demo.parquet'",
bucket, root
),
table_name: "with_filename",
@@ -908,89 +905,26 @@ async fn test_execute_copy_from_s3() {
}
}
#[tokio::test(flavor = "multi_thread")]
async fn test_create_by_procedure() {
common_telemetry::init_default_ut_logging();
let instance = MockInstance::with_procedure_enabled("create_by_procedure").await;
let output = execute_sql(
&instance,
r#"create table test_table(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
) engine=mito with(regions=1);"#,
)
.await;
assert!(matches!(output, Output::AffectedRows(0)));
// Create if not exists
let output = execute_sql(
&instance,
r#"create table if not exists test_table(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
) engine=mito with(regions=1);"#,
)
.await;
assert!(matches!(output, Output::AffectedRows(0)));
}
async fn execute_sql(instance: &MockInstance, sql: &str) -> Output {
async fn execute_sql(instance: &MockStandaloneInstance, sql: &str) -> Output {
execute_sql_in_db(instance, sql, DEFAULT_SCHEMA_NAME).await
}
async fn try_execute_sql(
instance: &MockInstance,
instance: &MockStandaloneInstance,
sql: &str,
) -> Result<Output, crate::error::Error> {
try_execute_sql_in_db(instance, sql, DEFAULT_SCHEMA_NAME).await
}
async fn try_execute_sql_in_db(
instance: &MockInstance,
instance: &MockStandaloneInstance,
sql: &str,
db: &str,
) -> Result<Output, crate::error::Error> {
let query_ctx = Arc::new(QueryContext::with(DEFAULT_CATALOG_NAME, db));
async fn plan_exec(
instance: &MockInstance,
stmt: QueryStatement,
query_ctx: QueryContextRef,
) -> Result<Output, Error> {
let engine = instance.inner().query_engine();
let plan = engine
.planner()
.plan(stmt, query_ctx.clone())
.await
.context(PlanStatementSnafu)?;
engine
.execute(plan, query_ctx)
.await
.context(ExecuteLogicalPlanSnafu)
}
let stmt = QueryLanguageParser::parse_sql(sql).unwrap();
match stmt {
QueryStatement::Sql(Statement::Query(_)) | QueryStatement::Sql(Statement::Delete(_)) => {
plan_exec(instance, stmt, query_ctx).await
}
QueryStatement::Sql(Statement::Insert(ref insert)) if insert.is_insert_select() => {
plan_exec(instance, stmt, query_ctx).await
}
_ => instance.inner().execute_stmt(stmt, query_ctx).await,
}
instance.instance.do_query(sql, query_ctx).await.remove(0)
}
async fn execute_sql_in_db(instance: &MockInstance, sql: &str, db: &str) -> Output {
async fn execute_sql_in_db(instance: &MockStandaloneInstance, sql: &str, db: &str) -> Output {
try_execute_sql_in_db(instance, sql, db).await.unwrap()
}

View File

@@ -0,0 +1,26 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_query::Output;
use common_recordbatch::util;
pub(crate) async fn check_output_stream(output: Output, expected: String) {
let recordbatches = match output {
Output::Stream(stream) => util::collect_batches(stream).await.unwrap(),
Output::RecordBatches(recordbatches) => recordbatches,
_ => unreachable!(),
};
let pretty_print = recordbatches.pretty_print().unwrap();
assert_eq!(pretty_print, expected, "{}", pretty_print);
}

View File

@@ -15,16 +15,22 @@
use std::any::Any;
use common_error::prelude::{ErrorExt, Snafu};
use common_runtime::error::Error as RuntimeError;
use snafu::{Backtrace, ErrorCompat};
use tokio::task::JoinError;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Failed to wait for gc task to stop, source: {}", source))]
WaitGcTaskStop {
source: JoinError,
backtrace: Backtrace,
#[snafu(display("Failed to start log store gc task, source: {}", source))]
StartGcTask {
#[snafu(backtrace)]
source: RuntimeError,
},
#[snafu(display("Failed to stop log store gc task, source: {}", source))]
StopGcTask {
#[snafu(backtrace)]
source: RuntimeError,
},
#[snafu(display("Failed to add entry to LogBatch, source: {}", source))]

View File

@@ -13,25 +13,22 @@
// limitations under the License.
use std::fmt::{Debug, Formatter};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use async_stream::stream;
use common_runtime::{RepeatedTask, TaskFunction};
use common_telemetry::{error, info};
use raft_engine::{Config, Engine, LogBatch, MessageExt, ReadableSize, RecoveryMode};
use snafu::{ensure, OptionExt, ResultExt};
use snafu::{ensure, ResultExt};
use store_api::logstore::entry::Id;
use store_api::logstore::entry_stream::SendableEntryStream;
use store_api::logstore::namespace::Namespace as NamespaceTrait;
use store_api::logstore::{AppendResponse, LogStore};
use tokio::sync::Mutex;
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use crate::config::LogConfig;
use crate::error::{
AddEntryLogBatchSnafu, Error, FetchEntrySnafu, IllegalNamespaceSnafu, IllegalStateSnafu,
RaftEngineSnafu, WaitGcTaskStopSnafu,
RaftEngineSnafu, StartGcTaskSnafu, StopGcTaskSnafu,
};
use crate::raft_engine::protos::logstore::{EntryImpl as Entry, NamespaceImpl as Namespace};
@@ -41,9 +38,36 @@ const SYSTEM_NAMESPACE: u64 = 0;
pub struct RaftEngineLogStore {
config: LogConfig,
engine: Arc<Engine>,
cancel_token: Mutex<Option<CancellationToken>>,
gc_task_handle: Mutex<Option<JoinHandle<()>>>,
started: AtomicBool,
gc_task: RepeatedTask<Error>,
}
pub struct PurgeExpiredFilesFunction {
engine: Arc<Engine>,
}
#[async_trait::async_trait]
impl TaskFunction<Error> for PurgeExpiredFilesFunction {
fn name(&self) -> &str {
"RaftEngineLogStore-gc-task"
}
async fn call(&self) -> Result<(), Error> {
match self.engine.purge_expired_files().context(RaftEngineSnafu) {
Ok(res) => {
// TODO(hl): the retval of purge_expired_files indicates the namespaces need to be compact,
// which is useful when monitoring regions failed to flush it's memtable to SSTs.
info!(
"Successfully purged logstore files, namespaces need compaction: {:?}",
res
);
}
Err(e) => {
error!(e; "Failed to purge files in logstore");
}
}
Ok(())
}
}
impl RaftEngineLogStore {
@@ -58,56 +82,31 @@ impl RaftEngineLogStore {
..Default::default()
};
let engine = Arc::new(Engine::open(raft_engine_config).context(RaftEngineSnafu)?);
let gc_task = RepeatedTask::new(
config.purge_interval,
Arc::new(PurgeExpiredFilesFunction {
engine: engine.clone(),
}),
);
let log_store = Self {
config,
engine,
cancel_token: Mutex::new(None),
gc_task_handle: Mutex::new(None),
started: AtomicBool::new(false),
gc_task,
};
log_store.start().await?;
Ok(log_store)
}
pub fn started(&self) -> bool {
self.started.load(Ordering::Relaxed)
self.gc_task.started()
}
async fn start(&self) -> Result<(), Error> {
let engine_clone = self.engine.clone();
let interval = self.config.purge_interval;
let token = CancellationToken::new();
let child = token.child_token();
// TODO(hl): Maybe spawn to a blocking runtime.
let handle = common_runtime::spawn_bg(async move {
loop {
tokio::select! {
_ = tokio::time::sleep(interval) => {}
_ = child.cancelled() => {
info!("LogStore gc task has been cancelled");
return;
}
}
match engine_clone.purge_expired_files().context(RaftEngineSnafu) {
Ok(res) => {
// TODO(hl): the retval of purge_expired_files indicates the namespaces need to be compact,
// which is useful when monitoring regions failed to flush it's memtable to SSTs.
info!(
"Successfully purged logstore files, namespaces need compaction: {:?}",
res
);
}
Err(e) => {
error!(e; "Failed to purge files in logstore");
}
}
}
});
*self.cancel_token.lock().await = Some(token);
*self.gc_task_handle.lock().await = Some(handle);
self.started.store(true, Ordering::Relaxed);
info!("RaftEngineLogStore started with config: {:?}", self.config);
Ok(())
self.gc_task
.start(common_runtime::bg_runtime())
.await
.context(StartGcTaskSnafu)
}
}
@@ -115,7 +114,7 @@ impl Debug for RaftEngineLogStore {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("RaftEngineLogsStore")
.field("config", &self.config)
.field("started", &self.started.load(Ordering::Relaxed))
.field("started", &self.gc_task.started())
.finish()
}
}
@@ -127,28 +126,7 @@ impl LogStore for RaftEngineLogStore {
type Entry = Entry;
async fn stop(&self) -> Result<(), Self::Error> {
ensure!(
self.started
.compare_exchange(true, false, Ordering::Relaxed, Ordering::Relaxed)
.is_ok(),
IllegalStateSnafu
);
let handle = self
.gc_task_handle
.lock()
.await
.take()
.context(IllegalStateSnafu)?;
let token = self
.cancel_token
.lock()
.await
.take()
.context(IllegalStateSnafu)?;
token.cancel();
handle.await.context(WaitGcTaskStopSnafu)?;
info!("RaftEngineLogStore stopped");
Ok(())
self.gc_task.stop().await.context(StopGcTaskSnafu)
}
/// Append an entry to logstore. Currently of existence of entry's namespace is not checked.

View File

@@ -10,12 +10,14 @@ mock = []
[dependencies]
anymap = "1.0.0-beta.2"
api = { path = "../api" }
async-stream.workspace = true
async-trait = "0.1"
catalog = { path = "../catalog" }
common-base = { path = "../common/base" }
common-catalog = { path = "../common/catalog" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
common-procedure = { path = "../common/procedure" }
common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
@@ -39,6 +41,7 @@ tokio-stream = { version = "0.1", features = ["net"] }
tonic.workspace = true
tower = "0.4"
url = "2.3"
servers = { path = "../servers" }
[dev-dependencies]
tracing = "0.1"

View File

@@ -20,8 +20,12 @@ use api::v1::meta::lock_server::LockServer;
use api::v1::meta::router_server::RouterServer;
use api::v1::meta::store_server::StoreServer;
use etcd_client::Client;
use servers::http::{HttpServer, HttpServerBuilder};
use servers::metrics_handler::MetricsHandler;
use servers::server::Server;
use snafu::ResultExt;
use tokio::net::TcpListener;
use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender};
use tokio_stream::wrappers::TcpListenerStream;
use tonic::transport::server::Router;
@@ -44,6 +48,8 @@ use crate::{error, Result};
pub struct MetaSrvInstance {
meta_srv: MetaSrv,
http_srv: Arc<HttpServer>,
opts: MetaSrvOptions,
signal_sender: Option<Sender<()>>,
@@ -52,26 +58,44 @@ pub struct MetaSrvInstance {
impl MetaSrvInstance {
pub async fn new(opts: MetaSrvOptions) -> Result<MetaSrvInstance> {
let meta_srv = build_meta_srv(&opts).await?;
let http_srv = Arc::new(
HttpServerBuilder::new(opts.http_opts.clone())
.with_metrics_handler(MetricsHandler)
.build(),
);
Ok(MetaSrvInstance {
meta_srv,
http_srv,
opts,
signal_sender: None,
})
}
pub async fn start(&mut self) -> Result<()> {
self.meta_srv.start().await;
self.meta_srv.try_start().await?;
let (tx, mut rx) = mpsc::channel::<()>(1);
self.signal_sender = Some(tx);
bootstrap_meta_srv_with_router(
let meta_srv = bootstrap_meta_srv_with_router(
&self.opts.bind_addr,
router(self.meta_srv.clone()),
&mut rx,
)
.await?;
);
let addr = self
.opts
.http_opts
.addr
.parse()
.context(error::ParseAddrSnafu {
addr: &self.opts.http_opts.addr,
})?;
let http_srv = self.http_srv.start(addr);
select! {
v = meta_srv => v?,
v = http_srv => v.map(|_| ()).context(error::StartMetricsExportSnafu)?,
}
Ok(())
}
@@ -85,7 +109,12 @@ impl MetaSrvInstance {
}
self.meta_srv.shutdown();
self.http_srv
.shutdown()
.await
.context(error::ShutdownServerSnafu {
server: self.http_srv.name(),
})?;
Ok(())
}
}
@@ -131,10 +160,7 @@ pub async fn build_meta_srv(opts: &MetaSrvOptions) -> Result<MetaSrv> {
.context(error::ConnectEtcdSnafu)?;
(
EtcdStore::with_etcd_client(etcd_client.clone())?,
Some(EtcdElection::with_etcd_client(
&opts.server_addr,
etcd_client.clone(),
)?),
Some(EtcdElection::with_etcd_client(&opts.server_addr, etcd_client.clone()).await?),
Some(EtcdLock::with_etcd_client(etcd_client)?),
)
};
@@ -172,7 +198,7 @@ pub async fn build_meta_srv(opts: &MetaSrvOptions) -> Result<MetaSrv> {
pub async fn make_meta_srv(opts: &MetaSrvOptions) -> Result<MetaSrv> {
let meta_srv = build_meta_srv(opts).await?;
meta_srv.start().await;
meta_srv.try_start().await?;
Ok(meta_srv)
}

View File

@@ -14,12 +14,23 @@
pub mod etcd;
use std::sync::Arc;
use etcd_client::LeaderKey;
use tokio::sync::broadcast::Receiver;
use crate::error::Result;
pub const LEASE_SECS: i64 = 3;
pub const KEEP_ALIVE_PERIOD_SECS: u64 = LEASE_SECS as u64 * 2 / 3;
pub const ELECTION_KEY: &str = "__meta_srv_election";
#[derive(Clone)]
pub enum LeaderChangeMessage {
Elected(Arc<LeaderKey>),
StepDown(Arc<LeaderKey>),
}
#[async_trait::async_trait]
pub trait Election: Send + Sync {
type Leader;
@@ -46,4 +57,6 @@ pub trait Election: Send + Sync {
/// Releases election leadership so other campaigners may
/// acquire leadership on the election.
async fn resign(&self) -> Result<()>;
fn subscribe_leader_change(&self) -> Receiver<LeaderChangeMessage>;
}

View File

@@ -16,11 +16,16 @@ use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use common_telemetry::{info, warn};
use common_telemetry::{error, info, warn};
use etcd_client::Client;
use snafu::{OptionExt, ResultExt};
use tokio::sync::broadcast;
use tokio::sync::broadcast::error::RecvError;
use tokio::sync::broadcast::Receiver;
use crate::election::{Election, ELECTION_KEY, KEEP_ALIVE_PERIOD_SECS, LEASE_SECS};
use crate::election::{
Election, LeaderChangeMessage, ELECTION_KEY, KEEP_ALIVE_PERIOD_SECS, LEASE_SECS,
};
use crate::error;
use crate::error::Result;
use crate::metasrv::{ElectionRef, LeaderValue};
@@ -30,6 +35,7 @@ pub struct EtcdElection {
client: Client,
is_leader: AtomicBool,
infancy: AtomicBool,
leader_watcher: broadcast::Sender<LeaderChangeMessage>,
}
impl EtcdElection {
@@ -42,20 +48,50 @@ impl EtcdElection {
.await
.context(error::ConnectEtcdSnafu)?;
Self::with_etcd_client(leader_value, client)
Self::with_etcd_client(leader_value, client).await
}
pub fn with_etcd_client<E>(leader_value: E, client: Client) -> Result<ElectionRef>
pub async fn with_etcd_client<E>(leader_value: E, client: Client) -> Result<ElectionRef>
where
E: AsRef<str>,
{
let leader_value = leader_value.as_ref().into();
let leader_value: String = leader_value.as_ref().into();
let leader_ident = leader_value.clone();
let (tx, mut rx) = broadcast::channel(100);
common_runtime::spawn_bg(async move {
loop {
match rx.recv().await {
Ok(msg) => match msg {
LeaderChangeMessage::Elected(key) => {
info!(
"[{leader_ident}] is elected as leader: {:?}, lease: {}",
key.name_str(),
key.lease()
);
}
LeaderChangeMessage::StepDown(key) => {
warn!(
"[{leader_ident}] is stepping down: {:?}, lease: {}",
key.name_str(),
key.lease()
);
}
},
Err(RecvError::Lagged(_)) => {
warn!("Log printing is too slow or leader changed too fast!");
}
Err(RecvError::Closed) => break,
}
}
});
Ok(Arc::new(Self {
leader_value,
client,
is_leader: AtomicBool::new(false),
infancy: AtomicBool::new(false),
leader_watcher: tx,
}))
}
}
@@ -120,18 +156,21 @@ impl Election for EtcdElection {
.is_ok()
{
self.infancy.store(true, Ordering::Relaxed);
info!(
"[{}] becoming leader: {:?}, lease: {}",
&self.leader_value,
leader.name_str(),
leader.lease()
);
if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::Elected(Arc::new(leader.clone())))
{
error!("Failed to send leader change message, error: {e}");
}
}
} else {
warn!(
"Failed to keep-alive, lease: {}, will re-initiate election",
leader.lease()
);
if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::StepDown(Arc::new(leader.clone())))
{
error!("Failed to send leader change message, error: {e}");
}
break;
}
}
@@ -162,4 +201,8 @@ impl Election for EtcdElection {
async fn resign(&self) -> Result<()> {
todo!()
}
fn subscribe_leader_change(&self) -> Receiver<LeaderChangeMessage> {
self.leader_watcher.subscribe()
}
}

View File

@@ -25,6 +25,13 @@ pub enum Error {
#[snafu(display("Failed to send shutdown signal"))]
SendShutdownSignal { source: SendError<()> },
#[snafu(display("Failed to shutdown {} server, source: {}", server, source))]
ShutdownServer {
#[snafu(backtrace)]
source: servers::error::Error,
server: String,
},
#[snafu(display("Error stream request next is None"))]
StreamNone { backtrace: Backtrace },
@@ -55,7 +62,16 @@ pub enum Error {
source: tonic::transport::Error,
backtrace: Backtrace,
},
#[snafu(display("Failed to start gRPC server, source: {}", source))]
StartMetricsExport {
#[snafu(backtrace)]
source: servers::error::Error,
},
#[snafu(display("Failed to parse address {}, source: {}", addr, source))]
ParseAddr {
addr: String,
source: std::net::AddrParseError,
},
#[snafu(display("Empty table name"))]
EmptyTableName { backtrace: Backtrace },
@@ -274,6 +290,12 @@ pub enum Error {
#[snafu(display("Missing required parameter, param: {:?}", param))]
MissingRequiredParameter { param: String },
#[snafu(display("Failed to recover procedure, source: {source}"))]
RecoverProcedure {
#[snafu(backtrace)]
source: common_procedure::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -317,6 +339,7 @@ impl ErrorExt for Error {
| Error::LockNotConfig { .. }
| Error::ExceededRetryLimit { .. }
| Error::SendShutdownSignal { .. }
| Error::ParseAddr { .. }
| Error::StartGrpc { .. } => StatusCode::Internal,
Error::EmptyKey { .. }
| Error::MissingRequiredParameter { .. }
@@ -341,6 +364,10 @@ impl ErrorExt for Error {
Error::TableNotFound { .. } => StatusCode::TableNotFound,
Error::InvalidCatalogValue { source, .. } => source.status_code(),
Error::MetaInternal { source } => source.status_code(),
Error::RecoverProcedure { source } => source.status_code(),
Error::ShutdownServer { source, .. } | Error::StartMetricsExport { source } => {
source.status_code()
}
}
}
}

View File

@@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(async_closure)]
#![feature(btree_drain_filter)]
pub mod bootstrap;
pub mod cluster;
pub mod election;
@@ -25,6 +27,7 @@ pub mod lock;
pub mod metasrv;
#[cfg(feature = "mock")]
pub mod mocks;
mod procedure;
pub mod selector;
mod sequence;
pub mod service;

View File

@@ -18,11 +18,16 @@ use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use api::v1::meta::Peer;
use common_telemetry::{info, warn};
use common_procedure::ProcedureManagerRef;
use common_telemetry::{error, info, warn};
use serde::{Deserialize, Serialize};
use servers::http::HttpOptions;
use snafu::ResultExt;
use tokio::sync::broadcast::error::RecvError;
use crate::cluster::MetaPeerClient;
use crate::election::Election;
use crate::election::{Election, LeaderChangeMessage};
use crate::error::{RecoverProcedureSnafu, Result};
use crate::handler::HeartbeatHandlerGroup;
use crate::lock::DistLockRef;
use crate::selector::{Selector, SelectorType};
@@ -40,6 +45,7 @@ pub struct MetaSrvOptions {
pub datanode_lease_secs: i64,
pub selector: SelectorType,
pub use_memory_store: bool,
pub http_opts: HttpOptions,
}
impl Default for MetaSrvOptions {
@@ -51,6 +57,7 @@ impl Default for MetaSrvOptions {
datanode_lease_secs: 15,
selector: SelectorType::default(),
use_memory_store: false,
http_opts: HttpOptions::default(),
}
}
}
@@ -102,20 +109,51 @@ pub struct MetaSrv {
election: Option<ElectionRef>,
meta_peer_client: Option<MetaPeerClient>,
lock: Option<DistLockRef>,
procedure_manager: ProcedureManagerRef,
}
impl MetaSrv {
pub async fn start(&self) {
pub async fn try_start(&self) -> Result<()> {
if self
.started
.compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
.is_err()
{
warn!("MetaSrv already started");
return;
return Ok(());
}
if let Some(election) = self.election() {
let procedure_manager = self.procedure_manager.clone();
let mut rx = election.subscribe_leader_change();
common_runtime::spawn_bg(async move {
loop {
match rx.recv().await {
Ok(msg) => {
match msg {
LeaderChangeMessage::Elected(_) => {
if let Err(e) = procedure_manager.recover().await {
error!("Failed to recover procedures, error: {e}");
}
}
LeaderChangeMessage::StepDown(_) => {
// TODO(LFC): TBC
unimplemented!()
}
}
}
Err(RecvError::Closed) => {
error!("Not expected, is leader election loop still running?");
break;
}
Err(RecvError::Lagged(_)) => {
// TODO(LFC): TBC
break;
}
}
}
});
let election = election.clone();
let started = self.started.clone();
common_runtime::spawn_bg(async move {
@@ -128,9 +166,15 @@ impl MetaSrv {
}
info!("MetaSrv stopped");
});
} else {
self.procedure_manager
.recover()
.await
.context(RecoverProcedureSnafu)?;
}
info!("MetaSrv started");
Ok(())
}
pub fn shutdown(&self) {

View File

@@ -15,6 +15,8 @@
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use common_procedure::local::{LocalManager, ManagerConfig};
use crate::cluster::MetaPeerClient;
use crate::handler::{
CheckLeaderHandler, CollectStatsHandler, HeartbeatHandlerGroup, KeepLeaseHandler,
@@ -22,6 +24,7 @@ use crate::handler::{
};
use crate::lock::DistLockRef;
use crate::metasrv::{ElectionRef, MetaSrv, MetaSrvOptions, SelectorRef, TABLE_ID_SEQ};
use crate::procedure::state_store::MetaStateStore;
use crate::selector::lease_based::LeaseBasedSelector;
use crate::sequence::Sequence;
use crate::service::store::kv::{KvStoreRef, ResettableKvStoreRef};
@@ -139,6 +142,10 @@ impl MetaSrvBuilder {
let table_id_sequence = Arc::new(Sequence::new(TABLE_ID_SEQ, 1024, 10, kv_store.clone()));
let config = ManagerConfig::default();
let state_store = Arc::new(MetaStateStore::new(kv_store.clone()));
let procedure_manager = Arc::new(LocalManager::new(config, state_store));
MetaSrv {
started,
options,
@@ -150,6 +157,7 @@ impl MetaSrvBuilder {
election,
meta_peer_client,
lock,
procedure_manager,
}
}
}

View File

@@ -0,0 +1,15 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) mod state_store;

View File

@@ -0,0 +1,194 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::meta::{BatchDeleteRequest, PutRequest, RangeRequest};
use async_stream::try_stream;
use async_trait::async_trait;
use common_error::prelude::BoxedError;
use common_procedure::error::{
CorruptedDataSnafu, DeleteStatesSnafu, ListStateSnafu, PutStateSnafu,
};
use common_procedure::store::state_store::{KeyValueStream, StateStore};
use common_procedure::Result;
use snafu::ResultExt;
use crate::service::store::kv::KvStoreRef;
use crate::util;
const PROCEDURE_PREFIX: &str = "/__procedure__/";
fn with_prefix(key: &str) -> String {
format!("{PROCEDURE_PREFIX}{key}")
}
fn strip_prefix(key: &str) -> String {
key.trim_start_matches(PROCEDURE_PREFIX).to_string()
}
pub(crate) struct MetaStateStore {
kv_store: KvStoreRef,
max_size_per_range: i64,
}
impl MetaStateStore {
pub(crate) fn new(kv_store: KvStoreRef) -> Self {
Self {
kv_store,
max_size_per_range: -1,
}
}
}
#[async_trait]
impl StateStore for MetaStateStore {
async fn put(&self, key: &str, value: Vec<u8>) -> Result<()> {
let _ = self
.kv_store
.put(PutRequest {
key: with_prefix(key).into_bytes(),
value,
..Default::default()
})
.await
.map_err(BoxedError::new)
.context(PutStateSnafu { key })?;
Ok(())
}
async fn walk_top_down(&self, path: &str) -> Result<KeyValueStream> {
// extend their lifetimes to be used in the stream
let path = path.to_string();
let kv_store = self.kv_store.clone();
let limit = self.max_size_per_range;
let stream = try_stream! {
let mut key = with_prefix(path.trim_start_matches('/')).into_bytes();
let range_end = util::get_prefix_end_key(&key);
loop {
let req = RangeRequest {
key: key.clone(),
range_end: range_end.clone(),
limit,
..Default::default()
};
let resp = kv_store.range(req).await.map_err(BoxedError::new).with_context(|_|
ListStateSnafu { path: path.clone() }
)?;
let mut no_more_data = true;
if resp.more {
if let Some(last) = resp.kvs.last() {
key = util::get_prefix_end_key(&last.key);
no_more_data = false;
}
}
for kv in resp.kvs {
let key = String::from_utf8(kv.key).context(CorruptedDataSnafu)?;
let key = strip_prefix(&key);
let value = kv.value;
yield (key, value)
}
if no_more_data {
break;
}
}
};
Ok(Box::pin(stream))
}
async fn delete(&self, keys: &[String]) -> Result<()> {
let _ = self
.kv_store
.batch_delete(BatchDeleteRequest {
keys: keys
.iter()
.map(|x| with_prefix(x).into_bytes())
.collect::<Vec<_>>(),
..Default::default()
})
.await
.map_err(BoxedError::new)
.with_context(|_| DeleteStatesSnafu {
keys: format!("{:?}", keys.to_vec()),
})?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_procedure::store::state_store::KeyValue;
use futures::TryStreamExt;
use super::*;
use crate::service::store::memory::MemStore;
#[tokio::test]
async fn test_meta_state_store() {
let store = &MetaStateStore {
kv_store: Arc::new(MemStore::new()),
max_size_per_range: 1, // for testing "more" in range
};
let walk_top_down = async move |path: &str| -> Vec<KeyValue> {
let mut data = store
.walk_top_down(path)
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap();
data.sort_unstable_by(|a, b| a.0.cmp(&b.0));
data
};
let data = walk_top_down("/").await;
assert!(data.is_empty());
store.put("a/1", b"v1".to_vec()).await.unwrap();
store.put("a/2", b"v2".to_vec()).await.unwrap();
store.put("b/1", b"v3".to_vec()).await.unwrap();
let data = walk_top_down("/").await;
assert_eq!(
vec![
("a/1".to_string(), b"v1".to_vec()),
("a/2".to_string(), b"v2".to_vec()),
("b/1".to_string(), b"v3".to_vec())
],
data
);
let data = walk_top_down("a/").await;
assert_eq!(
vec![
("a/1".to_string(), b"v1".to_vec()),
("a/2".to_string(), b"v2".to_vec()),
],
data
);
store
.delete(&["a/2".to_string(), "b/1".to_string()])
.await
.unwrap();
let data = walk_top_down("a/").await;
assert_eq!(vec![("a/1".to_string(), b"v1".to_vec()),], data);
}
}

View File

@@ -47,7 +47,7 @@ use table::{error as table_error, Result as TableResult, Table};
use tokio::sync::Mutex;
use crate::config::EngineConfig;
use crate::engine::procedure::CreateMitoTable;
use crate::engine::procedure::{AlterMitoTable, CreateMitoTable};
use crate::error::{
self, BuildColumnDescriptorSnafu, BuildColumnFamilyDescriptorSnafu, BuildRegionDescriptorSnafu,
BuildRowKeyDescriptorSnafu, InvalidPrimaryKeySnafu, InvalidRawSchemaSnafu,
@@ -166,7 +166,24 @@ impl<S: StorageEngine> TableEngineProcedure for MitoEngine<S> {
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?;
let procedure = Box::new(CreateMitoTable::new(request, self.inner.clone()));
let procedure = Box::new(
CreateMitoTable::new(request, self.inner.clone())
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?,
);
Ok(procedure)
}
fn alter_table_procedure(
&self,
_ctx: &EngineContext,
request: AlterTableRequest,
) -> TableResult<BoxedProcedure> {
let procedure = Box::new(
AlterMitoTable::new(request, self.inner.clone())
.map_err(BoxedError::new)
.context(table_error::TableOperationSnafu)?,
);
Ok(procedure)
}
}
@@ -175,7 +192,7 @@ pub(crate) struct MitoEngineInner<S: StorageEngine> {
/// All tables opened by the engine. Map key is formatted [TableReference].
///
/// Writing to `tables` should also hold the `table_mutex`.
tables: RwLock<HashMap<String, TableRef>>,
tables: RwLock<HashMap<String, Arc<MitoTable<S::Region>>>>,
object_store: ObjectStore,
storage_engine: S,
/// Table mutex is used to protect the operations such as creating/opening/closing
@@ -546,6 +563,16 @@ impl<S: StorageEngine> MitoEngineInner<S> {
}
fn get_table(&self, table_ref: &TableReference) -> Option<TableRef> {
self.tables
.read()
.unwrap()
.get(&table_ref.to_string())
.cloned()
.map(|table| table as _)
}
/// Returns the [MitoTable].
fn get_mito_table(&self, table_ref: &TableReference) -> Option<Arc<MitoTable<S::Region>>> {
self.tables
.read()
.unwrap()
@@ -579,7 +606,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
table: table_name,
};
let table = self
.get_table(&table_ref)
.get_mito_table(&table_ref)
.context(error::TableNotFoundSnafu { table_name })?;
logging::info!("start altering table {} with request {:?}", table_name, req);

View File

@@ -12,10 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod alter;
mod create;
use std::sync::Arc;
pub(crate) use alter::AlterMitoTable;
use common_procedure::ProcedureManager;
pub(crate) use create::CreateMitoTable;
use store_api::storage::StorageEngine;
@@ -31,7 +33,8 @@ pub(crate) fn register_procedure_loaders<S: StorageEngine>(
procedure_manager: &dyn ProcedureManager,
) {
// The procedure names are expected to be unique, so we just panic on error.
CreateMitoTable::register_loader(engine_inner, procedure_manager);
CreateMitoTable::register_loader(engine_inner.clone(), procedure_manager);
AlterMitoTable::register_loader(engine_inner, procedure_manager);
}
#[cfg(test)]

View File

@@ -0,0 +1,540 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use async_trait::async_trait;
use common_procedure::error::{Error, FromJsonSnafu, ToJsonSnafu};
use common_procedure::{Context, LockKey, Procedure, ProcedureManager, Result, Status};
use common_telemetry::logging;
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use store_api::manifest::Manifest;
use store_api::storage::{AlterRequest, Region, RegionMeta, StorageEngine};
use table::engine::TableReference;
use table::metadata::{RawTableInfo, TableInfo, TableVersion};
use table::requests::{AlterKind, AlterTableRequest};
use table::Table;
use crate::engine::MitoEngineInner;
use crate::error::{
BuildTableMetaSnafu, TableNotFoundSnafu, UpdateTableManifestSnafu, VersionChangedSnafu,
};
use crate::manifest::action::{TableChange, TableMetaAction, TableMetaActionList};
use crate::table::{create_alter_operation, MitoTable};
/// Procedure to alter a [MitoTable].
pub(crate) struct AlterMitoTable<S: StorageEngine> {
data: AlterTableData,
engine_inner: Arc<MitoEngineInner<S>>,
table: Arc<MitoTable<S::Region>>,
/// The table info after alteration.
new_info: Option<TableInfo>,
}
#[async_trait]
impl<S: StorageEngine> Procedure for AlterMitoTable<S> {
fn type_name(&self) -> &str {
Self::TYPE_NAME
}
async fn execute(&mut self, _ctx: &Context) -> Result<Status> {
match self.data.state {
AlterTableState::Prepare => self.on_prepare(),
AlterTableState::AlterRegions => self.on_alter_regions().await,
AlterTableState::UpdateTableManifest => self.on_update_table_manifest().await,
}
}
fn dump(&self) -> Result<String> {
let json = serde_json::to_string(&self.data).context(ToJsonSnafu)?;
Ok(json)
}
fn lock_key(&self) -> LockKey {
let table_ref = self.data.table_ref();
let info = self.table.table_info();
let mut keys: Vec<_> = info
.meta
.region_numbers
.iter()
.map(|number| format!("{table_ref}/region-{number}"))
.collect();
// If alter kind is rename, we also need to lock the region with another name.
if let AlterKind::RenameTable { new_table_name } = &self.data.request.alter_kind {
let new_table_ref = TableReference {
catalog: &self.data.request.catalog_name,
schema: &self.data.request.schema_name,
table: new_table_name,
};
// We only acquire the first region.
keys.push(format!("{new_table_ref}/region-0"));
}
LockKey::new(keys)
}
}
impl<S: StorageEngine> AlterMitoTable<S> {
const TYPE_NAME: &str = "mito::AlterMitoTable";
/// Returns a new [AlterMitoTable].
pub(crate) fn new(
request: AlterTableRequest,
engine_inner: Arc<MitoEngineInner<S>>,
) -> Result<Self> {
let mut data = AlterTableData {
state: AlterTableState::Prepare,
request,
// We set table version later.
table_version: 0,
};
let table_ref = data.table_ref();
let table =
engine_inner
.get_mito_table(&table_ref)
.with_context(|| TableNotFoundSnafu {
table_name: table_ref.to_string(),
})?;
let info = table.table_info();
data.table_version = info.ident.version;
Ok(AlterMitoTable {
data,
engine_inner,
table,
new_info: None,
})
}
/// Register the loader of this procedure to the `procedure_manager`.
///
/// # Panics
/// Panics on error.
pub(crate) fn register_loader(
engine_inner: Arc<MitoEngineInner<S>>,
procedure_manager: &dyn ProcedureManager,
) {
procedure_manager
.register_loader(
Self::TYPE_NAME,
Box::new(move |data| {
Self::from_json(data, engine_inner.clone()).map(|p| Box::new(p) as _)
}),
)
.unwrap()
}
/// Recover the procedure from json.
fn from_json(json: &str, engine_inner: Arc<MitoEngineInner<S>>) -> Result<Self> {
let data: AlterTableData = serde_json::from_str(json).context(FromJsonSnafu)?;
let table_ref = data.table_ref();
let table =
engine_inner
.get_mito_table(&table_ref)
.with_context(|| TableNotFoundSnafu {
table_name: table_ref.to_string(),
})?;
Ok(AlterMitoTable {
data,
engine_inner,
table,
new_info: None,
})
}
/// Prepare table info.
fn on_prepare(&mut self) -> Result<Status> {
let current_info = self.table.table_info();
ensure!(
current_info.ident.version == self.data.table_version,
VersionChangedSnafu {
expect: self.data.table_version,
actual: current_info.ident.version,
}
);
self.init_new_info(&current_info)?;
self.data.state = AlterTableState::AlterRegions;
Ok(Status::executing(true))
}
/// Alter regions.
async fn on_alter_regions(&mut self) -> Result<Status> {
let current_info = self.table.table_info();
ensure!(
current_info.ident.version == self.data.table_version,
VersionChangedSnafu {
expect: self.data.table_version,
actual: current_info.ident.version,
}
);
self.init_new_info(&current_info)?;
let new_info = self.new_info.as_mut().unwrap();
let table_name = &self.data.request.table_name;
let Some(alter_op) = create_alter_operation(table_name, &self.data.request.alter_kind, &mut new_info.meta)
.map_err(Error::from_error_ext)? else {
// Don't need to alter the region.
self.data.state = AlterTableState::UpdateTableManifest;
return Ok(Status::executing(true));
};
let regions = self.table.regions();
// For each region, alter it if its version is not updated.
for region in regions.values() {
let region_meta = region.in_memory_metadata();
if u64::from(region_meta.version()) > self.data.table_version {
// Region is already altered.
continue;
}
let alter_req = AlterRequest {
operation: alter_op.clone(),
version: region_meta.version(),
};
// Alter the region.
logging::debug!(
"start altering region {} of table {}, with request {:?}",
region.name(),
table_name,
alter_req,
);
region
.alter(alter_req)
.await
.map_err(Error::from_error_ext)?;
}
self.data.state = AlterTableState::UpdateTableManifest;
Ok(Status::executing(true))
}
/// Persist the alteration to the manifest and update table info.
async fn on_update_table_manifest(&mut self) -> Result<Status> {
// Get current table info.
let current_info = self.table.table_info();
if current_info.ident.version > self.data.table_version {
logging::info!(
"table {} version is already updated, current: {}, old_version: {}",
self.data.request.table_name,
current_info.ident.version,
self.data.table_version,
);
return Ok(Status::Done);
}
self.init_new_info(&current_info)?;
let new_info = self.new_info.as_ref().unwrap();
let table_name = &self.data.request.table_name;
logging::debug!(
"start updating the manifest of table {} with new table info {:?}",
table_name,
new_info
);
self.table
.manifest()
.update(TableMetaActionList::with_action(TableMetaAction::Change(
Box::new(TableChange {
table_info: RawTableInfo::from(new_info.clone()),
}),
)))
.await
.context(UpdateTableManifestSnafu { table_name })?;
// Update in memory metadata of the table.
self.table.set_table_info(new_info.clone());
// Rename key in tables map.
if let AlterKind::RenameTable { new_table_name } = &self.data.request.alter_kind {
let mut table_ref = self.data.table_ref();
let mut tables = self.engine_inner.tables.write().unwrap();
tables.remove(&table_ref.to_string());
table_ref.table = new_table_name.as_str();
tables.insert(table_ref.to_string(), self.table.clone());
}
Ok(Status::Done)
}
fn init_new_info(&mut self, current_info: &TableInfo) -> Result<()> {
if self.new_info.is_some() {
return Ok(());
}
let table_name = &current_info.name;
let mut new_info = TableInfo::clone(current_info);
// setup new table info
match &self.data.request.alter_kind {
AlterKind::RenameTable { new_table_name } => {
new_info.name = new_table_name.clone();
}
AlterKind::AddColumns { .. } | AlterKind::DropColumns { .. } => {
let table_meta = &current_info.meta;
let new_meta = table_meta
.builder_with_alter_kind(table_name, &self.data.request.alter_kind)
.map_err(Error::from_error_ext)?
.build()
.context(BuildTableMetaSnafu { table_name })?;
new_info.meta = new_meta;
}
}
// Increase version of the table.
new_info.ident.version = current_info.ident.version + 1;
self.new_info = Some(new_info);
Ok(())
}
}
/// Represents each step while altering table in the mito engine.
#[derive(Debug, Serialize, Deserialize)]
enum AlterTableState {
/// Prepare to alter table.
Prepare,
/// Alter regions.
AlterRegions,
/// Update table manifest.
UpdateTableManifest,
}
/// Serializable data of [AlterMitoTable].
#[derive(Debug, Serialize, Deserialize)]
struct AlterTableData {
state: AlterTableState,
request: AlterTableRequest,
/// Table version before alteration.
table_version: TableVersion,
}
impl AlterTableData {
fn table_ref(&self) -> TableReference {
TableReference {
catalog: &self.request.catalog_name,
schema: &self.request.schema_name,
table: &self.request.table_name,
}
}
}
#[cfg(test)]
mod tests {
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnSchema;
use table::engine::{EngineContext, TableEngine, TableEngineProcedure};
use table::requests::AddColumnRequest;
use super::*;
use crate::engine::procedure::procedure_test_util::{self, TestEnv};
use crate::table::test_util;
fn new_add_columns_req() -> AlterTableRequest {
let new_tag = ColumnSchema::new("my_tag", ConcreteDataType::string_datatype(), true);
let new_field = ColumnSchema::new("my_field", ConcreteDataType::string_datatype(), true);
let alter_kind = AlterKind::AddColumns {
columns: vec![
AddColumnRequest {
column_schema: new_tag,
is_key: true,
},
AddColumnRequest {
column_schema: new_field,
is_key: false,
},
],
};
test_util::new_alter_request(alter_kind)
}
#[tokio::test]
async fn test_procedure_add_column() {
common_telemetry::init_default_ut_logging();
let TestEnv {
table_engine,
dir: _dir,
} = procedure_test_util::setup_test_engine("create_procedure").await;
let schema = Arc::new(test_util::schema_for_test());
let request = test_util::new_create_request(schema.clone());
let engine_ctx = EngineContext::default();
// Create table first.
let mut procedure = table_engine
.create_table_procedure(&engine_ctx, request.clone())
.unwrap();
procedure_test_util::execute_procedure_until_done(&mut procedure).await;
// Get metadata of the created table.
let table_ref = TableReference {
catalog: &request.catalog_name,
schema: &request.schema_name,
table: &request.table_name,
};
let table = table_engine
.get_table(&engine_ctx, &table_ref)
.unwrap()
.unwrap();
let old_info = table.table_info();
let old_meta = &old_info.meta;
// Alter the table.
let request = new_add_columns_req();
let mut procedure = table_engine
.alter_table_procedure(&engine_ctx, request.clone())
.unwrap();
procedure_test_util::execute_procedure_until_done(&mut procedure).await;
// Validate.
let table = table_engine
.get_table(&engine_ctx, &table_ref)
.unwrap()
.unwrap();
let new_info = table.table_info();
let new_meta = &new_info.meta;
let new_schema = &new_meta.schema;
assert_eq!(&[0, 4], &new_meta.primary_key_indices[..]);
assert_eq!(&[1, 2, 3, 5], &new_meta.value_indices[..]);
assert!(new_schema.column_schema_by_name("my_tag").is_some());
assert!(new_schema.column_schema_by_name("my_field").is_some());
assert_eq!(new_schema.version(), schema.version() + 1);
assert_eq!(new_meta.next_column_id, old_meta.next_column_id + 2);
}
#[tokio::test]
async fn test_procedure_drop_column() {
common_telemetry::init_default_ut_logging();
let TestEnv {
table_engine,
dir: _dir,
} = procedure_test_util::setup_test_engine("create_procedure").await;
let schema = Arc::new(test_util::schema_for_test());
let request = test_util::new_create_request(schema.clone());
let engine_ctx = EngineContext::default();
// Create table first.
let mut procedure = table_engine
.create_table_procedure(&engine_ctx, request.clone())
.unwrap();
procedure_test_util::execute_procedure_until_done(&mut procedure).await;
// Add columns.
let request = new_add_columns_req();
let mut procedure = table_engine
.alter_table_procedure(&engine_ctx, request.clone())
.unwrap();
procedure_test_util::execute_procedure_until_done(&mut procedure).await;
// Get metadata.
let table_ref = TableReference {
catalog: &request.catalog_name,
schema: &request.schema_name,
table: &request.table_name,
};
let table = table_engine
.get_table(&engine_ctx, &table_ref)
.unwrap()
.unwrap();
let old_info = table.table_info();
let old_meta = &old_info.meta;
// Then remove memory and my_field from the table.
let alter_kind = AlterKind::DropColumns {
names: vec![String::from("memory"), String::from("my_field")],
};
let request = test_util::new_alter_request(alter_kind);
let mut procedure = table_engine
.alter_table_procedure(&engine_ctx, request.clone())
.unwrap();
procedure_test_util::execute_procedure_until_done(&mut procedure).await;
// Validate.
let new_info = table.table_info();
let new_meta = &new_info.meta;
let new_schema = &new_meta.schema;
let remaining_names: Vec<String> = new_schema
.column_schemas()
.iter()
.map(|column_schema| column_schema.name.clone())
.collect();
assert_eq!(&["host", "cpu", "ts", "my_tag"], &remaining_names[..]);
assert_eq!(&[0, 3], &new_meta.primary_key_indices[..]);
assert_eq!(&[1, 2], &new_meta.value_indices[..]);
assert_eq!(new_schema.version(), old_meta.schema.version() + 1);
assert_eq!(new_meta.region_numbers, old_meta.region_numbers);
}
#[tokio::test]
async fn test_procedure_rename_table() {
common_telemetry::init_default_ut_logging();
let TestEnv {
table_engine,
dir: _dir,
} = procedure_test_util::setup_test_engine("create_procedure").await;
let schema = Arc::new(test_util::schema_for_test());
let create_request = test_util::new_create_request(schema.clone());
let engine_ctx = EngineContext::default();
// Create table first.
let mut procedure = table_engine
.create_table_procedure(&engine_ctx, create_request.clone())
.unwrap();
procedure_test_util::execute_procedure_until_done(&mut procedure).await;
// Get metadata of the created table.
let mut table_ref = TableReference {
catalog: &create_request.catalog_name,
schema: &create_request.schema_name,
table: &create_request.table_name,
};
let table = table_engine
.get_table(&engine_ctx, &table_ref)
.unwrap()
.unwrap();
// Rename the table.
let new_name = "another_table".to_string();
let alter_kind = AlterKind::RenameTable {
new_table_name: new_name.clone(),
};
let alter_request = test_util::new_alter_request(alter_kind);
let mut procedure = table_engine
.alter_table_procedure(&engine_ctx, alter_request.clone())
.unwrap();
procedure_test_util::execute_procedure_until_done(&mut procedure).await;
// Validate.
let info = table.table_info();
assert_eq!(new_name, info.name);
assert!(table_engine
.get_table(&engine_ctx, &table_ref)
.unwrap()
.is_none());
table_ref.table = &new_name;
assert!(table_engine
.get_table(&engine_ctx, &table_ref)
.unwrap()
.is_some());
}
}

View File

@@ -43,7 +43,7 @@ pub(crate) struct CreateMitoTable<S: StorageEngine> {
/// Created regions of the table.
regions: HashMap<RegionNumber, S::Region>,
/// Schema of the table.
table_schema: Option<SchemaRef>,
table_schema: SchemaRef,
}
#[async_trait]
@@ -81,8 +81,14 @@ impl<S: StorageEngine> CreateMitoTable<S> {
const TYPE_NAME: &str = "mito::CreateMitoTable";
/// Returns a new [CreateMitoTable].
pub(crate) fn new(request: CreateTableRequest, engine_inner: Arc<MitoEngineInner<S>>) -> Self {
CreateMitoTable {
pub(crate) fn new(
request: CreateTableRequest,
engine_inner: Arc<MitoEngineInner<S>>,
) -> Result<Self> {
let table_schema =
Schema::try_from(request.schema.clone()).context(InvalidRawSchemaSnafu)?;
Ok(CreateMitoTable {
data: CreateTableData {
state: CreateTableState::Prepare,
request,
@@ -90,8 +96,8 @@ impl<S: StorageEngine> CreateMitoTable<S> {
},
engine_inner,
regions: HashMap::new(),
table_schema: None,
}
table_schema: Arc::new(table_schema),
})
}
/// Register the loader of this procedure to the `procedure_manager`.
@@ -115,12 +121,14 @@ impl<S: StorageEngine> CreateMitoTable<S> {
/// Recover the procedure from json.
fn from_json(json: &str, engine_inner: Arc<MitoEngineInner<S>>) -> Result<Self> {
let data: CreateTableData = serde_json::from_str(json).context(FromJsonSnafu)?;
let table_schema =
Schema::try_from(data.request.schema.clone()).context(InvalidRawSchemaSnafu)?;
Ok(CreateMitoTable {
data,
engine_inner,
regions: HashMap::new(),
table_schema: None,
table_schema: Arc::new(table_schema),
})
}
@@ -166,19 +174,17 @@ impl<S: StorageEngine> CreateMitoTable<S> {
ttl,
};
let table_schema =
Schema::try_from(self.data.request.schema.clone()).context(InvalidRawSchemaSnafu)?;
let primary_key_indices = &self.data.request.primary_key_indices;
let (next_column_id, default_cf) = engine::build_column_family(
engine::INIT_COLUMN_ID,
&self.data.request.table_name,
&table_schema,
&self.table_schema,
primary_key_indices,
)?;
let (next_column_id, row_key) = engine::build_row_key_desc(
next_column_id,
&self.data.request.table_name,
&table_schema,
&self.table_schema,
primary_key_indices,
)?;
self.data.next_column_id = Some(next_column_id);
@@ -228,7 +234,6 @@ impl<S: StorageEngine> CreateMitoTable<S> {
// All regions are created, moves to the next step.
self.data.state = CreateTableState::WriteTableManifest;
self.table_schema = Some(Arc::new(table_schema));
Ok(Status::executing(true))
}
@@ -276,10 +281,9 @@ impl<S: StorageEngine> CreateMitoTable<S> {
) -> Result<MitoTable<S::Region>> {
// Safety: We are in `WriteTableManifest` state.
let next_column_id = self.data.next_column_id.unwrap();
let table_schema = self.table_schema.clone().unwrap();
let table_meta = TableMetaBuilder::default()
.schema(table_schema)
.schema(self.table_schema.clone())
.engine(engine::MITO_ENGINE)
.next_column_id(next_column_id)
.primary_key_indices(self.data.request.primary_key_indices.clone())

View File

@@ -17,7 +17,7 @@ use std::any::Any;
use common_error::ext::BoxedError;
use common_error::prelude::*;
use store_api::storage::RegionNumber;
use table::metadata::{TableInfoBuilderError, TableMetaBuilderError};
use table::metadata::{TableInfoBuilderError, TableMetaBuilderError, TableVersion};
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
@@ -187,6 +187,12 @@ pub enum Error {
#[snafu(display("Invalid schema, source: {}", source))]
InvalidRawSchema { source: datatypes::error::Error },
#[snafu(display("Table version changed, expect: {}, actual: {}", expect, actual))]
VersionChanged {
expect: TableVersion,
actual: TableVersion,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -211,7 +217,8 @@ impl ErrorExt for Error {
| InvalidPrimaryKey { .. }
| MissingTimestampIndex { .. }
| TableNotFound { .. }
| InvalidRawSchema { .. } => StatusCode::InvalidArguments,
| InvalidRawSchema { .. }
| VersionChanged { .. } => StatusCode::InvalidArguments,
TableInfoNotFound { .. } | ConvertRaw { .. } => StatusCode::Unexpected,

View File

@@ -80,7 +80,7 @@ mod tests {
async fn test_table_manifest() {
let (_dir, object_store) = test_util::new_test_object_store("test_table_manifest").await;
let manifest = TableManifest::new("manifest/", object_store, None);
let manifest = TableManifest::create("manifest/", object_store);
let mut iter = manifest.scan(0, 100).await.unwrap();
assert!(iter.next_action().await.unwrap().is_none());

View File

@@ -471,7 +471,7 @@ impl<R: Region> MitoTable<R> {
regions: HashMap<RegionNumber, R>,
object_store: ObjectStore,
) -> Result<MitoTable<R>> {
let manifest = TableManifest::new(&table_manifest_dir(table_dir), object_store, None);
let manifest = TableManifest::create(&table_manifest_dir(table_dir), object_store);
// TODO(dennis): save manifest version into catalog?
let _manifest_version = manifest
@@ -487,7 +487,7 @@ impl<R: Region> MitoTable<R> {
}
pub(crate) fn build_manifest(table_dir: &str, object_store: ObjectStore) -> TableManifest {
TableManifest::new(&table_manifest_dir(table_dir), object_store, None)
TableManifest::create(&table_manifest_dir(table_dir), object_store)
}
pub(crate) async fn recover_table_info(
@@ -558,7 +558,7 @@ impl<R: Region> MitoTable<R> {
}
/// Create [`AlterOperation`] according to given `alter_kind`.
fn create_alter_operation(
pub(crate) fn create_alter_operation(
table_name: &str,
alter_kind: &AlterKind,
table_meta: &mut TableMeta,

View File

@@ -29,7 +29,9 @@ use storage::config::EngineConfig as StorageEngineConfig;
use storage::EngineImpl;
use table::engine::{EngineContext, TableEngine};
use table::metadata::{TableInfo, TableInfoBuilder, TableMetaBuilder, TableType};
use table::requests::{CreateTableRequest, InsertRequest, TableOptions};
use table::requests::{
AlterKind, AlterTableRequest, CreateTableRequest, InsertRequest, TableOptions,
};
use table::{Table, TableRef};
use crate::config::EngineConfig;
@@ -118,6 +120,15 @@ pub fn new_create_request(schema: SchemaRef) -> CreateTableRequest {
}
}
pub fn new_alter_request(alter_kind: AlterKind) -> AlterTableRequest {
AlterTableRequest {
catalog_name: "greptime".to_string(),
schema_name: "public".to_string(),
table_name: TABLE_NAME.to_string(),
alter_kind,
}
}
pub struct TestEngineComponents {
pub table_engine: MitoEngine<EngineImpl<NoopLogStore>>,
pub storage_engine: EngineImpl<NoopLogStore>,

View File

@@ -12,10 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub use opendal::raw::normalize_path as raw_normalize_path;
pub use opendal::raw::oio::Pager;
pub use opendal::{
layers, services, Builder as ObjectStoreBuilder, Entry, EntryMode, Error, ErrorKind, Metakey,
Operator as ObjectStore, Result,
Operator as ObjectStore, Result, Writer,
};
pub mod cache_policy;

View File

@@ -56,12 +56,12 @@ impl EmptyMetric {
let schema = Arc::new(DFSchema::new_with_metadata(
vec![
DFField::new(
None,
Some(""),
&time_index_column_name,
DataType::Timestamp(TimeUnit::Millisecond, None),
false,
),
DFField::new(None, &value_column_name, DataType::Float64, true),
DFField::new(Some(""), &value_column_name, DataType::Float64, true),
],
HashMap::new(),
)?);
@@ -75,7 +75,6 @@ impl EmptyMetric {
}
pub fn to_execution_plan(&self) -> Arc<dyn ExecutionPlan> {
// let schema = self.schema.to
Arc::new(EmptyMetricExec {
start: self.start,
end: self.end,

View File

@@ -33,6 +33,7 @@ use datafusion::physical_plan::{
DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream,
Statistics,
};
use datafusion::sql::TableReference;
use futures::{Stream, StreamExt};
use crate::extension_plan::Millisecond;
@@ -106,7 +107,7 @@ impl RangeManipulate {
// process time index column
// the raw timestamp field is preserved. And a new timestamp_range field is appended to the last.
let Some(ts_col_index) = input_schema.index_of_column_by_name(None, time_index)? else {
return Err(datafusion::common::field_not_found(None, time_index, input_schema.as_ref()))
return Err(datafusion::common::field_not_found(None::<TableReference>, time_index, input_schema.as_ref()))
};
let timestamp_range_field = columns[ts_col_index]
.field()
@@ -119,7 +120,7 @@ impl RangeManipulate {
// process value columns
for name in value_columns {
let Some(index) = input_schema.index_of_column_by_name(None, name)? else {
return Err(datafusion::common::field_not_found(None, name, input_schema.as_ref()))
return Err(datafusion::common::field_not_found(None::<TableReference>, name, input_schema.as_ref()))
};
columns[index] = DFField::from(RangeArray::convert_field(columns[index].field()));
}

View File

@@ -17,19 +17,21 @@ mod changes;
mod deriv;
mod extrapolate_rate;
mod idelta;
mod quantile;
mod resets;
#[cfg(test)]
mod test_util;
pub use aggr_over_time::{
AbsentOverTime, AvgOverTime, CountOverTime, LastOverTime, MaxOverTime, MinOverTime,
PresentOverTime, SumOverTime,
PresentOverTime, StddevOverTime, SumOverTime,
};
use datafusion::arrow::array::ArrayRef;
use datafusion::error::DataFusionError;
use datafusion::physical_plan::ColumnarValue;
pub use extrapolate_rate::{Delta, Increase, Rate};
pub use idelta::IDelta;
pub use quantile::QuantileOverTime;
pub(crate) fn extract_array(columnar_value: &ColumnarValue) -> Result<ArrayRef, DataFusionError> {
if let ColumnarValue::Array(array) = columnar_value {
@@ -40,3 +42,19 @@ pub(crate) fn extract_array(columnar_value: &ColumnarValue) -> Result<ArrayRef,
))
}
}
/// compensation(Kahan) summation algorithm - a technique for reducing the numerical error
/// in floating-point arithmetic. The algorithm also includes the modification ("Neumaier improvement")
/// that reduces the numerical error further in cases
/// where the numbers being summed have a large difference in magnitude
/// Prometheus's implementation:
/// https://github.com/prometheus/prometheus/blob/f55ab2217984770aa1eecd0f2d5f54580029b1c0/promql/functions.go#L782)
pub(crate) fn compensated_sum_inc(inc: f64, sum: f64, mut compensation: f64) -> (f64, f64) {
let new_sum = sum + inc;
if sum.abs() >= inc.abs() {
compensation += (sum - new_sum) + inc;
} else {
compensation += (inc - new_sum) + sum;
}
(new_sum, compensation)
}

View File

@@ -24,7 +24,7 @@ use datatypes::arrow::array::Array;
use datatypes::arrow::compute;
use datatypes::arrow::datatypes::DataType;
use crate::functions::extract_array;
use crate::functions::{compensated_sum_inc, extract_array};
use crate::range_array::RangeArray;
/// The average value of all points in the specified interval.
@@ -117,7 +117,42 @@ pub fn present_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -
}
}
// TODO(ruihang): support quantile_over_time, stddev_over_time, and stdvar_over_time
// TODO(ruihang): support quantile_over_time, and stdvar_over_time
/// the population standard deviation of the values in the specified interval.
/// Prometheus's implementation: https://github.com/prometheus/prometheus/blob/f55ab2217984770aa1eecd0f2d5f54580029b1c0/promql/functions.go#L556-L569
#[range_fn(
name = "StddevOverTime",
ret = "Float64Array",
display_name = "prom_stddev_over_time"
)]
pub fn stddev_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> Option<f64> {
if values.is_empty() {
None
} else {
let mut count = 0.0;
let mut mean = 0.0;
let mut comp_mean = 0.0;
let mut deviations_sum_sq = 0.0;
let mut comp_deviations_sum_sq = 0.0;
for v in values {
count += 1.0;
let current_value = v.unwrap();
let delta = current_value - (mean + comp_mean);
let (new_mean, new_comp_mean) = compensated_sum_inc(delta / count, mean, comp_mean);
mean = new_mean;
comp_mean = new_comp_mean;
let (new_deviations_sum_sq, new_comp_deviations_sum_sq) = compensated_sum_inc(
delta * (current_value - (mean + comp_mean)),
deviations_sum_sq,
comp_deviations_sum_sq,
);
deviations_sum_sq = new_deviations_sum_sq;
comp_deviations_sum_sq = new_comp_deviations_sum_sq;
}
Some(((deviations_sum_sq + comp_deviations_sum_sq) / count).sqrt())
}
}
#[cfg(test)]
mod test {
@@ -332,4 +367,50 @@ mod test {
],
);
}
#[test]
fn calculate_std_dev_over_time() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
StddevOverTime::scalar_udf(),
ts_array,
value_array,
vec![
Some(37.6543215),
Some(28.442923895289123),
Some(0.0),
None,
None,
Some(18.12081352042062),
Some(11.983172291869804),
Some(11.441953741554055),
Some(0.0),
None,
],
);
// add more assertions
let ts_array = Arc::new(TimestampMillisecondArray::from_iter(
[1000i64, 3000, 5000, 7000, 9000, 11000, 13000, 15000]
.into_iter()
.map(Some),
));
let values_array = Arc::new(Float64Array::from_iter([
1.5990505637277868,
1.5990505637277868,
1.5990505637277868,
0.0,
8.0,
8.0,
2.0,
3.0,
]));
let ranges = [(0, 3), (3, 5)];
simple_range_udf_runner(
StddevOverTime::scalar_udf(),
RangeArray::from_ranges(ts_array, ranges).unwrap(),
RangeArray::from_ranges(values_array, ranges).unwrap(),
vec![Some(0.0), Some(3.249615361854384)],
);
}
}

View File

@@ -0,0 +1,210 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use datafusion::arrow::array::Float64Array;
use datafusion::arrow::datatypes::TimeUnit;
use datafusion::common::DataFusionError;
use datafusion::logical_expr::{ScalarUDF, Signature, TypeSignature, Volatility};
use datafusion::physical_plan::ColumnarValue;
use datatypes::arrow::array::Array;
use datatypes::arrow::datatypes::DataType;
use crate::error;
use crate::functions::extract_array;
use crate::range_array::RangeArray;
pub struct QuantileOverTime {
quantile: f64,
}
impl QuantileOverTime {
fn new(quantile: f64) -> Self {
Self { quantile }
}
pub const fn name() -> &'static str {
"prom_quantile_over_time"
}
pub fn scalar_udf(quantile: f64) -> ScalarUDF {
ScalarUDF {
name: Self::name().to_string(),
signature: Signature::new(
TypeSignature::Exact(Self::input_type()),
Volatility::Immutable,
),
return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))),
fun: Arc::new(move |input| Self::new(quantile).calc(input)),
}
}
// time index column and value column
fn input_type() -> Vec<DataType> {
vec![
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
RangeArray::convert_data_type(DataType::Float64),
]
}
fn return_type() -> DataType {
DataType::Float64
}
fn calc(&self, input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
// construct matrix from input.
// The third one is quantile param, which is included in fields.
assert_eq!(input.len(), 3);
let ts_array = extract_array(&input[0])?;
let value_array = extract_array(&input[1])?;
let ts_range: RangeArray = RangeArray::try_new(ts_array.data().clone().into())?;
let value_range: RangeArray = RangeArray::try_new(value_array.data().clone().into())?;
error::ensure(
ts_range.len() == value_range.len(),
DataFusionError::Execution(format!(
"{}: input arrays should have the same length, found {} and {}",
Self::name(),
ts_range.len(),
value_range.len()
)),
)?;
error::ensure(
ts_range.value_type() == DataType::Timestamp(TimeUnit::Millisecond, None),
DataFusionError::Execution(format!(
"{}: expect TimestampMillisecond as time index array's type, found {}",
Self::name(),
ts_range.value_type()
)),
)?;
error::ensure(
value_range.value_type() == DataType::Float64,
DataFusionError::Execution(format!(
"{}: expect Float64 as value array's type, found {}",
Self::name(),
value_range.value_type()
)),
)?;
// calculation
let mut result_array = Vec::with_capacity(ts_range.len());
for index in 0..ts_range.len() {
let timestamps = ts_range.get(index).unwrap();
let values = value_range.get(index).unwrap();
let values = values
.as_any()
.downcast_ref::<Float64Array>()
.unwrap()
.values();
error::ensure(
timestamps.len() == values.len(),
DataFusionError::Execution(format!(
"{}: input arrays should have the same length, found {} and {}",
Self::name(),
timestamps.len(),
values.len()
)),
)?;
let retule = quantile_impl(values, self.quantile);
result_array.push(retule);
}
let result = ColumnarValue::Array(Arc::new(Float64Array::from_iter(result_array)));
Ok(result)
}
}
/// Refer to https://github.com/prometheus/prometheus/blob/6e2905a4d4ff9b47b1f6d201333f5bd53633f921/promql/quantile.go#L357-L386
fn quantile_impl(values: &[f64], quantile: f64) -> Option<f64> {
if quantile.is_nan() || values.is_empty() {
return Some(f64::NAN);
}
if quantile < 0.0 {
return Some(f64::NEG_INFINITY);
}
if quantile > 1.0 {
return Some(f64::INFINITY);
}
let mut values = values.to_vec();
values.sort_unstable_by(f64::total_cmp);
let length = values.len();
let rank = quantile * (length - 1) as f64;
let lower_index = 0.max(rank.floor() as usize);
let upper_index = (length - 1).min(lower_index + 1);
let weight = rank - rank.floor();
let result = values[lower_index] * (1.0 - weight) + values[upper_index] * weight;
Some(result)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_quantile_impl_empty() {
let values = &[];
let q = 0.5;
assert!(quantile_impl(values, q).unwrap().is_nan());
}
#[test]
fn test_quantile_impl_nan() {
let values = &[1.0, 2.0, 3.0];
let q = f64::NAN;
assert!(quantile_impl(values, q).unwrap().is_nan());
}
#[test]
fn test_quantile_impl_negative_quantile() {
let values = &[1.0, 2.0, 3.0];
let q = -0.5;
assert_eq!(quantile_impl(values, q).unwrap(), f64::NEG_INFINITY);
}
#[test]
fn test_quantile_impl_greater_than_one_quantile() {
let values = &[1.0, 2.0, 3.0];
let q = 1.5;
assert_eq!(quantile_impl(values, q).unwrap(), f64::INFINITY);
}
#[test]
fn test_quantile_impl_single_element() {
let values = &[1.0];
let q = 0.8;
assert_eq!(quantile_impl(values, q).unwrap(), 1.0);
}
#[test]
fn test_quantile_impl_even_length() {
let values = &[3.0, 1.0, 5.0, 2.0];
let q = 0.5;
assert_eq!(quantile_impl(values, q).unwrap(), 2.5);
}
#[test]
fn test_quantile_impl_odd_length() {
let values = &[4.0, 1.0, 3.0, 2.0, 5.0];
let q = 0.25;
assert_eq!(quantile_impl(values, q).unwrap(), 2.0);
}
}

View File

@@ -30,6 +30,7 @@ use datafusion::logical_expr::{
use datafusion::optimizer::utils;
use datafusion::prelude::{Column, Expr as DfExpr, JoinType};
use datafusion::scalar::ScalarValue;
use datafusion::sql::TableReference;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use promql_parser::label::{MatchOp, Matchers, METRIC_NAME};
use promql_parser::parser::{
@@ -43,15 +44,15 @@ use table::table::adapter::DfTableProviderAdapter;
use crate::error::{
CatalogSnafu, DataFusionPlanningSnafu, ExpectExprSnafu, ExpectRangeSelectorSnafu,
MultipleVectorSnafu, Result, TableNameNotFoundSnafu, TimeIndexNotFoundSnafu,
UnexpectedTokenSnafu, UnknownTableSnafu, UnsupportedExprSnafu, ValueNotFoundSnafu,
ZeroRangeSelectorSnafu,
UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu, UnsupportedExprSnafu,
ValueNotFoundSnafu, ZeroRangeSelectorSnafu,
};
use crate::extension_plan::{
EmptyMetric, InstantManipulate, Millisecond, RangeManipulate, SeriesDivide, SeriesNormalize,
};
use crate::functions::{
AbsentOverTime, AvgOverTime, CountOverTime, Delta, IDelta, Increase, LastOverTime, MaxOverTime,
MinOverTime, PresentOverTime, Rate, SumOverTime,
MinOverTime, PresentOverTime, QuantileOverTime, Rate, SumOverTime,
};
const LEFT_PLAN_JOIN_ALIAS: &str = "lhs";
@@ -562,15 +563,13 @@ impl PromPlanner {
table_name: &str,
filter: Vec<DfExpr>,
) -> Result<LogicalPlan> {
let table_ref = OwnedTableReference::Bare {
table: table_name.to_string(),
};
let table_ref = OwnedTableReference::bare(table_name.to_string());
let provider = self
.table_provider
.resolve_table(table_ref)
.resolve_table(table_ref.clone())
.await
.context(CatalogSnafu)?;
let result = LogicalPlanBuilder::scan_with_filters(table_name, provider, None, filter)
let result = LogicalPlanBuilder::scan_with_filters(table_ref, provider, None, filter)
.context(DataFusionPlanningSnafu)?
.build()
.context(DataFusionPlanningSnafu)?;
@@ -586,9 +585,7 @@ impl PromPlanner {
.context(TableNameNotFoundSnafu)?;
let table = self
.table_provider
.resolve_table(OwnedTableReference::Bare {
table: table_name.to_string(),
})
.resolve_table(TableReference::bare(&table_name))
.await
.context(CatalogSnafu)?
.as_any()
@@ -695,6 +692,16 @@ impl PromPlanner {
"last_over_time" => ScalarFunc::Udf(LastOverTime::scalar_udf()),
"absent_over_time" => ScalarFunc::Udf(AbsentOverTime::scalar_udf()),
"present_over_time" => ScalarFunc::Udf(PresentOverTime::scalar_udf()),
"quantile_over_time" => {
let quantile_expr = match other_input_exprs.get(0) {
Some(DfExpr::Literal(ScalarValue::Float64(Some(quantile)))) => *quantile,
other => UnexpectedPlanExprSnafu {
desc: format!("expect f64 literal as quantile, but found {:?}", other),
}
.fail()?,
};
ScalarFunc::Udf(QuantileOverTime::scalar_udf(quantile_expr))
}
_ => ScalarFunc::DataFusionBuiltin(
BuiltinScalarFunction::from_str(func.name).map_err(|_| {
UnsupportedExprSnafu {

View File

@@ -14,6 +14,7 @@
//An extended "array" based on [DictionaryArray].
use datafusion::arrow::buffer::NullBuffer;
use datafusion::arrow::datatypes::Field;
use datatypes::arrow::array::{Array, ArrayData, ArrayRef, DictionaryArray, Int64Array};
use datatypes::arrow::datatypes::{DataType, Int64Type};
@@ -124,10 +125,10 @@ impl RangeArray {
.len(key_array.len())
.add_buffer(key_array.data().buffers()[0].clone())
.add_child_data(values.data().clone());
match key_array.data().null_buffer() {
match key_array.data().nulls() {
Some(buffer) if key_array.data().null_count() > 0 => {
data = data
.null_bit_buffer(Some(buffer.clone()))
.nulls(Some(buffer.clone()))
.null_count(key_array.data().null_count());
}
_ => data = data.null_count(0),
@@ -223,6 +224,18 @@ impl Array for RangeArray {
fn into_data(self) -> ArrayData {
self.array.into_data()
}
fn to_data(&self) -> ArrayData {
self.array.to_data()
}
fn slice(&self, offset: usize, length: usize) -> ArrayRef {
self.array.slice(offset, length)
}
fn nulls(&self) -> Option<&NullBuffer> {
self.array.nulls()
}
}
impl std::fmt::Debug for RangeArray {

View File

@@ -58,7 +58,7 @@ use crate::physical_planner::PhysicalPlanner;
use crate::plan::LogicalPlan;
use crate::planner::{DfLogicalPlanner, LogicalPlanner};
use crate::query_engine::{QueryEngineContext, QueryEngineState};
use crate::{metric, QueryEngine};
use crate::{metrics, QueryEngine};
pub struct DatafusionQueryEngine {
state: Arc<QueryEngineState>,
@@ -93,10 +93,7 @@ impl DatafusionQueryEngine {
let default_catalog = query_ctx.current_catalog();
let default_schema = query_ctx.current_schema();
let table_name = dml
.table_name
.as_table_reference()
.resolve(&default_catalog, &default_schema);
let table_name = dml.table_name.resolve(&default_catalog, &default_schema);
let table = self.find_table(&table_name).await?;
let output = self
@@ -257,7 +254,7 @@ impl QueryEngine for DatafusionQueryEngine {
impl LogicalOptimizer for DatafusionQueryEngine {
fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
let _timer = timer!(metric::METRIC_OPTIMIZE_LOGICAL_ELAPSED);
let _timer = timer!(metrics::METRIC_OPTIMIZE_LOGICAL_ELAPSED);
match plan {
LogicalPlan::DfPlan(df_plan) => {
let optimized_plan = self
@@ -283,7 +280,7 @@ impl PhysicalPlanner for DatafusionQueryEngine {
ctx: &mut QueryEngineContext,
logical_plan: &LogicalPlan,
) -> Result<Arc<dyn PhysicalPlan>> {
let _timer = timer!(metric::METRIC_CREATE_PHYSICAL_ELAPSED);
let _timer = timer!(metrics::METRIC_CREATE_PHYSICAL_ELAPSED);
match logical_plan {
LogicalPlan::DfPlan(df_plan) => {
let state = ctx.state();
@@ -318,7 +315,7 @@ impl PhysicalOptimizer for DatafusionQueryEngine {
ctx: &mut QueryEngineContext,
plan: Arc<dyn PhysicalPlan>,
) -> Result<Arc<dyn PhysicalPlan>> {
let _timer = timer!(metric::METRIC_OPTIMIZE_PHYSICAL_ELAPSED);
let _timer = timer!(metrics::METRIC_OPTIMIZE_PHYSICAL_ELAPSED);
let mut new_plan = plan
.as_any()
@@ -345,7 +342,7 @@ impl QueryExecutor for DatafusionQueryEngine {
ctx: &QueryEngineContext,
plan: &Arc<dyn PhysicalPlan>,
) -> Result<SendableRecordBatchStream> {
let _timer = timer!(metric::METRIC_EXEC_PLAN_ELAPSED);
let _timer = timer!(metrics::METRIC_EXEC_PLAN_ELAPSED);
match plan.output_partitioning().partition_count() {
0 => Ok(Box::pin(EmptyRecordBatchStream::new(plan.schema()))),
1 => Ok(plan

View File

@@ -79,7 +79,7 @@ async fn resolve_tables(
for table_name in table_names {
let resolved_name = table_provider
.resolve_table_ref(table_name.as_table_reference())
.resolve_table_ref(table_name.clone())
.context(CatalogSnafu)?;
if let Entry::Vacant(v) = tables.entry(resolved_name.to_string()) {

View File

@@ -16,7 +16,7 @@ pub mod datafusion;
pub mod error;
pub mod executor;
pub mod logical_optimizer;
mod metric;
mod metrics;
mod optimizer;
pub mod parser;
pub mod physical_optimizer;

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! query engine metrics
pub static METRIC_PARSE_SQL_ELAPSED: &str = "query.parse_sql_elapsed";
pub static METRIC_PARSE_PROMQL_ELAPSED: &str = "query.parse_promql_elapsed";
pub static METRIC_OPTIMIZE_LOGICAL_ELAPSED: &str = "query.optimize_logicalplan_elapsed";

View File

@@ -18,8 +18,8 @@ use std::sync::Arc;
use common_time::timestamp::{TimeUnit, Timestamp};
use datafusion::optimizer::optimizer::OptimizerRule;
use datafusion::optimizer::OptimizerConfig;
use datafusion_common::tree_node::{TreeNode, TreeNodeRewriter};
use datafusion_common::{DFSchemaRef, DataFusionError, Result, ScalarValue};
use datafusion_expr::expr_rewriter::{ExprRewritable, ExprRewriter};
use datafusion_expr::{
Between, BinaryExpr, Expr, ExprSchemable, Filter, LogicalPlan, Operator, TableScan,
};
@@ -200,7 +200,9 @@ impl<'a> TypeConverter<'a> {
}
}
impl<'a> ExprRewriter for TypeConverter<'a> {
impl<'a> TreeNodeRewriter for TypeConverter<'a> {
type N = Expr;
fn mutate(&mut self, expr: Expr) -> Result<Expr> {
let new_expr = match expr {
Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op {
@@ -299,6 +301,7 @@ mod tests {
use std::collections::HashMap;
use datafusion_common::{Column, DFField, DFSchema};
use datafusion_sql::TableReference;
use super::*;
@@ -358,7 +361,7 @@ mod tests {
let schema_ref = Arc::new(
DFSchema::new_with_metadata(
vec![DFField::new(
None,
None::<TableReference>,
"ts",
DataType::Timestamp(ArrowTimeUnit::Millisecond, None),
true,
@@ -390,7 +393,12 @@ mod tests {
let col_name = "is_valid";
let schema_ref = Arc::new(
DFSchema::new_with_metadata(
vec![DFField::new(None, col_name, DataType::Boolean, false)],
vec![DFField::new(
None::<TableReference>,
col_name,
DataType::Boolean,
false,
)],
HashMap::new(),
)
.unwrap(),

View File

@@ -28,7 +28,7 @@ use sql::statements::statement::Statement;
use crate::error::{
MultipleStatementsSnafu, ParseFloatSnafu, ParseTimestampSnafu, QueryParseSnafu, Result,
};
use crate::metric::{METRIC_PARSE_PROMQL_ELAPSED, METRIC_PARSE_SQL_ELAPSED};
use crate::metrics::{METRIC_PARSE_PROMQL_ELAPSED, METRIC_PARSE_SQL_ELAPSED};
const DEFAULT_LOOKBACK: u64 = 5 * 60; // 5m

View File

@@ -61,7 +61,7 @@ impl DfLogicalPlanner {
)
.await?;
let config_options = self.session_state.config().config_options();
let config_options = self.session_state.config().options();
let parser_options = ParserOptions {
enable_ident_normalization: config_options.sql_parser.enable_ident_normalization,
parse_float_as_decimal: config_options.sql_parser.parse_float_as_decimal,

View File

@@ -18,4 +18,4 @@ pub mod error;
pub mod manager;
#[cfg(feature = "python")]
pub mod python;
mod table;
pub mod table;

View File

@@ -99,7 +99,9 @@ async fn integrated_py_copr_test() {
actual_result.insert(col_sch.name.clone(), col.clone());
}
for (name, col) in expect_result {
let actual_col = actual_result.get(&name).expect("Column with this name");
let actual_col = actual_result.get(&name).unwrap_or_else(|| {
panic!("Expect column with name: {name} in {actual_result:?}")
});
if !check_equal(col.clone(), actual_col.clone()) {
panic!("Column {name} doesn't match, expect {col:?}, found {actual_col:?}")
}

View File

@@ -19,7 +19,9 @@ use std::sync::Arc;
use datatypes::prelude::ScalarVector;
#[cfg(feature = "pyo3_backend")]
use datatypes::vectors::UInt32Vector;
use datatypes::vectors::{BooleanVector, Float64Vector, Int32Vector, Int64Vector, VectorRef};
use datatypes::vectors::{
BooleanVector, Float64Vector, Int32Vector, Int64Vector, StringVector, VectorRef,
};
use crate::python::ffi_types::pair_tests::{CodeBlockTestCase, CoprTestCase};
macro_rules! vector {
@@ -37,6 +39,363 @@ macro_rules! ronish {
}
pub(super) fn generate_copr_intgrate_tests() -> Vec<CoprTestCase> {
vec![
// first is examples in docs
// hello.py: test return a single string
CoprTestCase {
script: r#"
@coprocessor(returns=['msg'])
def hello() -> vector[str]:
return "hello, GreptimeDB"
"#
.to_string(),
expect: Some(ronish!("msg": vector!(StringVector, &["hello, GreptimeDB"]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
@coprocessor(returns=['msg'], backend="pyo3")
def hello() -> vector[str]:
return "hello, GreptimeDB"
"#
.to_string(),
expect: Some(ronish!("msg": vector!(StringVector, &["hello, GreptimeDB"]))),
},
// add_vectors.py
CoprTestCase {
script: r#"
@copr(args=["n1", "n2"],
returns=["value"],
sql="select number as n1,number as n2 from numbers limit 5")
def add_vectors(n1, n2) -> vector[i32]:
return n1 + n2
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int32Vector, [0, 2, 4, 6, 8]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
@copr(args=["n1", "n2"],
returns=["value"],
sql="select number as n1,number as n2 from numbers limit 5",
backend="pyo3")
def add_vectors(n1, n2) -> vector[i32]:
return n1 + n2
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int32Vector, [0, 2, 4, 6, 8]))),
},
// answer.py
CoprTestCase {
script: r#"
@copr(returns=["value"])
def answer() -> vector[i64]:
return 42
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, [42]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
@copr(returns=["value"], backend="pyo3")
def answer() -> vector[i64]:
return 42
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, [42]))),
},
// answer_list.py
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"])
def answer() -> (vector[i64]):
return vector([42, 43, 44])
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, [42, 43, 44]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"], backend="pyo3")
def answer() -> (vector[i64]):
return vector([42, 43, 44])
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, [42, 43, 44]))),
},
// boolean_array.py
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"])
def boolean_array() -> vector[f64]:
v = vector([1.0, 2.0, 3.0])
# This returns a vector([2.0])
return v[(v > 1) & (v< 3)]
"#
.to_string(),
expect: Some(ronish!("value": vector!(Float64Vector, [2.0]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"], backend="pyo3")
def boolean_array() -> vector[f64]:
v = vector([1.0, 2.0, 3.0])
# This returns a vector([2.0])
return v[(v > 1) & (v< 3)]
"#
.to_string(),
expect: Some(ronish!("value": vector!(Float64Vector, [2.0]))),
},
// compare.py
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"])
def compare() -> vector[bool]:
# This returns a vector([False, False, True])
return vector([1.0, 2.0, 3.0]) > 2.0
"#
.to_string(),
expect: Some(ronish!("value": vector!(BooleanVector, &[false, false, true]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"], backend="pyo3")
def compare() -> vector[bool]:
# This returns a vector([False, False, True])
return vector([1.0, 2.0, 3.0]) > 2.0
"#
.to_string(),
expect: Some(ronish!("value": vector!(BooleanVector, &[false, false, true]))),
},
// compare_vectors.py
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"])
def compare_vectors() -> vector[bool]:
# This returns a vector([False, False, True])
return vector([1.0, 2.0, 3.0]) > vector([1.0, 2.0, 2.0])
"#
.to_string(),
expect: Some(ronish!("value": vector!(BooleanVector, &[false, false, true]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"], backend="pyo3")
def compare_vectors() -> vector[bool]:
# This returns a vector([False, False, True])
return vector([1.0, 2.0, 3.0]) > vector([1.0, 2.0, 2.0])
"#
.to_string(),
expect: Some(ronish!("value": vector!(BooleanVector, &[false, false, true]))),
},
// list_comprehension.py
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"])
def list_comprehension() -> (vector[f64]):
a = vector([1.0, 2.0, 3.0])
# This returns a vector([3.0, 4.0])
return [x+1 for x in a if x >= 2.0]
"#
.to_string(),
expect: Some(ronish!("value": vector!(Float64Vector, &[3.0 ,4.0]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"], backend="pyo3")
def list_comprehension() -> (vector[f64]):
a = vector([1.0, 2.0, 3.0])
# This returns a vector([3.0, 4.0])
return [x+1 for x in a if x >= 2.0]
"#
.to_string(),
expect: Some(ronish!("value": vector!(Float64Vector, &[3.0 ,4.0]))),
},
// select_elements.py
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"])
def select_elements() -> (vector[f64]):
a = vector([1.0, 2.0, 3.0])
# This returns a vector([2.0, 3.0])
return a[a>=2.0]
"#
.to_string(),
expect: Some(ronish!("value": vector!(Float64Vector, &[2.0, 3.0]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
from greptime import vector
@copr(returns=["value"], backend="pyo3")
def select_elements() -> (vector[f64]):
a = vector([1.0, 2.0, 3.0])
# This returns a vector([2.0, 3.0])
return a[a>=2.0]
"#
.to_string(),
expect: Some(ronish!("value": vector!(Float64Vector, &[2.0, 3.0]))),
},
// args.py
CoprTestCase {
script: r#"
@coprocessor(args=["a", "b"],
returns=["value"],
sql="select number as a,number as b from numbers limit 5")
def add_vectors(a, b) -> vector[i64]:
return a + b
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, &[0, 2, 4, 6, 8]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
@coprocessor(args=["a", "b"],
returns=["value"],
sql="select number as a,number as b from numbers limit 5",
backend="pyo3")
def add_vectors(a, b) -> vector[i64]:
return a + b
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, &[0, 2, 4, 6, 8]))),
},
// numbers.py
CoprTestCase {
script: r#"
@coprocessor(args=["number", "number", "number"],
sql="select number from numbers limit 5",
returns=["value"])
def normalize(n1, n2, n3) -> vector[i64]:
# returns [0,1,8,27,64]
return n1 * n2 * n3
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, &[0, 1, 8, 27, 64]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
@coprocessor(args=["number", "number", "number"],
sql="select number from numbers limit 5",
returns=["value"],
backend="pyo3")
def normalize(n1, n2, n3) -> vector[i64]:
# returns [0,1,8,27,64]
return n1 * n2 * n3
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, &[0, 1, 8, 27, 64]))),
},
// return_multi_vectors1.py
CoprTestCase {
script: r#"
from greptime import vector
@coprocessor(returns=["a", "b", "c"])
def return_vectors() -> (vector[i64], vector[str], vector[f64]):
a = vector([1, 2, 3])
b = vector(["a", "b", "c"])
c = vector([42.0, 43.0, 44.0])
return a, b, c
"#
.to_string(),
expect: Some(ronish!(
"a": vector!(Int64Vector, &[1, 2, 3]),
"b": vector!(StringVector, &["a", "b", "c"]),
"c": vector!(Float64Vector, &[42.0, 43.0, 44.0])
)),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
from greptime import vector
@coprocessor(returns=["a", "b", "c"], backend="pyo3")
def return_vectors() -> (vector[i64], vector[str], vector[f64]):
a = vector([1, 2, 3])
b = vector(["a", "b", "c"])
c = vector([42.0, 43.0, 44.0])
return a, b, c
"#
.to_string(),
expect: Some(ronish!(
"a": vector!(Int64Vector, &[1, 2, 3]),
"b": vector!(StringVector, &["a", "b", "c"]),
"c": vector!(Float64Vector, &[42.0, 43.0, 44.0])
)),
},
// return_multi_vectors2.py
CoprTestCase {
script: r#"
from greptime import vector
@coprocessor(returns=["a", "b", "c"])
def return_vectors() -> (vector[i64], vector[str], vector[i64]):
a = 1
b = "Hello, GreptimeDB!"
c = 42
return a, b, c
"#
.to_string(),
expect: Some(ronish!(
"a": vector!(Int64Vector, &[1]),
"b": vector!(StringVector, &["Hello, GreptimeDB!"]),
"c": vector!(Float64Vector, &[42.0])
)),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
from greptime import vector
@coprocessor(returns=["a", "b", "c"], backend="pyo3")
def return_vectors() -> (vector[i64], vector[str], vector[i64]):
a = 1
b = "Hello, GreptimeDB!"
c = 42
return [a], [b], [c]
"#
.to_string(),
expect: Some(ronish!(
"a": vector!(Int64Vector, &[1]),
"b": vector!(StringVector, &["Hello, GreptimeDB!"]),
"c": vector!(Float64Vector, &[42.0])
)),
},
// following is some random tests covering most features of coprocessor
CoprTestCase {
script: r#"
from greptime import vector
@@ -288,6 +647,7 @@ def answer() -> vector[i64]:
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, [43]))),
},
// normalize.py
CoprTestCase {
script: r#"
import math
@@ -316,7 +676,6 @@ def normalize(v) -> vector[i64]:
CoprTestCase {
script: r#"
import math
from greptime import vector
def normalize0(x):
if x is None or math.isnan(x):
@@ -330,7 +689,7 @@ def normalize0(x):
@coprocessor(args=["number"], sql="select number from numbers limit 10", returns=["value"], backend="pyo3")
def normalize(v) -> vector[i64]:
return vector([normalize0(x) for x in v])
return [normalize0(x) for x in v]
"#
.to_string(),

View File

@@ -349,7 +349,7 @@ fn approx_percentile_cont(py: Python<'_>, values: &PyVector, percent: f64) -> Py
Arc::new(percent) as _,
],
"ApproxPercentileCont",
values.arrow_data_type().to_owned(),
values.arrow_data_type(),
)
.map_err(|e| PyValueError::new_err(format!("{e:?}")))?,
&[values.to_arrow_array()],

View File

@@ -16,9 +16,11 @@ use std::collections::HashMap;
use common_recordbatch::RecordBatch;
use common_telemetry::timer;
use datafusion_common::ScalarValue;
use datatypes::prelude::ConcreteDataType;
use datatypes::vectors::{Helper, VectorRef};
use pyo3::exceptions::{PyRuntimeError, PyValueError};
use pyo3::types::{PyDict, PyList, PyModule, PyTuple};
use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyModule, PyString, PyTuple};
use pyo3::{pymethods, PyAny, PyCell, PyObject, PyResult, Python, ToPyObject};
use snafu::{ensure, Backtrace, GenerateImplicitData, ResultExt};
@@ -167,40 +169,119 @@ coprocessor = copr
/// Cast return of py script result to `Vec<VectorRef>`,
/// constants will be broadcast to length of `col_len`
/// accept and convert if obj is of two types:
/// 1. tuples of PyVector/PyList of literals/single literal of same type
/// or a mixed tuple of PyVector and PyList of same type Literals
/// 2. a single PyVector
/// 3. a PyList of same type Literals
/// 4. a single constant, will be expanded to a PyVector of length of `col_len`
fn py_any_to_vec(obj: &PyAny, col_len: usize) -> PyResult<Vec<VectorRef>> {
// check if obj is of two types:
// 1. tuples of PyVector
// 2. a single PyVector
let is_literal = |obj: &PyAny| -> PyResult<bool> {
Ok(obj.is_instance_of::<PyInt>()?
|| obj.is_instance_of::<PyFloat>()?
|| obj.is_instance_of::<PyString>()?
|| obj.is_instance_of::<PyBool>()?)
};
let check = if obj.is_instance_of::<PyTuple>()? {
let tuple = obj.downcast::<PyTuple>()?;
(0..tuple.len())
.map(|idx| tuple.get_item(idx).map(|i| i.is_instance_of::<PyVector>()))
.map(|idx| {
tuple.get_item(idx).map(|i| -> PyResult<bool> {
Ok(i.is_instance_of::<PyVector>()?
|| i.is_instance_of::<PyList>()?
|| is_literal(i)?)
})
})
.all(|i| matches!(i, Ok(Ok(true))))
} else {
obj.is_instance_of::<PyVector>()?
obj.is_instance_of::<PyVector>()? || obj.is_instance_of::<PyList>()? || is_literal(obj)?
};
if !check {
return Err(PyRuntimeError::new_err(format!(
"Expect a tuple of vectors or one single vector, found {obj}"
"Expect a tuple of vectors(or lists) or one single vector or a list of same type literals, found {obj}"
)));
}
if let Ok(tuple) = obj.downcast::<PyTuple>() {
let len = tuple.len();
let v = (0..len)
.map(|idx| tuple.get_item(idx))
.map(|elem| {
elem.map(|any| py_obj_broadcast_to_vec(any, col_len))
.and_then(|v| v)
elem.map(|any| {
if let Ok(list) = any.downcast::<PyList>() {
py_list_to_vec(list)
} else {
py_obj_broadcast_to_vec(any, col_len)
}
})
.and_then(|v| v)
})
.collect::<PyResult<Vec<_>>>()?;
Ok(v)
} else if let Ok(list) = obj.downcast::<PyList>() {
let ret = py_list_to_vec(list)?;
Ok(vec![ret])
} else {
let ret = py_obj_broadcast_to_vec(obj, col_len)?;
Ok(vec![ret])
}
}
/// Convert a python list to a [`VectorRef`] all of same type: bool/int/float/string
fn py_list_to_vec(list: &PyList) -> PyResult<VectorRef> {
/// make sure elements of list is all of same type: bool/int/float/string
#[derive(PartialEq, Eq, Debug, Copy, Clone)]
enum ExpectType {
Bool,
Int,
Float,
String,
}
let mut expected_type = None;
let mut v = Vec::with_capacity(list.len());
for (idx, elem) in list.iter().enumerate() {
let (elem_ty, con_type) = if elem.is_instance_of::<PyBool>()? {
(ExpectType::Bool, ConcreteDataType::boolean_datatype())
} else if elem.is_instance_of::<PyInt>()? {
(ExpectType::Int, ConcreteDataType::int64_datatype())
} else if elem.is_instance_of::<PyFloat>()? {
(ExpectType::Float, ConcreteDataType::float64_datatype())
} else if elem.is_instance_of::<PyString>()? {
(ExpectType::String, ConcreteDataType::string_datatype())
} else {
return Err(PyRuntimeError::new_err(format!(
"Expect list contains bool or int or float or string, found <{list}>"
)));
};
if let Some(ty) = expected_type {
if ty != elem_ty {
return Err(PyRuntimeError::new_err(format!(
"Expect a list of same type elements, found {list} in position {idx} in list"
)));
}
} else {
expected_type = Some(elem_ty);
}
// push into a vector buffer
let val = pyo3_obj_try_to_typed_val(elem, Some(con_type))?;
let scalar = val.try_to_scalar_value(&val.data_type()).map_err(|err| {
PyRuntimeError::new_err(format!("Can't convert value to scalar value: {}", err))
})?;
v.push(scalar);
}
let array = ScalarValue::iter_to_array(v.into_iter()).map_err(|err| {
PyRuntimeError::new_err(format!("Can't convert scalar value list to array: {}", err))
})?;
let ret = Helper::try_into_vector(array).map_err(|err| {
PyRuntimeError::new_err(format!("Can't convert array to vector: {}", err))
})?;
Ok(ret)
}
/// broadcast a single Python Object to a Vector of same object with length `col_len`
/// obj is either:
/// 1. a PyVector
/// 2. a single Literal
fn py_obj_broadcast_to_vec(obj: &PyAny, col_len: usize) -> PyResult<VectorRef> {
if let Ok(v) = obj.extract::<PyVector>() {
Ok(v.as_vector_ref())

View File

@@ -205,7 +205,7 @@ impl ScriptsTable {
}
/// Build scripts table
fn build_scripts_schema() -> RawSchema {
pub fn build_scripts_schema() -> RawSchema {
let cols = vec![
ColumnSchema::new(
"schema".to_string(),

View File

@@ -60,6 +60,7 @@ use self::influxdb::{influxdb_health, influxdb_ping, influxdb_write};
use crate::auth::UserProviderRef;
use crate::error::{AlreadyStartedSnafu, Result, StartHttpSnafu};
use crate::http::admin::flush;
use crate::metrics_handler::MetricsHandler;
use crate::query_handler::grpc::ServerGrpcQueryHandlerRef;
use crate::query_handler::sql::ServerSqlQueryHandlerRef;
use crate::query_handler::{
@@ -99,9 +100,10 @@ pub const HTTP_API_PREFIX: &str = "/v1/";
// TODO(fys): This is a temporary workaround, it will be improved later
pub static PUBLIC_APIS: [&str; 2] = ["/v1/influxdb/ping", "/v1/influxdb/health"];
#[derive(Default)]
pub struct HttpServer {
sql_handler: ServerSqlQueryHandlerRef,
grpc_handler: ServerGrpcQueryHandlerRef,
sql_handler: Option<ServerSqlQueryHandlerRef>,
grpc_handler: Option<ServerGrpcQueryHandlerRef>,
options: HttpOptions,
influxdb_handler: Option<InfluxdbLineProtocolHandlerRef>,
opentsdb_handler: Option<OpentsdbProtocolHandlerRef>,
@@ -109,9 +111,11 @@ pub struct HttpServer {
script_handler: Option<ScriptHandlerRef>,
shutdown_tx: Mutex<Option<Sender<()>>>,
user_provider: Option<UserProviderRef>,
metrics_handler: Option<MetricsHandler>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(default)]
pub struct HttpOptions {
pub addr: String,
#[serde(with = "humantime_serde")]
@@ -354,65 +358,74 @@ pub struct ApiState {
pub script_handler: Option<ScriptHandlerRef>,
}
impl HttpServer {
pub fn new(
sql_handler: ServerSqlQueryHandlerRef,
grpc_handler: ServerGrpcQueryHandlerRef,
options: HttpOptions,
) -> Self {
#[derive(Default)]
pub struct HttpServerBuilder {
inner: HttpServer,
}
impl HttpServerBuilder {
pub fn new(options: HttpOptions) -> Self {
Self {
sql_handler,
grpc_handler,
options,
opentsdb_handler: None,
influxdb_handler: None,
prom_handler: None,
user_provider: None,
script_handler: None,
shutdown_tx: Mutex::new(None),
inner: HttpServer {
sql_handler: None,
grpc_handler: None,
options,
opentsdb_handler: None,
influxdb_handler: None,
prom_handler: None,
user_provider: None,
script_handler: None,
metrics_handler: None,
shutdown_tx: Mutex::new(None),
},
}
}
pub fn set_opentsdb_handler(&mut self, handler: OpentsdbProtocolHandlerRef) {
debug_assert!(
self.opentsdb_handler.is_none(),
"OpenTSDB handler can be set only once!"
);
self.opentsdb_handler.get_or_insert(handler);
pub fn with_sql_handler(&mut self, handler: ServerSqlQueryHandlerRef) -> &mut Self {
self.inner.sql_handler.get_or_insert(handler);
self
}
pub fn set_script_handler(&mut self, handler: ScriptHandlerRef) {
debug_assert!(
self.script_handler.is_none(),
"Script handler can be set only once!"
);
self.script_handler.get_or_insert(handler);
pub fn with_grpc_handler(&mut self, handler: ServerGrpcQueryHandlerRef) -> &mut Self {
self.inner.grpc_handler.get_or_insert(handler);
self
}
pub fn set_influxdb_handler(&mut self, handler: InfluxdbLineProtocolHandlerRef) {
debug_assert!(
self.influxdb_handler.is_none(),
"Influxdb line protocol handler can be set only once!"
);
self.influxdb_handler.get_or_insert(handler);
pub fn with_opentsdb_handler(&mut self, handler: OpentsdbProtocolHandlerRef) -> &mut Self {
self.inner.opentsdb_handler.get_or_insert(handler);
self
}
pub fn set_prom_handler(&mut self, handler: PrometheusProtocolHandlerRef) {
debug_assert!(
self.prom_handler.is_none(),
"Prometheus protocol handler can be set only once!"
);
self.prom_handler.get_or_insert(handler);
pub fn with_script_handler(&mut self, handler: ScriptHandlerRef) -> &mut Self {
self.inner.script_handler.get_or_insert(handler);
self
}
pub fn set_user_provider(&mut self, user_provider: UserProviderRef) {
debug_assert!(
self.user_provider.is_none(),
"User provider can be set only once!"
);
self.user_provider.get_or_insert(user_provider);
pub fn with_influxdb_handler(&mut self, handler: InfluxdbLineProtocolHandlerRef) -> &mut Self {
self.inner.influxdb_handler.get_or_insert(handler);
self
}
pub fn with_prom_handler(&mut self, handler: PrometheusProtocolHandlerRef) -> &mut Self {
self.inner.prom_handler.get_or_insert(handler);
self
}
pub fn with_user_provider(&mut self, user_provider: UserProviderRef) -> &mut Self {
self.inner.user_provider.get_or_insert(user_provider);
self
}
pub fn with_metrics_handler(&mut self, handler: MetricsHandler) -> &mut Self {
self.inner.metrics_handler.get_or_insert(handler);
self
}
pub fn build(&mut self) -> HttpServer {
std::mem::take(self).inner
}
}
impl HttpServer {
pub fn make_app(&self) -> Router {
let mut api = OpenApi {
info: Info {
@@ -428,19 +441,25 @@ impl HttpServer {
..OpenApi::default()
};
let sql_router = self
.route_sql(ApiState {
sql_handler: self.sql_handler.clone(),
script_handler: self.script_handler.clone(),
})
.finish_api(&mut api)
.layer(Extension(api));
let mut router = Router::new();
let mut router = Router::new().nest(&format!("/{HTTP_API_VERSION}"), sql_router);
router = router.nest(
&format!("/{HTTP_API_VERSION}/admin"),
self.route_admin(self.grpc_handler.clone()),
);
if let Some(sql_handler) = self.sql_handler.clone() {
let sql_router = self
.route_sql(ApiState {
sql_handler,
script_handler: self.script_handler.clone(),
})
.finish_api(&mut api)
.layer(Extension(api));
router = router.nest(&format!("/{HTTP_API_VERSION}"), sql_router);
}
if let Some(grpc_handler) = self.grpc_handler.clone() {
router = router.nest(
&format!("/{HTTP_API_VERSION}/admin"),
self.route_admin(grpc_handler.clone()),
);
}
if let Some(opentsdb_handler) = self.opentsdb_handler.clone() {
router = router.nest(
@@ -472,7 +491,9 @@ impl HttpServer {
);
}
router = router.route("/metrics", routing::get(handler::metrics));
if let Some(metrics_handler) = self.metrics_handler {
router = router.nest("", self.route_metrics(metrics_handler));
}
router = router.route(
"/health",
@@ -498,6 +519,12 @@ impl HttpServer {
)
}
fn route_metrics<S>(&self, metrics_handler: MetricsHandler) -> Router<S> {
Router::new()
.route("/metrics", routing::get(handler::metrics))
.with_state(metrics_handler)
}
fn route_sql<S>(&self, api_state: ApiState) -> ApiRouter<S> {
ApiRouter::new()
.api_route(
@@ -680,8 +707,10 @@ mod test {
let instance = Arc::new(DummyInstance { _tx: tx });
let sql_instance = ServerSqlQueryHandlerAdaptor::arc(instance.clone());
let grpc_instance = ServerGrpcQueryHandlerAdaptor::arc(instance);
let server = HttpServer::new(sql_instance, grpc_instance, HttpOptions::default());
let server = HttpServerBuilder::new(HttpOptions::default())
.with_sql_handler(sql_instance)
.with_grpc_handler(grpc_instance)
.build();
server.make_app().route(
"/test/timeout",
get(forever.layer(

Some files were not shown because too many files have changed in this diff Show More