Compare commits

..

18 Commits

Author SHA1 Message Date
dimbtp
f735f739e5 feat: add information_schema.key_column_usage (#3057)
* feat: add information_schema.key_column_usage

* fix: follow #3057 review comments

* fix: add sql test for `key_column_usage` table

* fix: fix spell typo

* fix: resolve conflict in sql test result
2023-12-31 12:29:06 +00:00
dimbtp
6070e88077 feat: add information_schema.files (#3054)
* feat: add information_schema.files

* fix: update information_schema.result

* fix: change `EXTRA` field type to string
2023-12-31 02:08:16 +00:00
niebayes
9db168875c fix(remote_wal): some known issues (#3052)
* fix: some known issues

* fix: CR

* fix: CR

* chore: replace Mutex with RwLock
2023-12-30 15:28:10 +00:00
AntiTopQuark
4460af800f feat(TableRouteValue): add panic notes and type checks (#3031)
* refactor(TableRouteValue): add panic notes and type checks

* chore: add deprecate develop branch warning

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add error defines and checks

* Update README.md

* update code format and fix tests

* update name of error

* delete unused note

* fix unsafe .expect() for region_route()

* update error name

* update unwrap

* update code format

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2023-12-30 13:02:26 +00:00
Zhenchi
69a53130c2 feat(inverted_index): Add applier builder to convert Expr to Predicates (Part 1) (#3034)
* feat(inverted_index.integration): Add applier builder to convert Expr to Predicates (Part 1)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add docs

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: typos

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/mito2/src/sst/index/applier/builder.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: remove unwrap

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: error source

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2023-12-30 07:32:32 +00:00
Ning Sun
1c94d4c506 ci: fix duplicatd doc issue (#3056) 2023-12-30 13:36:14 +08:00
Ning Sun
41e51d4ab3 chore: attempt to add doc issue in label task (#3021)
* chore: attempt to add doc issue in label task

* ci: check pr body for doc issue creation
2023-12-29 20:17:34 +08:00
dennis zhuang
11ae85b1cd feat: adds information_schema.schemata (#3051)
* feat: improve information_schema.columns

* feat: adds information_schema.schemata

* fix: instance test

* fix: comment
2023-12-29 09:22:31 +00:00
LFC
7551432cff refactor: merge standalone and metasrv table metadata allocators (#3035)
* refactor: merge standalone and metasrv table metadata allocators

* Update src/common/meta/src/ddl/table_meta.rs

Co-authored-by: niebayes <niebayes@gmail.com>

* Update src/common/meta/src/ddl/table_meta.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

---------

Co-authored-by: niebayes <niebayes@gmail.com>
Co-authored-by: Weny Xu <wenymedia@gmail.com>
2023-12-29 08:50:59 +00:00
Weny Xu
e16f093282 test(remote_wal): add sqlness with kafka wal (#3027)
* feat(sqlness): add kafka wal config

* chore: add sqlness with kafka wal ci config

* fix: fix config

* chore: apply suggestions from CR

* fix: add metasrv config to sqlness with kafka

* fix: replay memtable should from flushed_entry_id + 1

* fix: should set append flag to fopen

* feat: start wal allocator in standalone meta mode

* feat: append a noop record after kafka topic initialization

* test: ignore tests temporally

* test: change sqlness kafka wal config
2023-12-29 08:17:22 +00:00
Weny Xu
301ffc1d91 feat(remote_wal): append a noop record after kafka topic initialization (#3040)
* feat: append a noop record after kafka topic initialization

* chore: apply suggestions from CR

* feat: ignore the noop record during the read
2023-12-29 07:46:48 +00:00
Weny Xu
d22072f68b feat: expose region migration http endpoint (#3032)
* feat: add region migration endpoint

* feat: implement naive peer registry

* chore: apply suggestions from CR

* chore: rename `ContextFactoryImpl` to `DefaultContextFactory`

* chore: rename unregister to deregister

* refactor: use lease-based alive datanode checking
2023-12-29 06:57:00 +00:00
Weny Xu
b526d159c3 fix: replay memtable should from flushed_entry_id + 1 (#3038)
* fix: replay memtable should from flushed_entry_id + 1

* chore: apply suggestions from CR
2023-12-28 16:12:07 +00:00
ZonaHe
7152407428 feat: update dashboard to v0.4.5 (#3033)
Co-authored-by: ZonaHex <ZonaHex@users.noreply.github.com>
2023-12-28 11:51:43 +00:00
Ruihang Xia
b58296de22 feat: Implement OR for PromQL (#3024)
* with anit-join

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl UnionDistinctOn

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* unify schema

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add UTs

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/promql/src/planner.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: dennis zhuang <killme2008@gmail.com>
2023-12-28 06:56:17 +00:00
Yingwen
1d80a0f2d6 chore: Update CI badge in README.md (#3028)
chore: Update README.md

Fix CI badge
2023-12-28 05:59:27 +00:00
Ruihang Xia
286b9af661 chore: change all reference from develop to main (#3026)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-12-28 04:11:00 +00:00
dennis zhuang
af13eeaad3 feat: adds character_sets, collations and events etc. (#3017)
feat: adds character_sets, collations and events etc. to information_schema
2023-12-28 04:01:42 +00:00
103 changed files with 3713 additions and 831 deletions

View File

@@ -1,7 +1,7 @@
on:
push:
branches:
- develop
- main
paths-ignore:
- 'docs/**'
- 'config/**'

View File

@@ -11,7 +11,6 @@ on:
- '.gitignore'
push:
branches:
- develop
- main
paths-ignore:
- 'docs/**'
@@ -105,6 +104,37 @@ jobs:
path: ${{ runner.temp }}/greptime-*.log
retention-days: 3
sqlness-kafka-wal:
name: Sqlness Test with Kafka Wal
if: github.event.pull_request.draft == false
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04-8-cores ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Setup kafka server
working-directory: tests-integration/fixtures/kafka
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Run sqlness
run: cargo sqlness -w kafka -k 127.0.0.1:9092
- name: Upload sqlness logs
if: always()
uses: actions/upload-artifact@v3
with:
name: sqlness-logs
path: ${{ runner.temp }}/greptime-*.log
retention-days: 3
fmt:
name: Rustfmt
if: github.event.pull_request.draft == false

View File

@@ -18,3 +18,14 @@ jobs:
enable-versioned-regex: false
repo-token: ${{ secrets.GITHUB_TOKEN }}
sync-labels: 1
- name: create an issue in doc repo
uses: dacbd/create-issue-action@main
if: ${{ github.event.action == 'opened' && contains(github.event.pull_request.body, '- [ ] This PR does not require documentation updates.') }}
with:
owner: GreptimeTeam
repo: docs
token: ${{ secrets.DOCS_REPO_TOKEN }}
title: Update docs for ${{ github.event.issue.title || github.event.pull_request.title }}
body: |
A document change request is generated from
${{ github.event.issue.html_url || github.event.pull_request.html_url }}

View File

@@ -11,7 +11,6 @@ on:
- '.gitignore'
push:
branches:
- develop
- main
paths:
- 'docs/**'

View File

@@ -3,7 +3,7 @@ name: License checker
on:
push:
branches:
- develop
- main
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
jobs:

View File

@@ -10,7 +10,7 @@ Follow our [README](https://github.com/GreptimeTeam/greptimedb#readme) to get th
It can feel intimidating to contribute to a complex project, but it can also be exciting and fun. These general notes will help everyone participate in this communal activity.
- Follow the [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/develop/CODE_OF_CONDUCT.md)
- Follow the [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/main/CODE_OF_CONDUCT.md)
- Small changes make huge differences. We will happily accept a PR making a single character change if it helps move forward. Don't wait to have everything working.
- Check the closed issues before opening your issue.
- Try to follow the existing style of the code.
@@ -26,7 +26,7 @@ Pull requests are great, but we accept all kinds of other help if you like. Such
## Code of Conduct
Also, there are things that we are not looking for because they don't match the goals of the product or benefit the community. Please read [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/develop/CODE_OF_CONDUCT.md); we hope everyone can keep good manners and become an honored member.
Also, there are things that we are not looking for because they don't match the goals of the product or benefit the community. Please read [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/main/CODE_OF_CONDUCT.md); we hope everyone can keep good manners and become an honored member.
## License

15
Cargo.lock generated
View File

@@ -4029,7 +4029,7 @@ dependencies = [
"prost 0.12.3",
"rand",
"regex",
"regex-automata 0.1.10",
"regex-automata 0.2.0",
"snafu",
"tokio",
"tokio-util",
@@ -4977,6 +4977,7 @@ dependencies = [
"datatypes",
"futures",
"humantime-serde",
"index",
"lazy_static",
"log-store",
"memcomparable",
@@ -6541,6 +6542,7 @@ dependencies = [
name = "promql"
version = "0.5.0"
dependencies = [
"ahash 0.8.6",
"async-recursion",
"async-trait",
"bytemuck",
@@ -7133,8 +7135,18 @@ name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
name = "regex-automata"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782"
dependencies = [
"fst",
"memchr",
"regex-syntax 0.6.29",
]
@@ -8858,6 +8870,7 @@ dependencies = [
"common-recordbatch",
"common-time",
"serde",
"serde_json",
"sqlness",
"tinytemplate",
"tokio",

View File

@@ -111,7 +111,7 @@ prost = "0.12"
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
rand = "0.8"
regex = "1.8"
regex-automata = { version = "0.1", features = ["transducer"] }
regex-automata = { version = "0.2", features = ["transducer"] }
reqwest = { version = "0.11", default-features = false, features = [
"json",
"rustls-tls-native-roots",
@@ -169,6 +169,7 @@ datanode = { path = "src/datanode" }
datatypes = { path = "src/datatypes" }
file-engine = { path = "src/file-engine" }
frontend = { path = "src/frontend" }
index = { path = "src/index" }
log-store = { path = "src/log-store" }
meta-client = { path = "src/meta-client" }
meta-srv = { path = "src/meta-srv" }

View File

@@ -1,8 +1,8 @@
<p align="center">
<picture>
<source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png">
<source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding-dark.png">
<img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png" width="400px">
<source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding.png">
<source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding-dark.png">
<img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding.png" width="400px">
</picture>
</p>
@@ -12,11 +12,11 @@
</h3>
<p align="center">
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/develop/graph/badge.svg?token=FITFDI3J3C"></img></a>
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/main/graph/badge.svg?token=FITFDI3J3C"></img></a>
&nbsp;
<a href="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml"><img src="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml/badge.svg" alt="CI"></img></a>
&nbsp;
<a href="https://github.com/greptimeTeam/greptimedb/blob/develop/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
<a href="https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
</p>
<p align="center">
@@ -27,9 +27,6 @@
<a href="https://greptime.com/slack"><img src="https://img.shields.io/badge/slack-GreptimeDB-0abd59?logo=slack" alt="slack" /></a>
</p>
> [!WARNING]
> Our default branch has changed from `develop` to `main` (issue [#3025](https://github.com/GreptimeTeam/greptimedb/issues/3025)). Please update your local repository to use the `main` branch.
## What is GreptimeDB
GreptimeDB is an open-source time-series database with a special focus on
@@ -171,7 +168,7 @@ In addition, you may:
GreptimeDB uses the [Apache 2.0 license][1] to strike a balance between
open contributions and allowing you to use the software however you want.
[1]: <https://github.com/greptimeTeam/greptimedb/blob/develop/LICENSE>
[1]: <https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE>
## Contributing

View File

@@ -13,7 +13,9 @@
// limitations under the License.
mod columns;
mod key_column_usage;
mod memory_table;
mod schemata;
mod table_names;
mod tables;
@@ -40,7 +42,9 @@ pub use table_names::*;
use self::columns::InformationSchemaColumns;
use crate::error::Result;
use crate::information_schema::key_column_usage::InformationSchemaKeyColumnUsage;
use crate::information_schema::memory_table::{get_schema_columns, MemoryTable};
use crate::information_schema::schemata::InformationSchemaSchemata;
use crate::information_schema::tables::InformationSchemaTables;
use crate::CatalogManager;
@@ -51,6 +55,12 @@ lazy_static! {
COLUMN_PRIVILEGES,
COLUMN_STATISTICS,
BUILD_INFO,
CHARACTER_SETS,
COLLATIONS,
COLLATION_CHARACTER_SET_APPLICABILITY,
CHECK_CONSTRAINTS,
EVENTS,
FILES,
];
}
@@ -121,11 +131,16 @@ impl InformationSchemaProvider {
fn build_tables(&mut self) {
let mut tables = HashMap::new();
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
tables.insert(SCHEMATA.to_string(), self.build_table(SCHEMATA).unwrap());
tables.insert(COLUMNS.to_string(), self.build_table(COLUMNS).unwrap());
tables.insert(
KEY_COLUMN_USAGE.to_string(),
self.build_table(KEY_COLUMN_USAGE).unwrap(),
);
// Add memory tables
for name in MEMORY_TABLES.iter() {
tables.insert((*name).to_string(), self.build_table(name).unwrap());
tables.insert((*name).to_string(), self.build_table(name).expect(name));
}
self.tables = tables;
@@ -156,6 +171,22 @@ impl InformationSchemaProvider {
COLUMN_PRIVILEGES => setup_memory_table!(COLUMN_PRIVILEGES),
COLUMN_STATISTICS => setup_memory_table!(COLUMN_STATISTICS),
BUILD_INFO => setup_memory_table!(BUILD_INFO),
CHARACTER_SETS => setup_memory_table!(CHARACTER_SETS),
COLLATIONS => setup_memory_table!(COLLATIONS),
COLLATION_CHARACTER_SET_APPLICABILITY => {
setup_memory_table!(COLLATION_CHARACTER_SET_APPLICABILITY)
}
CHECK_CONSTRAINTS => setup_memory_table!(CHECK_CONSTRAINTS),
EVENTS => setup_memory_table!(EVENTS),
FILES => setup_memory_table!(FILES),
KEY_COLUMN_USAGE => Some(Arc::new(InformationSchemaKeyColumnUsage::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
SCHEMATA => Some(Arc::new(InformationSchemaSchemata::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
_ => None,
}
}

View File

@@ -51,6 +51,10 @@ const TABLE_NAME: &str = "table_name";
const COLUMN_NAME: &str = "column_name";
const DATA_TYPE: &str = "data_type";
const SEMANTIC_TYPE: &str = "semantic_type";
const COLUMN_DEFAULT: &str = "column_default";
const IS_NULLABLE: &str = "is_nullable";
const COLUMN_TYPE: &str = "column_type";
const COLUMN_COMMENT: &str = "column_comment";
impl InformationSchemaColumns {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
@@ -69,6 +73,10 @@ impl InformationSchemaColumns {
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_DEFAULT, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(IS_NULLABLE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_COMMENT, ConcreteDataType::string_datatype(), true),
]))
}
@@ -126,6 +134,11 @@ struct InformationSchemaColumnsBuilder {
column_names: StringVectorBuilder,
data_types: StringVectorBuilder,
semantic_types: StringVectorBuilder,
column_defaults: StringVectorBuilder,
is_nullables: StringVectorBuilder,
column_types: StringVectorBuilder,
column_comments: StringVectorBuilder,
}
impl InformationSchemaColumnsBuilder {
@@ -144,6 +157,10 @@ impl InformationSchemaColumnsBuilder {
column_names: StringVectorBuilder::with_capacity(42),
data_types: StringVectorBuilder::with_capacity(42),
semantic_types: StringVectorBuilder::with_capacity(42),
column_defaults: StringVectorBuilder::with_capacity(42),
is_nullables: StringVectorBuilder::with_capacity(42),
column_types: StringVectorBuilder::with_capacity(42),
column_comments: StringVectorBuilder::with_capacity(42),
}
}
@@ -187,9 +204,8 @@ impl InformationSchemaColumnsBuilder {
&catalog_name,
&schema_name,
&table_name,
&column.name,
&column.data_type.name(),
semantic_type,
column,
);
}
} else {
@@ -206,16 +222,31 @@ impl InformationSchemaColumnsBuilder {
catalog_name: &str,
schema_name: &str,
table_name: &str,
column_name: &str,
data_type: &str,
semantic_type: &str,
column_schema: &ColumnSchema,
) {
let data_type = &column_schema.data_type.name();
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.table_names.push(Some(table_name));
self.column_names.push(Some(column_name));
self.column_names.push(Some(&column_schema.name));
self.data_types.push(Some(data_type));
self.semantic_types.push(Some(semantic_type));
self.column_defaults.push(
column_schema
.default_constraint()
.map(|s| format!("{}", s))
.as_deref(),
);
if column_schema.is_nullable() {
self.is_nullables.push(Some("Yes"));
} else {
self.is_nullables.push(Some("No"));
}
self.column_types.push(Some(data_type));
self.column_comments
.push(column_schema.column_comment().map(|x| x.as_ref()));
}
fn finish(&mut self) -> Result<RecordBatch> {
@@ -226,6 +257,10 @@ impl InformationSchemaColumnsBuilder {
Arc::new(self.column_names.finish()),
Arc::new(self.data_types.finish()),
Arc::new(self.semantic_types.finish()),
Arc::new(self.column_defaults.finish()),
Arc::new(self.is_nullables.finish()),
Arc::new(self.column_types.finish()),
Arc::new(self.column_comments.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)

View File

@@ -0,0 +1,338 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID;
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
use snafu::{OptionExt, ResultExt};
use store_api::storage::TableId;
use super::KEY_COLUMN_USAGE;
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::InformationTable;
use crate::CatalogManager;
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
pub(super) struct InformationSchemaKeyColumnUsage {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaKeyColumnUsage {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(
"constraint_catalog",
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new(
"constraint_schema",
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new(
"constraint_name",
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("column_name", ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
"ordinal_position",
ConcreteDataType::uint32_datatype(),
false,
),
ColumnSchema::new(
"position_in_unique_constraint",
ConcreteDataType::uint32_datatype(),
true,
),
ColumnSchema::new(
"referenced_table_schema",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"referenced_table_name",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"referenced_column_name",
ConcreteDataType::string_datatype(),
true,
),
]))
}
fn builder(&self) -> InformationSchemaKeyColumnUsageBuilder {
InformationSchemaKeyColumnUsageBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaKeyColumnUsage {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID
}
fn table_name(&self) -> &'static str {
KEY_COLUMN_USAGE
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_key_column_usage()
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
/// Builds the `information_schema.KEY_COLUMN_USAGE` table row by row
///
/// Columns are based on <https://dev.mysql.com/doc/refman/8.2/en/information-schema-key-column-usage-table.html>
struct InformationSchemaKeyColumnUsageBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
constraint_catalog: StringVectorBuilder,
constraint_schema: StringVectorBuilder,
constraint_name: StringVectorBuilder,
table_catalog: StringVectorBuilder,
table_schema: StringVectorBuilder,
table_name: StringVectorBuilder,
column_name: StringVectorBuilder,
ordinal_position: UInt32VectorBuilder,
position_in_unique_constraint: UInt32VectorBuilder,
referenced_table_schema: StringVectorBuilder,
referenced_table_name: StringVectorBuilder,
referenced_column_name: StringVectorBuilder,
}
impl InformationSchemaKeyColumnUsageBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
constraint_catalog: StringVectorBuilder::with_capacity(42),
constraint_schema: StringVectorBuilder::with_capacity(42),
constraint_name: StringVectorBuilder::with_capacity(42),
table_catalog: StringVectorBuilder::with_capacity(42),
table_schema: StringVectorBuilder::with_capacity(42),
table_name: StringVectorBuilder::with_capacity(42),
column_name: StringVectorBuilder::with_capacity(42),
ordinal_position: UInt32VectorBuilder::with_capacity(42),
position_in_unique_constraint: UInt32VectorBuilder::with_capacity(42),
referenced_table_schema: StringVectorBuilder::with_capacity(42),
referenced_table_name: StringVectorBuilder::with_capacity(42),
referenced_column_name: StringVectorBuilder::with_capacity(42),
}
}
/// Construct the `information_schema.KEY_COLUMN_USAGE` virtual table
async fn make_key_column_usage(&mut self) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let mut time_index_constraints = vec![];
let mut primary_constraints = vec![];
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
.schema_exists(&catalog_name, &schema_name)
.await?
{
continue;
}
for table_name in catalog_manager
.table_names(&catalog_name, &schema_name)
.await?
{
if let Some(table) = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await?
{
let keys = &table.table_info().meta.primary_key_indices;
let schema = table.schema();
for (idx, column) in schema.column_schemas().iter().enumerate() {
if column.is_time_index() {
time_index_constraints.push((
schema_name.clone(),
table_name.clone(),
column.name.clone(),
));
}
if keys.contains(&idx) {
primary_constraints.push((
schema_name.clone(),
table_name.clone(),
column.name.clone(),
));
}
// TODO(dimbtp): foreign key constraint not supported yet
}
} else {
unreachable!();
}
}
}
for (i, (schema_name, table_name, column_name)) in
time_index_constraints.into_iter().enumerate()
{
self.add_key_column_usage(
&schema_name,
"TIME INDEX",
&schema_name,
&table_name,
&column_name,
i as u32 + 1,
);
}
for (i, (schema_name, table_name, column_name)) in
primary_constraints.into_iter().enumerate()
{
self.add_key_column_usage(
&schema_name,
"PRIMARY",
&schema_name,
&table_name,
&column_name,
i as u32 + 1,
);
}
self.finish()
}
// TODO(dimbtp): Foreign key constraint has not `None` value for last 4
// fields, but it is not supported yet.
fn add_key_column_usage(
&mut self,
constraint_schema: &str,
constraint_name: &str,
table_schema: &str,
table_name: &str,
column_name: &str,
ordinal_position: u32,
) {
self.constraint_catalog.push(Some("def"));
self.constraint_schema.push(Some(constraint_schema));
self.constraint_name.push(Some(constraint_name));
self.table_catalog.push(Some("def"));
self.table_schema.push(Some(table_schema));
self.table_name.push(Some(table_name));
self.column_name.push(Some(column_name));
self.ordinal_position.push(Some(ordinal_position));
self.position_in_unique_constraint.push(None);
self.referenced_table_schema.push(None);
self.referenced_table_name.push(None);
self.referenced_column_name.push(None);
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.constraint_catalog.finish()),
Arc::new(self.constraint_schema.finish()),
Arc::new(self.constraint_name.finish()),
Arc::new(self.table_catalog.finish()),
Arc::new(self.table_schema.finish()),
Arc::new(self.table_name.finish()),
Arc::new(self.column_name.finish()),
Arc::new(self.ordinal_position.finish()),
Arc::new(self.position_in_unique_constraint.finish()),
Arc::new(self.referenced_table_schema.finish()),
Arc::new(self.referenced_table_name.finish()),
Arc::new(self.referenced_column_name.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaKeyColumnUsage {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_key_column_usage()
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -17,7 +17,7 @@ use std::sync::Arc;
use common_catalog::consts::MITO_ENGINE;
use datatypes::prelude::{ConcreteDataType, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::StringVector;
use datatypes::vectors::{Int64Vector, StringVector};
use crate::information_schema::table_names::*;
@@ -97,6 +97,136 @@ pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
],
),
CHARACTER_SETS => (
vec![
string_column("CHARACTER_SET_NAME"),
string_column("DEFAULT_COLLATE_NAME"),
string_column("DESCRIPTION"),
bigint_column("MAXLEN"),
],
vec![
Arc::new(StringVector::from(vec!["utf8"])),
Arc::new(StringVector::from(vec!["utf8_bin"])),
Arc::new(StringVector::from(vec!["UTF-8 Unicode"])),
Arc::new(Int64Vector::from_slice([4])),
],
),
COLLATIONS => (
vec![
string_column("COLLATION_NAME"),
string_column("CHARACTER_SET_NAME"),
bigint_column("ID"),
string_column("IS_DEFAULT"),
string_column("IS_COMPILED"),
bigint_column("SORTLEN"),
],
vec![
Arc::new(StringVector::from(vec!["utf8_bin"])),
Arc::new(StringVector::from(vec!["utf8"])),
Arc::new(Int64Vector::from_slice([1])),
Arc::new(StringVector::from(vec!["Yes"])),
Arc::new(StringVector::from(vec!["Yes"])),
Arc::new(Int64Vector::from_slice([1])),
],
),
COLLATION_CHARACTER_SET_APPLICABILITY => (
vec![
string_column("COLLATION_NAME"),
string_column("CHARACTER_SET_NAME"),
],
vec![
Arc::new(StringVector::from(vec!["utf8_bin"])),
Arc::new(StringVector::from(vec!["utf8"])),
],
),
CHECK_CONSTRAINTS => (
string_columns(&[
"CONSTRAINT_CATALOG",
"CONSTRAINT_SCHEMA",
"CONSTRAINT_NAME",
"CHECK_CLAUSE",
]),
// Not support check constraints yet
vec![],
),
EVENTS => (
vec![
string_column("EVENT_CATALOG"),
string_column("EVENT_SCHEMA"),
string_column("EVENT_NAME"),
string_column("DEFINER"),
string_column("TIME_ZONE"),
string_column("EVENT_BODY"),
string_column("EVENT_DEFINITION"),
string_column("EVENT_TYPE"),
datetime_column("EXECUTE_AT"),
bigint_column("INTERVAL_VALUE"),
string_column("INTERVAL_FIELD"),
string_column("SQL_MODE"),
datetime_column("STARTS"),
datetime_column("ENDS"),
string_column("STATUS"),
string_column("ON_COMPLETION"),
datetime_column("CREATED"),
datetime_column("LAST_ALTERED"),
datetime_column("LAST_EXECUTED"),
string_column("EVENT_COMMENT"),
bigint_column("ORIGINATOR"),
string_column("CHARACTER_SET_CLIENT"),
string_column("COLLATION_CONNECTION"),
string_column("DATABASE_COLLATION"),
],
vec![],
),
FILES => (
vec![
bigint_column("FILE_ID"),
string_column("FILE_NAME"),
string_column("FILE_TYPE"),
string_column("TABLESPACE_NAME"),
string_column("TABLE_CATALOG"),
string_column("TABLE_SCHEMA"),
string_column("TABLE_NAME"),
string_column("LOGFILE_GROUP_NAME"),
bigint_column("LOGFILE_GROUP_NUMBER"),
string_column("ENGINE"),
string_column("FULLTEXT_KEYS"),
bigint_column("DELETED_ROWS"),
bigint_column("UPDATE_COUNT"),
bigint_column("FREE_EXTENTS"),
bigint_column("TOTAL_EXTENTS"),
bigint_column("EXTENT_SIZE"),
bigint_column("INITIAL_SIZE"),
bigint_column("MAXIMUM_SIZE"),
bigint_column("AUTOEXTEND_SIZE"),
datetime_column("CREATION_TIME"),
datetime_column("LAST_UPDATE_TIME"),
datetime_column("LAST_ACCESS_TIME"),
datetime_column("RECOVER_TIME"),
bigint_column("TRANSACTION_COUNTER"),
string_column("VERSION"),
string_column("ROW_FORMAT"),
bigint_column("TABLE_ROWS"),
bigint_column("AVG_ROW_LENGTH"),
bigint_column("DATA_LENGTH"),
bigint_column("MAX_DATA_LENGTH"),
bigint_column("INDEX_LENGTH"),
bigint_column("DATA_FREE"),
datetime_column("CREATE_TIME"),
datetime_column("UPDATE_TIME"),
datetime_column("CHECK_TIME"),
string_column("CHECKSUM"),
string_column("STATUS"),
string_column("EXTRA"),
],
vec![],
),
_ => unreachable!("Unknown table in information_schema: {}", table_name),
};
@@ -115,6 +245,22 @@ fn string_column(name: &str) -> ColumnSchema {
)
}
fn bigint_column(name: &str) -> ColumnSchema {
ColumnSchema::new(
str::to_lowercase(name),
ConcreteDataType::int64_datatype(),
false,
)
}
fn datetime_column(name: &str) -> ColumnSchema {
ColumnSchema::new(
str::to_lowercase(name),
ConcreteDataType::datetime_datatype(),
false,
)
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -0,0 +1,210 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_SCHEMATA_TABLE_ID;
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::StringVectorBuilder;
use snafu::{OptionExt, ResultExt};
use store_api::storage::TableId;
use super::SCHEMATA;
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::InformationTable;
use crate::CatalogManager;
/// The `information_schema.schemata` table implementation.
pub(super) struct InformationSchemaSchemata {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaSchemata {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new("catalog_name", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("schema_name", ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
"default_character_set_name",
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new(
"default_collation_name",
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new("sql_path", ConcreteDataType::string_datatype(), true),
]))
}
fn builder(&self) -> InformationSchemaSchemataBuilder {
InformationSchemaSchemataBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaSchemata {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_SCHEMATA_TABLE_ID
}
fn table_name(&self) -> &'static str {
SCHEMATA
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_schemata()
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
/// Builds the `information_schema.schemata` table row by row
///
/// Columns are based on <https://docs.pingcap.com/tidb/stable/information-schema-schemata>
struct InformationSchemaSchemataBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
catalog_names: StringVectorBuilder,
schema_names: StringVectorBuilder,
charset_names: StringVectorBuilder,
collation_names: StringVectorBuilder,
sql_paths: StringVectorBuilder,
}
impl InformationSchemaSchemataBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
catalog_names: StringVectorBuilder::with_capacity(42),
schema_names: StringVectorBuilder::with_capacity(42),
charset_names: StringVectorBuilder::with_capacity(42),
collation_names: StringVectorBuilder::with_capacity(42),
sql_paths: StringVectorBuilder::with_capacity(42),
}
}
/// Construct the `information_schema.schemata` virtual table
async fn make_schemata(&mut self) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
.schema_exists(&catalog_name, &schema_name)
.await?
{
continue;
}
self.add_schema(&catalog_name, &schema_name);
}
self.finish()
}
fn add_schema(&mut self, catalog_name: &str, schema_name: &str) {
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.charset_names.push(Some("utf8"));
self.collation_names.push(Some("utf8_bin"));
self.sql_paths.push(None);
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.catalog_names.finish()),
Arc::new(self.schema_names.finish()),
Arc::new(self.charset_names.finish()),
Arc::new(self.collation_names.finish()),
Arc::new(self.sql_paths.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaSchemata {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_schemata()
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -20,3 +20,11 @@ pub const ENGINES: &str = "engines";
pub const COLUMN_PRIVILEGES: &str = "column_privileges";
pub const COLUMN_STATISTICS: &str = "column_statistics";
pub const BUILD_INFO: &str = "build_info";
pub const CHARACTER_SETS: &str = "character_sets";
pub const COLLATIONS: &str = "collations";
pub const COLLATION_CHARACTER_SET_APPLICABILITY: &str = "collation_character_set_applicability";
pub const CHECK_CONSTRAINTS: &str = "check_constraints";
pub const EVENTS: &str = "events";
pub const KEY_COLUMN_USAGE: &str = "key_column_usage";
pub const FILES: &str = "files";
pub const SCHEMATA: &str = "schemata";

View File

@@ -39,7 +39,7 @@ use crate::from_grpc_response;
/// ```
///
/// If you want to see a concrete usage example, please see
/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/develop/src/client/examples/stream_ingest.rs).
/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/main/src/client/examples/stream_ingest.rs).
pub struct StreamInserter {
sender: mpsc::Sender<GreptimeRequest>,

View File

@@ -22,7 +22,8 @@ use common_config::wal::StandaloneWalConfig;
use common_config::{metadata_store_dir, KvBackendConfig};
use common_meta::cache_invalidator::DummyCacheInvalidator;
use common_meta::datanode_manager::DatanodeManagerRef;
use common_meta::ddl::{DdlTaskExecutorRef, TableMetadataAllocatorRef};
use common_meta::ddl::table_meta::TableMetadataAllocator;
use common_meta::ddl::DdlTaskExecutorRef;
use common_meta::ddl_manager::DdlManager;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
@@ -38,7 +39,6 @@ use datanode::datanode::{Datanode, DatanodeBuilder};
use file_engine::config::EngineConfig as FileEngineConfig;
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::standalone::StandaloneTableMetadataAllocator;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
use frontend::service_config::{
GrpcOptions, InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
@@ -406,10 +406,8 @@ impl StartCommand {
opts.wal_meta.clone(),
kv_backend.clone(),
));
let table_meta_allocator = Arc::new(StandaloneTableMetadataAllocator::new(
table_id_sequence,
wal_options_allocator.clone(),
));
let table_meta_allocator =
TableMetadataAllocator::new(table_id_sequence, wal_options_allocator.clone());
let ddl_task_executor = Self::create_ddl_task_executor(
kv_backend.clone(),
@@ -446,7 +444,7 @@ impl StartCommand {
kv_backend: KvBackendRef,
procedure_manager: ProcedureManagerRef,
datanode_manager: DatanodeManagerRef,
table_meta_allocator: TableMetadataAllocatorRef,
table_meta_allocator: TableMetadataAllocator,
) -> Result<DdlTaskExecutorRef> {
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;

View File

@@ -44,6 +44,22 @@ pub const INFORMATION_SCHEMA_COLUMN_PRIVILEGES_TABLE_ID: u32 = 6;
pub const INFORMATION_SCHEMA_COLUMN_STATISTICS_TABLE_ID: u32 = 7;
/// id for information_schema.build_info
pub const INFORMATION_SCHEMA_BUILD_INFO_TABLE_ID: u32 = 8;
/// id for information_schema.CHARACTER_SETS
pub const INFORMATION_SCHEMA_CHARACTER_SETS_TABLE_ID: u32 = 9;
/// id for information_schema.COLLATIONS
pub const INFORMATION_SCHEMA_COLLATIONS_TABLE_ID: u32 = 10;
/// id for information_schema.COLLATIONS
pub const INFORMATION_SCHEMA_COLLATION_CHARACTER_SET_APPLICABILITY_TABLE_ID: u32 = 11;
/// id for information_schema.CHECK_CONSTRAINTS
pub const INFORMATION_SCHEMA_CHECK_CONSTRAINTS_TABLE_ID: u32 = 12;
/// id for information_schema.EVENTS
pub const INFORMATION_SCHEMA_EVENTS_TABLE_ID: u32 = 13;
/// id for information_schema.FILES
pub const INFORMATION_SCHEMA_FILES_TABLE_ID: u32 = 14;
/// id for information_schema.SCHEMATA
pub const INFORMATION_SCHEMA_SCHEMATA_TABLE_ID: u32 = 15;
/// id for information_schema.KEY_COLUMN_USAGE
pub const INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID: u32 = 16;
/// ----- End of information_schema tables -----
pub const MITO_ENGINE: &str = "mito";

View File

@@ -42,7 +42,7 @@ pub struct KafkaConfig {
#[serde(skip)]
#[serde(default)]
pub compression: RsKafkaCompression,
/// The maximum log size a kakfa batch producer could buffer.
/// The maximum log size a kafka batch producer could buffer.
pub max_batch_size: ReadableSize,
/// The linger duration of a kafka batch producer.
#[serde(with = "humantime_serde")]

View File

@@ -24,11 +24,12 @@ use crate::error::Result;
use crate::key::table_route::TableRouteValue;
use crate::key::TableMetadataManagerRef;
use crate::region_keeper::MemoryRegionKeeperRef;
use crate::rpc::ddl::{CreateTableTask, SubmitDdlTaskRequest, SubmitDdlTaskResponse};
use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
pub mod alter_table;
pub mod create_table;
pub mod drop_table;
pub mod table_meta;
pub mod truncate_table;
pub mod utils;
@@ -64,17 +65,6 @@ pub struct TableMetadata {
pub region_wal_options: HashMap<RegionNumber, String>,
}
#[async_trait::async_trait]
pub trait TableMetadataAllocator: Send + Sync {
async fn create(
&self,
ctx: &TableMetadataAllocatorContext,
task: &CreateTableTask,
) -> Result<TableMetadata>;
}
pub type TableMetadataAllocatorRef = Arc<dyn TableMetadataAllocator>;
#[derive(Clone)]
pub struct DdlContext {
pub datanode_manager: DatanodeManagerRef,

View File

@@ -191,7 +191,7 @@ impl AlterTableProcedure {
.await?
.context(TableRouteNotFoundSnafu { table_id })?
.into_inner();
let region_routes = table_route.region_routes();
let region_routes = table_route.region_routes()?;
let leaders = find_leaders(region_routes);
let mut alter_region_tasks = Vec::with_capacity(leaders.len());

View File

@@ -217,7 +217,7 @@ impl CreateTableProcedure {
.context(TableRouteNotFoundSnafu {
table_id: physical_table_id,
})?;
let region_routes = physical_table_route.region_routes();
let region_routes = physical_table_route.region_routes()?;
let request_builder = self.new_region_request_builder(Some(physical_table_id))?;

View File

@@ -116,7 +116,7 @@ impl DropTableProcedure {
/// Register dropping regions if doesn't exist.
fn register_dropping_regions(&mut self) -> Result<()> {
let region_routes = self.data.region_routes();
let region_routes = self.data.region_routes()?;
let dropping_regions = operating_leader_regions(region_routes);
@@ -190,7 +190,7 @@ impl DropTableProcedure {
pub async fn on_datanode_drop_regions(&self) -> Result<Status> {
let table_id = self.data.table_id();
let region_routes = &self.data.region_routes();
let region_routes = &self.data.region_routes()?;
let leaders = find_leaders(region_routes);
let mut drop_region_tasks = Vec::with_capacity(leaders.len());
@@ -306,7 +306,7 @@ impl DropTableData {
self.task.table_ref()
}
fn region_routes(&self) -> &Vec<RegionRoute> {
fn region_routes(&self) -> Result<&Vec<RegionRoute>> {
self.table_route_value.region_routes()
}

View File

@@ -0,0 +1,190 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use common_catalog::consts::METRIC_ENGINE;
use common_telemetry::{debug, info};
use snafu::ensure;
use store_api::storage::{RegionId, RegionNumber, TableId};
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
use crate::error::{Result, UnsupportedSnafu};
use crate::key::table_route::{LogicalTableRouteValue, PhysicalTableRouteValue, TableRouteValue};
use crate::peer::Peer;
use crate::rpc::ddl::CreateTableTask;
use crate::rpc::router::{Region, RegionRoute};
use crate::sequence::SequenceRef;
use crate::wal::{allocate_region_wal_options, WalOptionsAllocatorRef};
pub struct TableMetadataAllocator {
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
peer_allocator: PeerAllocatorRef,
}
impl TableMetadataAllocator {
pub fn new(
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
) -> Self {
Self::with_peer_allocator(
table_id_sequence,
wal_options_allocator,
Arc::new(NoopPeerAllocator),
)
}
pub fn with_peer_allocator(
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
peer_allocator: PeerAllocatorRef,
) -> Self {
Self {
table_id_sequence,
wal_options_allocator,
peer_allocator,
}
}
async fn allocate_table_id(&self, task: &CreateTableTask) -> Result<TableId> {
let table_id = if let Some(table_id) = &task.create_table.table_id {
let table_id = table_id.id;
ensure!(
!self
.table_id_sequence
.min_max()
.await
.contains(&(table_id as u64)),
UnsupportedSnafu {
operation: format!(
"create table by id {} that is reserved in this node",
table_id
)
}
);
info!(
"Received explicitly allocated table id {}, will use it directly.",
table_id
);
table_id
} else {
self.table_id_sequence.next().await? as TableId
};
Ok(table_id)
}
fn create_wal_options(
&self,
table_route: &TableRouteValue,
) -> Result<HashMap<RegionNumber, String>> {
match table_route {
TableRouteValue::Physical(x) => {
let region_numbers = x
.region_routes
.iter()
.map(|route| route.region.id.region_number())
.collect();
allocate_region_wal_options(region_numbers, &self.wal_options_allocator)
}
TableRouteValue::Logical(_) => Ok(HashMap::new()),
}
}
async fn create_table_route(
&self,
ctx: &TableMetadataAllocatorContext,
table_id: TableId,
task: &CreateTableTask,
) -> Result<TableRouteValue> {
let regions = task.partitions.len();
let table_route = if task.create_table.engine == METRIC_ENGINE {
TableRouteValue::Logical(LogicalTableRouteValue {})
} else {
let peers = self.peer_allocator.alloc(ctx, regions).await?;
let region_routes = task
.partitions
.iter()
.enumerate()
.map(|(i, partition)| {
let region = Region {
id: RegionId::new(table_id, i as u32),
partition: Some(partition.clone().into()),
..Default::default()
};
let peer = peers[i % peers.len()].clone();
RegionRoute {
region,
leader_peer: Some(peer),
..Default::default()
}
})
.collect::<Vec<_>>();
TableRouteValue::Physical(PhysicalTableRouteValue::new(region_routes))
};
Ok(table_route)
}
pub async fn create(
&self,
ctx: &TableMetadataAllocatorContext,
task: &CreateTableTask,
) -> Result<TableMetadata> {
let table_id = self.allocate_table_id(task).await?;
let table_route = self.create_table_route(ctx, table_id, task).await?;
let region_wal_options = self.create_wal_options(&table_route)?;
debug!(
"Allocated region wal options {:?} for table {}",
region_wal_options, table_id
);
Ok(TableMetadata {
table_id,
table_route,
region_wal_options,
})
}
}
pub type PeerAllocatorRef = Arc<dyn PeerAllocator>;
/// [PeerAllocator] allocates [Peer]s for creating regions.
#[async_trait]
pub trait PeerAllocator: Send + Sync {
/// Allocates `regions` size [Peer]s.
async fn alloc(&self, ctx: &TableMetadataAllocatorContext, regions: usize)
-> Result<Vec<Peer>>;
}
struct NoopPeerAllocator;
#[async_trait]
impl PeerAllocator for NoopPeerAllocator {
async fn alloc(
&self,
_ctx: &TableMetadataAllocatorContext,
regions: usize,
) -> Result<Vec<Peer>> {
Ok(vec![Peer::default(); regions])
}
}

View File

@@ -26,10 +26,10 @@ use crate::datanode_manager::DatanodeManagerRef;
use crate::ddl::alter_table::AlterTableProcedure;
use crate::ddl::create_table::CreateTableProcedure;
use crate::ddl::drop_table::DropTableProcedure;
use crate::ddl::table_meta::TableMetadataAllocator;
use crate::ddl::truncate_table::TruncateTableProcedure;
use crate::ddl::{
DdlContext, DdlTaskExecutor, ExecutorContext, TableMetadata, TableMetadataAllocatorContext,
TableMetadataAllocatorRef,
};
use crate::error::{
self, RegisterProcedureLoaderSnafu, Result, SubmitProcedureSnafu, TableNotFoundSnafu,
@@ -54,7 +54,7 @@ pub struct DdlManager {
datanode_manager: DatanodeManagerRef,
cache_invalidator: CacheInvalidatorRef,
table_metadata_manager: TableMetadataManagerRef,
table_metadata_allocator: TableMetadataAllocatorRef,
table_metadata_allocator: TableMetadataAllocator,
memory_region_keeper: MemoryRegionKeeperRef,
}
@@ -65,7 +65,7 @@ impl DdlManager {
datanode_clients: DatanodeManagerRef,
cache_invalidator: CacheInvalidatorRef,
table_metadata_manager: TableMetadataManagerRef,
table_metadata_allocator: TableMetadataAllocatorRef,
table_metadata_allocator: TableMetadataAllocator,
memory_region_keeper: MemoryRegionKeeperRef,
) -> Result<Self> {
let manager = Self {
@@ -278,7 +278,7 @@ async fn handle_truncate_table_task(
let table_route_value =
table_route_value.context(error::TableRouteNotFoundSnafu { table_id })?;
let table_route = table_route_value.into_inner().region_routes().clone();
let table_route = table_route_value.into_inner().region_routes()?.clone();
let id = ddl_manager
.submit_truncate_table_task(
@@ -461,15 +461,15 @@ mod tests {
use crate::ddl::alter_table::AlterTableProcedure;
use crate::ddl::create_table::CreateTableProcedure;
use crate::ddl::drop_table::DropTableProcedure;
use crate::ddl::table_meta::TableMetadataAllocator;
use crate::ddl::truncate_table::TruncateTableProcedure;
use crate::ddl::{TableMetadata, TableMetadataAllocator, TableMetadataAllocatorContext};
use crate::error::Result;
use crate::key::TableMetadataManager;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::peer::Peer;
use crate::region_keeper::MemoryRegionKeeper;
use crate::rpc::ddl::CreateTableTask;
use crate::sequence::SequenceBuilder;
use crate::state_store::KvStateStore;
use crate::wal::WalOptionsAllocator;
/// A dummy implemented [DatanodeManager].
pub struct DummyDatanodeManager;
@@ -481,26 +481,12 @@ mod tests {
}
}
/// A dummy implemented [TableMetadataAllocator].
pub struct DummyTableMetadataAllocator;
#[async_trait::async_trait]
impl TableMetadataAllocator for DummyTableMetadataAllocator {
async fn create(
&self,
_ctx: &TableMetadataAllocatorContext,
_task: &CreateTableTask,
) -> Result<TableMetadata> {
unimplemented!()
}
}
#[test]
fn test_try_new() {
let kv_backend = Arc::new(MemoryKvBackend::new());
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
let state_store = Arc::new(KvStateStore::new(kv_backend));
let state_store = Arc::new(KvStateStore::new(kv_backend.clone()));
let procedure_manager = Arc::new(LocalManager::new(Default::default(), state_store));
let _ = DdlManager::try_new(
@@ -508,7 +494,10 @@ mod tests {
Arc::new(DummyDatanodeManager),
Arc::new(DummyCacheInvalidator),
table_metadata_manager,
Arc::new(DummyTableMetadataAllocator),
TableMetadataAllocator::new(
Arc::new(SequenceBuilder::new("test", kv_backend).build()),
Arc::new(WalOptionsAllocator::default()),
),
Arc::new(MemoryRegionKeeper::default()),
);

View File

@@ -321,6 +321,27 @@ pub enum Error {
error: rskafka::client::error::Error,
},
#[snafu(display(
"Failed to build a Kafka partition client, topic: {}, partition: {}",
topic,
partition
))]
BuildKafkaPartitionClient {
topic: String,
partition: i32,
location: Location,
#[snafu(source)]
error: rskafka::client::error::Error,
},
#[snafu(display("Failed to produce records to Kafka, topic: {}", topic))]
ProduceRecord {
topic: String,
location: Location,
#[snafu(source)]
error: rskafka::client::error::Error,
},
#[snafu(display("Failed to create a Kafka wal topic"))]
CreateKafkaWalTopic {
location: Location,
@@ -330,6 +351,9 @@ pub enum Error {
#[snafu(display("The topic pool is empty"))]
EmptyTopicPool { location: Location },
#[snafu(display("Unexpected table route type: {}", err_msg))]
UnexpectedLogicalRouteTable { location: Location, err_msg: String },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -368,8 +392,11 @@ impl ErrorExt for Error {
| EncodeWalOptions { .. }
| BuildKafkaClient { .. }
| BuildKafkaCtrlClient { .. }
| BuildKafkaPartitionClient { .. }
| ProduceRecord { .. }
| CreateKafkaWalTopic { .. }
| EmptyTopicPool { .. } => StatusCode::Unexpected,
| EmptyTopicPool { .. }
| UnexpectedLogicalRouteTable { .. } => StatusCode::Unexpected,
SendMessage { .. }
| GetKvCache { .. }

View File

@@ -483,7 +483,7 @@ impl TableMetadataManager {
.build_delete_txn(table_id, table_info_value)?;
// Deletes datanode table key value pairs.
let distribution = region_distribution(table_route_value.region_routes())?;
let distribution = region_distribution(table_route_value.region_routes()?)?;
let delete_datanode_txn = self
.datanode_table_manager()
.build_delete_txn(table_id, distribution)?;
@@ -608,7 +608,7 @@ impl TableMetadataManager {
) -> Result<()> {
// Updates the datanode table key value pairs.
let current_region_distribution =
region_distribution(current_table_route_value.region_routes())?;
region_distribution(current_table_route_value.region_routes()?)?;
let new_region_distribution = region_distribution(&new_region_routes)?;
let update_datanode_table_txn = self.datanode_table_manager().build_update_txn(
@@ -621,7 +621,7 @@ impl TableMetadataManager {
)?;
// Updates the table_route.
let new_table_route_value = current_table_route_value.update(new_region_routes);
let new_table_route_value = current_table_route_value.update(new_region_routes)?;
let (update_table_route_txn, on_update_table_route_failure) = self
.table_route_manager()
@@ -656,7 +656,7 @@ impl TableMetadataManager {
where
F: Fn(&RegionRoute) -> Option<Option<RegionStatus>>,
{
let mut new_region_routes = current_table_route_value.region_routes().clone();
let mut new_region_routes = current_table_route_value.region_routes()?.clone();
let mut updated = 0;
for route in &mut new_region_routes {
@@ -673,7 +673,7 @@ impl TableMetadataManager {
}
// Updates the table_route.
let new_table_route_value = current_table_route_value.update(new_region_routes);
let new_table_route_value = current_table_route_value.update(new_region_routes)?;
let (update_table_route_txn, on_update_table_route_failure) = self
.table_route_manager()
@@ -897,7 +897,11 @@ mod tests {
table_info
);
assert_eq!(
remote_table_route.unwrap().into_inner().region_routes(),
remote_table_route
.unwrap()
.into_inner()
.region_routes()
.unwrap(),
region_routes
);
}
@@ -978,7 +982,7 @@ mod tests {
.unwrap()
.unwrap()
.into_inner();
assert_eq!(removed_table_route.region_routes(), region_routes);
assert_eq!(removed_table_route.region_routes().unwrap(), region_routes);
}
#[tokio::test]
@@ -1173,11 +1177,11 @@ mod tests {
.unwrap();
assert_eq!(
updated_route_value.region_routes()[0].leader_status,
updated_route_value.region_routes().unwrap()[0].leader_status,
Some(RegionStatus::Downgraded)
);
assert_eq!(
updated_route_value.region_routes()[1].leader_status,
updated_route_value.region_routes().unwrap()[1].leader_status,
Some(RegionStatus::Downgraded)
);
}
@@ -1271,7 +1275,8 @@ mod tests {
let current_table_route_value = DeserializedValueWithBytes::from_inner(
current_table_route_value
.inner
.update(new_region_routes.clone()),
.update(new_region_routes.clone())
.unwrap(),
);
let new_region_routes = vec![new_region_route(2, 4), new_region_route(5, 5)];
// it should be ok.
@@ -1295,13 +1300,16 @@ mod tests {
// if the current_table_route_value is wrong, it should return an error.
// The ABA problem.
let wrong_table_route_value =
DeserializedValueWithBytes::from_inner(current_table_route_value.update(vec![
new_region_route(1, 1),
new_region_route(2, 2),
new_region_route(3, 3),
new_region_route(4, 4),
]));
let wrong_table_route_value = DeserializedValueWithBytes::from_inner(
current_table_route_value
.update(vec![
new_region_route(1, 1),
new_region_route(2, 2),
new_region_route(3, 3),
new_region_route(4, 4),
])
.unwrap(),
);
assert!(table_metadata_manager
.update_table_route(
table_id,

View File

@@ -16,12 +16,12 @@ use std::collections::HashMap;
use std::fmt::Display;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use snafu::{ensure, ResultExt};
use store_api::storage::{RegionId, RegionNumber};
use table::metadata::TableId;
use super::{DeserializedValueWithBytes, TableMetaValue};
use crate::error::{Result, SerdeJsonSnafu};
use crate::error::{Result, SerdeJsonSnafu, UnexpectedLogicalRouteTableSnafu};
use crate::key::{to_removed_key, RegionDistribution, TableMetaKey, TABLE_ROUTE_PREFIX};
use crate::kv_backend::txn::{Compare, CompareOp, Txn, TxnOp, TxnOpResponse};
use crate::kv_backend::KvBackendRef;
@@ -62,29 +62,48 @@ impl TableRouteValue {
}
/// Returns a new version [TableRouteValue] with `region_routes`.
pub fn update(&self, region_routes: Vec<RegionRoute>) -> Self {
pub fn update(&self, region_routes: Vec<RegionRoute>) -> Result<Self> {
ensure!(
self.is_physical(),
UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
}
);
let version = self.physical_table_route().version;
Self::Physical(PhysicalTableRouteValue {
Ok(Self::Physical(PhysicalTableRouteValue {
region_routes,
version: version + 1,
})
}))
}
/// Returns the version.
///
/// For test purpose.
#[cfg(any(test, feature = "testing"))]
pub fn version(&self) -> u64 {
self.physical_table_route().version
pub fn version(&self) -> Result<u64> {
ensure!(
self.is_physical(),
UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
}
);
Ok(self.physical_table_route().version)
}
/// Returns the corresponding [RegionRoute].
pub fn region_route(&self, region_id: RegionId) -> Option<RegionRoute> {
self.physical_table_route()
pub fn region_route(&self, region_id: RegionId) -> Result<Option<RegionRoute>> {
ensure!(
self.is_physical(),
UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
}
);
Ok(self
.physical_table_route()
.region_routes
.iter()
.find(|route| route.region.id == region_id)
.cloned()
.cloned())
}
/// Returns true if it's [TableRouteValue::Physical].
@@ -93,11 +112,14 @@ impl TableRouteValue {
}
/// Gets the [RegionRoute]s of this [TableRouteValue::Physical].
///
/// # Panics
/// The route type is not the [TableRouteValue::Physical].
pub fn region_routes(&self) -> &Vec<RegionRoute> {
&self.physical_table_route().region_routes
pub fn region_routes(&self) -> Result<&Vec<RegionRoute>> {
ensure!(
self.is_physical(),
UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
}
);
Ok(&self.physical_table_route().region_routes)
}
fn physical_table_route(&self) -> &PhysicalTableRouteValue {
@@ -354,7 +376,7 @@ impl TableRouteManager {
) -> Result<Option<RegionDistribution>> {
self.get(table_id)
.await?
.map(|table_route| region_distribution(table_route.region_routes()))
.map(|table_route| region_distribution(table_route.region_routes()?))
.transpose()
}
}

View File

@@ -27,6 +27,7 @@ pub use crate::wal::kafka::topic_manager::TopicManager;
/// Configurations for kafka wal.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(default)]
pub struct KafkaConfig {
/// The broker endpoints of the Kafka cluster.
pub broker_endpoints: Vec<String>,

View File

@@ -21,13 +21,16 @@ use common_telemetry::{debug, error, info};
use rskafka::client::controller::ControllerClient;
use rskafka::client::error::Error as RsKafkaError;
use rskafka::client::error::ProtocolError::TopicAlreadyExists;
use rskafka::client::ClientBuilder;
use rskafka::client::partition::{Compression, UnknownTopicHandling};
use rskafka::client::{Client, ClientBuilder};
use rskafka::record::Record;
use rskafka::BackoffConfig;
use snafu::{ensure, AsErrorSource, ResultExt};
use crate::error::{
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, CreateKafkaWalTopicSnafu, DecodeJsonSnafu,
EncodeJsonSnafu, InvalidNumTopicsSnafu, Result,
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, BuildKafkaPartitionClientSnafu,
CreateKafkaWalTopicSnafu, DecodeJsonSnafu, EncodeJsonSnafu, InvalidNumTopicsSnafu,
ProduceRecordSnafu, Result,
};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::PutRequest;
@@ -37,6 +40,10 @@ use crate::wal::kafka::KafkaConfig;
const CREATED_TOPICS_KEY: &str = "__created_wal_topics/kafka/";
// Each topic only has one partition for now.
// The `DEFAULT_PARTITION` refers to the index of the partition.
const DEFAULT_PARTITION: i32 = 0;
/// Manages topic initialization and selection.
pub struct TopicManager {
config: KafkaConfig,
@@ -117,14 +124,20 @@ impl TopicManager {
.await
.with_context(|_| BuildKafkaClientSnafu {
broker_endpoints: self.config.broker_endpoints.clone(),
})?
})?;
let control_client = client
.controller_client()
.context(BuildKafkaCtrlClientSnafu)?;
// Try to create missing topics.
let tasks = to_be_created
.iter()
.map(|i| self.try_create_topic(&topics[*i], &client))
.map(|i| async {
self.try_create_topic(&topics[*i], &control_client).await?;
self.try_append_noop_record(&topics[*i], &client).await?;
Ok(())
})
.collect::<Vec<_>>();
futures::future::try_join_all(tasks).await.map(|_| ())
}
@@ -141,6 +154,31 @@ impl TopicManager {
.collect()
}
async fn try_append_noop_record(&self, topic: &Topic, client: &Client) -> Result<()> {
let partition_client = client
.partition_client(topic, DEFAULT_PARTITION, UnknownTopicHandling::Retry)
.await
.context(BuildKafkaPartitionClientSnafu {
topic,
partition: DEFAULT_PARTITION,
})?;
partition_client
.produce(
vec![Record {
key: None,
value: None,
timestamp: rskafka::chrono::Utc::now(),
headers: Default::default(),
}],
Compression::NoCompression,
)
.await
.context(ProduceRecordSnafu { topic })?;
Ok(())
}
async fn try_create_topic(&self, topic: &Topic, client: &ControllerClient) -> Result<()> {
match client
.create_topic(

View File

@@ -109,6 +109,11 @@ impl ColumnSchema {
&mut self.metadata
}
/// Retrieve the column comment
pub fn column_comment(&self) -> Option<&String> {
self.metadata.get(COMMENT_KEY)
}
pub fn with_time_index(mut self, is_time_index: bool) -> Self {
self.is_time_index = is_time_index;
if is_time_index {
@@ -315,12 +320,16 @@ mod tests {
#[test]
fn test_column_schema_with_metadata() {
let metadata = Metadata::from([("k1".to_string(), "v1".to_string())]);
let metadata = Metadata::from([
("k1".to_string(), "v1".to_string()),
(COMMENT_KEY.to_string(), "test comment".to_string()),
]);
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
.with_metadata(metadata)
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
.unwrap();
assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
assert_eq!("test comment", column_schema.column_comment().unwrap());
assert!(column_schema
.metadata()
.get(DEFAULT_CONSTRAINT_KEY)

View File

@@ -12,33 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
use async_trait::async_trait;
use client::region::check_response_header;
use common_catalog::consts::METRIC_ENGINE;
use common_error::ext::BoxedError;
use common_meta::datanode_manager::{AffectedRows, Datanode, DatanodeManager, DatanodeRef};
use common_meta::ddl::{TableMetadata, TableMetadataAllocator, TableMetadataAllocatorContext};
use common_meta::error::{self as meta_error, Result as MetaResult, UnsupportedSnafu};
use common_meta::key::table_route::{
LogicalTableRouteValue, PhysicalTableRouteValue, TableRouteValue,
};
use common_meta::error::{self as meta_error, Result as MetaResult};
use common_meta::peer::Peer;
use common_meta::rpc::ddl::CreateTableTask;
use common_meta::rpc::router::{Region, RegionRoute};
use common_meta::sequence::SequenceRef;
use common_meta::wal::options_allocator::allocate_region_wal_options;
use common_meta::wal::WalOptionsAllocatorRef;
use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::tracing;
use common_telemetry::tracing_context::{FutureExt, TracingContext};
use common_telemetry::{debug, info, tracing};
use datanode::region_server::RegionServer;
use servers::grpc::region_server::RegionServerHandler;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::{RegionId, RegionNumber, TableId};
use snafu::{OptionExt, ResultExt};
use crate::error::{InvalidRegionRequestSnafu, InvokeRegionServerSnafu, Result};
@@ -109,121 +97,3 @@ impl Datanode for RegionInvoker {
.context(meta_error::ExternalSnafu)
}
}
pub struct StandaloneTableMetadataAllocator {
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
}
impl StandaloneTableMetadataAllocator {
pub fn new(
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
) -> Self {
Self {
table_id_sequence,
wal_options_allocator,
}
}
async fn allocate_table_id(&self, task: &CreateTableTask) -> MetaResult<TableId> {
let table_id = if let Some(table_id) = &task.create_table.table_id {
let table_id = table_id.id;
ensure!(
!self
.table_id_sequence
.min_max()
.await
.contains(&(table_id as u64)),
UnsupportedSnafu {
operation: format!(
"create table by id {} that is reserved in this node",
table_id
)
}
);
info!(
"Received explicitly allocated table id {}, will use it directly.",
table_id
);
table_id
} else {
self.table_id_sequence.next().await? as TableId
};
Ok(table_id)
}
fn create_wal_options(
&self,
table_route: &TableRouteValue,
) -> MetaResult<HashMap<RegionNumber, String>> {
match table_route {
TableRouteValue::Physical(x) => {
let region_numbers = x
.region_routes
.iter()
.map(|route| route.region.id.region_number())
.collect();
allocate_region_wal_options(region_numbers, &self.wal_options_allocator)
}
TableRouteValue::Logical(_) => Ok(HashMap::new()),
}
}
}
fn create_table_route(table_id: TableId, task: &CreateTableTask) -> TableRouteValue {
if task.create_table.engine == METRIC_ENGINE {
TableRouteValue::Logical(LogicalTableRouteValue {})
} else {
let region_routes = task
.partitions
.iter()
.enumerate()
.map(|(i, partition)| {
let region = Region {
id: RegionId::new(table_id, i as u32),
partition: Some(partition.clone().into()),
..Default::default()
};
// It's only a placeholder.
let peer = Peer::default();
RegionRoute {
region,
leader_peer: Some(peer),
follower_peers: vec![],
leader_status: None,
}
})
.collect::<Vec<_>>();
TableRouteValue::Physical(PhysicalTableRouteValue::new(region_routes))
}
}
#[async_trait]
impl TableMetadataAllocator for StandaloneTableMetadataAllocator {
async fn create(
&self,
_ctx: &TableMetadataAllocatorContext,
task: &CreateTableTask,
) -> MetaResult<TableMetadata> {
let table_id = self.allocate_table_id(task).await?;
let table_route = create_table_route(table_id, task);
let region_wal_options = self.create_wal_options(&table_route)?;
debug!(
"Allocated region wal options {:?} for table {}",
region_wal_options, table_id
);
Ok(TableMetadata {
table_id,
table_route,
region_wal_options,
})
}
}

View File

@@ -113,7 +113,7 @@ pub enum Error {
#[snafu(display("Failed to parse regex DFA"))]
ParseDFA {
#[snafu(source)]
error: regex_automata::Error,
error: Box<regex_automata::dfa::Error>,
location: Location,
},

View File

@@ -48,7 +48,7 @@
//! More detailed information regarding the encoding of the inverted indices can be found in the [RFC].
//!
//! [`InvertedIndexMetas`]: https://github.com/GreptimeTeam/greptime-proto/blob/2aaee38de81047537dfa42af9df63bcfb866e06c/proto/greptime/v1/index/inverted_index.proto#L32-L64
//! [RFC]: https://github.com/GreptimeTeam/greptimedb/blob/develop/docs/rfcs/2023-11-03-inverted-index.md
//! [RFC]: https://github.com/GreptimeTeam/greptimedb/blob/main/docs/rfcs/2023-11-03-inverted-index.md
pub mod reader;
pub mod writer;

View File

@@ -14,7 +14,7 @@
use fst::map::OpBuilder;
use fst::{IntoStreamer, Streamer};
use regex_automata::DenseDFA;
use regex_automata::dfa::dense::DFA;
use snafu::{ensure, ResultExt};
use crate::inverted_index::error::{
@@ -24,15 +24,13 @@ use crate::inverted_index::search::fst_apply::FstApplier;
use crate::inverted_index::search::predicate::{Predicate, Range};
use crate::inverted_index::FstMap;
type Dfa = DenseDFA<Vec<usize>, usize>;
/// `IntersectionFstApplier` applies intersection operations on an FstMap using specified ranges and regex patterns.
pub struct IntersectionFstApplier {
/// A list of `Range` which define inclusive or exclusive ranges for keys to be queried in the FstMap.
ranges: Vec<Range>,
/// A list of `Dfa` compiled from regular expression patterns.
dfas: Vec<Dfa>,
dfas: Vec<DFA<Vec<u32>>>,
}
impl FstApplier for IntersectionFstApplier {
@@ -88,8 +86,8 @@ impl IntersectionFstApplier {
match predicate {
Predicate::Range(range) => ranges.push(range.range),
Predicate::RegexMatch(regex) => {
let dfa = DenseDFA::new(&regex.pattern);
let dfa = dfa.context(ParseDFASnafu)?;
let dfa = DFA::new(&regex.pattern);
let dfa = dfa.map_err(Box::new).context(ParseDFASnafu)?;
dfas.push(dfa);
}
// Rejection of `InList` predicates is enforced here.
@@ -210,47 +208,67 @@ mod tests {
#[test]
fn test_intersection_fst_applier_with_valid_pattern() {
let test_fst = FstMap::from_iter([("aa", 1), ("bb", 2), ("cc", 3)]).unwrap();
let test_fst = FstMap::from_iter([("123", 1), ("abc", 2)]).unwrap();
let applier = create_applier_from_pattern("a.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let cases = vec![
("1", vec![1]),
("2", vec![1]),
("3", vec![1]),
("^1", vec![1]),
("^2", vec![]),
("^3", vec![]),
("^1.*", vec![1]),
("^.*2", vec![1]),
("^.*3", vec![1]),
("1$", vec![]),
("2$", vec![]),
("3$", vec![1]),
("1.*$", vec![1]),
("2.*$", vec![1]),
("3.*$", vec![1]),
("^1..$", vec![1]),
("^.2.$", vec![1]),
("^..3$", vec![1]),
("^[0-9]", vec![1]),
("^[0-9]+$", vec![1]),
("^[0-9][0-9]$", vec![]),
("^[0-9][0-9][0-9]$", vec![1]),
("^123$", vec![1]),
("a", vec![2]),
("b", vec![2]),
("c", vec![2]),
("^a", vec![2]),
("^b", vec![]),
("^c", vec![]),
("^a.*", vec![2]),
("^.*b", vec![2]),
("^.*c", vec![2]),
("a$", vec![]),
("b$", vec![]),
("c$", vec![2]),
("a.*$", vec![2]),
("b.*$", vec![2]),
("c.*$", vec![2]),
("^.[a-z]", vec![2]),
("^abc$", vec![2]),
("^ab$", vec![]),
("abc$", vec![2]),
("^a.c$", vec![2]),
("^..c$", vec![2]),
("ab", vec![2]),
(".*", vec![1, 2]),
("", vec![1, 2]),
("^$", vec![]),
("1|a", vec![1, 2]),
("^123$|^abc$", vec![1, 2]),
("^123$|d", vec![1]),
];
let applier = create_applier_from_pattern("b.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![2]);
let applier = create_applier_from_pattern("c.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![3]);
let applier = create_applier_from_pattern("a.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let applier = create_applier_from_pattern("b.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![2]);
let applier = create_applier_from_pattern("c.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![3]);
let applier = create_applier_from_pattern("d.?").unwrap();
let results = applier.apply(&test_fst);
assert!(results.is_empty());
let applier = create_applier_from_pattern("a.?|b.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1, 2]);
let applier = create_applier_from_pattern("d.?|a.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let applier = create_applier_from_pattern(".*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1, 2, 3]);
for (pattern, expected) in cases {
let applier = create_applier_from_pattern(pattern).unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, expected);
}
}
#[test]

View File

@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use common_config::wal::{KafkaConfig, KafkaWalTopic as Topic};
use dashmap::mapref::entry::Entry as DashMapEntry;
use dashmap::DashMap;
use rskafka::client::partition::{PartitionClient, UnknownTopicHandling};
use rskafka::client::producer::aggregator::RecordAggregator;
use rskafka::client::producer::{BatchProducer, BatchProducerBuilder};
use rskafka::client::{Client as RsKafkaClient, ClientBuilder};
use rskafka::BackoffConfig;
use snafu::ResultExt;
use tokio::sync::RwLock;
use crate::error::{BuildClientSnafu, BuildPartitionClientSnafu, Result};
@@ -67,7 +67,7 @@ pub(crate) struct ClientManager {
client_factory: RsKafkaClient,
/// A pool maintaining a collection of clients.
/// Key: a topic. Value: the associated client of the topic.
client_pool: DashMap<Topic, Client>,
client_pool: RwLock<HashMap<Topic, Client>>,
}
impl ClientManager {
@@ -91,18 +91,28 @@ impl ClientManager {
Ok(Self {
config: config.clone(),
client_factory: client,
client_pool: DashMap::new(),
client_pool: RwLock::new(HashMap::new()),
})
}
/// Gets the client associated with the topic. If the client does not exist, a new one will
/// be created and returned.
pub(crate) async fn get_or_insert(&self, topic: &Topic) -> Result<Client> {
match self.client_pool.entry(topic.to_string()) {
DashMapEntry::Occupied(entry) => Ok(entry.get().clone()),
DashMapEntry::Vacant(entry) => {
let topic_client = self.try_create_client(topic).await?;
Ok(entry.insert(topic_client).clone())
let client_pool = self.client_pool.read().await;
if let Some(client) = client_pool.get(topic) {
return Ok(client.clone());
}
// Manullay releases the read lock.
drop(client_pool);
// Acquires the write lock.
let mut client_pool = self.client_pool.write().await;
match client_pool.get(topic) {
Some(client) => Ok(client.clone()),
None => {
let client = self.try_create_client(topic).await?;
client_pool.insert(topic.clone(), client.clone());
Ok(client)
}
}
}

View File

@@ -100,25 +100,24 @@ impl LogStore for KafkaLogStore {
.push(entry);
}
// Builds a record from entries belong to a region and produces them to kafka server.
let region_ids = producers.keys().cloned().collect::<Vec<_>>();
// Produces entries for each region and gets the offset those entries written to.
// The returned offset is then converted into an entry id.
let last_entry_ids = futures::future::try_join_all(producers.into_iter().map(
|(region_id, producer)| async move {
let entry_id = producer
.produce(&self.client_manager)
.await
.map(TryInto::try_into)??;
Ok((region_id, entry_id))
},
))
.await?
.into_iter()
.collect::<HashMap<_, _>>();
let tasks = producers
.into_values()
.map(|producer| producer.produce(&self.client_manager))
.collect::<Vec<_>>();
// Each produce operation returns a kafka offset of the produced record.
// The offsets are then converted to entry ids.
let entry_ids = futures::future::try_join_all(tasks)
.await?
.into_iter()
.map(TryInto::try_into)
.collect::<Result<Vec<_>>>()?;
debug!("The entries are appended at offsets {:?}", entry_ids);
debug!("Append batch result: {:?}", last_entry_ids);
Ok(AppendBatchResponse {
last_entry_ids: region_ids.into_iter().zip(entry_ids).collect(),
})
Ok(AppendBatchResponse { last_entry_ids })
}
/// Creates a new `EntryStream` to asynchronously generates `Entry` with entry ids
@@ -186,6 +185,10 @@ impl LogStore for KafkaLogStore {
record_offset, ns_clone, high_watermark
);
// Ignores noop records.
if record.record.value.is_none() {
continue;
}
let entries = decode_from_record(record.record)?;
// Filters entries by region id.
@@ -193,8 +196,6 @@ impl LogStore for KafkaLogStore {
&& entry.ns.region_id == region_id
{
yield Ok(entries);
} else {
yield Ok(vec![]);
}
// Terminates the stream if the entry with the end offset was read.

View File

@@ -79,6 +79,17 @@ impl MetaPeerClient {
to_stat_kv_map(kvs)
}
// Get kv information from the leader's in_mem kv store.
pub async fn get(&self, key: Vec<u8>) -> Result<Option<KeyValue>> {
let mut kvs = self.range(key, vec![], false).await?;
Ok(if kvs.is_empty() {
None
} else {
debug_assert_eq!(kvs.len(), 1);
Some(kvs.remove(0))
})
}
// Range kv information from the leader's in_mem kv store
pub async fn range(
&self,
@@ -228,7 +239,7 @@ impl MetaPeerClient {
// Check if the meta node is a leader node.
// Note: when self.election is None, we also consider the meta node is leader
fn is_leader(&self) -> bool {
pub(crate) fn is_leader(&self) -> bool {
self.election
.as_ref()
.map(|election| election.is_leader())

View File

@@ -32,6 +32,9 @@ use crate::pubsub::Message;
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display("The target peer is unavailable temporally: {}", peer_id))]
PeerUnavailable { location: Location, peer_id: u64 },
#[snafu(display("Another migration procedure is running for region: {}", region_id))]
MigrationRunning {
location: Location,
@@ -599,6 +602,13 @@ pub enum Error {
#[snafu(display("Weight array is not set"))]
NotSetWeightArray { location: Location },
#[snafu(display("Unexpected table route type: {}", err_msg))]
UnexpectedLogicalRouteTable {
location: Location,
err_msg: String,
source: common_meta::error::Error,
},
}
impl Error {
@@ -650,7 +660,8 @@ impl ErrorExt for Error {
| Error::Join { .. }
| Error::WeightArray { .. }
| Error::NotSetWeightArray { .. }
| Error::Unsupported { .. } => StatusCode::Internal,
| Error::Unsupported { .. }
| Error::PeerUnavailable { .. } => StatusCode::Internal,
Error::TableAlreadyExists { .. } => StatusCode::TableAlreadyExists,
Error::EmptyKey { .. }
| Error::MissingRequiredParameter { .. }
@@ -713,7 +724,8 @@ impl ErrorExt for Error {
| Error::TableMetadataManager { source, .. }
| Error::KvBackend { source, .. }
| Error::UpdateTableRoute { source, .. }
| Error::GetFullTableInfo { source, .. } => source.status_code(),
| Error::GetFullTableInfo { source, .. }
| Error::UnexpectedLogicalRouteTable { source, .. } => source.status_code(),
Error::InitMetadata { source, .. } | Error::InitDdlManager { source, .. } => {
source.status_code()

View File

@@ -229,7 +229,7 @@ impl HeartbeatHandlerGroup {
let _ = self.pushers.insert(key.to_string(), pusher).await;
}
pub async fn unregister(&self, key: impl AsRef<str>) -> Option<Pusher> {
pub async fn deregister(&self, key: impl AsRef<str>) -> Option<Pusher> {
let key = key.as_ref();
METRIC_META_HEARTBEAT_CONNECTION_NUM.dec();
info!("Pusher unregister: {}", key);

View File

@@ -14,27 +14,57 @@
use std::collections::HashMap;
use common_meta::util;
use common_meta::peer::Peer;
use common_meta::{util, ClusterId};
use common_time::util as time_util;
use crate::cluster::MetaPeerClientRef;
use crate::error::Result;
use crate::keys::{LeaseKey, LeaseValue, DN_LEASE_PREFIX};
fn build_lease_filter(lease_secs: u64) -> impl Fn(&LeaseKey, &LeaseValue) -> bool {
move |_: &LeaseKey, v: &LeaseValue| {
((time_util::current_time_millis() - v.timestamp_millis) as u64) < lease_secs * 1000
}
}
pub async fn lookup_alive_datanode_peer(
cluster_id: ClusterId,
datanode_id: u64,
meta_peer_client: &MetaPeerClientRef,
lease_secs: u64,
) -> Result<Option<Peer>> {
let lease_filter = build_lease_filter(lease_secs);
let lease_key = LeaseKey {
cluster_id,
node_id: datanode_id,
};
let Some(kv) = meta_peer_client.get(lease_key.clone().try_into()?).await? else {
return Ok(None);
};
let lease_value: LeaseValue = kv.value.try_into()?;
if lease_filter(&lease_key, &lease_value) {
Ok(Some(Peer {
id: lease_key.node_id,
addr: lease_value.node_addr,
}))
} else {
Ok(None)
}
}
pub async fn alive_datanodes(
cluster_id: u64,
cluster_id: ClusterId,
meta_peer_client: &MetaPeerClientRef,
lease_secs: u64,
) -> Result<HashMap<LeaseKey, LeaseValue>> {
let lease_filter = |_: &LeaseKey, v: &LeaseValue| {
((time_util::current_time_millis() - v.timestamp_millis) as u64) < lease_secs * 1000
};
let lease_filter = build_lease_filter(lease_secs);
filter_datanodes(cluster_id, meta_peer_client, lease_filter).await
}
pub async fn filter_datanodes<P>(
cluster_id: u64,
cluster_id: ClusterId,
meta_peer_client: &MetaPeerClientRef,
predicate: P,
) -> Result<HashMap<LeaseKey, LeaseValue>>

View File

@@ -18,13 +18,13 @@ use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::Peer;
use common_base::Plugins;
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
use common_grpc::channel_manager;
use common_meta::ddl::DdlTaskExecutorRef;
use common_meta::key::TableMetadataManagerRef;
use common_meta::kv_backend::{KvBackendRef, ResettableKvBackend, ResettableKvBackendRef};
use common_meta::peer::Peer;
use common_meta::region_keeper::MemoryRegionKeeperRef;
use common_meta::wal::options_allocator::WalOptionsAllocatorRef;
use common_meta::wal::WalConfig;
@@ -48,6 +48,7 @@ use crate::error::{
use crate::failure_detector::PhiAccrualFailureDetectorOptions;
use crate::handler::HeartbeatHandlerGroup;
use crate::lock::DistLockRef;
use crate::procedure::region_migration::manager::RegionMigrationManagerRef;
use crate::pubsub::{PublishRef, SubscribeManagerRef};
use crate::selector::{Selector, SelectorType};
use crate::service::mailbox::MailboxRef;
@@ -249,6 +250,7 @@ pub struct MetaSrv {
table_metadata_manager: TableMetadataManagerRef,
memory_region_keeper: MemoryRegionKeeperRef,
greptimedb_telemetry_task: Arc<GreptimeDBTelemetryTask>,
region_migration_manager: RegionMigrationManagerRef,
plugins: Plugins,
}
@@ -328,6 +330,9 @@ impl MetaSrv {
info!("MetaSrv stopped");
});
} else {
if let Err(e) = self.wal_options_allocator.start().await {
error!(e; "Failed to start wal options allocator");
}
// Always load kv into cached kv store.
self.leader_cached_kv_backend
.load()
@@ -411,6 +416,10 @@ impl MetaSrv {
&self.memory_region_keeper
}
pub fn region_migration_manager(&self) -> &RegionMigrationManagerRef {
&self.region_migration_manager
}
pub fn publish(&self) -> Option<PublishRef> {
self.plugins.get::<PublishRef>()
}

View File

@@ -21,7 +21,7 @@ use common_base::Plugins;
use common_catalog::consts::MIN_USER_TABLE_ID;
use common_grpc::channel_manager::ChannelConfig;
use common_meta::datanode_manager::DatanodeManagerRef;
use common_meta::ddl::TableMetadataAllocatorRef;
use common_meta::ddl::table_meta::TableMetadataAllocator;
use common_meta::ddl_manager::{DdlManager, DdlManagerRef};
use common_meta::distributed_time_constants;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
@@ -57,12 +57,14 @@ use crate::metasrv::{
ElectionRef, MetaSrv, MetaSrvOptions, MetasrvInfo, SelectorContext, SelectorRef, TABLE_ID_SEQ,
};
use crate::procedure::region_failover::RegionFailoverManager;
use crate::procedure::region_migration::manager::RegionMigrationManager;
use crate::procedure::region_migration::DefaultContextFactory;
use crate::pubsub::PublishRef;
use crate::selector::lease_based::LeaseBasedSelector;
use crate::service::mailbox::MailboxRef;
use crate::service::store::cached_kv::{CheckLeader, LeaderCachedKvBackend};
use crate::state::State;
use crate::table_meta_alloc::MetaSrvTableMetadataAllocator;
use crate::table_meta_alloc::MetasrvPeerAllocator;
// TODO(fys): try use derive_builder macro
pub struct MetaSrvBuilder {
@@ -76,7 +78,7 @@ pub struct MetaSrvBuilder {
lock: Option<DistLockRef>,
datanode_manager: Option<DatanodeManagerRef>,
plugins: Option<Plugins>,
table_metadata_allocator: Option<TableMetadataAllocatorRef>,
table_metadata_allocator: Option<TableMetadataAllocator>,
}
impl MetaSrvBuilder {
@@ -148,7 +150,7 @@ impl MetaSrvBuilder {
pub fn table_metadata_allocator(
mut self,
table_metadata_allocator: TableMetadataAllocatorRef,
table_metadata_allocator: TableMetadataAllocator,
) -> Self {
self.table_metadata_allocator = Some(table_metadata_allocator);
self
@@ -216,12 +218,15 @@ impl MetaSrvBuilder {
.step(10)
.build(),
);
Arc::new(MetaSrvTableMetadataAllocator::new(
let peer_allocator = Arc::new(MetasrvPeerAllocator::new(
selector_ctx.clone(),
selector.clone(),
sequence.clone(),
));
TableMetadataAllocator::with_peer_allocator(
sequence,
wal_options_allocator.clone(),
))
peer_allocator,
)
});
let opening_region_keeper = Arc::new(MemoryRegionKeeper::default());
@@ -236,6 +241,17 @@ impl MetaSrvBuilder {
&opening_region_keeper,
)?;
let region_migration_manager = Arc::new(RegionMigrationManager::new(
procedure_manager.clone(),
DefaultContextFactory::new(
table_metadata_manager.clone(),
opening_region_keeper.clone(),
mailbox.clone(),
options.server_addr.clone(),
),
));
region_migration_manager.try_start()?;
let handler_group = match handler_group {
Some(handler_group) => handler_group,
None => {
@@ -323,6 +339,7 @@ impl MetaSrvBuilder {
.await,
plugins: plugins.unwrap_or_else(Plugins::default),
memory_region_keeper: opening_region_keeper,
region_migration_manager,
})
}
}
@@ -368,7 +385,7 @@ fn build_ddl_manager(
procedure_manager: &ProcedureManagerRef,
mailbox: &MailboxRef,
table_metadata_manager: &TableMetadataManagerRef,
table_metadata_allocator: TableMetadataAllocatorRef,
table_metadata_allocator: TableMetadataAllocator,
memory_region_keeper: &MemoryRegionKeeperRef,
) -> Result<DdlManagerRef> {
let datanode_clients = datanode_clients.unwrap_or_else(|| {

View File

@@ -383,12 +383,13 @@ mod tests {
use std::sync::Mutex;
use api::v1::meta::mailbox_message::Payload;
use api::v1::meta::{HeartbeatResponse, MailboxMessage, Peer, RequestHeader};
use api::v1::meta::{HeartbeatResponse, MailboxMessage, RequestHeader};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_meta::ddl::utils::region_storage_path;
use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply};
use common_meta::key::TableMetadataManager;
use common_meta::kv_backend::memory::MemoryKvBackend;
use common_meta::peer::Peer;
use common_meta::sequence::SequenceBuilder;
use common_meta::DatanodeId;
use common_procedure::{BoxedProcedure, ProcedureId};

View File

@@ -208,6 +208,7 @@ mod tests {
let should_downgraded = table_route_value
.region_routes()
.unwrap()
.iter()
.find(|route| route.region.id.region_number() == failed_region.region_number)
.unwrap();

View File

@@ -59,7 +59,7 @@ impl RegionFailoverStart {
.iter()
.filter_map(|p| {
if p.id != failed_region.datanode_id {
Some(p.clone().into())
Some(p.clone())
} else {
None
}

View File

@@ -85,7 +85,12 @@ impl UpdateRegionMetadata {
.context(error::TableMetadataManagerSnafu)?
.context(TableRouteNotFoundSnafu { table_id })?;
let mut new_region_routes = table_route_value.region_routes().clone();
let mut new_region_routes = table_route_value
.region_routes()
.context(error::UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
})?
.clone();
for region_route in new_region_routes.iter_mut() {
if region_route.region.id.region_number() == failed_region.region_number {
@@ -234,6 +239,7 @@ mod tests {
.unwrap()
.into_inner()
.region_routes()
.unwrap()
.clone()
}
@@ -396,8 +402,8 @@ mod tests {
.unwrap()
.into_inner();
let peers = &extract_all_peers(table_route_value.region_routes());
let actual = table_route_value.region_routes();
let peers = &extract_all_peers(table_route_value.region_routes().unwrap());
let actual = table_route_value.region_routes().unwrap();
let expected = &vec![
new_region_route(1, peers, 2),
new_region_route(2, peers, 3),
@@ -416,7 +422,7 @@ mod tests {
.unwrap()
.into_inner();
let map = region_distribution(table_route_value.region_routes()).unwrap();
let map = region_distribution(table_route_value.region_routes().unwrap()).unwrap();
assert_eq!(map.len(), 2);
assert_eq!(map.get(&2), Some(&vec![1, 3]));
assert_eq!(map.get(&3), Some(&vec![2, 4]));

View File

@@ -127,7 +127,7 @@ pub trait ContextFactory {
/// Default implementation.
#[derive(Clone)]
pub struct ContextFactoryImpl {
pub struct DefaultContextFactory {
volatile_ctx: VolatileContext,
table_metadata_manager: TableMetadataManagerRef,
opening_region_keeper: MemoryRegionKeeperRef,
@@ -135,7 +135,25 @@ pub struct ContextFactoryImpl {
server_addr: String,
}
impl ContextFactory for ContextFactoryImpl {
impl DefaultContextFactory {
/// Returns an [ContextFactoryImpl].
pub fn new(
table_metadata_manager: TableMetadataManagerRef,
opening_region_keeper: MemoryRegionKeeperRef,
mailbox: MailboxRef,
server_addr: String,
) -> Self {
Self {
volatile_ctx: VolatileContext::default(),
table_metadata_manager,
opening_region_keeper,
mailbox,
server_addr,
}
}
}
impl ContextFactory for DefaultContextFactory {
fn new_context(self, persistent_ctx: PersistentContext) -> Context {
Context {
persistent_ctx,
@@ -735,7 +753,7 @@ mod tests {
.unwrap()
.version();
// Should be unchanged.
assert_eq!(table_routes_version, 0);
assert_eq!(table_routes_version.unwrap(), 0);
}
#[tokio::test]

View File

@@ -21,21 +21,23 @@ use common_meta::key::table_route::TableRouteValue;
use common_meta::peer::Peer;
use common_meta::rpc::router::RegionRoute;
use common_meta::ClusterId;
use common_procedure::{watcher, ProcedureManagerRef, ProcedureWithId};
use common_procedure::{watcher, ProcedureId, ProcedureManagerRef, ProcedureWithId};
use common_telemetry::{error, info};
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::RegionId;
use crate::error::{self, Result};
use crate::procedure::region_migration::{
ContextFactoryImpl, PersistentContext, RegionMigrationProcedure,
DefaultContextFactory, PersistentContext, RegionMigrationProcedure,
};
pub type RegionMigrationManagerRef = Arc<RegionMigrationManager>;
/// Manager of region migration procedure.
pub(crate) struct RegionMigrationManager {
pub struct RegionMigrationManager {
procedure_manager: ProcedureManagerRef,
running_procedures: Arc<RwLock<HashMap<RegionId, RegionMigrationProcedureTask>>>,
context_factory: ContextFactoryImpl,
context_factory: DefaultContextFactory,
}
/// The guard of running [RegionMigrationProcedureTask].
@@ -55,10 +57,10 @@ impl Drop for RegionMigrationProcedureGuard {
#[derive(Debug, Clone)]
pub(crate) struct RegionMigrationProcedureTask {
cluster_id: ClusterId,
region_id: RegionId,
from_peer: Peer,
to_peer: Peer,
pub(crate) cluster_id: ClusterId,
pub(crate) region_id: RegionId,
pub(crate) from_peer: Peer,
pub(crate) to_peer: Peer,
}
impl Display for RegionMigrationProcedureTask {
@@ -93,7 +95,7 @@ impl RegionMigrationManager {
/// Returns new [RegionMigrationManager]
pub(crate) fn new(
procedure_manager: ProcedureManagerRef,
context_factory: ContextFactoryImpl,
context_factory: DefaultContextFactory,
) -> Self {
Self {
procedure_manager,
@@ -221,7 +223,10 @@ impl RegionMigrationManager {
}
/// Submits a new region migration procedure.
pub(crate) async fn submit_procedure(&self, task: RegionMigrationProcedureTask) -> Result<()> {
pub(crate) async fn submit_procedure(
&self,
task: RegionMigrationProcedureTask,
) -> Result<Option<ProcedureId>> {
let Some(guard) = self.insert_running_procedure(&task) else {
return error::MigrationRunningSnafu {
region_id: task.region_id,
@@ -239,11 +244,14 @@ impl RegionMigrationManager {
// Safety: checked before.
let region_route = table_route
.region_route(region_id)
.context(error::UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
})?
.context(error::RegionRouteNotFoundSnafu { region_id })?;
if self.has_migrated(&region_route, &task)? {
info!("Skipping region migration task: {task}");
return Ok(());
return Ok(None);
}
self.verify_region_leader_peer(&region_route, &task)?;
@@ -274,7 +282,7 @@ impl RegionMigrationManager {
info!("Region migration procedure {procedure_id} for {task} is finished successfully!");
});
Ok(())
Ok(Some(procedure_id))
}
}

View File

@@ -18,7 +18,7 @@ use common_meta::peer::Peer;
use common_meta::rpc::router::RegionRoute;
use common_procedure::Status;
use serde::{Deserialize, Serialize};
use snafu::OptionExt;
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionId;
use super::migration_end::RegionMigrationEnd;
@@ -85,6 +85,9 @@ impl RegionMigrationStart {
let region_route = table_route
.region_routes()
.context(error::UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
})?
.iter()
.find(|route| route.region.id == region_id)
.cloned()

View File

@@ -43,7 +43,7 @@ use tokio::sync::mpsc::{Receiver, Sender};
use super::migration_abort::RegionMigrationAbort;
use super::upgrade_candidate_region::UpgradeCandidateRegion;
use super::{Context, ContextFactory, ContextFactoryImpl, State, VolatileContext};
use super::{Context, ContextFactory, DefaultContextFactory, State, VolatileContext};
use crate::error::{self, Error, Result};
use crate::handler::{HeartbeatMailbox, Pusher, Pushers};
use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion;
@@ -120,8 +120,8 @@ impl TestingEnv {
}
/// Returns a context of region migration procedure.
pub fn context_factory(&self) -> ContextFactoryImpl {
ContextFactoryImpl {
pub fn context_factory(&self) -> DefaultContextFactory {
DefaultContextFactory {
table_metadata_manager: self.table_metadata_manager.clone(),
opening_region_keeper: self.opening_region_keeper.clone(),
volatile_ctx: Default::default(),
@@ -419,7 +419,7 @@ impl ProcedureMigrationTestSuite {
.unwrap()
.unwrap()
.into_inner();
let region_routes = table_route.region_routes();
let region_routes = table_route.region_routes().unwrap();
let expected_leader_id = self.context.persistent_ctx.to_peer.id;
let removed_follower_id = self.context.persistent_ctx.from_peer.id;

View File

@@ -208,8 +208,8 @@ mod tests {
.unwrap();
// It should remain unchanged.
assert_eq!(latest_table_route.version(), 0);
assert!(!latest_table_route.region_routes()[0].is_leader_downgraded());
assert_eq!(latest_table_route.version().unwrap(), 0);
assert!(!latest_table_route.region_routes().unwrap()[0].is_leader_downgraded());
assert!(ctx.volatile_ctx.table_route.is_none());
}
@@ -249,7 +249,7 @@ mod tests {
.unwrap()
.unwrap();
assert!(latest_table_route.region_routes()[0].is_leader_downgraded());
assert!(latest_table_route.region_routes().unwrap()[0].is_leader_downgraded());
assert!(ctx.volatile_ctx.table_route.is_none());
}
}

View File

@@ -170,7 +170,10 @@ mod tests {
.unwrap()
.unwrap()
.into_inner();
assert_eq!(&expected_region_routes, table_route.region_routes());
assert_eq!(
&expected_region_routes,
table_route.region_routes().unwrap()
);
}
#[tokio::test]
@@ -231,6 +234,9 @@ mod tests {
.unwrap()
.unwrap()
.into_inner();
assert_eq!(&expected_region_routes, table_route.region_routes());
assert_eq!(
&expected_region_routes,
table_route.region_routes().unwrap()
);
}
}

View File

@@ -33,7 +33,12 @@ impl UpdateMetadata {
let region_id = ctx.region_id();
let table_route_value = ctx.get_table_route_value().await?.clone();
let mut region_routes = table_route_value.region_routes().clone();
let mut region_routes = table_route_value
.region_routes()
.context(error::UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
})?
.clone();
let region_route = region_routes
.iter_mut()
.find(|route| route.region.id == region_id)
@@ -81,7 +86,12 @@ impl UpdateMetadata {
let region_id = ctx.region_id();
let table_route_value = ctx.get_table_route_value().await?.clone();
let region_routes = table_route_value.region_routes().clone();
let region_routes = table_route_value
.region_routes()
.context(error::UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
})?
.clone();
let region_route = region_routes
.into_iter()
.find(|route| route.region.id == region_id)
@@ -465,7 +475,7 @@ mod tests {
.unwrap()
.unwrap()
.into_inner();
let region_routes = table_route.region_routes();
let region_routes = table_route.region_routes().unwrap();
assert!(ctx.volatile_ctx.table_route.is_none());
assert!(ctx.volatile_ctx.opening_region_guard.is_none());

View File

@@ -127,7 +127,7 @@ impl RegionLeaseKeeper {
}
if let Some(table_route) = table_metadata.get(&region_id.table_id()) {
if let Some(region_route) = table_route.region_route(region_id) {
if let Ok(Some(region_route)) = table_route.region_route(region_id) {
return renew_region_lease_via_region_route(&region_route, datanode_id, region_id);
}
}

View File

@@ -14,7 +14,7 @@
use std::collections::HashSet;
use api::v1::meta::Peer;
use common_meta::peer::Peer;
use snafu::ensure;
use super::weighted_choose::{WeightedChoose, WeightedItem};
@@ -92,7 +92,7 @@ where
mod tests {
use std::collections::HashSet;
use api::v1::meta::Peer;
use common_meta::peer::Peer;
use crate::selector::common::choose_peers;
use crate::selector::weighted_choose::{RandomWeightedChoose, WeightedItem};

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::meta::Peer;
use common_meta::peer::Peer;
use crate::error::Result;
use crate::lease;

View File

@@ -14,8 +14,8 @@
use std::collections::HashMap;
use api::v1::meta::Peer;
use common_meta::key::TableMetadataManager;
use common_meta::peer::Peer;
use common_meta::rpc::router::find_leaders;
use common_telemetry::{debug, info};
use parking_lot::RwLock;
@@ -142,13 +142,19 @@ async fn get_leader_peer_ids(
.await
.context(error::TableMetadataManagerSnafu)
.map(|route| {
route.map_or_else(Vec::new, |route| {
find_leaders(route.region_routes())
.into_iter()
.map(|peer| peer.id)
.collect()
})
})
route.map_or_else(
|| Ok(Vec::new()),
|route| {
let region_routes = route
.region_routes()
.context(error::UnexpectedLogicalRouteTableSnafu { err_msg: "" })?;
Ok(find_leaders(region_routes)
.into_iter()
.map(|peer| peer.id)
.collect())
},
)
})?
}
#[cfg(test)]

View File

@@ -14,7 +14,7 @@
use std::collections::HashMap;
use api::v1::meta::Peer;
use common_meta::peer::Peer;
use itertools::{Itertools, MinMaxResult};
use crate::keys::{StatKey, StatValue};
@@ -92,7 +92,7 @@ impl WeightCompute for RegionNumsBasedWeightCompute {
mod tests {
use std::collections::HashMap;
use api::v1::meta::Peer;
use common_meta::peer::Peer;
use store_api::region_engine::RegionRole;
use store_api::storage::RegionId;

View File

@@ -93,6 +93,12 @@ pub fn make_admin_service(meta_srv: MetaSrv) -> Admin {
.route("/route", handler.clone())
.route("/route/help", handler);
let handler = region_migration::SubmitRegionMigrationTaskHandler {
region_migration_manager: meta_srv.region_migration_manager().clone(),
meta_peer_client: meta_srv.meta_peer_client().clone(),
};
let router = router.route("/region-migration", handler);
let router = Router::nest("/admin", router);
Admin::new(router)

View File

@@ -17,22 +17,24 @@ use std::num::ParseIntError;
use std::str::FromStr;
use common_meta::peer::Peer;
use common_meta::ClusterId;
use common_meta::{distributed_time_constants, ClusterId};
use serde::Serialize;
use snafu::ResultExt;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::RegionId;
use tonic::codegen::http;
use super::HttpHandler;
use crate::cluster::MetaPeerClientRef;
use crate::error::{self, Error, Result};
pub trait PeerLookup: Send + Sync {
fn peer(&self, peer_id: u64) -> Option<Peer>;
}
use crate::lease::lookup_alive_datanode_peer;
use crate::procedure::region_migration::manager::{
RegionMigrationManagerRef, RegionMigrationProcedureTask,
};
/// The handler of submitting migration task.
pub struct SubmitRegionMigrationTaskHandler {
// TODO(weny): waits for https://github.com/GreptimeTeam/greptimedb/pull/3014
pub region_migration_manager: RegionMigrationManagerRef,
pub meta_peer_client: MetaPeerClientRef,
}
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -45,7 +47,8 @@ struct SubmitRegionMigrationTaskRequest {
#[derive(Debug, Serialize)]
struct SubmitRegionMigrationTaskResponse {
procedure_id: String,
/// The `None` stands region has been migrated.
procedure_id: Option<String>,
}
fn parse_num_parameter_with_default<T, F>(
@@ -96,13 +99,63 @@ impl TryFrom<&HashMap<String, String>> for SubmitRegionMigrationTaskRequest {
}
impl SubmitRegionMigrationTaskHandler {
fn is_leader(&self) -> bool {
self.meta_peer_client.is_leader()
}
/// Checks the peer is available.
async fn lookup_peer(&self, cluster_id: ClusterId, peer_id: u64) -> Result<Option<Peer>> {
lookup_alive_datanode_peer(
cluster_id,
peer_id,
&self.meta_peer_client,
distributed_time_constants::DATANODE_LEASE_SECS,
)
.await
}
/// Submits a region migration task, returns the procedure id.
async fn handle_submit(
&self,
_task: SubmitRegionMigrationTaskRequest,
task: SubmitRegionMigrationTaskRequest,
) -> Result<SubmitRegionMigrationTaskResponse> {
// TODO(weny): waits for https://github.com/GreptimeTeam/greptimedb/pull/3014
todo!()
ensure!(
self.is_leader(),
error::UnexpectedSnafu {
violated: "Trying to submit a region migration procedure to non-leader meta server"
}
);
let SubmitRegionMigrationTaskRequest {
cluster_id,
region_id,
from_peer_id,
to_peer_id,
} = task;
let from_peer = self.lookup_peer(cluster_id, from_peer_id).await?.context(
error::PeerUnavailableSnafu {
peer_id: from_peer_id,
},
)?;
let to_peer = self.lookup_peer(cluster_id, to_peer_id).await?.context(
error::PeerUnavailableSnafu {
peer_id: to_peer_id,
},
)?;
let procedure_id = self
.region_migration_manager
.submit_procedure(RegionMigrationProcedureTask {
cluster_id,
region_id,
from_peer,
to_peer,
})
.await?;
Ok(SubmitRegionMigrationTaskResponse {
procedure_id: procedure_id.map(|id| id.to_string()),
})
}
}

View File

@@ -113,7 +113,7 @@ impl heartbeat_server::Heartbeat for MetaSrv {
);
if let Some(key) = pusher_key {
let _ = handler_group.unregister(&key).await;
let _ = handler_group.deregister(&key).await;
}
});

View File

@@ -12,154 +12,72 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use common_catalog::consts::METRIC_ENGINE;
use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_meta::ddl::{TableMetadata, TableMetadataAllocator, TableMetadataAllocatorContext};
use common_meta::ddl::table_meta::PeerAllocator;
use common_meta::ddl::TableMetadataAllocatorContext;
use common_meta::error::{ExternalSnafu, Result as MetaResult};
use common_meta::key::table_route::{
LogicalTableRouteValue, PhysicalTableRouteValue, TableRouteValue,
};
use common_meta::rpc::ddl::CreateTableTask;
use common_meta::rpc::router::{Region, RegionRoute};
use common_meta::sequence::SequenceRef;
use common_meta::wal::{allocate_region_wal_options, WalOptionsAllocatorRef};
use common_meta::ClusterId;
use common_telemetry::debug;
use common_meta::peer::Peer;
use snafu::{ensure, ResultExt};
use store_api::storage::{RegionId, RegionNumber, TableId, MAX_REGION_SEQ};
use store_api::storage::MAX_REGION_SEQ;
use crate::error::{self, Result, TooManyPartitionsSnafu};
use crate::metasrv::{SelectorContext, SelectorRef};
use crate::selector::SelectorOptions;
pub struct MetaSrvTableMetadataAllocator {
pub struct MetasrvPeerAllocator {
ctx: SelectorContext,
selector: SelectorRef,
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
}
impl MetaSrvTableMetadataAllocator {
pub fn new(
ctx: SelectorContext,
selector: SelectorRef,
table_id_sequence: SequenceRef,
wal_options_allocator: WalOptionsAllocatorRef,
) -> Self {
Self {
ctx,
selector,
table_id_sequence,
wal_options_allocator,
}
impl MetasrvPeerAllocator {
pub fn new(ctx: SelectorContext, selector: SelectorRef) -> Self {
Self { ctx, selector }
}
async fn create_table_route(
&self,
cluster_id: ClusterId,
table_id: TableId,
task: &CreateTableTask,
) -> Result<TableRouteValue> {
let table_route = if task.create_table.engine == METRIC_ENGINE {
TableRouteValue::Logical(LogicalTableRouteValue {})
} else {
let regions = task.partitions.len();
ensure!(regions <= MAX_REGION_SEQ as usize, TooManyPartitionsSnafu);
let mut peers = self
.selector
.select(
cluster_id,
&self.ctx,
SelectorOptions {
min_required_items: regions,
allow_duplication: true,
},
)
.await?;
ensure!(
peers.len() >= regions,
error::NoEnoughAvailableDatanodeSnafu {
required: regions,
available: peers.len(),
}
);
peers.truncate(regions);
let region_routes = task
.partitions
.iter()
.enumerate()
.map(|(i, partition)| {
let region = Region {
id: RegionId::new(table_id, i as RegionNumber),
partition: Some(partition.clone().into()),
..Default::default()
};
let peer = peers[i % peers.len()].clone();
RegionRoute {
region,
leader_peer: Some(peer.into()),
..Default::default()
}
})
.collect::<Vec<_>>();
TableRouteValue::Physical(PhysicalTableRouteValue::new(region_routes))
};
Ok(table_route)
}
fn create_wal_options(
&self,
table_route: &TableRouteValue,
) -> MetaResult<HashMap<RegionNumber, String>> {
match table_route {
TableRouteValue::Physical(x) => {
let region_numbers = x
.region_routes
.iter()
.map(|route| route.region.id.region_number())
.collect();
allocate_region_wal_options(region_numbers, &self.wal_options_allocator)
}
TableRouteValue::Logical(_) => Ok(HashMap::new()),
}
}
}
#[async_trait::async_trait]
impl TableMetadataAllocator for MetaSrvTableMetadataAllocator {
async fn create(
async fn alloc(
&self,
ctx: &TableMetadataAllocatorContext,
task: &CreateTableTask,
) -> MetaResult<TableMetadata> {
let table_id = self.table_id_sequence.next().await? as TableId;
regions: usize,
) -> Result<Vec<Peer>> {
ensure!(regions <= MAX_REGION_SEQ as usize, TooManyPartitionsSnafu);
let table_route = self
.create_table_route(ctx.cluster_id, table_id, task)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let mut peers = self
.selector
.select(
ctx.cluster_id,
&self.ctx,
SelectorOptions {
min_required_items: regions,
allow_duplication: true,
},
)
.await?;
let region_wal_options = self.create_wal_options(&table_route)?;
debug!(
"Allocated region wal options {:?} for table {}",
region_wal_options, table_id
ensure!(
peers.len() >= regions,
error::NoEnoughAvailableDatanodeSnafu {
required: regions,
available: peers.len(),
}
);
Ok(TableMetadata {
table_id,
table_route,
region_wal_options,
})
peers.truncate(regions);
Ok(peers)
}
}
#[async_trait]
impl PeerAllocator for MetasrvPeerAllocator {
async fn alloc(
&self,
ctx: &TableMetadataAllocatorContext,
regions: usize,
) -> MetaResult<Vec<Peer>> {
self.alloc(ctx, regions)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)
}
}

View File

@@ -39,6 +39,7 @@ datafusion.workspace = true
datatypes.workspace = true
futures.workspace = true
humantime-serde.workspace = true
index.workspace = true
lazy_static = "1.4"
log-store = { workspace = true, optional = true }
memcomparable = "0.2"

View File

@@ -423,6 +423,23 @@ pub enum Error {
#[snafu(source)]
error: parquet::errors::ParquetError,
},
#[snafu(display("Column not found, column: {column}"))]
ColumnNotFound { column: String, location: Location },
#[snafu(display("Failed to build index applier"))]
BuildIndexApplier {
#[snafu(source)]
source: index::inverted_index::error::Error,
location: Location,
},
#[snafu(display("Failed to convert value"))]
ConvertValue {
#[snafu(source)]
source: datatypes::error::Error,
location: Location,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -468,6 +485,7 @@ impl ErrorExt for Error {
| InvalidRequest { .. }
| FillDefault { .. }
| ConvertColumnDataType { .. }
| ColumnNotFound { .. }
| InvalidMetadata { .. } => StatusCode::InvalidArguments,
RegionMetadataNotFound { .. }
| Join { .. }
@@ -504,6 +522,8 @@ impl ErrorExt for Error {
JsonOptions { .. } => StatusCode::InvalidArguments,
EmptyRegionDir { .. } | EmptyManifestDir { .. } => StatusCode::RegionNotFound,
ArrowReader { .. } => StatusCode::StorageUnavailable,
BuildIndexApplier { source, .. } => source.status_code(),
ConvertValue { source, .. } => source.status_code(),
}
}

View File

@@ -257,8 +257,9 @@ impl RegionOpener {
let version_control = Arc::new(VersionControl::new(version));
if !self.skip_wal_replay {
info!(
"Start replaying memtable at flushed_entry_id {} for region {}",
flushed_entry_id, region_id
"Start replaying memtable at flushed_entry_id + 1 {} for region {}",
flushed_entry_id + 1,
region_id
);
replay_memtable(
wal,
@@ -380,9 +381,12 @@ pub(crate) async fn replay_memtable<S: LogStore>(
// data in the WAL.
let mut last_entry_id = flushed_entry_id;
let mut region_write_ctx = RegionWriteCtx::new(region_id, version_control, wal_options.clone());
let mut wal_stream = wal.scan(region_id, flushed_entry_id, wal_options)?;
let replay_from_entry_id = flushed_entry_id + 1;
let mut wal_stream = wal.scan(region_id, replay_from_entry_id, wal_options)?;
while let Some(res) = wal_stream.next().await {
let (entry_id, entry) = res?;
debug_assert!(entry_id > flushed_entry_id);
last_entry_id = last_entry_id.max(entry_id);
for mutation in entry.mutations {
rows_replayed += mutation

View File

@@ -84,7 +84,11 @@ impl SortField {
}
impl SortField {
fn serialize(&self, serializer: &mut Serializer<&mut Vec<u8>>, value: &ValueRef) -> Result<()> {
pub(crate) fn serialize(
&self,
serializer: &mut Serializer<&mut Vec<u8>>,
value: &ValueRef,
) -> Result<()> {
macro_rules! cast_value_and_serialize {
(
$self: ident;

View File

@@ -16,5 +16,6 @@
pub mod file;
pub mod file_purger;
mod index;
pub mod parquet;
pub(crate) mod version;

View File

@@ -0,0 +1,18 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![allow(dead_code)]
pub mod applier;
mod codec;

View File

@@ -0,0 +1,47 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod builder;
use index::inverted_index::search::index_apply::IndexApplier;
use object_store::ObjectStore;
/// The [`SstIndexApplier`] is responsible for applying predicates to the provided SST files
/// and returning the relevant row group ids for further scan.
pub struct SstIndexApplier {
/// The root directory of the region.
region_dir: String,
/// Object store responsible for accessing SST files.
object_store: ObjectStore,
/// Predefined index applier used to apply predicates to index files
/// and return the relevant row group ids for further scan.
index_applier: Box<dyn IndexApplier>,
}
impl SstIndexApplier {
/// Creates a new [`SstIndexApplier`].
pub fn new(
region_dir: String,
object_store: ObjectStore,
index_applier: Box<dyn IndexApplier>,
) -> Self {
Self {
region_dir,
object_store,
index_applier,
}
}
}

View File

@@ -0,0 +1,261 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod between;
// TODO(zhongzc): This PR is too large. The following modules are coming soon.
// mod comparison;
// mod eq_list;
// mod in_list;
// mod regex_match;
use std::collections::HashMap;
use api::v1::SemanticType;
use common_query::logical_plan::Expr;
use common_telemetry::warn;
use datafusion_common::ScalarValue;
use datafusion_expr::Expr as DfExpr;
use datatypes::data_type::ConcreteDataType;
use datatypes::value::Value;
use index::inverted_index::search::index_apply::PredicatesIndexApplier;
use index::inverted_index::search::predicate::Predicate;
use object_store::ObjectStore;
use snafu::{OptionExt, ResultExt};
use store_api::metadata::RegionMetadata;
use crate::error::{BuildIndexApplierSnafu, ColumnNotFoundSnafu, ConvertValueSnafu, Result};
use crate::row_converter::SortField;
use crate::sst::index::applier::SstIndexApplier;
use crate::sst::index::codec::IndexValueCodec;
type ColumnName = String;
/// Constructs an [`SstIndexApplier`] which applies predicates to SST files during scan.
pub struct SstIndexApplierBuilder<'a> {
/// Directory of the region, required argument for constructing [`SstIndexApplier`].
region_dir: String,
/// Object store, required argument for constructing [`SstIndexApplier`].
object_store: ObjectStore,
/// Metadata of the region, used to get metadata like column type.
metadata: &'a RegionMetadata,
/// Stores predicates during traversal on the Expr tree.
output: HashMap<ColumnName, Vec<Predicate>>,
}
impl<'a> SstIndexApplierBuilder<'a> {
/// Creates a new [`SstIndexApplierBuilder`].
pub fn new(
region_dir: String,
object_store: ObjectStore,
metadata: &'a RegionMetadata,
) -> Self {
Self {
region_dir,
object_store,
metadata,
output: HashMap::default(),
}
}
/// Consumes the builder to construct an [`SstIndexApplier`], optionally returned based on
/// the expressions provided. If no predicates match, returns `None`.
pub fn build(mut self, exprs: &[Expr]) -> Result<Option<SstIndexApplier>> {
for expr in exprs {
self.traverse_and_collect(expr.df_expr());
}
if self.output.is_empty() {
return Ok(None);
}
let predicates = self.output.into_iter().collect();
let applier = PredicatesIndexApplier::try_from(predicates);
Ok(Some(SstIndexApplier::new(
self.region_dir,
self.object_store,
Box::new(applier.context(BuildIndexApplierSnafu)?),
)))
}
/// Recursively traverses expressions to collect predicates.
/// Results are stored in `self.output`.
fn traverse_and_collect(&mut self, expr: &DfExpr) {
let res = match expr {
DfExpr::Between(between) => self.collect_between(between),
// TODO(zhongzc): This PR is too large. The following arms are coming soon.
// DfExpr::InList(in_list) => self.collect_inlist(in_list),
// DfExpr::BinaryExpr(BinaryExpr { left, op, right }) => match op {
// Operator::And => {
// self.traverse_and_collect(left);
// self.traverse_and_collect(right);
// Ok(())
// }
// Operator::Or => self.collect_or_eq_list(left, right),
// Operator::Eq => self.collect_eq(left, right),
// Operator::Lt | Operator::LtEq | Operator::Gt | Operator::GtEq => {
// self.collect_comparison_expr(left, op, right)
// }
// Operator::RegexMatch => self.collect_regex_match(left, right),
// _ => Ok(()),
// },
// TODO(zhongzc): support more expressions, e.g. IsNull, IsNotNull, ...
_ => Ok(()),
};
if let Err(err) = res {
warn!(err; "Failed to collect predicates, ignore it. expr: {expr}");
}
}
/// Helper function to add a predicate to the output.
fn add_predicate(&mut self, column_name: &str, predicate: Predicate) {
match self.output.get_mut(column_name) {
Some(predicates) => predicates.push(predicate),
None => {
self.output.insert(column_name.to_string(), vec![predicate]);
}
}
}
/// Helper function to get the column type of a tag column.
/// Returns `None` if the column is not a tag column.
fn tag_column_type(&self, column_name: &str) -> Result<Option<ConcreteDataType>> {
let column = self
.metadata
.column_by_name(column_name)
.context(ColumnNotFoundSnafu {
column: column_name,
})?;
Ok((column.semantic_type == SemanticType::Tag)
.then(|| column.column_schema.data_type.clone()))
}
/// Helper function to get a non-null literal.
fn nonnull_lit(expr: &DfExpr) -> Option<&ScalarValue> {
match expr {
DfExpr::Literal(lit) if !lit.is_null() => Some(lit),
_ => None,
}
}
/// Helper function to get the column name of a column expression.
fn column_name(expr: &DfExpr) -> Option<&str> {
match expr {
DfExpr::Column(column) => Some(&column.name),
_ => None,
}
}
/// Helper function to encode a literal into bytes.
fn encode_lit(lit: &ScalarValue, data_type: ConcreteDataType) -> Result<Vec<u8>> {
let value = Value::try_from(lit.clone()).context(ConvertValueSnafu)?;
let mut bytes = vec![];
let field = SortField::new(data_type);
IndexValueCodec::encode_value(value.as_value_ref(), &field, &mut bytes)?;
Ok(bytes)
}
}
#[cfg(test)]
mod tests {
use api::v1::SemanticType;
use datafusion_common::Column;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::ColumnSchema;
use object_store::services::Memory;
use object_store::ObjectStore;
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
use store_api::storage::RegionId;
use super::*;
pub(crate) fn test_region_metadata() -> RegionMetadata {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1234, 5678));
builder
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new("a", ConcreteDataType::string_datatype(), false),
semantic_type: SemanticType::Tag,
column_id: 1,
})
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new("b", ConcreteDataType::string_datatype(), false),
semantic_type: SemanticType::Field,
column_id: 2,
})
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"c",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id: 3,
})
.primary_key(vec![1]);
builder.build().unwrap()
}
pub(crate) fn test_object_store() -> ObjectStore {
ObjectStore::new(Memory::default()).unwrap().finish()
}
pub(crate) fn tag_column() -> DfExpr {
DfExpr::Column(Column {
relation: None,
name: "a".to_string(),
})
}
pub(crate) fn field_column() -> DfExpr {
DfExpr::Column(Column {
relation: None,
name: "b".to_string(),
})
}
pub(crate) fn nonexistent_column() -> DfExpr {
DfExpr::Column(Column {
relation: None,
name: "nonexistent".to_string(),
})
}
pub(crate) fn string_lit(s: impl Into<String>) -> DfExpr {
DfExpr::Literal(ScalarValue::Utf8(Some(s.into())))
}
pub(crate) fn int64_lit(i: impl Into<i64>) -> DfExpr {
DfExpr::Literal(ScalarValue::Int64(Some(i.into())))
}
pub(crate) fn encoded_string(s: impl Into<String>) -> Vec<u8> {
let mut bytes = vec![];
IndexValueCodec::encode_value(
Value::from(s.into()).as_value_ref(),
&SortField::new(ConcreteDataType::string_datatype()),
&mut bytes,
)
.unwrap();
bytes
}
}

View File

@@ -0,0 +1,171 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use datafusion_expr::Between;
use index::inverted_index::search::predicate::{Bound, Predicate, Range, RangePredicate};
use crate::error::Result;
use crate::sst::index::applier::builder::SstIndexApplierBuilder;
impl<'a> SstIndexApplierBuilder<'a> {
/// Collects a `BETWEEN` expression in the form of `column BETWEEN lit AND lit`.
pub(crate) fn collect_between(&mut self, between: &Between) -> Result<()> {
if between.negated {
return Ok(());
}
let Some(column_name) = Self::column_name(&between.expr) else {
return Ok(());
};
let Some(data_type) = self.tag_column_type(column_name)? else {
return Ok(());
};
let Some(low) = Self::nonnull_lit(&between.low) else {
return Ok(());
};
let Some(high) = Self::nonnull_lit(&between.high) else {
return Ok(());
};
let predicate = Predicate::Range(RangePredicate {
range: Range {
lower: Some(Bound {
inclusive: true,
value: Self::encode_lit(low, data_type.clone())?,
}),
upper: Some(Bound {
inclusive: true,
value: Self::encode_lit(high, data_type)?,
}),
},
});
self.add_predicate(column_name, predicate);
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::Error;
use crate::sst::index::applier::builder::tests::{
encoded_string, field_column, int64_lit, nonexistent_column, string_lit, tag_column,
test_object_store, test_region_metadata,
};
#[test]
fn test_collect_between_basic() {
let metadata = test_region_metadata();
let mut builder =
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), &metadata);
let between = Between {
negated: false,
expr: Box::new(tag_column()),
low: Box::new(string_lit("abc")),
high: Box::new(string_lit("def")),
};
builder.collect_between(&between).unwrap();
let predicates = builder.output.get("a").unwrap();
assert_eq!(predicates.len(), 1);
assert_eq!(
predicates[0],
Predicate::Range(RangePredicate {
range: Range {
lower: Some(Bound {
inclusive: true,
value: encoded_string("abc"),
}),
upper: Some(Bound {
inclusive: true,
value: encoded_string("def"),
}),
}
})
);
}
#[test]
fn test_collect_between_negated() {
let metadata = test_region_metadata();
let mut builder =
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), &metadata);
let between = Between {
negated: true,
expr: Box::new(tag_column()),
low: Box::new(string_lit("abc")),
high: Box::new(string_lit("def")),
};
builder.collect_between(&between).unwrap();
assert!(builder.output.is_empty());
}
#[test]
fn test_collect_between_field_column() {
let metadata = test_region_metadata();
let mut builder =
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), &metadata);
let between = Between {
negated: false,
expr: Box::new(field_column()),
low: Box::new(string_lit("abc")),
high: Box::new(string_lit("def")),
};
builder.collect_between(&between).unwrap();
assert!(builder.output.is_empty());
}
#[test]
fn test_collect_between_type_mismatch() {
let metadata = test_region_metadata();
let mut builder =
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), &metadata);
let between = Between {
negated: false,
expr: Box::new(tag_column()),
low: Box::new(int64_lit(123)),
high: Box::new(int64_lit(456)),
};
let res = builder.collect_between(&between);
assert!(matches!(res, Err(Error::FieldTypeMismatch { .. })));
assert!(builder.output.is_empty());
}
#[test]
fn test_collect_between_nonexistent_column() {
let metadata = test_region_metadata();
let mut builder =
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), &metadata);
let between = Between {
negated: false,
expr: Box::new(nonexistent_column()),
low: Box::new(string_lit("abc")),
high: Box::new(string_lit("def")),
};
let res = builder.collect_between(&between);
assert!(matches!(res, Err(Error::ColumnNotFound { .. })));
assert!(builder.output.is_empty());
}
}

View File

@@ -0,0 +1,65 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use datatypes::value::ValueRef;
use memcomparable::Serializer;
use crate::error::Result;
use crate::row_converter::SortField;
/// Encodes index values according to their data types for sorting and storage use.
pub struct IndexValueCodec;
impl IndexValueCodec {
/// Serializes a `ValueRef` using the data type defined in `SortField` and writes
/// the result into a buffer.
///
/// # Arguments
/// * `value` - The value to be encoded.
/// * `field` - Contains data type to guide serialization.
/// * `buffer` - Destination buffer for the serialized value.
pub fn encode_value(value: ValueRef, field: &SortField, buffer: &mut Vec<u8>) -> Result<()> {
buffer.reserve(field.estimated_size());
let mut serializer = Serializer::new(buffer);
field.serialize(&mut serializer, &value)
}
}
#[cfg(test)]
mod tests {
use datatypes::data_type::ConcreteDataType;
use super::*;
use crate::error::Error;
#[test]
fn test_encode_value_basic() {
let value = ValueRef::from("hello");
let field = SortField::new(ConcreteDataType::string_datatype());
let mut buffer = Vec::new();
IndexValueCodec::encode_value(value, &field, &mut buffer).unwrap();
assert!(!buffer.is_empty());
}
#[test]
fn test_encode_value_type_mismatch() {
let value = ValueRef::from("hello");
let field = SortField::new(ConcreteDataType::int64_datatype());
let mut buffer = Vec::new();
let res = IndexValueCodec::encode_value(value, &field, &mut buffer);
assert!(matches!(res, Err(Error::FieldTypeMismatch { .. })));
}
}

View File

@@ -119,6 +119,13 @@ pub enum Error {
region_id: RegionId,
location: Location,
},
#[snafu(display("Unexpected table route type: {}", err_msg))]
UnexpectedLogicalRouteTable {
location: Location,
err_msg: String,
source: common_meta::error::Error,
},
}
impl ErrorExt for Error {
@@ -138,6 +145,7 @@ impl ErrorExt for Error {
Error::FindDatanode { .. } => StatusCode::InvalidArguments,
Error::TableRouteManager { source, .. } => source.status_code(),
Error::MissingDefaultValue { .. } => StatusCode::Internal,
Error::UnexpectedLogicalRouteTable { source, .. } => source.status_code(),
}
}

View File

@@ -75,8 +75,13 @@ impl PartitionRuleManager {
.context(error::TableRouteManagerSnafu)?
.context(error::FindTableRoutesSnafu { table_id })?
.into_inner();
Ok(RegionRoutes(route.region_routes().clone()))
let region_routes =
route
.region_routes()
.context(error::UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
})?;
Ok(RegionRoutes(region_routes.clone()))
}
pub async fn find_table_partitions(&self, table_id: TableId) -> Result<Vec<PartitionInfo>> {
@@ -87,7 +92,12 @@ impl PartitionRuleManager {
.context(error::TableRouteManagerSnafu)?
.context(error::FindTableRoutesSnafu { table_id })?
.into_inner();
let region_routes = route.region_routes();
let region_routes =
route
.region_routes()
.context(error::UnexpectedLogicalRouteTableSnafu {
err_msg: "{self:?} is a non-physical TableRouteValue.",
})?;
ensure!(
!region_routes.is_empty(),

View File

@@ -5,6 +5,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
ahash.workspace = true
async-recursion = "1.0"
async-trait.workspace = true
bytemuck.workspace = true

View File

@@ -19,6 +19,9 @@ mod normalize;
mod planner;
mod range_manipulate;
mod series_divide;
#[cfg(test)]
mod test_util;
mod union_distinct_on;
use datafusion::arrow::datatypes::{ArrowPrimitiveType, TimestampMillisecondType};
pub use empty_metric::{build_special_time_expr, EmptyMetric, EmptyMetricExec, EmptyMetricStream};
@@ -28,5 +31,6 @@ pub use normalize::{SeriesNormalize, SeriesNormalizeExec, SeriesNormalizeStream}
pub use planner::PromExtensionPlanner;
pub use range_manipulate::{RangeManipulate, RangeManipulateExec, RangeManipulateStream};
pub use series_divide::{SeriesDivide, SeriesDivideExec, SeriesDivideStream};
pub use union_distinct_on::{UnionDistinctOn, UnionDistinctOnExec, UnionDistinctOnStream};
pub(crate) type Millisecond = <TimestampMillisecondType as ArrowPrimitiveType>::Native;

View File

@@ -445,40 +445,12 @@ impl InstantManipulateStream {
#[cfg(test)]
mod test {
use datafusion::arrow::array::Float64Array;
use datafusion::arrow::datatypes::{
ArrowPrimitiveType, DataType, Field, Schema, TimestampMillisecondType,
};
use datafusion::physical_plan::memory::MemoryExec;
use datafusion::prelude::SessionContext;
use datatypes::arrow::array::TimestampMillisecondArray;
use datatypes::arrow_array::StringArray;
use super::*;
const TIME_INDEX_COLUMN: &str = "timestamp";
fn prepare_test_data() -> MemoryExec {
let schema = Arc::new(Schema::new(vec![
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
Field::new("value", DataType::Float64, true),
Field::new("path", DataType::Utf8, true),
]));
let timestamp_column = Arc::new(TimestampMillisecondArray::from(vec![
0, 30_000, 60_000, 90_000, 120_000, // every 30s
180_000, 240_000, // every 60s
241_000, 271_000, 291_000, // others
])) as _;
let field_column = Arc::new(Float64Array::from(vec![1.0; 10])) as _;
let path_column = Arc::new(StringArray::from(vec!["foo"; 10])) as _;
let data = RecordBatch::try_new(
schema.clone(),
vec![timestamp_column, field_column, path_column],
)
.unwrap();
MemoryExec::try_new(&[vec![data]], schema, None).unwrap()
}
use crate::extension_plan::test_util::{
prepare_test_data, prepare_test_data_with_nan, TIME_INDEX_COLUMN,
};
async fn do_normalize_test(
start: Millisecond,
@@ -749,22 +721,6 @@ mod test {
do_normalize_test(190_000, 300_000, 30_000, 10_000, expected, false).await;
}
fn prepare_test_data_with_nan() -> MemoryExec {
let schema = Arc::new(Schema::new(vec![
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
Field::new("value", DataType::Float64, true),
]));
let timestamp_column = Arc::new(TimestampMillisecondArray::from(vec![
0, 30_000, 60_000, 90_000, 120_000, // every 30s
])) as _;
let field_column =
Arc::new(Float64Array::from(vec![0.0, f64::NAN, 6.0, f64::NAN, 12.0])) as _;
let data =
RecordBatch::try_new(schema.clone(), vec![timestamp_column, field_column]).unwrap();
MemoryExec::try_new(&[vec![data]], schema, None).unwrap()
}
#[tokio::test]
async fn lookback_10s_interval_10s_with_nan() {
let expected = String::from(

View File

@@ -21,7 +21,7 @@ use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNode};
use datafusion::physical_plan::ExecutionPlan;
use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner};
use super::HistogramFold;
use super::{HistogramFold, UnionDistinctOn};
use crate::extension_plan::{
EmptyMetric, InstantManipulate, RangeManipulate, SeriesDivide, SeriesNormalize,
};
@@ -50,6 +50,11 @@ impl ExtensionPlanner for PromExtensionPlanner {
Ok(Some(node.to_execution_plan(session_state, planner)?))
} else if let Some(node) = node.as_any().downcast_ref::<HistogramFold>() {
Ok(Some(node.to_execution_plan(physical_inputs[0].clone())))
} else if let Some(node) = node.as_any().downcast_ref::<UnionDistinctOn>() {
Ok(Some(node.to_execution_plan(
physical_inputs[0].clone(),
physical_inputs[1].clone(),
)))
} else {
Ok(None)
}

View File

@@ -0,0 +1,64 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Utils for testing extension plan
use std::sync::Arc;
use common_recordbatch::DfRecordBatch as RecordBatch;
use datafusion::arrow::array::Float64Array;
use datafusion::arrow::datatypes::{
ArrowPrimitiveType, DataType, Field, Schema, TimestampMillisecondType,
};
use datafusion::physical_plan::memory::MemoryExec;
use datatypes::arrow::array::TimestampMillisecondArray;
use datatypes::arrow_array::StringArray;
pub(crate) const TIME_INDEX_COLUMN: &str = "timestamp";
pub(crate) fn prepare_test_data() -> MemoryExec {
let schema = Arc::new(Schema::new(vec![
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
Field::new("value", DataType::Float64, true),
Field::new("path", DataType::Utf8, true),
]));
let timestamp_column = Arc::new(TimestampMillisecondArray::from(vec![
0, 30_000, 60_000, 90_000, 120_000, // every 30s
180_000, 240_000, // every 60s
241_000, 271_000, 291_000, // others
])) as _;
let field_column = Arc::new(Float64Array::from(vec![1.0; 10])) as _;
let path_column = Arc::new(StringArray::from(vec!["foo"; 10])) as _;
let data = RecordBatch::try_new(
schema.clone(),
vec![timestamp_column, field_column, path_column],
)
.unwrap();
MemoryExec::try_new(&[vec![data]], schema, None).unwrap()
}
pub(crate) fn prepare_test_data_with_nan() -> MemoryExec {
let schema = Arc::new(Schema::new(vec![
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
Field::new("value", DataType::Float64, true),
]));
let timestamp_column = Arc::new(TimestampMillisecondArray::from(vec![
0, 30_000, 60_000, 90_000, 120_000, // every 30s
])) as _;
let field_column = Arc::new(Float64Array::from(vec![0.0, f64::NAN, 6.0, f64::NAN, 12.0])) as _;
let data = RecordBatch::try_new(schema.clone(), vec![timestamp_column, field_column]).unwrap();
MemoryExec::try_new(&[vec![data]], schema, None).unwrap()
}

View File

@@ -0,0 +1,576 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use ahash::{HashMap, RandomState};
use datafusion::arrow::array::UInt64Array;
use datafusion::arrow::datatypes::SchemaRef;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::common::DFSchemaRef;
use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::execution::context::TaskContext;
use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
use datafusion::physical_expr::PhysicalSortExpr;
use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
use datafusion::physical_plan::{
hash_utils, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
RecordBatchStream, SendableRecordBatchStream, Statistics,
};
use datatypes::arrow::compute;
use futures::future::BoxFuture;
use futures::{ready, Stream, StreamExt, TryStreamExt};
/// A special kind of `UNION`(`OR` in PromQL) operator, for PromQL specific use case.
///
/// This operator is similar to `UNION` from SQL, but it only accepts two inputs. The
/// most different part is that it treat left child and right child differently:
/// - All columns from left child will be outputted.
/// - Only check collisions (when not distinct) on the columns specified by `compare_keys`.
/// - When there is a collision:
/// - If the collision is from right child itself, only the first observed row will be
/// preserved. All others are discarded.
/// - If the collision is from left child, the row in right child will be discarded.
/// - The output order is not maintained. This plan will output left child first, then right child.
/// - The output schema contains all columns from left or right child plans.
///
/// From the implementation perspective, this operator is similar to `HashJoin`, but the
/// probe side is the right child, and the build side is the left child. Another difference
/// is that the probe is opting-out.
///
/// This plan will exhaust the right child first to build probe hash table, then streaming
/// on left side, and use the left side to "mask" the hash table.
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct UnionDistinctOn {
left: LogicalPlan,
right: LogicalPlan,
/// The columns to compare for equality.
/// TIME INDEX is included.
compare_keys: Vec<String>,
ts_col: String,
output_schema: DFSchemaRef,
}
impl UnionDistinctOn {
pub fn name() -> &'static str {
"UnionDistinctOn"
}
pub fn new(
left: LogicalPlan,
right: LogicalPlan,
compare_keys: Vec<String>,
ts_col: String,
output_schema: DFSchemaRef,
) -> Self {
Self {
left,
right,
compare_keys,
ts_col,
output_schema,
}
}
pub fn to_execution_plan(
&self,
left_exec: Arc<dyn ExecutionPlan>,
right_exec: Arc<dyn ExecutionPlan>,
) -> Arc<dyn ExecutionPlan> {
Arc::new(UnionDistinctOnExec {
left: left_exec,
right: right_exec,
compare_keys: self.compare_keys.clone(),
ts_col: self.ts_col.clone(),
output_schema: Arc::new(self.output_schema.as_ref().into()),
metric: ExecutionPlanMetricsSet::new(),
random_state: RandomState::new(),
})
}
}
impl UserDefinedLogicalNodeCore for UnionDistinctOn {
fn name(&self) -> &str {
Self::name()
}
fn inputs(&self) -> Vec<&LogicalPlan> {
vec![&self.left, &self.right]
}
fn schema(&self) -> &DFSchemaRef {
&self.output_schema
}
fn expressions(&self) -> Vec<Expr> {
vec![]
}
fn fmt_for_explain(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"UnionDistinctOn: on col=[{:?}], ts_col=[{}]",
self.compare_keys, self.ts_col
)
}
fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
assert_eq!(inputs.len(), 2);
let left = inputs[0].clone();
let right = inputs[1].clone();
Self {
left,
right,
compare_keys: self.compare_keys.clone(),
ts_col: self.ts_col.clone(),
output_schema: self.output_schema.clone(),
}
}
}
#[derive(Debug)]
pub struct UnionDistinctOnExec {
left: Arc<dyn ExecutionPlan>,
right: Arc<dyn ExecutionPlan>,
compare_keys: Vec<String>,
ts_col: String,
output_schema: SchemaRef,
metric: ExecutionPlanMetricsSet,
/// Shared the `RandomState` for the hashing algorithm
random_state: RandomState,
}
impl ExecutionPlan for UnionDistinctOnExec {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
self.output_schema.clone()
}
fn required_input_distribution(&self) -> Vec<Distribution> {
vec![Distribution::SinglePartition, Distribution::SinglePartition]
}
fn output_partitioning(&self) -> Partitioning {
Partitioning::UnknownPartitioning(1)
}
/// [UnionDistinctOnExec] will output left first, then right.
/// So the order of the output is not maintained.
fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
None
}
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
vec![self.left.clone(), self.right.clone()]
}
fn with_new_children(
self: Arc<Self>,
children: Vec<Arc<dyn ExecutionPlan>>,
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
assert_eq!(children.len(), 2);
let left = children[0].clone();
let right = children[1].clone();
Ok(Arc::new(UnionDistinctOnExec {
left,
right,
compare_keys: self.compare_keys.clone(),
ts_col: self.ts_col.clone(),
output_schema: self.output_schema.clone(),
metric: self.metric.clone(),
random_state: self.random_state.clone(),
}))
}
fn execute(
&self,
partition: usize,
context: Arc<TaskContext>,
) -> DataFusionResult<SendableRecordBatchStream> {
let left_stream = self.left.execute(partition, context.clone())?;
let right_stream = self.right.execute(partition, context.clone())?;
// Convert column name to column index. Add one for the time column.
let mut key_indices = Vec::with_capacity(self.compare_keys.len() + 1);
for key in &self.compare_keys {
let index = self
.output_schema
.column_with_name(key)
.map(|(i, _)| i)
.ok_or_else(|| DataFusionError::Internal(format!("Column {} not found", key)))?;
key_indices.push(index);
}
let ts_index = self
.output_schema
.column_with_name(&self.ts_col)
.map(|(i, _)| i)
.ok_or_else(|| {
DataFusionError::Internal(format!("Column {} not found", self.ts_col))
})?;
key_indices.push(ts_index);
// Build right hash table future.
let hashed_data_future = HashedDataFut::Pending(Box::pin(HashedData::new(
right_stream,
self.random_state.clone(),
key_indices.clone(),
)));
let baseline_metric = BaselineMetrics::new(&self.metric, partition);
Ok(Box::pin(UnionDistinctOnStream {
left: left_stream,
right: hashed_data_future,
compare_keys: key_indices,
output_schema: self.output_schema.clone(),
metric: baseline_metric,
}))
}
fn metrics(&self) -> Option<MetricsSet> {
Some(self.metric.clone_inner())
}
fn statistics(&self) -> Statistics {
Statistics::default()
}
}
impl DisplayAs for UnionDistinctOnExec {
fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match t {
DisplayFormatType::Default | DisplayFormatType::Verbose => {
write!(
f,
"UnionDistinctOnExec: on col=[{:?}], ts_col=[{}]",
self.compare_keys, self.ts_col
)
}
}
}
}
// TODO(ruihang): some unused fields are for metrics, which will be implemented later.
#[allow(dead_code)]
pub struct UnionDistinctOnStream {
left: SendableRecordBatchStream,
right: HashedDataFut,
/// Include time index
compare_keys: Vec<usize>,
output_schema: SchemaRef,
metric: BaselineMetrics,
}
impl UnionDistinctOnStream {
fn poll_impl(&mut self, cx: &mut Context<'_>) -> Poll<Option<<Self as Stream>::Item>> {
// resolve the right stream
let right = match self.right {
HashedDataFut::Pending(ref mut fut) => {
let right = ready!(fut.as_mut().poll(cx))?;
self.right = HashedDataFut::Ready(right);
let HashedDataFut::Ready(right_ref) = &mut self.right else {
unreachable!()
};
right_ref
}
HashedDataFut::Ready(ref mut right) => right,
HashedDataFut::Empty => return Poll::Ready(None),
};
// poll left and probe with right
let next_left = ready!(self.left.poll_next_unpin(cx));
match next_left {
Some(Ok(left)) => {
// observe left batch and return it
right.update_map(&left)?;
Poll::Ready(Some(Ok(left)))
}
Some(Err(e)) => Poll::Ready(Some(Err(e))),
None => {
// left stream is exhausted, so we can send the right part
let right = std::mem::replace(&mut self.right, HashedDataFut::Empty);
let HashedDataFut::Ready(data) = right else {
unreachable!()
};
Poll::Ready(Some(data.finish()))
}
}
}
}
impl RecordBatchStream for UnionDistinctOnStream {
fn schema(&self) -> SchemaRef {
self.output_schema.clone()
}
}
impl Stream for UnionDistinctOnStream {
type Item = DataFusionResult<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
self.poll_impl(cx)
}
}
/// Simple future state for [HashedData]
enum HashedDataFut {
/// The result is not ready
Pending(BoxFuture<'static, DataFusionResult<HashedData>>),
/// The result is ready
Ready(HashedData),
/// The result is taken
Empty,
}
/// ALL input batches and its hash table
struct HashedData {
// TODO(ruihang): use `JoinHashMap` instead after upgrading to DF 34.0
/// Hash table for all input batches. The key is hash value, and the value
/// is the index of `bathc`.
hash_map: HashMap<u64, usize>,
/// Output batch.
batch: RecordBatch,
/// The indices of the columns to be hashed.
hash_key_indices: Vec<usize>,
random_state: RandomState,
}
impl HashedData {
pub async fn new(
input: SendableRecordBatchStream,
random_state: RandomState,
hash_key_indices: Vec<usize>,
) -> DataFusionResult<Self> {
// Collect all batches from the input stream
let initial = (Vec::new(), 0);
let (batches, _num_rows) = input
.try_fold(initial, |mut acc, batch| async {
// Update rowcount
acc.1 += batch.num_rows();
// Push batch to output
acc.0.push(batch);
Ok(acc)
})
.await?;
// Create hash for each batch
let mut hash_map = HashMap::default();
let mut hashes_buffer = Vec::new();
let mut interleave_indices = Vec::new();
for (batch_number, batch) in batches.iter().enumerate() {
hashes_buffer.resize(batch.num_rows(), 0);
// get columns for hashing
let arrays = hash_key_indices
.iter()
.map(|i| batch.column(*i).clone())
.collect::<Vec<_>>();
// compute hash
let hash_values =
hash_utils::create_hashes(&arrays, &random_state, &mut hashes_buffer)?;
for (row_number, hash_value) in hash_values.iter().enumerate() {
// Only keeps the first observed row for each hash value
if hash_map
.try_insert(*hash_value, interleave_indices.len())
.is_ok()
{
interleave_indices.push((batch_number, row_number));
}
}
}
// Finilize the hash map
let batch = interleave_batches(batches, interleave_indices)?;
Ok(Self {
hash_map,
batch,
hash_key_indices,
random_state,
})
}
/// Remove rows that hash value present in the input
/// record batch from the hash map.
pub fn update_map(&mut self, input: &RecordBatch) -> DataFusionResult<()> {
// get columns for hashing
let mut hashes_buffer = Vec::new();
let arrays = self
.hash_key_indices
.iter()
.map(|i| input.column(*i).clone())
.collect::<Vec<_>>();
// compute hash
hashes_buffer.resize(input.num_rows(), 0);
let hash_values =
hash_utils::create_hashes(&arrays, &self.random_state, &mut hashes_buffer)?;
// remove those hashes
for hash in hash_values {
self.hash_map.remove(hash);
}
Ok(())
}
pub fn finish(self) -> DataFusionResult<RecordBatch> {
let valid_indices = self.hash_map.values().copied().collect::<Vec<_>>();
let result = take_batch(&self.batch, &valid_indices)?;
Ok(result)
}
}
/// Utility function to interleave batches. Based on [interleave](datafusion::arrow::compute::interleave)
fn interleave_batches(
batches: Vec<RecordBatch>,
indices: Vec<(usize, usize)>,
) -> DataFusionResult<RecordBatch> {
let schema = batches[0].schema();
// transform batches into arrays
let mut arrays = vec![vec![]; schema.fields().len()];
for batch in &batches {
for (i, array) in batch.columns().iter().enumerate() {
arrays[i].push(array.as_ref());
}
}
// interleave arrays
let mut interleaved_arrays = Vec::with_capacity(arrays.len());
for array in arrays {
interleaved_arrays.push(compute::interleave(&array, &indices)?);
}
// assemble new record batch
RecordBatch::try_new(schema.clone(), interleaved_arrays).map_err(DataFusionError::ArrowError)
}
/// Utility function to take rows from a record batch. Based on [take](datafusion::arrow::compute::take)
fn take_batch(batch: &RecordBatch, indices: &[usize]) -> DataFusionResult<RecordBatch> {
// fast path
if batch.num_rows() == indices.len() {
return Ok(batch.clone());
}
let schema = batch.schema();
let indices_array = UInt64Array::from_iter(indices.iter().map(|i| *i as u64));
let arrays = batch
.columns()
.iter()
.map(|array| compute::take(array, &indices_array, None))
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(DataFusionError::ArrowError)?;
let result = RecordBatch::try_new(schema, arrays).map_err(DataFusionError::ArrowError)?;
Ok(result)
}
#[cfg(test)]
mod test {
use datafusion::arrow::array::Int32Array;
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use super::*;
#[test]
fn test_interleave_batches() {
let schema = Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, false),
]);
let batch1 = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![
Arc::new(Int32Array::from(vec![1, 2, 3])),
Arc::new(Int32Array::from(vec![4, 5, 6])),
],
)
.unwrap();
let batch2 = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![
Arc::new(Int32Array::from(vec![7, 8, 9])),
Arc::new(Int32Array::from(vec![10, 11, 12])),
],
)
.unwrap();
let batch3 = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![
Arc::new(Int32Array::from(vec![13, 14, 15])),
Arc::new(Int32Array::from(vec![16, 17, 18])),
],
)
.unwrap();
let batches = vec![batch1, batch2, batch3];
let indices = vec![(0, 0), (1, 0), (2, 0), (0, 1), (1, 1), (2, 1)];
let result = interleave_batches(batches, indices).unwrap();
let expected = RecordBatch::try_new(
Arc::new(schema),
vec![
Arc::new(Int32Array::from(vec![1, 7, 13, 2, 8, 14])),
Arc::new(Int32Array::from(vec![4, 10, 16, 5, 11, 17])),
],
)
.unwrap();
assert_eq!(result, expected);
}
#[test]
fn test_take_batch() {
let schema = Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, false),
]);
let batch = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![
Arc::new(Int32Array::from(vec![1, 2, 3])),
Arc::new(Int32Array::from(vec![4, 5, 6])),
],
)
.unwrap();
let indices = vec![0, 2];
let result = take_batch(&batch, &indices).unwrap();
let expected = RecordBatch::try_new(
Arc::new(schema),
vec![
Arc::new(Int32Array::from(vec![1, 3])),
Arc::new(Int32Array::from(vec![4, 6])),
],
)
.unwrap();
assert_eq!(result, expected);
}
}

View File

@@ -14,6 +14,7 @@
#![feature(option_get_or_insert_default)]
#![feature(let_chains)]
#![feature(map_try_insert)]
pub mod error;
pub mod extension_plan;

View File

@@ -51,7 +51,7 @@ use crate::error::{
};
use crate::extension_plan::{
build_special_time_expr, EmptyMetric, HistogramFold, InstantManipulate, Millisecond,
RangeManipulate, SeriesDivide, SeriesNormalize,
RangeManipulate, SeriesDivide, SeriesNormalize, UnionDistinctOn,
};
use crate::functions::{
AbsentOverTime, AvgOverTime, Changes, CountOverTime, Delta, Deriv, HoltWinters, IDelta,
@@ -1489,6 +1489,7 @@ impl PromPlanner {
.context(DataFusionPlanningSnafu)
}
/// Build a set operator (AND/OR/UNLESS)
fn set_op_on_non_field_columns(
&self,
left: LogicalPlan,
@@ -1501,6 +1502,10 @@ impl PromPlanner {
let mut left_tag_col_set = left_tag_cols.into_iter().collect::<HashSet<_>>();
let mut right_tag_col_set = right_tag_cols.into_iter().collect::<HashSet<_>>();
if matches!(op.id(), token::T_LOR) {
return self.or_operator(left, right, left_tag_col_set, right_tag_col_set, modifier);
}
// apply modifier
if let Some(modifier) = modifier {
// one-to-many and many-to-one are not supported
@@ -1545,7 +1550,8 @@ impl PromPlanner {
)
};
let join_keys = left_tag_col_set
.into_iter()
.iter()
.cloned()
.chain([self.ctx.time_index_column.clone().unwrap()])
.collect::<Vec<_>>();
@@ -1579,17 +1585,122 @@ impl PromPlanner {
.build()
.context(DataFusionPlanningSnafu),
token::T_LOR => {
// `OR` can not be expressed by `UNION` precisely.
// it will generate unexpceted result when schemas don't match
UnsupportedExprSnafu {
name: "set operation `OR`",
}
.fail()
self.or_operator(left, right, left_tag_col_set, right_tag_col_set, modifier)
}
_ => UnexpectedTokenSnafu { token: op }.fail(),
}
}
// TODO(ruihang): change function name
fn or_operator(
&self,
left: LogicalPlan,
right: LogicalPlan,
left_tag_cols_set: HashSet<String>,
right_tag_cols_set: HashSet<String>,
modifier: &Option<BinModifier>,
) -> Result<LogicalPlan> {
// prepare hash sets
let all_tags = left_tag_cols_set
.union(&right_tag_cols_set)
.cloned()
.collect::<HashSet<_>>();
let tags_not_in_left = all_tags
.difference(&left_tag_cols_set)
.cloned()
.collect::<Vec<_>>();
let tags_not_in_right = all_tags
.difference(&right_tag_cols_set)
.cloned()
.collect::<Vec<_>>();
let left_qualifier = left.schema().field(0).qualifier().cloned();
let right_qualifier = right.schema().field(0).qualifier().cloned();
let left_qualifier_string = left_qualifier
.as_ref()
.map(|l| l.to_string())
.unwrap_or_default();
let right_qualifier_string = right_qualifier
.as_ref()
.map(|r| r.to_string())
.unwrap_or_default();
// step 0: fill all columns in output schema
let all_columns_set = left
.schema()
.fields()
.iter()
.chain(right.schema().fields().iter())
.map(|field| field.name().clone())
.collect::<HashSet<_>>();
let mut all_columns = all_columns_set.into_iter().collect::<Vec<_>>();
// sort to ensure the generated schema is not volatile
all_columns.sort_unstable();
// step 1: align schema using project, fill non-exist columns with null
let left_proj_exprs = all_columns.iter().map(|col| {
if tags_not_in_left.contains(col) {
DfExpr::Literal(ScalarValue::Utf8(None)).alias(col.to_string())
} else {
DfExpr::Column(Column::new(left_qualifier.clone(), col))
}
});
let right_proj_exprs = all_columns.iter().map(|col| {
if tags_not_in_right.contains(col) {
DfExpr::Literal(ScalarValue::Utf8(None)).alias(col.to_string())
} else {
DfExpr::Column(Column::new(right_qualifier.clone(), col))
}
});
let left_projected = LogicalPlanBuilder::from(left)
.project(left_proj_exprs)
.context(DataFusionPlanningSnafu)?
.alias(left_qualifier_string.clone())
.context(DataFusionPlanningSnafu)?
.build()
.context(DataFusionPlanningSnafu)?;
let right_projected = LogicalPlanBuilder::from(right)
.project(right_proj_exprs)
.context(DataFusionPlanningSnafu)?
.alias(right_qualifier_string.clone())
.context(DataFusionPlanningSnafu)?
.build()
.context(DataFusionPlanningSnafu)?;
// step 2: compute match columns
let mut match_columns = if let Some(modifier) = modifier
&& let Some(matching) = &modifier.matching
{
match matching {
// keeps columns mentioned in `on`
LabelModifier::Include(on) => on.labels.clone(),
// removes columns memtioned in `ignoring`
LabelModifier::Exclude(ignoring) => {
let ignoring = ignoring.labels.iter().cloned().collect::<HashSet<_>>();
all_tags.difference(&ignoring).cloned().collect()
}
}
} else {
all_tags.iter().cloned().collect()
};
// sort to ensure the generated plan is not volatile
match_columns.sort_unstable();
// step 3: build `UnionDistinctOn` plan
let schema = left_projected.schema().clone();
let union_distinct_on = UnionDistinctOn::new(
left_projected,
right_projected,
match_columns,
self.ctx.time_index_column.clone().unwrap(),
schema,
);
let result = LogicalPlan::Extension(Extension {
node: Arc::new(union_distinct_on),
});
Ok(result)
}
/// Build a projection that project and perform operation expr for every value columns.
/// Non-value columns (tag and timestamp) will be preserved in the projection.
///

View File

@@ -1 +1 @@
v0.4.4
v0.4.5

View File

@@ -0,0 +1,19 @@
## Starts a standalone kafka
```bash
docker compose -f docker-compose-standalone.yml up kafka -d
```
## Lists running services
```bash
docker compose -f docker-compose-standalone.yml ps
```
## Stops the standalone kafka
```bash
docker compose -f docker-compose-standalone.yml stop kafka
```
## Stops and removes the standalone kafka
```bash
docker compose -f docker-compose-standalone.yml down kafka
```

View File

@@ -521,11 +521,15 @@ CREATE TABLE {table_name} (
.unwrap()
.into_inner();
let region_to_dn_map = region_distribution(table_route_value.region_routes())
.unwrap()
.iter()
.map(|(k, v)| (v[0], *k))
.collect::<HashMap<u32, u64>>();
let region_to_dn_map = region_distribution(
table_route_value
.region_routes()
.expect("physical table route"),
)
.unwrap()
.iter()
.map(|(k, v)| (v[0], *k))
.collect::<HashMap<u32, u64>>();
assert!(region_to_dn_map.len() <= instance.datanodes().len());
let stmt = QueryLanguageParser::parse_sql(&format!(

View File

@@ -216,11 +216,15 @@ mod tests {
.unwrap()
.into_inner();
let region_to_dn_map = region_distribution(table_route_value.region_routes())
.unwrap()
.iter()
.map(|(k, v)| (v[0], *k))
.collect::<HashMap<u32, u64>>();
let region_to_dn_map = region_distribution(
table_route_value
.region_routes()
.expect("region routes should be physical"),
)
.unwrap()
.iter()
.map(|(k, v)| (v[0], *k))
.collect::<HashMap<u32, u64>>();
assert!(region_to_dn_map.len() <= instance.datanodes().len());
let stmt = QueryLanguageParser::parse_sql("SELECT ts, host FROM demo ORDER BY ts").unwrap();

View File

@@ -19,6 +19,7 @@ use common_base::Plugins;
use common_catalog::consts::MIN_USER_TABLE_ID;
use common_config::KvBackendConfig;
use common_meta::cache_invalidator::DummyCacheInvalidator;
use common_meta::ddl::table_meta::TableMetadataAllocator;
use common_meta::ddl_manager::DdlManager;
use common_meta::key::TableMetadataManager;
use common_meta::region_keeper::MemoryRegionKeeper;
@@ -30,7 +31,6 @@ use datanode::config::DatanodeOptions;
use datanode::datanode::DatanodeBuilder;
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::standalone::StandaloneTableMetadataAllocator;
use frontend::instance::{FrontendInstance, Instance, StandaloneDatanodeManager};
use crate::test_util::{self, create_tmp_dir_and_datanode_opts, StorageType, TestGuard};
@@ -123,10 +123,8 @@ impl GreptimeDbStandaloneBuilder {
wal_meta.clone(),
kv_backend.clone(),
));
let table_meta_allocator = Arc::new(StandaloneTableMetadataAllocator::new(
table_id_sequence,
wal_options_allocator.clone(),
));
let table_meta_allocator =
TableMetadataAllocator::new(table_id_sequence, wal_options_allocator.clone());
let ddl_task_executor = Arc::new(
DdlManager::try_new(

View File

@@ -1775,45 +1775,53 @@ async fn test_information_schema_dot_columns(instance: Arc<dyn MockInstance>) {
let output = execute_sql(&instance, sql).await;
let expected = "\
+---------------+--------------------+------------+---------------+-----------+---------------+
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
+---------------+--------------------+------------+---------------+-----------+---------------+
| greptime | information_schema | columns | table_catalog | String | FIELD |
| greptime | information_schema | columns | table_schema | String | FIELD |
| greptime | information_schema | columns | table_name | String | FIELD |
| greptime | information_schema | columns | column_name | String | FIELD |
| greptime | information_schema | columns | data_type | String | FIELD |
| greptime | information_schema | columns | semantic_type | String | FIELD |
| greptime | public | numbers | number | UInt32 | TAG |
| greptime | information_schema | tables | table_catalog | String | FIELD |
| greptime | information_schema | tables | table_schema | String | FIELD |
| greptime | information_schema | tables | table_name | String | FIELD |
| greptime | information_schema | tables | table_type | String | FIELD |
| greptime | information_schema | tables | table_id | UInt32 | FIELD |
| greptime | information_schema | tables | engine | String | FIELD |
+---------------+--------------------+------------+---------------+-----------+---------------+";
+---------------+--------------------+------------+----------------+-----------+---------------+
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
+---------------+--------------------+------------+----------------+-----------+---------------+
| greptime | information_schema | columns | table_catalog | String | FIELD |
| greptime | information_schema | columns | table_schema | String | FIELD |
| greptime | information_schema | columns | table_name | String | FIELD |
| greptime | information_schema | columns | column_name | String | FIELD |
| greptime | information_schema | columns | data_type | String | FIELD |
| greptime | information_schema | columns | semantic_type | String | FIELD |
| greptime | information_schema | columns | column_default | String | FIELD |
| greptime | information_schema | columns | is_nullable | String | FIELD |
| greptime | information_schema | columns | column_type | String | FIELD |
| greptime | information_schema | columns | column_comment | String | FIELD |
| greptime | public | numbers | number | UInt32 | TAG |
| greptime | information_schema | tables | table_catalog | String | FIELD |
| greptime | information_schema | tables | table_schema | String | FIELD |
| greptime | information_schema | tables | table_name | String | FIELD |
| greptime | information_schema | tables | table_type | String | FIELD |
| greptime | information_schema | tables | table_id | UInt32 | FIELD |
| greptime | information_schema | tables | engine | String | FIELD |
+---------------+--------------------+------------+----------------+-----------+---------------+";
check_output_stream(output, expected).await;
let output = execute_sql_with(&instance, sql, query_ctx).await;
let expected = "\
+-----------------+--------------------+---------------+---------------+----------------------+---------------+
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
+-----------------+--------------------+---------------+---------------+----------------------+---------------+
| another_catalog | another_schema | another_table | i | TimestampMillisecond | TIMESTAMP |
| another_catalog | information_schema | columns | table_catalog | String | FIELD |
| another_catalog | information_schema | columns | table_schema | String | FIELD |
| another_catalog | information_schema | columns | table_name | String | FIELD |
| another_catalog | information_schema | columns | column_name | String | FIELD |
| another_catalog | information_schema | columns | data_type | String | FIELD |
| another_catalog | information_schema | columns | semantic_type | String | FIELD |
| another_catalog | information_schema | tables | table_catalog | String | FIELD |
| another_catalog | information_schema | tables | table_schema | String | FIELD |
| another_catalog | information_schema | tables | table_name | String | FIELD |
| another_catalog | information_schema | tables | table_type | String | FIELD |
| another_catalog | information_schema | tables | table_id | UInt32 | FIELD |
| another_catalog | information_schema | tables | engine | String | FIELD |
+-----------------+--------------------+---------------+---------------+----------------------+---------------+";
+-----------------+--------------------+---------------+----------------+----------------------+---------------+
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
+-----------------+--------------------+---------------+----------------+----------------------+---------------+
| another_catalog | another_schema | another_table | i | TimestampMillisecond | TIMESTAMP |
| another_catalog | information_schema | columns | table_catalog | String | FIELD |
| another_catalog | information_schema | columns | table_schema | String | FIELD |
| another_catalog | information_schema | columns | table_name | String | FIELD |
| another_catalog | information_schema | columns | column_name | String | FIELD |
| another_catalog | information_schema | columns | data_type | String | FIELD |
| another_catalog | information_schema | columns | semantic_type | String | FIELD |
| another_catalog | information_schema | columns | column_default | String | FIELD |
| another_catalog | information_schema | columns | is_nullable | String | FIELD |
| another_catalog | information_schema | columns | column_type | String | FIELD |
| another_catalog | information_schema | columns | column_comment | String | FIELD |
| another_catalog | information_schema | tables | table_catalog | String | FIELD |
| another_catalog | information_schema | tables | table_schema | String | FIELD |
| another_catalog | information_schema | tables | table_name | String | FIELD |
| another_catalog | information_schema | tables | table_type | String | FIELD |
| another_catalog | information_schema | tables | table_id | UInt32 | FIELD |
| another_catalog | information_schema | tables | engine | String | FIELD |
+-----------------+--------------------+---------------+----------------+----------------------+---------------+";
check_output_stream(output, expected).await;
}

View File

@@ -15,11 +15,11 @@
use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::Peer;
use catalog::kvbackend::{CachedMetaKvBackend, KvBackendCatalogManager};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_meta::key::table_route::TableRouteKey;
use common_meta::key::{RegionDistribution, TableMetaKey};
use common_meta::peer::Peer;
use common_meta::{distributed_time_constants, RegionIdent};
use common_procedure::{watcher, ProcedureWithId};
use common_query::Output;

View File

@@ -130,10 +130,21 @@ tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and ignoring(g, job)
-- http_requests{group="production", instance="0", job="app-server"} 500
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `or`
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') http_requests{g="canary"} or http_requests{g="production"};
Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
+------------+----------+-----+---------------------+-------+
| g | instance | job | ts | val |
+------------+----------+-----+---------------------+-------+
| canary | 0 | api | 1970-01-01T00:50:00 | 300.0 |
| canary | 0 | app | 1970-01-01T00:50:00 | 700.0 |
| canary | 1 | api | 1970-01-01T00:50:00 | 400.0 |
| canary | 1 | app | 1970-01-01T00:50:00 | 800.0 |
| production | 0 | api | 1970-01-01T00:50:00 | 100.0 |
| production | 0 | app | 1970-01-01T00:50:00 | 500.0 |
| production | 1 | api | 1970-01-01T00:50:00 | 200.0 |
| production | 1 | app | 1970-01-01T00:50:00 | 600.0 |
+------------+----------+-----+---------------------+-------+
-- # On overlap the rhs samples must be dropped.
-- eval instant at 50m (http_requests{group="canary"} + 1) or http_requests{instance="1"}
@@ -143,10 +154,10 @@ Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
-- {group="canary", instance="1", job="app-server"} 801
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `or`
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) or http_requests{instance="1"};
Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
Error: 1004(InvalidArguments), Internal error during building DataFusion plan: No field named http_requests.val. Valid fields are http_requests.job, http_requests.instance, http_requests.g, http_requests.ts, "val + Float64(1)".
-- # Matching only on instance excludes everything that has instance=0/1 but includes
-- # entries without the instance label.
@@ -161,7 +172,7 @@ Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
-- NOT SUPPORTED: `or`
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) or on(instance) (http_requests or cpu_count or vector_matching_a);
Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
Error: 1004(InvalidArguments), Internal error during building DataFusion plan: No field named cpu_count.val. Valid fields are cpu_count.ts.
-- eval instant at 50m (http_requests{group="canary"} + 1) or ignoring(l, group, job) (http_requests or cpu_count or vector_matching_a)
-- {group="canary", instance="0", job="api-server"} 301
@@ -174,7 +185,7 @@ Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
-- NOT SUPPORTED: `or`
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) or ignoring(l, g, job) (http_requests or cpu_count or vector_matching_a);
Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
Error: 1004(InvalidArguments), Internal error during building DataFusion plan: No field named cpu_count.val. Valid fields are cpu_count.ts.
-- eval instant at 50m http_requests{group="canary"} unless http_requests{instance="0"}
-- http_requests{group="canary", instance="1", job="api-server"} 400
@@ -268,3 +279,128 @@ drop table vector_matching_a;
Affected Rows: 0
-- the following cases are not from Prometheus.
create table t1 (ts timestamp time index, job string primary key, val double);
Affected Rows: 0
insert into t1 values (0, "a", 1.0), (500000, "b", 2.0), (1000000, "a", 3.0), (1500000, "c", 4.0);
Affected Rows: 4
create table t2 (ts timestamp time index, val double);
Affected Rows: 0
insert into t2 values (0, 0), (300000, 0), (600000, 0), (900000, 0), (1200000, 0), (1500000, 0), (1800000, 0);
Affected Rows: 7
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t1 or t2;
+-----+---------------------+-----+
| job | ts | val |
+-----+---------------------+-----+
| | 1970-01-01T00:00:00 | 0.0 |
| | 1970-01-01T00:06:40 | 0.0 |
| | 1970-01-01T00:13:20 | 0.0 |
| | 1970-01-01T00:20:00 | 0.0 |
| | 1970-01-01T00:26:40 | 0.0 |
| | 1970-01-01T00:33:20 | 0.0 |
| a | 1970-01-01T00:00:00 | 1.0 |
| a | 1970-01-01T00:20:00 | 3.0 |
| b | 1970-01-01T00:13:20 | 2.0 |
| c | 1970-01-01T00:26:40 | 4.0 |
+-----+---------------------+-----+
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t1 or on () t2;
+-----+---------------------+-----+
| job | ts | val |
+-----+---------------------+-----+
| | 1970-01-01T00:06:40 | 0.0 |
| | 1970-01-01T00:33:20 | 0.0 |
| a | 1970-01-01T00:00:00 | 1.0 |
| a | 1970-01-01T00:20:00 | 3.0 |
| b | 1970-01-01T00:13:20 | 2.0 |
| c | 1970-01-01T00:26:40 | 4.0 |
+-----+---------------------+-----+
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t1 or on (job) t2;
+-----+---------------------+-----+
| job | ts | val |
+-----+---------------------+-----+
| | 1970-01-01T00:00:00 | 0.0 |
| | 1970-01-01T00:06:40 | 0.0 |
| | 1970-01-01T00:13:20 | 0.0 |
| | 1970-01-01T00:20:00 | 0.0 |
| | 1970-01-01T00:26:40 | 0.0 |
| | 1970-01-01T00:33:20 | 0.0 |
| a | 1970-01-01T00:00:00 | 1.0 |
| a | 1970-01-01T00:20:00 | 3.0 |
| b | 1970-01-01T00:13:20 | 2.0 |
| c | 1970-01-01T00:26:40 | 4.0 |
+-----+---------------------+-----+
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t2 or t1;
+-----+---------------------+-----+
| job | ts | val |
+-----+---------------------+-----+
| | 1970-01-01T00:00:00 | 0.0 |
| | 1970-01-01T00:06:40 | 0.0 |
| | 1970-01-01T00:13:20 | 0.0 |
| | 1970-01-01T00:20:00 | 0.0 |
| | 1970-01-01T00:26:40 | 0.0 |
| | 1970-01-01T00:33:20 | 0.0 |
| a | 1970-01-01T00:00:00 | 1.0 |
| a | 1970-01-01T00:20:00 | 3.0 |
| b | 1970-01-01T00:13:20 | 2.0 |
| c | 1970-01-01T00:26:40 | 4.0 |
+-----+---------------------+-----+
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t2 or on () t1;
+-----+---------------------+-----+
| job | ts | val |
+-----+---------------------+-----+
| | 1970-01-01T00:00:00 | 0.0 |
| | 1970-01-01T00:06:40 | 0.0 |
| | 1970-01-01T00:13:20 | 0.0 |
| | 1970-01-01T00:20:00 | 0.0 |
| | 1970-01-01T00:26:40 | 0.0 |
| | 1970-01-01T00:33:20 | 0.0 |
+-----+---------------------+-----+
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t2 or on(job) t1;
+-----+---------------------+-----+
| job | ts | val |
+-----+---------------------+-----+
| | 1970-01-01T00:00:00 | 0.0 |
| | 1970-01-01T00:06:40 | 0.0 |
| | 1970-01-01T00:13:20 | 0.0 |
| | 1970-01-01T00:20:00 | 0.0 |
| | 1970-01-01T00:26:40 | 0.0 |
| | 1970-01-01T00:33:20 | 0.0 |
| a | 1970-01-01T00:00:00 | 1.0 |
| a | 1970-01-01T00:20:00 | 3.0 |
| b | 1970-01-01T00:13:20 | 2.0 |
| c | 1970-01-01T00:26:40 | 4.0 |
+-----+---------------------+-----+
drop table t1;
Affected Rows: 0
drop table t2;
Affected Rows: 0

View File

@@ -79,7 +79,7 @@ tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and ignoring(g, job)
-- http_requests{group="production", instance="0", job="app-server"} 500
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `or`
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') http_requests{g="canary"} or http_requests{g="production"};
-- # On overlap the rhs samples must be dropped.
@@ -90,7 +90,7 @@ tql eval (3000, 3000, '1s') http_requests{g="canary"} or http_requests{g="produc
-- {group="canary", instance="1", job="app-server"} 801
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `or`
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) or http_requests{instance="1"};
@@ -173,3 +173,35 @@ drop table http_requests;
drop table cpu_count;
drop table vector_matching_a;
-- the following cases are not from Prometheus.
create table t1 (ts timestamp time index, job string primary key, val double);
insert into t1 values (0, "a", 1.0), (500000, "b", 2.0), (1000000, "a", 3.0), (1500000, "c", 4.0);
create table t2 (ts timestamp time index, val double);
insert into t2 values (0, 0), (300000, 0), (600000, 0), (900000, 0), (1200000, 0), (1500000, 0), (1800000, 0);
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t1 or t2;
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t1 or on () t2;
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t1 or on (job) t2;
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t2 or t1;
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t2 or on () t1;
-- SQLNESS SORT_RESULT 3 1
tql eval (0, 2000, '400') t2 or on(job) t1;
drop table t1;
drop table t2;

View File

@@ -17,14 +17,22 @@ Affected Rows: 0
show tables;
+-------------------+
| Tables |
+-------------------+
| build_info |
| column_privileges |
| column_statistics |
| columns |
| engines |
| tables |
+-------------------+
+---------------------------------------+
| Tables |
+---------------------------------------+
| build_info |
| character_sets |
| check_constraints |
| collation_character_set_applicability |
| collations |
| column_privileges |
| column_statistics |
| columns |
| engines |
| events |
| files |
| key_column_usage |
| schemata |
| tables |
+---------------------------------------+

View File

@@ -9,59 +9,166 @@ from information_schema.tables
where table_name != 'scripts'
order by table_schema, table_name;
+---------------+--------------------+-------------------+-----------------+----------+-------------+
| table_catalog | table_schema | table_name | table_type | table_id | engine |
+---------------+--------------------+-------------------+-----------------+----------+-------------+
| greptime | information_schema | build_info | LOCAL TEMPORARY | 8 | |
| greptime | information_schema | column_privileges | LOCAL TEMPORARY | 6 | |
| greptime | information_schema | column_statistics | LOCAL TEMPORARY | 7 | |
| greptime | information_schema | columns | LOCAL TEMPORARY | 4 | |
| greptime | information_schema | engines | LOCAL TEMPORARY | 5 | |
| greptime | information_schema | tables | LOCAL TEMPORARY | 3 | |
| greptime | public | numbers | LOCAL TEMPORARY | 2 | test_engine |
+---------------+--------------------+-------------------+-----------------+----------+-------------+
+---------------+--------------------+---------------------------------------+-----------------+----------+-------------+
| table_catalog | table_schema | table_name | table_type | table_id | engine |
+---------------+--------------------+---------------------------------------+-----------------+----------+-------------+
| greptime | information_schema | build_info | LOCAL TEMPORARY | 8 | |
| greptime | information_schema | character_sets | LOCAL TEMPORARY | 9 | |
| greptime | information_schema | check_constraints | LOCAL TEMPORARY | 12 | |
| greptime | information_schema | collation_character_set_applicability | LOCAL TEMPORARY | 11 | |
| greptime | information_schema | collations | LOCAL TEMPORARY | 10 | |
| greptime | information_schema | column_privileges | LOCAL TEMPORARY | 6 | |
| greptime | information_schema | column_statistics | LOCAL TEMPORARY | 7 | |
| greptime | information_schema | columns | LOCAL TEMPORARY | 4 | |
| greptime | information_schema | engines | LOCAL TEMPORARY | 5 | |
| greptime | information_schema | events | LOCAL TEMPORARY | 13 | |
| greptime | information_schema | files | LOCAL TEMPORARY | 14 | |
| greptime | information_schema | key_column_usage | LOCAL TEMPORARY | 16 | |
| greptime | information_schema | schemata | LOCAL TEMPORARY | 15 | |
| greptime | information_schema | tables | LOCAL TEMPORARY | 3 | |
| greptime | public | numbers | LOCAL TEMPORARY | 2 | test_engine |
+---------------+--------------------+---------------------------------------+-----------------+----------+-------------+
select * from information_schema.columns order by table_schema, table_name;
+---------------+--------------------+-------------------+------------------+-----------+---------------+
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
+---------------+--------------------+-------------------+------------------+-----------+---------------+
| greptime | information_schema | build_info | pkg_version | String | FIELD |
| greptime | information_schema | build_info | git_dirty | String | FIELD |
| greptime | information_schema | build_info | git_commit_short | String | FIELD |
| greptime | information_schema | build_info | git_commit | String | FIELD |
| greptime | information_schema | build_info | git_branch | String | FIELD |
| greptime | information_schema | column_privileges | grantee | String | FIELD |
| greptime | information_schema | column_privileges | is_grantable | String | FIELD |
| greptime | information_schema | column_privileges | privilege_type | String | FIELD |
| greptime | information_schema | column_privileges | column_name | String | FIELD |
| greptime | information_schema | column_privileges | table_name | String | FIELD |
| greptime | information_schema | column_privileges | table_schema | String | FIELD |
| greptime | information_schema | column_privileges | table_catalog | String | FIELD |
| greptime | information_schema | column_statistics | histogram | String | FIELD |
| greptime | information_schema | column_statistics | column_name | String | FIELD |
| greptime | information_schema | column_statistics | table_name | String | FIELD |
| greptime | information_schema | column_statistics | schema_name | String | FIELD |
| greptime | information_schema | columns | table_name | String | FIELD |
| greptime | information_schema | columns | semantic_type | String | FIELD |
| greptime | information_schema | columns | data_type | String | FIELD |
| greptime | information_schema | columns | column_name | String | FIELD |
| greptime | information_schema | columns | table_schema | String | FIELD |
| greptime | information_schema | columns | table_catalog | String | FIELD |
| greptime | information_schema | engines | savepoints | String | FIELD |
| greptime | information_schema | engines | xa | String | FIELD |
| greptime | information_schema | engines | transactions | String | FIELD |
| greptime | information_schema | engines | comment | String | FIELD |
| greptime | information_schema | engines | support | String | FIELD |
| greptime | information_schema | engines | engine | String | FIELD |
| greptime | information_schema | tables | table_schema | String | FIELD |
| greptime | information_schema | tables | table_catalog | String | FIELD |
| greptime | information_schema | tables | engine | String | FIELD |
| greptime | information_schema | tables | table_id | UInt32 | FIELD |
| greptime | information_schema | tables | table_type | String | FIELD |
| greptime | information_schema | tables | table_name | String | FIELD |
| greptime | public | numbers | number | UInt32 | TAG |
+---------------+--------------------+-------------------+------------------+-----------+---------------+
+---------------+--------------------+---------------------------------------+-------------------------------+-----------+---------------+----------------+-------------+-------------+----------------+
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type | column_default | is_nullable | column_type | column_comment |
+---------------+--------------------+---------------------------------------+-------------------------------+-----------+---------------+----------------+-------------+-------------+----------------+
| greptime | information_schema | build_info | git_branch | String | FIELD | | No | String | |
| greptime | information_schema | build_info | git_commit | String | FIELD | | No | String | |
| greptime | information_schema | build_info | git_commit_short | String | FIELD | | No | String | |
| greptime | information_schema | build_info | git_dirty | String | FIELD | | No | String | |
| greptime | information_schema | build_info | pkg_version | String | FIELD | | No | String | |
| greptime | information_schema | character_sets | description | String | FIELD | | No | String | |
| greptime | information_schema | character_sets | character_set_name | String | FIELD | | No | String | |
| greptime | information_schema | character_sets | default_collate_name | String | FIELD | | No | String | |
| greptime | information_schema | character_sets | maxlen | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | check_constraints | check_clause | String | FIELD | | No | String | |
| greptime | information_schema | check_constraints | constraint_name | String | FIELD | | No | String | |
| greptime | information_schema | check_constraints | constraint_schema | String | FIELD | | No | String | |
| greptime | information_schema | check_constraints | constraint_catalog | String | FIELD | | No | String | |
| greptime | information_schema | collation_character_set_applicability | character_set_name | String | FIELD | | No | String | |
| greptime | information_schema | collation_character_set_applicability | collation_name | String | FIELD | | No | String | |
| greptime | information_schema | collations | collation_name | String | FIELD | | No | String | |
| greptime | information_schema | collations | character_set_name | String | FIELD | | No | String | |
| greptime | information_schema | collations | id | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | collations | is_default | String | FIELD | | No | String | |
| greptime | information_schema | collations | is_compiled | String | FIELD | | No | String | |
| greptime | information_schema | collations | sortlen | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | column_privileges | table_schema | String | FIELD | | No | String | |
| greptime | information_schema | column_privileges | grantee | String | FIELD | | No | String | |
| greptime | information_schema | column_privileges | table_catalog | String | FIELD | | No | String | |
| greptime | information_schema | column_privileges | table_name | String | FIELD | | No | String | |
| greptime | information_schema | column_privileges | column_name | String | FIELD | | No | String | |
| greptime | information_schema | column_privileges | privilege_type | String | FIELD | | No | String | |
| greptime | information_schema | column_privileges | is_grantable | String | FIELD | | No | String | |
| greptime | information_schema | column_statistics | histogram | String | FIELD | | No | String | |
| greptime | information_schema | column_statistics | schema_name | String | FIELD | | No | String | |
| greptime | information_schema | column_statistics | table_name | String | FIELD | | No | String | |
| greptime | information_schema | column_statistics | column_name | String | FIELD | | No | String | |
| greptime | information_schema | columns | column_type | String | FIELD | | No | String | |
| greptime | information_schema | columns | table_catalog | String | FIELD | | No | String | |
| greptime | information_schema | columns | column_comment | String | FIELD | | Yes | String | |
| greptime | information_schema | columns | is_nullable | String | FIELD | | No | String | |
| greptime | information_schema | columns | column_default | String | FIELD | | Yes | String | |
| greptime | information_schema | columns | semantic_type | String | FIELD | | No | String | |
| greptime | information_schema | columns | data_type | String | FIELD | | No | String | |
| greptime | information_schema | columns | column_name | String | FIELD | | No | String | |
| greptime | information_schema | columns | table_name | String | FIELD | | No | String | |
| greptime | information_schema | columns | table_schema | String | FIELD | | No | String | |
| greptime | information_schema | engines | xa | String | FIELD | | No | String | |
| greptime | information_schema | engines | transactions | String | FIELD | | No | String | |
| greptime | information_schema | engines | comment | String | FIELD | | No | String | |
| greptime | information_schema | engines | support | String | FIELD | | No | String | |
| greptime | information_schema | engines | engine | String | FIELD | | No | String | |
| greptime | information_schema | engines | savepoints | String | FIELD | | No | String | |
| greptime | information_schema | events | starts | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | events | event_comment | String | FIELD | | No | String | |
| greptime | information_schema | events | database_collation | String | FIELD | | No | String | |
| greptime | information_schema | events | collation_connection | String | FIELD | | No | String | |
| greptime | information_schema | events | character_set_client | String | FIELD | | No | String | |
| greptime | information_schema | events | originator | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | events | event_catalog | String | FIELD | | No | String | |
| greptime | information_schema | events | event_schema | String | FIELD | | No | String | |
| greptime | information_schema | events | event_name | String | FIELD | | No | String | |
| greptime | information_schema | events | definer | String | FIELD | | No | String | |
| greptime | information_schema | events | time_zone | String | FIELD | | No | String | |
| greptime | information_schema | events | event_body | String | FIELD | | No | String | |
| greptime | information_schema | events | event_definition | String | FIELD | | No | String | |
| greptime | information_schema | events | event_type | String | FIELD | | No | String | |
| greptime | information_schema | events | execute_at | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | events | interval_value | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | events | interval_field | String | FIELD | | No | String | |
| greptime | information_schema | events | sql_mode | String | FIELD | | No | String | |
| greptime | information_schema | events | ends | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | events | status | String | FIELD | | No | String | |
| greptime | information_schema | events | on_completion | String | FIELD | | No | String | |
| greptime | information_schema | events | created | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | events | last_altered | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | events | last_executed | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | files | free_extents | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | row_format | String | FIELD | | No | String | |
| greptime | information_schema | files | extra | String | FIELD | | No | String | |
| greptime | information_schema | files | status | String | FIELD | | No | String | |
| greptime | information_schema | files | checksum | String | FIELD | | No | String | |
| greptime | information_schema | files | check_time | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | files | file_id | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | file_name | String | FIELD | | No | String | |
| greptime | information_schema | files | file_type | String | FIELD | | No | String | |
| greptime | information_schema | files | tablespace_name | String | FIELD | | No | String | |
| greptime | information_schema | files | table_catalog | String | FIELD | | No | String | |
| greptime | information_schema | files | table_schema | String | FIELD | | No | String | |
| greptime | information_schema | files | table_name | String | FIELD | | No | String | |
| greptime | information_schema | files | logfile_group_name | String | FIELD | | No | String | |
| greptime | information_schema | files | logfile_group_number | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | engine | String | FIELD | | No | String | |
| greptime | information_schema | files | fulltext_keys | String | FIELD | | No | String | |
| greptime | information_schema | files | deleted_rows | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | update_count | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | update_time | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | files | total_extents | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | extent_size | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | initial_size | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | maximum_size | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | autoextend_size | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | creation_time | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | files | last_update_time | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | files | last_access_time | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | files | recover_time | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | files | transaction_counter | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | version | String | FIELD | | No | String | |
| greptime | information_schema | files | create_time | DateTime | FIELD | | No | DateTime | |
| greptime | information_schema | files | table_rows | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | avg_row_length | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | data_length | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | max_data_length | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | index_length | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | files | data_free | Int64 | FIELD | | No | Int64 | |
| greptime | information_schema | key_column_usage | ordinal_position | UInt32 | FIELD | | No | UInt32 | |
| greptime | information_schema | key_column_usage | constraint_schema | String | FIELD | | No | String | |
| greptime | information_schema | key_column_usage | referenced_column_name | String | FIELD | | Yes | String | |
| greptime | information_schema | key_column_usage | referenced_table_name | String | FIELD | | Yes | String | |
| greptime | information_schema | key_column_usage | referenced_table_schema | String | FIELD | | Yes | String | |
| greptime | information_schema | key_column_usage | position_in_unique_constraint | UInt32 | FIELD | | Yes | UInt32 | |
| greptime | information_schema | key_column_usage | constraint_catalog | String | FIELD | | No | String | |
| greptime | information_schema | key_column_usage | column_name | String | FIELD | | No | String | |
| greptime | information_schema | key_column_usage | table_schema | String | FIELD | | No | String | |
| greptime | information_schema | key_column_usage | table_catalog | String | FIELD | | No | String | |
| greptime | information_schema | key_column_usage | constraint_name | String | FIELD | | No | String | |
| greptime | information_schema | key_column_usage | table_name | String | FIELD | | No | String | |
| greptime | information_schema | schemata | catalog_name | String | FIELD | | No | String | |
| greptime | information_schema | schemata | default_collation_name | String | FIELD | | No | String | |
| greptime | information_schema | schemata | default_character_set_name | String | FIELD | | No | String | |
| greptime | information_schema | schemata | sql_path | String | FIELD | | Yes | String | |
| greptime | information_schema | schemata | schema_name | String | FIELD | | No | String | |
| greptime | information_schema | tables | table_schema | String | FIELD | | No | String | |
| greptime | information_schema | tables | table_catalog | String | FIELD | | No | String | |
| greptime | information_schema | tables | engine | String | FIELD | | Yes | String | |
| greptime | information_schema | tables | table_name | String | FIELD | | No | String | |
| greptime | information_schema | tables | table_type | String | FIELD | | No | String | |
| greptime | information_schema | tables | table_id | UInt32 | FIELD | | Yes | UInt32 | |
| greptime | public | numbers | number | UInt32 | TAG | | No | UInt32 | |
+---------------+--------------------+---------------------------------------+-------------------------------+-----------+---------------+----------------+-------------+-------------+----------------+
create
database my_db;
@@ -128,6 +235,32 @@ use information_schema;
Affected Rows: 0
-- schemata --
desc table schemata;
+----------------------------+--------+-----+------+---------+---------------+
| Column | Type | Key | Null | Default | Semantic Type |
+----------------------------+--------+-----+------+---------+---------------+
| catalog_name | String | | NO | | FIELD |
| schema_name | String | | NO | | FIELD |
| default_character_set_name | String | | NO | | FIELD |
| default_collation_name | String | | NO | | FIELD |
| sql_path | String | | YES | | FIELD |
+----------------------------+--------+-----+------+---------+---------------+
select * from schemata where catalog_name = 'greptime' and schema_name != 'public' order by catalog_name, schema_name;
+--------------+-----------------------+----------------------------+------------------------+----------+
| catalog_name | schema_name | default_character_set_name | default_collation_name | sql_path |
+--------------+-----------------------+----------------------------+------------------------+----------+
| greptime | greptime_private | utf8 | utf8_bin | |
| greptime | illegal-database | utf8 | utf8_bin | |
| greptime | information_schema | utf8 | utf8_bin | |
| greptime | my_db | utf8 | utf8_bin | |
| greptime | test_public_schema | utf8 | utf8_bin | |
| greptime | upper_case_table_name | utf8 | utf8_bin | |
+--------------+-----------------------+----------------------------+------------------------+----------+
-- test engines
select * from engines;
@@ -157,6 +290,34 @@ select count(*) from build_info;
| 1 |
+----------+
desc table key_column_usage;
+-------------------------------+--------+-----+------+---------+---------------+
| Column | Type | Key | Null | Default | Semantic Type |
+-------------------------------+--------+-----+------+---------+---------------+
| constraint_catalog | String | | NO | | FIELD |
| constraint_schema | String | | NO | | FIELD |
| constraint_name | String | | NO | | FIELD |
| table_catalog | String | | NO | | FIELD |
| table_schema | String | | NO | | FIELD |
| table_name | String | | NO | | FIELD |
| column_name | String | | NO | | FIELD |
| ordinal_position | UInt32 | | NO | | FIELD |
| position_in_unique_constraint | UInt32 | | YES | | FIELD |
| referenced_table_schema | String | | YES | | FIELD |
| referenced_table_name | String | | YES | | FIELD |
| referenced_column_name | String | | YES | | FIELD |
+-------------------------------+--------+-----+------+---------+---------------+
select * from key_column_usage;
+--------------------+-------------------+-----------------+---------------+--------------+------------+-------------+------------------+-------------------------------+-------------------------+-----------------------+------------------------+
| constraint_catalog | constraint_schema | constraint_name | table_catalog | table_schema | table_name | column_name | ordinal_position | position_in_unique_constraint | referenced_table_schema | referenced_table_name | referenced_column_name |
+--------------------+-------------------+-----------------+---------------+--------------+------------+-------------+------------------+-------------------------------+-------------------------+-----------------------+------------------------+
| def | my_db | TIME INDEX | def | my_db | foo | ts | 1 | | | | |
| def | public | PRIMARY | def | public | numbers | number | 1 | | | | |
+--------------------+-------------------+-----------------+---------------+--------------+------------+-------------+------------------+-------------------------------+-------------------------+-----------------------+------------------------+
-- tables not implemented
desc table COLUMN_PRIVILEGES;
@@ -197,6 +358,48 @@ select * from COLUMN_STATISTICS;
+-------------+------------+-------------+-----------+
+-------------+------------+-------------+-----------+
select * from CHARACTER_SETS;
+--------------------+----------------------+---------------+--------+
| character_set_name | default_collate_name | description | maxlen |
+--------------------+----------------------+---------------+--------+
| utf8 | utf8_bin | UTF-8 Unicode | 4 |
+--------------------+----------------------+---------------+--------+
select * from COLLATIONS;
+----------------+--------------------+----+------------+-------------+---------+
| collation_name | character_set_name | id | is_default | is_compiled | sortlen |
+----------------+--------------------+----+------------+-------------+---------+
| utf8_bin | utf8 | 1 | Yes | Yes | 1 |
+----------------+--------------------+----+------------+-------------+---------+
select * from COLLATION_CHARACTER_SET_APPLICABILITY;
+----------------+--------------------+
| collation_name | character_set_name |
+----------------+--------------------+
| utf8_bin | utf8 |
+----------------+--------------------+
desc table CHECK_CONSTRAINTS;
+--------------------+--------+-----+------+---------+---------------+
| Column | Type | Key | Null | Default | Semantic Type |
+--------------------+--------+-----+------+---------+---------------+
| constraint_catalog | String | | NO | | FIELD |
| constraint_schema | String | | NO | | FIELD |
| constraint_name | String | | NO | | FIELD |
| check_clause | String | | NO | | FIELD |
+--------------------+--------+-----+------+---------+---------------+
select * from CHECK_CONSTRAINTS;
+--------------------+-------------------+-----------------+--------------+
| constraint_catalog | constraint_schema | constraint_name | check_clause |
+--------------------+-------------------+-----------------+--------------+
+--------------------+-------------------+-----------------+--------------+
use public;
Affected Rows: 0

View File

@@ -44,6 +44,12 @@ drop schema my_db;
use information_schema;
-- schemata --
desc table schemata;
select * from schemata where catalog_name = 'greptime' and schema_name != 'public' order by catalog_name, schema_name;
-- test engines
select * from engines;
@@ -51,6 +57,10 @@ desc table build_info;
select count(*) from build_info;
desc table key_column_usage;
select * from key_column_usage;
-- tables not implemented
desc table COLUMN_PRIVILEGES;
@@ -60,4 +70,14 @@ desc table COLUMN_STATISTICS;
select * from COLUMN_STATISTICS;
select * from CHARACTER_SETS;
select * from COLLATIONS;
select * from COLLATION_CHARACTER_SET_APPLICABILITY;
desc table CHECK_CONSTRAINTS;
select * from CHECK_CONSTRAINTS;
use public;

View File

@@ -126,102 +126,22 @@ SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
| 128 | 128 | 10000 | 1280000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 53 FROM bigtable;
Affected Rows: 128
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| COUNT(*) | COUNT(bigtable.a) | MAX(character_length(bigtable.a)) | SUM(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 256 | 256 | 10000 | 2560000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 57 FROM bigtable;
Affected Rows: 256
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| COUNT(*) | COUNT(bigtable.a) | MAX(character_length(bigtable.a)) | SUM(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 512 | 512 | 10000 | 5120000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 61 FROM bigtable;
Affected Rows: 512
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| COUNT(*) | COUNT(bigtable.a) | MAX(character_length(bigtable.a)) | SUM(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 1024 | 1024 | 10000 | 10240000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 63 FROM bigtable;
Affected Rows: 1024
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| COUNT(*) | COUNT(bigtable.a) | MAX(character_length(bigtable.a)) | SUM(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 2048 | 2048 | 10000 | 20480000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 67 FROM bigtable;
Affected Rows: 2048
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| COUNT(*) | COUNT(bigtable.a) | MAX(character_length(bigtable.a)) | SUM(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 4096 | 4096 | 10000 | 40960000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 71 FROM bigtable;
Affected Rows: 4096
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| COUNT(*) | COUNT(bigtable.a) | MAX(character_length(bigtable.a)) | SUM(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 8192 | 8192 | 10000 | 81920000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 73 FROM bigtable;
Affected Rows: 8192
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| COUNT(*) | COUNT(bigtable.a) | MAX(character_length(bigtable.a)) | SUM(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 16384 | 16384 | 10000 | 163840000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 79 FROM bigtable;
Affected Rows: 16384
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| COUNT(*) | COUNT(bigtable.a) | MAX(character_length(bigtable.a)) | SUM(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 32768 | 32768 | 10000 | 327680000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 53 FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 57 FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 61 FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 63 FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 67 FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 71 FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 73 FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 79 FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
DROP TABLE test;
Affected Rows: 0

View File

@@ -51,38 +51,38 @@ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 51 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 53 FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 53 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 57 FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 57 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 61 FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 61 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 63 FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 63 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 67 FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 67 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 71 FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 71 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 73 FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 73 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 79 FROM bigtable;
-- INSERT INTO bigtable SELECT a, to_unixtime(ts) * 79 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
DROP TABLE test;

View File

@@ -6,12 +6,18 @@ rpc_hostname = '127.0.0.1'
rpc_runtime_size = 8
[wal]
{{ if is_raft_engine }}
provider = "raft_engine"
file_size = '1GB'
purge_interval = '10m'
purge_threshold = '10GB'
read_batch_size = 128
sync_write = false
{{ else }}
provider = "kafka"
broker_endpoints = {kafka_wal_broker_endpoints | unescaped}
linger = "5ms"
{{ endif }}
[storage]
type = 'File'

View File

@@ -0,0 +1,10 @@
[wal]
{{ if is_raft_engine }}
provider = "raft_engine"
{{ else }}
provider = "kafka"
broker_endpoints = {kafka_wal_broker_endpoints | unescaped}
num_topics = 64
selector_type = "round_robin"
topic_name_prefix = "distributed_test_greptimedb_wal_topic"
{{ endif }}

View File

@@ -3,12 +3,18 @@ enable_memory_catalog = false
require_lease_before_startup = true
[wal]
{{ if is_raft_engine }}
provider = "raft_engine"
file_size = '1GB'
purge_interval = '10m'
purge_threshold = '10GB'
read_batch_size = 128
sync_write = false
{{ else }}
provider = "kafka"
broker_endpoints = {kafka_wal_broker_endpoints | unescaped}
linger = "5ms"
{{ endif }}
[storage]
type = 'File'

Some files were not shown because too many files have changed in this diff Show More