Compare commits

..

24 Commits

Author SHA1 Message Date
Niwaka
358d5e1d63 fix: support alter table ~ add ~ custom_type (#5165) 2024-12-15 09:05:29 +00:00
Yingwen
579059d99f ci: use 4xlarge for nightly build (#5158) 2024-12-13 12:53:11 +00:00
localhost
53d55c0b6b fix: loki write row len error (#5161) 2024-12-13 10:10:59 +00:00
Yingwen
bef6896280 docs: Add index panels to standalone grafana dashboard (#5140)
* docs: Add index panels to standalnoe grafana dashboard

* docs: fix flush/compaction op
2024-12-13 08:17:49 +00:00
Yohan Wal
4b4c6dbb66 refactor: cache inverted index with fixed-size page (#5114)
* feat: cache inverted index by page instead of file

* fix: add unit test and fix bugs

* chore: typo

* chore: ci

* fix: math

* chore: apply review comments

* chore: renames

* test: add unit test for index key calculation

* refactor: use ReadableSize

* feat: add config for inverted index page size

* chore: update config file

* refactor: handle multiple range read and fix some related bugs

* fix: add config

* test: turn to a fs reader to match behaviors of object store
2024-12-13 07:34:24 +00:00
localhost
e8e9526738 chore: pipeline dryrun api can currently receives pipeline raw content (#5142)
* chore: pipeline dryrun api can currently receives pipeline raw content

* chore: remove dryrun v1 and add test

* chore: change dryrun pipeline api body schema

* chore: remove useless struct PipelineInfo

* chore: update PipelineDryrunParams doc

* chore: increase code readability

* chore: add some comment for pipeline dryrun test

* Apply suggestions from code review

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>

* chore: format code

---------

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>
2024-12-12 11:47:21 +00:00
Yingwen
fee75a1fad feat: collect reader metrics from prune reader (#5152) 2024-12-12 11:27:22 +00:00
localhost
b8a78b7838 chore: decide tag column in log api follow table schema if table exists (#5138)
* chore: decide tag column in log api follow table schema if table exists

* chore: add more test for greptime_identity pipeline

* chore: change pipeline get_table function signature

* chore: change identity_pipeline_inner tag_column_names type
2024-12-12 09:01:21 +00:00
Weny Xu
2137c53274 feat(index): add file_size_hint for remote blob reader (#5147)
feat(index): add file_size_hint for remote blob reader
2024-12-12 04:45:40 +00:00
Yohan Wal
03ad6e2a8d feat(fuzz): add alter table options for alter fuzzer (#5074)
* feat(fuzz): add set table options to alter fuzzer

* chore: clippy is happy, I'm sad

* chore: happy ci happy

* fix: unit test

* feat(fuzz): add unset table options to alter fuzzer

* fix: unit test

* feat(fuzz): add table option validator

* fix: make clippy happy

* chore: add comments

* chore: apply review comments

* fix: unit test

* feat(fuzz): add more ttl options

* fix: #5108

* chore: add comments

* chore: add comments
2024-12-12 04:21:38 +00:00
Weny Xu
d53fbcb936 feat: introduce PuffinMetadataCache (#5148)
* feat: introduce `PuffinMetadataCache`

* refactor: remove too_many_arguments

* chore: fmt toml
2024-12-12 04:09:36 +00:00
Weny Xu
8c1959c580 feat: add prefetch support to InvertedIndexFooterReader for reduced I/O time (#5146)
* feat: add prefetch support to `InvertedIndeFooterReader`

* chore: correct struct name

* chore: apply suggestions from CR
2024-12-12 03:49:54 +00:00
Weny Xu
e2a41ccaec feat: add prefetch support to PuffinFileFooterReader for reduced I/O time (#5145)
* feat: introduce `PuffinFileFooterReader`

* refactor: remove `SyncReader` trait and impl

* refactor: replace `FooterParser` with `PuffinFileFooterReader`

* chore: remove unused errors
2024-12-12 03:13:36 +00:00
Niwaka
a8012147ab feat: support push down IN filter (#5129)
* feat: support push down IN filter

* chore: move tests to prune.sql
2024-12-11 13:46:23 +00:00
Ruihang Xia
60f8dbf7f0 feat: implement v1/sql/parse endpoint to parse GreptimeDB's SQL dialect (#5144)
* derive ser/de

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl method

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove deserialize

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-11 13:33:54 +00:00
ZonaHe
9da2e17d0e feat: update dashboard to v0.7.2 (#5141)
Co-authored-by: sunchanglong <sunchanglong@users.noreply.github.com>
2024-12-11 12:47:59 +00:00
Yohan Wal
1a8e77a480 test: part of parser test migrated from duckdb (#5125)
* test: update test

* fix: fix test
2024-12-11 09:28:13 +00:00
Zhenchi
e1e39993f7 feat(vector): add scalar add function (#5119)
* refactor: extract implicit conversion helper functions of vector

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat(vector): add scalar add function

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix fmt

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-12-11 09:25:56 +00:00
Lei, HUANG
a30d918df2 perf: avoid cache during compaction (#5135)
* Revert "refactor: Avoid wrapping Option for CacheManagerRef (#4996)"

This reverts commit 42bf7e9965.

* fix: memory usage during log ingestion

* fix: fmt
2024-12-11 08:24:41 +00:00
dennis zhuang
2c4ac76754 feat: adjust WAL purge default configurations (#5107)
* feat: adjust WAL purge default configurations

* fix: config

* feat: change raft engine file_size default to 128Mib
2024-12-11 08:08:05 +00:00
jeremyhi
a6893aad42 chore: set store_key_prefix for all kvbackend (#5132) 2024-12-11 08:04:02 +00:00
discord9
d91517688a chore: fix aws_lc not in depend tree check in CI (#5121)
* chore: fix aws_lc check in CI

* chore: update lock file
2024-12-11 07:02:03 +00:00
shuiyisong
3d1b8c4fac chore: add /ready api for health checking (#5124)
* chore: add ready endpoint for health checking

* chore: add test
2024-12-11 02:56:48 +00:00
Yingwen
7c69ca0502 chore: bump main branch version to 0.12 (#5133)
chore: bump version to v0.12.0
2024-12-10 13:10:37 +00:00
198 changed files with 1960 additions and 7311 deletions

View File

@@ -18,8 +18,6 @@ runs:
--set controller.replicaCount=${{ inputs.controller-replicas }} \
--set controller.resources.requests.cpu=50m \
--set controller.resources.requests.memory=128Mi \
--set controller.resources.limits.cpu=2000m \
--set controller.resources.limits.memory=2Gi \
--set listeners.controller.protocol=PLAINTEXT \
--set listeners.client.protocol=PLAINTEXT \
--create-namespace \

View File

@@ -4,8 +4,7 @@ I hereby agree to the terms of the [GreptimeDB CLA](https://github.com/GreptimeT
## What's changed and what's your intention?
<!--
__!!! DO NOT LEAVE THIS BLOCK EMPTY !!!__
__!!! DO NOT LEAVE THIS BLOCK EMPTY !!!__
Please explain IN DETAIL what the changes are in this PR and why they are needed:
@@ -13,14 +12,9 @@ Please explain IN DETAIL what the changes are in this PR and why they are needed
- How does this PR work? Need a brief introduction for the changed logic (optional)
- Describe clearly one logical change and avoid lazy messages (optional)
- Describe any limitations of the current code (optional)
- Describe if this PR will break **API or data compatibility** (optional)
-->
## PR Checklist
Please convert it to a draft if some of the following conditions are not met.
## Checklist
- [ ] I have written the necessary rustdoc comments.
- [ ] I have added the necessary unit tests and integration tests.
- [ ] This PR requires documentation updates.
- [ ] API changes are backward compatible.
- [ ] Schema or data changes are backward compatible.

View File

@@ -29,7 +29,7 @@ on:
linux_arm64_runner:
type: choice
description: The runner uses to build linux-arm64 artifacts
default: ec2-c6g.8xlarge-arm64
default: ec2-c6g.4xlarge-arm64
options:
- ec2-c6g.xlarge-arm64 # 4C8G
- ec2-c6g.2xlarge-arm64 # 8C16G

View File

@@ -323,6 +323,8 @@ jobs:
uses: ./.github/actions/setup-kafka-cluster
- name: Setup Etcd cluser
uses: ./.github/actions/setup-etcd-cluster
- name: Setup Postgres cluser
uses: ./.github/actions/setup-postgres-cluster
# Prepares for fuzz tests
- uses: arduino/setup-protoc@v3
with:
@@ -472,6 +474,8 @@ jobs:
uses: ./.github/actions/setup-kafka-cluster
- name: Setup Etcd cluser
uses: ./.github/actions/setup-etcd-cluster
- name: Setup Postgres cluser
uses: ./.github/actions/setup-postgres-cluster
# Prepares for fuzz tests
- uses: arduino/setup-protoc@v3
with:

View File

@@ -27,7 +27,7 @@ on:
linux_arm64_runner:
type: choice
description: The runner uses to build linux-arm64 artifacts
default: ec2-c6g.8xlarge-arm64
default: ec2-c6g.4xlarge-arm64
options:
- ec2-c6g.xlarge-arm64 # 4C8G
- ec2-c6g.2xlarge-arm64 # 8C16G

View File

@@ -114,17 +114,6 @@ jobs:
GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
UNITTEST_LOG_DIR: "__unittest_logs"
cleanbuild-linux-nix:
runs-on: ubuntu-latest-8-cores
timeout-minutes: 60
needs: [coverage, fmt, clippy, check]
steps:
- uses: actions/checkout@v4
- uses: cachix/install-nix-action@v27
with:
nix_path: nixpkgs=channel:nixos-unstable
- run: nix-shell --pure --run "cargo build"
check-status:
name: Check status
needs: [sqlness-test, sqlness-windows, test-on-windows]

View File

@@ -31,7 +31,7 @@ on:
linux_arm64_runner:
type: choice
description: The runner uses to build linux-arm64 artifacts
default: ec2-c6g.8xlarge-arm64
default: ec2-c6g.4xlarge-arm64
options:
- ubuntu-2204-32-cores-arm
- ec2-c6g.xlarge-arm64 # 4C8G
@@ -91,7 +91,7 @@ env:
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.11.0
NEXT_RELEASE_VERSION: v0.12.0
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:

6
.gitignore vendored
View File

@@ -47,10 +47,6 @@ benchmarks/data
venv/
# Fuzz tests
# Fuzz tests
tests-fuzz/artifacts/
tests-fuzz/corpus/
# Nix
.direnv
.envrc

502
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -68,7 +68,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.11.1"
version = "0.12.0"
edition = "2021"
license = "Apache-2.0"

View File

@@ -15,8 +15,8 @@ RUN apt-get update && \
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libssl-dev \
tzdata \
protobuf-compiler \
curl \
unzip \
ca-certificates \
git \
build-essential \
@@ -24,20 +24,6 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
python3.10 \
python3.10-dev
ARG TARGETPLATFORM
RUN echo "target platform: $TARGETPLATFORM"
# Install protobuf, because the one in the apt is too old (v3.12).
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v29.1/protoc-29.1-linux-aarch_64.zip && \
unzip protoc-29.1-linux-aarch_64.zip -d protoc3; \
elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v29.1/protoc-29.1-linux-x86_64.zip && \
unzip protoc-29.1-linux-x86_64.zip -d protoc3; \
fi
RUN mv protoc3/bin/* /usr/local/bin/
RUN mv protoc3/include/* /usr/local/include/
# https://github.com/GreptimeTeam/greptimedb/actions/runs/10935485852/job/30357457188#step:3:7106
# `aws-lc-sys` require gcc >= 10.3.0 to work, hence alias to use gcc-10
RUN apt-get remove -y gcc-9 g++-9 cpp-9 && \
@@ -63,7 +49,7 @@ RUN apt-get -y purge python3.8 && \
# wildcard here. However, that requires the git's config files and the submodules all owned by the very same user.
# It's troublesome to do this since the dev build runs in Docker, which is under user "root"; while outside the Docker,
# it can be a different user that have prepared the submodules.
RUN git config --global --add safe.directory '*'
RUN git config --global --add safe.directory *
# Install Python dependencies.
COPY $DOCKER_BUILD_ROOT/docker/python/requirements.txt /etc/greptime/requirements.txt

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,2 @@
[toolchain]
channel = "nightly-2024-10-19"
components = ["rust-analyzer"]

View File

@@ -58,10 +58,8 @@ def main():
if not check_snafu_in_files(branch_name, other_rust_files)
]
if unused_snafu:
print("Unused error variants:")
for name in unused_snafu:
print(name)
for name in unused_snafu:
print(name)
if unused_snafu:
raise SystemExit(1)

View File

@@ -1,27 +0,0 @@
let
nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable";
fenix = import (fetchTarball "https://github.com/nix-community/fenix/archive/main.tar.gz") {};
pkgs = import nixpkgs { config = {}; overlays = []; };
in
pkgs.mkShell rec {
nativeBuildInputs = with pkgs; [
pkg-config
git
clang
gcc
protobuf
mold
(fenix.fromToolchainFile {
dir = ./.;
})
cargo-nextest
taplo
];
buildInputs = with pkgs; [
libgit2
];
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs;
}

View File

@@ -16,7 +16,7 @@ use std::collections::HashMap;
use datatypes::schema::{
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY,
FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
FULLTEXT_KEY, INVERTED_INDEX_KEY,
};
use greptime_proto::v1::Analyzer;
use snafu::ResultExt;
@@ -29,8 +29,6 @@ use crate::v1::{ColumnDef, ColumnOptions, SemanticType};
const FULLTEXT_GRPC_KEY: &str = "fulltext";
/// Key used to store inverted index options in gRPC column options.
const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index";
/// Key used to store skip index options in gRPC column options.
const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index";
/// Tries to construct a `ColumnSchema` from the given `ColumnDef`.
pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
@@ -62,9 +60,6 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) {
metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.clone());
}
if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) {
metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.clone());
}
}
ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
@@ -89,11 +84,6 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
.options
.insert(INVERTED_INDEX_GRPC_KEY.to_string(), inverted_index.clone());
}
if let Some(skipping_index) = column_schema.metadata().get(SKIPPING_INDEX_KEY) {
options
.options
.insert(SKIPPING_INDEX_GRPC_KEY.to_string(), skipping_index.clone());
}
(!options.options.is_empty()).then_some(options)
}

View File

@@ -11,3 +11,4 @@ common-macro.workspace = true
common-meta.workspace = true
moka.workspace = true
snafu.workspace = true
substrait.workspace = true

View File

@@ -18,6 +18,7 @@ async-stream.workspace = true
async-trait = "0.1"
bytes.workspace = true
common-catalog.workspace = true
common-config.workspace = true
common-error.workspace = true
common-macro.workspace = true
common-meta.workspace = true
@@ -57,5 +58,7 @@ catalog = { workspace = true, features = ["testing"] }
chrono.workspace = true
common-meta = { workspace = true, features = ["testing"] }
common-query = { workspace = true, features = ["testing"] }
common-test-util.workspace = true
log-store.workspace = true
object-store.workspace = true
tokio.workspace = true

View File

@@ -64,13 +64,6 @@ pub enum Error {
source: BoxedError,
},
#[snafu(display("Failed to list flow stats"))]
ListFlowStats {
#[snafu(implicit)]
location: Location,
source: BoxedError,
},
#[snafu(display("Failed to list flows in catalog {catalog}"))]
ListFlows {
#[snafu(implicit)]
@@ -333,7 +326,6 @@ impl ErrorExt for Error {
| Error::ListSchemas { source, .. }
| Error::ListTables { source, .. }
| Error::ListFlows { source, .. }
| Error::ListFlowStats { source, .. }
| Error::ListProcedures { source, .. }
| Error::ListRegionStats { source, .. }
| Error::ConvertProtoData { source, .. } => source.status_code(),

View File

@@ -17,7 +17,6 @@ use common_error::ext::BoxedError;
use common_meta::cluster::{ClusterInfo, NodeInfo};
use common_meta::datanode::RegionStat;
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
use common_meta::key::flow::flow_state::FlowStat;
use common_meta::rpc::procedure;
use common_procedure::{ProcedureInfo, ProcedureState};
use meta_client::MetaClientRef;
@@ -90,12 +89,4 @@ impl InformationExtension for DistributedInformationExtension {
.map_err(BoxedError::new)
.context(error::ListRegionStatsSnafu)
}
async fn flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error> {
self.meta_client
.list_flow_stats()
.await
.map_err(BoxedError::new)
.context(crate::error::ListFlowStatsSnafu)
}
}

View File

@@ -35,7 +35,6 @@ use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, INFORMATION_SCHEMA_NAME
use common_error::ext::ErrorExt;
use common_meta::cluster::NodeInfo;
use common_meta::datanode::RegionStat;
use common_meta::key::flow::flow_state::FlowStat;
use common_meta::key::flow::FlowMetadataManager;
use common_procedure::ProcedureInfo;
use common_recordbatch::SendableRecordBatchStream;
@@ -193,7 +192,6 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
)) as _),
FLOWS => Some(Arc::new(InformationSchemaFlows::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
self.flow_metadata_manager.clone(),
)) as _),
PROCEDURE_INFO => Some(
@@ -340,9 +338,6 @@ pub trait InformationExtension {
/// Gets the region statistics.
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
/// Get the flow statistics. If no flownode is available, return `None`.
async fn flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error>;
}
pub struct NoopInformationExtension;
@@ -362,8 +357,4 @@ impl InformationExtension for NoopInformationExtension {
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
Ok(vec![])
}
async fn flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error> {
Ok(None)
}
}

View File

@@ -12,12 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use std::sync::Arc;
use common_catalog::consts::INFORMATION_SCHEMA_FLOW_TABLE_ID;
use common_error::ext::BoxedError;
use common_meta::key::flow::flow_info::FlowInfoValue;
use common_meta::key::flow::flow_state::FlowStat;
use common_meta::key::flow::FlowMetadataManager;
use common_meta::key::FlowId;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
@@ -29,9 +28,7 @@ use datatypes::prelude::ConcreteDataType as CDT;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{
Int64VectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
};
use datatypes::vectors::{Int64VectorBuilder, StringVectorBuilder, UInt32VectorBuilder, VectorRef};
use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId};
@@ -41,8 +38,6 @@ use crate::error::{
};
use crate::information_schema::{Predicates, FLOWS};
use crate::system_schema::information_schema::InformationTable;
use crate::system_schema::utils;
use crate::CatalogManager;
const INIT_CAPACITY: usize = 42;
@@ -50,7 +45,6 @@ const INIT_CAPACITY: usize = 42;
// pk is (flow_name, flow_id, table_catalog)
pub const FLOW_NAME: &str = "flow_name";
pub const FLOW_ID: &str = "flow_id";
pub const STATE_SIZE: &str = "state_size";
pub const TABLE_CATALOG: &str = "table_catalog";
pub const FLOW_DEFINITION: &str = "flow_definition";
pub const COMMENT: &str = "comment";
@@ -61,24 +55,20 @@ pub const FLOWNODE_IDS: &str = "flownode_ids";
pub const OPTIONS: &str = "options";
/// The `information_schema.flows` to provides information about flows in databases.
///
pub(super) struct InformationSchemaFlows {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
flow_metadata_manager: Arc<FlowMetadataManager>,
}
impl InformationSchemaFlows {
pub(super) fn new(
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
flow_metadata_manager: Arc<FlowMetadataManager>,
) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
flow_metadata_manager,
}
}
@@ -90,7 +80,6 @@ impl InformationSchemaFlows {
vec![
(FLOW_NAME, CDT::string_datatype(), false),
(FLOW_ID, CDT::uint32_datatype(), false),
(STATE_SIZE, CDT::uint64_datatype(), true),
(TABLE_CATALOG, CDT::string_datatype(), false),
(FLOW_DEFINITION, CDT::string_datatype(), false),
(COMMENT, CDT::string_datatype(), true),
@@ -110,7 +99,6 @@ impl InformationSchemaFlows {
InformationSchemaFlowsBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
&self.flow_metadata_manager,
)
}
@@ -156,12 +144,10 @@ impl InformationTable for InformationSchemaFlows {
struct InformationSchemaFlowsBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
flow_metadata_manager: Arc<FlowMetadataManager>,
flow_names: StringVectorBuilder,
flow_ids: UInt32VectorBuilder,
state_sizes: UInt64VectorBuilder,
table_catalogs: StringVectorBuilder,
raw_sqls: StringVectorBuilder,
comments: StringVectorBuilder,
@@ -176,18 +162,15 @@ impl InformationSchemaFlowsBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
flow_metadata_manager: &Arc<FlowMetadataManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
flow_metadata_manager: flow_metadata_manager.clone(),
flow_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
flow_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
state_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
table_catalogs: StringVectorBuilder::with_capacity(INIT_CAPACITY),
raw_sqls: StringVectorBuilder::with_capacity(INIT_CAPACITY),
comments: StringVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -212,11 +195,6 @@ impl InformationSchemaFlowsBuilder {
.flow_names(&catalog_name)
.await;
let flow_stat = {
let information_extension = utils::information_extension(&self.catalog_manager)?;
information_extension.flow_stats().await?
};
while let Some((flow_name, flow_id)) = stream
.try_next()
.await
@@ -235,7 +213,7 @@ impl InformationSchemaFlowsBuilder {
catalog_name: catalog_name.to_string(),
flow_name: flow_name.to_string(),
})?;
self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)?;
self.add_flow(&predicates, flow_id.flow_id(), flow_info)?;
}
self.finish()
@@ -246,7 +224,6 @@ impl InformationSchemaFlowsBuilder {
predicates: &Predicates,
flow_id: FlowId,
flow_info: FlowInfoValue,
flow_stat: &Option<FlowStat>,
) -> Result<()> {
let row = [
(FLOW_NAME, &Value::from(flow_info.flow_name().to_string())),
@@ -261,11 +238,6 @@ impl InformationSchemaFlowsBuilder {
}
self.flow_names.push(Some(flow_info.flow_name()));
self.flow_ids.push(Some(flow_id));
self.state_sizes.push(
flow_stat
.as_ref()
.and_then(|state| state.state_size.get(&flow_id).map(|v| *v as u64)),
);
self.table_catalogs.push(Some(flow_info.catalog_name()));
self.raw_sqls.push(Some(flow_info.raw_sql()));
self.comments.push(Some(flow_info.comment()));
@@ -298,7 +270,6 @@ impl InformationSchemaFlowsBuilder {
let columns: Vec<VectorRef> = vec![
Arc::new(self.flow_names.finish()),
Arc::new(self.flow_ids.finish()),
Arc::new(self.state_sizes.finish()),
Arc::new(self.table_catalogs.finish()),
Arc::new(self.raw_sqls.finish()),
Arc::new(self.comments.finish()),

View File

@@ -54,10 +54,6 @@ const INIT_CAPACITY: usize = 42;
pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
/// Time index constraint name
pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
/// Inverted index constraint name
pub(crate) const INVERTED_INDEX_CONSTRAINT_NAME: &str = "INVERTED INDEX";
/// Fulltext index constraint name
pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX";
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
pub(super) struct InformationSchemaKeyColumnUsage {
@@ -220,13 +216,14 @@ impl InformationSchemaKeyColumnUsageBuilder {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
while let Some(table) = stream.try_next().await? {
let mut primary_constraints = vec![];
let table_info = table.table_info();
let table_name = &table_info.name;
let keys = &table_info.meta.primary_key_indices;
let schema = table.schema();
for (idx, column) in schema.column_schemas().iter().enumerate() {
let mut constraints = vec![];
if column.is_time_index() {
self.add_key_column_usage(
&predicates,
@@ -239,31 +236,30 @@ impl InformationSchemaKeyColumnUsageBuilder {
1, //always 1 for time index
);
}
// TODO(dimbtp): foreign key constraint not supported yet
if keys.contains(&idx) {
constraints.push(PRI_CONSTRAINT_NAME);
}
if column.is_inverted_indexed() {
constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
primary_constraints.push((
catalog_name.clone(),
schema_name.clone(),
table_name.to_string(),
column.name.clone(),
));
}
// TODO(dimbtp): foreign key constraint not supported yet
}
if column.has_fulltext_index_key() {
constraints.push(FULLTEXT_INDEX_CONSTRAINT_NAME);
}
if !constraints.is_empty() {
let aggregated_constraints = constraints.join(", ");
self.add_key_column_usage(
&predicates,
&schema_name,
&aggregated_constraints,
&catalog_name,
&schema_name,
table_name,
&column.name,
idx as u32 + 1,
);
}
for (i, (catalog_name, schema_name, table_name, column_name)) in
primary_constraints.into_iter().enumerate()
{
self.add_key_column_usage(
&predicates,
&schema_name,
PRI_CONSTRAINT_NAME,
&catalog_name,
&schema_name,
&table_name,
&column_name,
i as u32 + 1,
);
}
}
}

View File

@@ -23,6 +23,7 @@ common-error.workspace = true
common-grpc.workspace = true
common-macro.workspace = true
common-meta.workspace = true
common-options.workspace = true
common-procedure.workspace = true
common-query.workspace = true
common-recordbatch.workspace = true
@@ -60,4 +61,5 @@ client = { workspace = true, features = ["testing"] }
common-test-util.workspace = true
common-version.workspace = true
serde.workspace = true
temp-env = "0.3"
tempfile.workspace = true

View File

@@ -34,7 +34,7 @@ use common_query::Output;
use common_recordbatch::RecordBatches;
use common_telemetry::debug;
use either::Either;
use meta_client::client::{ClusterKvBackend, MetaClientBuilder};
use meta_client::client::MetaClientBuilder;
use query::datafusion::DatafusionQueryEngine;
use query::parser::QueryLanguageParser;
use query::query_engine::{DefaultSerializer, QueryEngineState};

View File

@@ -42,6 +42,8 @@ tonic.workspace = true
[dev-dependencies]
common-grpc-expr.workspace = true
datanode.workspace = true
derive-new = "0.5"
tracing = "0.1"
[dev-dependencies.substrait_proto]

View File

@@ -59,6 +59,10 @@ impl Instance {
}
}
pub fn datanode_mut(&mut self) -> &mut Datanode {
&mut self.datanode
}
pub fn datanode(&self) -> &Datanode {
&self.datanode
}

View File

@@ -63,6 +63,10 @@ impl Instance {
}
}
pub fn flownode_mut(&mut self) -> &mut FlownodeInstance {
&mut self.flownode
}
pub fn flownode(&self) -> &FlownodeInstance {
&self.flownode
}

View File

@@ -34,7 +34,6 @@ use common_meta::ddl::flow_meta::{FlowMetadataAllocator, FlowMetadataAllocatorRe
use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
use common_meta::ddl_manager::DdlManager;
use common_meta::key::flow::flow_state::FlowStat;
use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef};
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
@@ -71,7 +70,7 @@ use servers::http::HttpOptions;
use servers::tls::{TlsMode, TlsOption};
use servers::Mode;
use snafu::ResultExt;
use tokio::sync::{broadcast, RwLock};
use tokio::sync::broadcast;
use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{
@@ -508,7 +507,7 @@ impl StartCommand {
procedure_manager.clone(),
));
let catalog_manager = KvBackendCatalogManager::new(
information_extension.clone(),
information_extension,
kv_backend.clone(),
layered_cache_registry.clone(),
Some(procedure_manager.clone()),
@@ -533,14 +532,6 @@ impl StartCommand {
.context(OtherSnafu)?,
);
// set the ref to query for the local flow state
{
let flow_worker_manager = flownode.flow_worker_manager();
information_extension
.set_flow_worker_manager(flow_worker_manager.clone())
.await;
}
let node_manager = Arc::new(StandaloneDatanodeManager {
region_server: datanode.region_server(),
flow_server: flownode.flow_worker_manager(),
@@ -678,7 +669,6 @@ pub struct StandaloneInformationExtension {
region_server: RegionServer,
procedure_manager: ProcedureManagerRef,
start_time_ms: u64,
flow_worker_manager: RwLock<Option<Arc<FlowWorkerManager>>>,
}
impl StandaloneInformationExtension {
@@ -687,15 +677,8 @@ impl StandaloneInformationExtension {
region_server,
procedure_manager,
start_time_ms: common_time::util::current_time_millis() as u64,
flow_worker_manager: RwLock::new(None),
}
}
/// Set the flow worker manager for the standalone instance.
pub async fn set_flow_worker_manager(&self, flow_worker_manager: Arc<FlowWorkerManager>) {
let mut guard = self.flow_worker_manager.write().await;
*guard = Some(flow_worker_manager);
}
}
#[async_trait::async_trait]
@@ -767,18 +750,6 @@ impl InformationExtension for StandaloneInformationExtension {
.collect::<Vec<_>>();
Ok(stats)
}
async fn flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error> {
Ok(Some(
self.flow_worker_manager
.read()
.await
.as_ref()
.unwrap()
.gen_state_report()
.await,
))
}
}
#[cfg(test)]

View File

@@ -17,7 +17,6 @@ common-macro.workspace = true
futures.workspace = true
paste = "1.0"
pin-project.workspace = true
rand.workspace = true
serde = { version = "1.0", features = ["derive"] }
snafu.workspace = true
tokio.workspace = true

View File

@@ -8,5 +8,10 @@ license.workspace = true
workspace = true
[dependencies]
common-error.workspace = true
common-macro.workspace = true
snafu.workspace = true
[dev-dependencies]
chrono.workspace = true
tokio.workspace = true

View File

@@ -48,4 +48,5 @@ url = "2.3"
[dev-dependencies]
common-telemetry.workspace = true
common-test-util.workspace = true
dotenv.workspace = true
uuid.workspace = true

View File

@@ -5,7 +5,12 @@ edition.workspace = true
license.workspace = true
[dependencies]
api.workspace = true
async-trait.workspace = true
common-base.workspace = true
common-error.workspace = true
common-macro.workspace = true
common-query.workspace = true
session.workspace = true
snafu.workspace = true
sql.workspace = true

View File

@@ -51,5 +51,6 @@ wkt = { version = "0.11", optional = true }
[dev-dependencies]
approx = "0.5"
ron = "0.7"
serde = { version = "1.0", features = ["derive"] }
tokio.workspace = true

View File

@@ -16,7 +16,6 @@ mod convert;
mod distance;
pub(crate) mod impl_conv;
mod scalar_add;
mod scalar_mul;
use std::sync::Arc;
@@ -37,6 +36,5 @@ impl VectorFunction {
// scalar calculation
registry.register(Arc::new(scalar_add::ScalarAddFunction));
registry.register(Arc::new(scalar_mul::ScalarMulFunction));
}
}

View File

@@ -1,173 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::borrow::Cow;
use std::fmt::Display;
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::prelude::Signature;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
use nalgebra::DVectorView;
use snafu::ensure;
use crate::function::{Function, FunctionContext};
use crate::helper;
use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
const NAME: &str = "vec_scalar_mul";
/// Multiples a scalar to each element of a vector.
///
/// # Example
///
/// ```sql
/// SELECT vec_to_string(vec_scalar_mul(2, "[1, 2, 3]")) as result;
///
/// +---------+
/// | result |
/// +---------+
/// | [2,4,6] |
/// +---------+
///
/// -- 1/scalar to simulate division
/// SELECT vec_to_string(vec_scalar_mul(0.5, "[2, 4, 6]")) as result;
///
/// +---------+
/// | result |
/// +---------+
/// | [1,2,3] |
/// +---------+
/// ```
#[derive(Debug, Clone, Default)]
pub struct ScalarMulFunction;
impl Function for ScalarMulFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::binary_datatype())
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![ConcreteDataType::float64_datatype()],
vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::binary_datatype(),
],
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let arg0 = &columns[0];
let arg1 = &columns[1];
let len = arg0.len();
let mut result = BinaryVectorBuilder::with_capacity(len);
if len == 0 {
return Ok(result.to_vector());
}
let arg1_const = as_veclit_if_const(arg1)?;
for i in 0..len {
let arg0 = arg0.get(i).as_f64_lossy();
let Some(arg0) = arg0 else {
result.push_null();
continue;
};
let arg1 = match arg1_const.as_ref() {
Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())),
None => as_veclit(arg1.get_ref(i))?,
};
let Some(arg1) = arg1 else {
result.push_null();
continue;
};
let vec = DVectorView::from_slice(&arg1, arg1.len());
let vec_res = vec.scale(arg0 as _);
let veclit = vec_res.as_slice();
let binlit = veclit_to_binlit(veclit);
result.push(Some(&binlit));
}
Ok(result.to_vector())
}
}
impl Display for ScalarMulFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datatypes::vectors::{Float32Vector, StringVector};
use super::*;
#[test]
fn test_scalar_mul() {
let func = ScalarMulFunction;
let input0 = Arc::new(Float32Vector::from(vec![
Some(2.0),
Some(-0.5),
None,
Some(3.0),
]));
let input1 = Arc::new(StringVector::from(vec![
Some("[1.0,2.0,3.0]".to_string()),
Some("[8.0,10.0,12.0]".to_string()),
Some("[7.0,8.0,9.0]".to_string()),
None,
]));
let result = func
.eval(FunctionContext::default(), &[input0, input1])
.unwrap();
let result = result.as_ref();
assert_eq!(result.len(), 4);
assert_eq!(
result.get_ref(0).as_binary().unwrap(),
Some(veclit_to_binlit(&[2.0, 4.0, 6.0]).as_slice())
);
assert_eq!(
result.get_ref(1).as_binary().unwrap(),
Some(veclit_to_binlit(&[-4.0, -5.0, -6.0]).as_slice())
);
assert!(result.get_ref(2).is_null());
assert!(result.get_ref(3).is_null());
}
}

View File

@@ -49,6 +49,14 @@ impl TableRoute {
TableRoute::Logical(_) => None,
}
}
/// Returns [LogicalTableRouteValue] reference if it's [TableRoute::Logical]; Otherwise it returns [None].
pub fn as_logical_table_route_ref(&self) -> Option<&Arc<LogicalTableRouteValue>> {
match self {
TableRoute::Physical(_) => None,
TableRoute::Logical(table_route) => Some(table_route),
}
}
}
/// [TableRouteCache] caches the [TableId] to [TableRoute] mapping.

View File

@@ -137,7 +137,6 @@ use self::schema_name::{SchemaManager, SchemaNameKey, SchemaNameValue};
use self::table_route::{TableRouteManager, TableRouteValue};
use self::tombstone::TombstoneManager;
use crate::error::{self, Result, SerdeJsonSnafu};
use crate::key::flow::flow_state::FlowStateValue;
use crate::key::node_address::NodeAddressValue;
use crate::key::table_route::TableRouteKey;
use crate::key::txn_helper::TxnOpGetResponseSet;
@@ -1263,8 +1262,7 @@ impl_metadata_value! {
FlowRouteValue,
TableFlowValue,
NodeAddressValue,
SchemaNameValue,
FlowStateValue
SchemaNameValue
}
impl_optional_metadata_value! {

View File

@@ -15,7 +15,6 @@
pub mod flow_info;
pub(crate) mod flow_name;
pub(crate) mod flow_route;
pub mod flow_state;
pub(crate) mod flownode_flow;
pub(crate) mod table_flow;
@@ -36,7 +35,6 @@ use crate::ensure_values;
use crate::error::{self, Result};
use crate::key::flow::flow_info::FlowInfoManager;
use crate::key::flow::flow_name::FlowNameManager;
use crate::key::flow::flow_state::FlowStateManager;
use crate::key::flow::flownode_flow::FlownodeFlowManager;
pub use crate::key::flow::table_flow::{TableFlowManager, TableFlowManagerRef};
use crate::key::txn_helper::TxnOpGetResponseSet;
@@ -104,8 +102,6 @@ pub struct FlowMetadataManager {
flownode_flow_manager: FlownodeFlowManager,
table_flow_manager: TableFlowManager,
flow_name_manager: FlowNameManager,
/// only metasrv have access to itself's memory backend, so for other case it should be None
flow_state_manager: Option<FlowStateManager>,
kv_backend: KvBackendRef,
}
@@ -118,7 +114,6 @@ impl FlowMetadataManager {
flow_name_manager: FlowNameManager::new(kv_backend.clone()),
flownode_flow_manager: FlownodeFlowManager::new(kv_backend.clone()),
table_flow_manager: TableFlowManager::new(kv_backend.clone()),
flow_state_manager: None,
kv_backend,
}
}
@@ -128,10 +123,6 @@ impl FlowMetadataManager {
&self.flow_name_manager
}
pub fn flow_state_manager(&self) -> Option<&FlowStateManager> {
self.flow_state_manager.as_ref()
}
/// Returns the [`FlowInfoManager`].
pub fn flow_info_manager(&self) -> &FlowInfoManager {
&self.flow_info_manager

View File

@@ -1,162 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use crate::error::{self, Result};
use crate::key::flow::FlowScoped;
use crate::key::{FlowId, MetadataKey, MetadataValue};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::PutRequest;
/// The entire FlowId to Flow Size's Map is stored directly in the value part of the key.
const FLOW_STATE_KEY: &str = "state";
/// The key of flow state.
#[derive(Debug, Clone, Copy, PartialEq)]
struct FlowStateKeyInner;
impl FlowStateKeyInner {
pub fn new() -> Self {
Self
}
}
impl<'a> MetadataKey<'a, FlowStateKeyInner> for FlowStateKeyInner {
fn to_bytes(&self) -> Vec<u8> {
FLOW_STATE_KEY.as_bytes().to_vec()
}
fn from_bytes(bytes: &'a [u8]) -> Result<FlowStateKeyInner> {
let key = std::str::from_utf8(bytes).map_err(|e| {
error::InvalidMetadataSnafu {
err_msg: format!(
"FlowInfoKeyInner '{}' is not a valid UTF8 string: {e}",
String::from_utf8_lossy(bytes)
),
}
.build()
})?;
if key != FLOW_STATE_KEY {
return Err(error::InvalidMetadataSnafu {
err_msg: format!("Invalid FlowStateKeyInner '{key}'"),
}
.build());
}
Ok(FlowStateKeyInner::new())
}
}
/// The key stores the state size of the flow.
///
/// The layout: `__flow/state`.
pub struct FlowStateKey(FlowScoped<FlowStateKeyInner>);
impl FlowStateKey {
/// Returns the [FlowStateKey].
pub fn new() -> FlowStateKey {
let inner = FlowStateKeyInner::new();
FlowStateKey(FlowScoped::new(inner))
}
}
impl Default for FlowStateKey {
fn default() -> Self {
Self::new()
}
}
impl<'a> MetadataKey<'a, FlowStateKey> for FlowStateKey {
fn to_bytes(&self) -> Vec<u8> {
self.0.to_bytes()
}
fn from_bytes(bytes: &'a [u8]) -> Result<FlowStateKey> {
Ok(FlowStateKey(FlowScoped::<FlowStateKeyInner>::from_bytes(
bytes,
)?))
}
}
/// The value of flow state size
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct FlowStateValue {
/// For each key, the bytes of the state in memory
pub state_size: BTreeMap<FlowId, usize>,
}
impl FlowStateValue {
pub fn new(state_size: BTreeMap<FlowId, usize>) -> Self {
Self { state_size }
}
}
pub type FlowStateManagerRef = Arc<FlowStateManager>;
/// The manager of [FlowStateKey]. Since state size changes frequently, we store it in memory.
///
/// This is only used in distributed mode. When meta-srv use heartbeat to update the flow stat report
/// and frontned use get to get the latest flow stat report.
pub struct FlowStateManager {
in_memory: KvBackendRef,
}
impl FlowStateManager {
pub fn new(in_memory: KvBackendRef) -> Self {
Self { in_memory }
}
pub async fn get(&self) -> Result<Option<FlowStateValue>> {
let key = FlowStateKey::new().to_bytes();
self.in_memory
.get(&key)
.await?
.map(|x| FlowStateValue::try_from_raw_value(&x.value))
.transpose()
}
pub async fn put(&self, value: FlowStateValue) -> Result<()> {
let key = FlowStateKey::new().to_bytes();
let value = value.try_as_raw_value()?;
let req = PutRequest::new().with_key(key).with_value(value);
self.in_memory.put(req).await?;
Ok(())
}
}
/// Flow's state report, send regularly through heartbeat message
#[derive(Debug, Clone)]
pub struct FlowStat {
/// For each key, the bytes of the state in memory
pub state_size: BTreeMap<u32, usize>,
}
impl From<FlowStateValue> for FlowStat {
fn from(value: FlowStateValue) -> Self {
Self {
state_size: value.state_size,
}
}
}
impl From<FlowStat> for FlowStateValue {
fn from(value: FlowStat) -> Self {
Self {
state_size: value.state_size,
}
}
}

View File

@@ -290,6 +290,28 @@ impl TableRouteManager {
}
}
/// Returns the [`PhysicalTableRouteValue`] in the first level,
/// It won't follow the [`LogicalTableRouteValue`] to find the next level [`PhysicalTableRouteValue`].
///
/// Returns an error if the first level value is not a [`PhysicalTableRouteValue`].
pub async fn try_get_physical_table_route(
&self,
table_id: TableId,
) -> Result<Option<PhysicalTableRouteValue>> {
match self.storage.get(table_id).await? {
Some(route) => {
ensure!(
route.is_physical(),
UnexpectedLogicalRouteTableSnafu {
err_msg: format!("{route:?} is a non-physical TableRouteValue.")
}
);
Ok(Some(route.into_physical_table_route()))
}
None => Ok(None),
}
}
/// Returns the [TableId] recursively.
///
/// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if:
@@ -547,6 +569,37 @@ impl TableRouteStorage {
.transpose()
}
/// Returns the physical `DeserializedValueWithBytes<TableRouteValue>` recursively.
///
/// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if:
/// - the physical table(`logical_or_physical_table_id`) does not exist
/// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist.
pub async fn get_physical_table_route_with_raw_bytes(
&self,
logical_or_physical_table_id: TableId,
) -> Result<(TableId, DeserializedValueWithBytes<TableRouteValue>)> {
let table_route = self
.get_with_raw_bytes(logical_or_physical_table_id)
.await?
.context(TableRouteNotFoundSnafu {
table_id: logical_or_physical_table_id,
})?;
match table_route.get_inner_ref() {
TableRouteValue::Physical(_) => Ok((logical_or_physical_table_id, table_route)),
TableRouteValue::Logical(x) => {
let physical_table_id = x.physical_table_id();
let physical_table_route = self
.get_with_raw_bytes(physical_table_id)
.await?
.context(TableRouteNotFoundSnafu {
table_id: physical_table_id,
})?;
Ok((physical_table_id, physical_table_route))
}
}
}
/// Returns batch of [`TableRouteValue`] that respects the order of `table_ids`.
pub async fn batch_get(&self, table_ids: &[TableId]) -> Result<Vec<Option<TableRouteValue>>> {
let mut table_routes = self.batch_get_inner(table_ids).await?;

View File

@@ -36,7 +36,7 @@ pub mod postgres;
pub mod test;
pub mod txn;
pub type KvBackendRef<E = Error> = Arc<dyn KvBackend<Error = E> + Send + Sync>;
pub type KvBackendRef = Arc<dyn KvBackend<Error = Error> + Send + Sync>;
#[async_trait]
pub trait KvBackend: TxnService
@@ -161,9 +161,6 @@ where
Self::Error: ErrorExt,
{
fn reset(&self);
/// Upcast as `KvBackendRef`. Since https://github.com/rust-lang/rust/issues/65991 is not yet stable.
fn as_kv_backend_ref(self: Arc<Self>) -> KvBackendRef<Self::Error>;
}
pub type ResettableKvBackendRef<E = Error> = Arc<dyn ResettableKvBackend<Error = E> + Send + Sync>;
pub type ResettableKvBackendRef = Arc<dyn ResettableKvBackend<Error = Error> + Send + Sync>;

View File

@@ -15,7 +15,6 @@
use std::any::Any;
use std::sync::Arc;
use common_telemetry::info;
use etcd_client::{
Client, DeleteOptions, GetOptions, PutOptions, Txn, TxnOp, TxnOpResponse, TxnResponse,
};
@@ -56,7 +55,6 @@ impl EtcdStore {
}
pub fn with_etcd_client(client: Client, max_txn_ops: usize) -> KvBackendRef {
info!("Connected to etcd");
Arc::new(Self {
client,
max_txn_ops,

View File

@@ -16,13 +16,13 @@ use std::any::Any;
use std::collections::BTreeMap;
use std::fmt::{Display, Formatter};
use std::marker::PhantomData;
use std::sync::{Arc, RwLock};
use std::sync::RwLock;
use async_trait::async_trait;
use common_error::ext::ErrorExt;
use serde::Serializer;
use super::{KvBackendRef, ResettableKvBackend};
use super::ResettableKvBackend;
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse, TxnRequest, TxnResponse};
use crate::kv_backend::{KvBackend, TxnService};
use crate::metrics::METRIC_META_TXN_REQUEST;
@@ -311,10 +311,6 @@ impl<T: ErrorExt + Send + Sync + 'static> ResettableKvBackend for MemoryKvBacken
fn reset(&self) {
self.clear();
}
fn as_kv_backend_ref(self: Arc<Self>) -> KvBackendRef<T> {
self
}
}
#[cfg(test)]

View File

@@ -89,6 +89,39 @@ pub fn convert_to_region_leader_map(region_routes: &[RegionRoute]) -> HashMap<Re
.collect::<HashMap<_, _>>()
}
/// Returns the HashMap<[RegionNumber], HashSet<DatanodeId>>
pub fn convert_to_region_peer_map(
region_routes: &[RegionRoute],
) -> HashMap<RegionNumber, HashSet<u64>> {
region_routes
.iter()
.map(|x| {
let set = x
.follower_peers
.iter()
.map(|p| p.id)
.chain(x.leader_peer.as_ref().map(|p| p.id))
.collect::<HashSet<_>>();
(x.region.id.region_number(), set)
})
.collect::<HashMap<_, _>>()
}
/// Returns the HashMap<[RegionNumber], [LeaderState]>;
pub fn convert_to_region_leader_state_map(
region_routes: &[RegionRoute],
) -> HashMap<RegionNumber, LeaderState> {
region_routes
.iter()
.filter_map(|x| {
x.leader_state
.as_ref()
.map(|state| (x.region.id.region_number(), *state))
})
.collect::<HashMap<_, _>>()
}
pub fn find_region_leader(
region_routes: &[RegionRoute],
region_number: RegionNumber,
@@ -114,6 +147,19 @@ pub fn find_leader_regions(region_routes: &[RegionRoute], datanode: &Peer) -> Ve
.collect()
}
pub fn extract_all_peers(region_routes: &[RegionRoute]) -> Vec<Peer> {
let mut peers = region_routes
.iter()
.flat_map(|x| x.leader_peer.iter().chain(x.follower_peers.iter()))
.collect::<HashSet<_>>()
.into_iter()
.cloned()
.collect::<Vec<_>>();
peers.sort_by_key(|x| x.id);
peers
}
impl TableRoute {
pub fn new(table: Table, region_routes: Vec<RegionRoute>) -> Self {
let region_leaders = region_routes

View File

@@ -26,6 +26,7 @@ use std::sync::Arc;
use adapter::RecordBatchMetrics;
use arc_swap::ArcSwapOption;
use datafusion::physical_plan::memory::MemoryStream;
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::arrow::compute::SortOptions;
pub use datatypes::arrow::record_batch::RecordBatch as DfRecordBatch;
@@ -169,6 +170,19 @@ impl RecordBatches {
index: 0,
})
}
pub fn into_df_stream(self) -> DfSendableRecordBatchStream {
let df_record_batches = self
.batches
.into_iter()
.map(|batch| batch.into_df_record_batch())
.collect();
// unwrap safety: `MemoryStream::try_new` won't fail
Box::pin(
MemoryStream::try_new(df_record_batches, self.schema.arrow_schema().clone(), None)
.unwrap(),
)
}
}
impl IntoIterator for RecordBatches {

View File

@@ -35,6 +35,8 @@ serde_json.workspace = true
snafu.workspace = true
tempfile.workspace = true
tokio.workspace = true
tokio-metrics = "0.3"
tokio-metrics-collector = { git = "https://github.com/MichaelScofield/tokio-metrics-collector.git", rev = "89d692d5753d28564a7aac73c6ac5aba22243ba0" }
tokio-util.workspace = true
[dev-dependencies]

View File

@@ -29,6 +29,10 @@ pub fn format_utc_datetime(utc: &NaiveDateTime, pattern: &str) -> String {
}
}
pub fn system_datetime_to_utc(local: &NaiveDateTime) -> LocalResult<NaiveDateTime> {
datetime_to_utc(local, get_timezone(None))
}
/// Cast a [`NaiveDateTime`] with the given timezone.
pub fn datetime_to_utc(
datetime: &NaiveDateTime,

View File

@@ -370,51 +370,6 @@ impl ConcreteDataType {
_ => None,
}
}
/// Return the datatype name in postgres type system
pub fn postgres_datatype_name(&self) -> &'static str {
match self {
&ConcreteDataType::Null(_) => "UNKNOWN",
&ConcreteDataType::Boolean(_) => "BOOL",
&ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
&ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
&ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
&ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
&ConcreteDataType::Float32(_) => "FLOAT4",
&ConcreteDataType::Float64(_) => "FLOAT8",
&ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
&ConcreteDataType::String(_) => "VARCHAR",
&ConcreteDataType::Date(_) => "DATE",
&ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
&ConcreteDataType::Time(_) => "TIME",
&ConcreteDataType::Interval(_) => "INTERVAL",
&ConcreteDataType::Decimal128(_) => "NUMERIC",
&ConcreteDataType::Json(_) => "JSON",
ConcreteDataType::List(list) => match list.item_type() {
&ConcreteDataType::Null(_) => "UNKNOWN",
&ConcreteDataType::Boolean(_) => "_BOOL",
&ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
&ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
&ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
&ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
&ConcreteDataType::Float32(_) => "_FLOAT4",
&ConcreteDataType::Float64(_) => "_FLOAT8",
&ConcreteDataType::Binary(_) => "_BYTEA",
&ConcreteDataType::String(_) => "_VARCHAR",
&ConcreteDataType::Date(_) => "_DATE",
&ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
&ConcreteDataType::Time(_) => "_TIME",
&ConcreteDataType::Interval(_) => "_INTERVAL",
&ConcreteDataType::Decimal128(_) => "_NUMERIC",
&ConcreteDataType::Json(_) => "_JSON",
&ConcreteDataType::Duration(_)
| &ConcreteDataType::Dictionary(_)
| &ConcreteDataType::Vector(_)
| &ConcreteDataType::List(_) => "UNKNOWN",
},
&ConcreteDataType::Duration(_) | &ConcreteDataType::Dictionary(_) => "UNKNOWN",
}
}
}
impl From<&ConcreteDataType> for ConcreteDataType {

View File

@@ -232,12 +232,6 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid skipping index option: {}", msg))]
InvalidSkippingIndexOption {
msg: String,
#[snafu(implicit)]
location: Location,
},
}
impl ErrorExt for Error {
@@ -258,8 +252,7 @@ impl ErrorExt for Error {
| InvalidPrecisionOrScale { .. }
| InvalidJson { .. }
| InvalidVector { .. }
| InvalidFulltextOption { .. }
| InvalidSkippingIndexOption { .. } => StatusCode::InvalidArguments,
| InvalidFulltextOption { .. } => StatusCode::InvalidArguments,
ValueExceedsPrecision { .. }
| CastType { .. }

View File

@@ -28,11 +28,10 @@ use snafu::{ensure, ResultExt};
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
use crate::prelude::ConcreteDataType;
pub use crate::schema::column_schema::{
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions,
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata,
COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
SKIPPING_INDEX_KEY, TIME_INDEX_KEY,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
TIME_INDEX_KEY,
};
pub use crate::schema::constraint::ColumnDefaultConstraint;
pub use crate::schema::raw::RawSchema;

View File

@@ -39,20 +39,12 @@ const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
pub const FULLTEXT_KEY: &str = "greptime:fulltext";
/// Key used to store whether the column has inverted index in arrow field's metadata.
pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
/// Key used to store skip options in arrow field's metadata.
pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
/// Keys used in fulltext options
pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";
/// Keys used in SKIPPING index options
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";
pub const DEFAULT_GRANULARITY: u32 = 10240;
/// Schema of a column, used as an immutable struct.
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ColumnSchema {
@@ -164,10 +156,6 @@ impl ColumnSchema {
.unwrap_or(false)
}
pub fn has_fulltext_index_key(&self) -> bool {
self.metadata.contains_key(FULLTEXT_KEY)
}
pub fn has_inverted_index_key(&self) -> bool {
self.metadata.contains_key(INVERTED_INDEX_KEY)
}
@@ -310,34 +298,6 @@ impl ColumnSchema {
);
Ok(())
}
/// Retrieves the skipping index options for the column.
pub fn skipping_index_options(&self) -> Result<Option<SkippingIndexOptions>> {
match self.metadata.get(SKIPPING_INDEX_KEY) {
None => Ok(None),
Some(json) => {
let options =
serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
Ok(Some(options))
}
}
}
pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result<Self> {
self.metadata.insert(
SKIPPING_INDEX_KEY.to_string(),
serde_json::to_string(&options).context(error::SerializeSnafu)?,
);
Ok(self)
}
pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> {
self.metadata.insert(
SKIPPING_INDEX_KEY.to_string(),
serde_json::to_string(options).context(error::SerializeSnafu)?,
);
Ok(())
}
}
/// Column extended type set in column schema's metadata.
@@ -535,76 +495,6 @@ impl fmt::Display for FulltextAnalyzer {
}
}
/// Skipping options for a column.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
#[serde(rename_all = "kebab-case")]
pub struct SkippingIndexOptions {
/// The granularity of the skip index.
pub granularity: u32,
/// The type of the skip index.
#[serde(default)]
pub index_type: SkipIndexType,
}
impl fmt::Display for SkippingIndexOptions {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "granularity={}", self.granularity)?;
write!(f, ", index_type={}", self.index_type)?;
Ok(())
}
}
/// Skip index types.
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
pub enum SkipIndexType {
#[default]
BloomFilter,
}
impl fmt::Display for SkipIndexType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SkipIndexType::BloomFilter => write!(f, "BLOOM"),
}
}
}
impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
type Error = Error;
fn try_from(options: HashMap<String, String>) -> Result<Self> {
// Parse granularity with default value 1
let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) {
Some(value) => value.parse::<u32>().map_err(|_| {
error::InvalidSkippingIndexOptionSnafu {
msg: format!("Invalid granularity: {value}, expected: positive integer"),
}
.build()
})?,
None => DEFAULT_GRANULARITY,
};
// Parse index type with default value BloomFilter
let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) {
Some(typ) => match typ.to_ascii_uppercase().as_str() {
"BLOOM" => SkipIndexType::BloomFilter,
_ => {
return error::InvalidSkippingIndexOptionSnafu {
msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"),
}
.fail();
}
},
None => SkipIndexType::default(),
};
Ok(SkippingIndexOptions {
granularity,
index_type,
})
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;

View File

@@ -38,4 +38,5 @@ tokio.workspace = true
[dev-dependencies]
api.workspace = true
common-procedure-test.workspace = true
common-test-util.workspace = true

View File

@@ -40,8 +40,6 @@ datatypes.workspace = true
enum-as-inner = "0.6.0"
enum_dispatch = "0.3"
futures = "0.3"
get-size-derive2 = "0.1.2"
get-size2 = "0.1.2"
greptime-proto.workspace = true
# This fork of hydroflow is simply for keeping our dependency in our org, and pin the version
# otherwise it is the same with upstream repo
@@ -49,6 +47,7 @@ hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "m
itertools.workspace = true
lazy_static.workspace = true
meta-client.workspace = true
minstant = "0.1.7"
nom = "7.1.3"
num-traits = "0.2"
operator.workspace = true

View File

@@ -60,7 +60,6 @@ use crate::repr::{self, DiffRow, Row, BATCH_SIZE};
mod flownode_impl;
mod parse_expr;
mod stat;
#[cfg(test)]
mod tests;
mod util;
@@ -70,7 +69,6 @@ pub(crate) mod node_context;
mod table_source;
use crate::error::Error;
use crate::utils::StateReportHandler;
use crate::FrontendInvoker;
// `GREPTIME_TIMESTAMP` is not used to distinguish when table is created automatically by flow
@@ -139,8 +137,6 @@ pub struct FlowWorkerManager {
///
/// So that a series of event like `inserts -> flush` can be handled correctly
flush_lock: RwLock<()>,
/// receive a oneshot sender to send state size report
state_report_handler: RwLock<Option<StateReportHandler>>,
}
/// Building FlownodeManager
@@ -174,15 +170,9 @@ impl FlowWorkerManager {
tick_manager,
node_id,
flush_lock: RwLock::new(()),
state_report_handler: RwLock::new(None),
}
}
pub async fn with_state_report_handler(self, handler: StateReportHandler) -> Self {
*self.state_report_handler.write().await = Some(handler);
self
}
/// Create a flownode manager with one worker
pub fn new_with_worker<'s>(
node_id: Option<u32>,
@@ -216,6 +206,28 @@ impl DiffRequest {
}
}
/// iterate through the diff row and form continuous diff row with same diff type
pub fn diff_row_to_request(rows: Vec<DiffRow>) -> Vec<DiffRequest> {
let mut reqs = Vec::new();
for (row, ts, diff) in rows {
let last = reqs.last_mut();
match (last, diff) {
(Some(DiffRequest::Insert(rows)), 1) => {
rows.push((row, ts));
}
(Some(DiffRequest::Insert(_)), -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])),
(Some(DiffRequest::Delete(rows)), -1) => {
rows.push((row, ts));
}
(Some(DiffRequest::Delete(_)), 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])),
(None, 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])),
(None, -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])),
_ => {}
}
}
reqs
}
pub fn batches_to_rows_req(batches: Vec<Batch>) -> Result<Vec<DiffRequest>, Error> {
let mut reqs = Vec::new();
for batch in batches {
@@ -510,27 +522,6 @@ impl FlowWorkerManager {
/// Flow Runtime related methods
impl FlowWorkerManager {
/// Start state report handler, which will receive a sender from HeartbeatTask to send state size report back
///
/// if heartbeat task is shutdown, this future will exit too
async fn start_state_report_handler(self: Arc<Self>) -> Option<JoinHandle<()>> {
let state_report_handler = self.state_report_handler.write().await.take();
if let Some(mut handler) = state_report_handler {
let zelf = self.clone();
let handler = common_runtime::spawn_global(async move {
while let Some(ret_handler) = handler.recv().await {
let state_report = zelf.gen_state_report().await;
ret_handler.send(state_report).unwrap_or_else(|err| {
common_telemetry::error!(err; "Send state size report error");
});
}
});
Some(handler)
} else {
None
}
}
/// run in common_runtime background runtime
pub fn run_background(
self: Arc<Self>,
@@ -538,7 +529,6 @@ impl FlowWorkerManager {
) -> JoinHandle<()> {
info!("Starting flownode manager's background task");
common_runtime::spawn_global(async move {
let _state_report_handler = self.clone().start_state_report_handler().await;
self.run(shutdown).await;
})
}
@@ -565,8 +555,6 @@ impl FlowWorkerManager {
let default_interval = Duration::from_secs(1);
let mut avg_spd = 0; // rows/sec
let mut since_last_run = tokio::time::Instant::now();
let run_per_trace = 10;
let mut run_cnt = 0;
loop {
// TODO(discord9): only run when new inputs arrive or scheduled to
let row_cnt = self.run_available(true).await.unwrap_or_else(|err| {
@@ -609,19 +597,10 @@ impl FlowWorkerManager {
} else {
(9 * avg_spd + cur_spd) / 10
};
trace!("avg_spd={} r/s, cur_spd={} r/s", avg_spd, cur_spd);
let new_wait = BATCH_SIZE * 1000 / avg_spd.max(1); //in ms
let new_wait = Duration::from_millis(new_wait as u64).min(default_interval);
// print trace every `run_per_trace` times so that we can see if there is something wrong
// but also not get flooded with trace
if run_cnt >= run_per_trace {
trace!("avg_spd={} r/s, cur_spd={} r/s", avg_spd, cur_spd);
trace!("Wait for {} ms, row_cnt={}", new_wait.as_millis(), row_cnt);
run_cnt = 0;
} else {
run_cnt += 1;
}
trace!("Wait for {} ms, row_cnt={}", new_wait.as_millis(), row_cnt);
METRIC_FLOW_RUN_INTERVAL_MS.set(new_wait.as_millis() as i64);
since_last_run = tokio::time::Instant::now();
tokio::time::sleep(new_wait).await;
@@ -681,18 +660,13 @@ impl FlowWorkerManager {
&self,
region_id: RegionId,
rows: Vec<DiffRow>,
batch_datatypes: &[ConcreteDataType],
) -> Result<(), Error> {
let rows_len = rows.len();
let table_id = region_id.table_id();
let _timer = METRIC_FLOW_INSERT_ELAPSED
.with_label_values(&[table_id.to_string().as_str()])
.start_timer();
self.node_context
.read()
.await
.send(table_id, rows, batch_datatypes)
.await?;
self.node_context.read().await.send(table_id, rows).await?;
trace!(
"Handling write request for table_id={} with {} rows",
table_id,

View File

@@ -28,7 +28,6 @@ use itertools::Itertools;
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionId;
use super::util::from_proto_to_data_type;
use crate::adapter::{CreateFlowArgs, FlowWorkerManager};
use crate::error::InternalSnafu;
use crate::metrics::METRIC_FLOW_TASK_COUNT;
@@ -207,17 +206,9 @@ impl Flownode for FlowWorkerManager {
})
.map(|r| (r, now, 1))
.collect_vec();
let batch_datatypes = insert_schema
.iter()
.map(from_proto_to_data_type)
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(to_meta_err)?;
self.handle_write_request(region_id.into(), rows, &batch_datatypes)
self.handle_write_request(region_id.into(), rows)
.await
.map_err(|err| {
common_telemetry::error!(err;"Failed to handle write request");
to_meta_err(err)
})?;
.map_err(to_meta_err)?;
}
Ok(Default::default())
}

View File

@@ -19,7 +19,6 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use common_telemetry::trace;
use datatypes::prelude::ConcreteDataType;
use session::context::QueryContext;
use snafu::{OptionExt, ResultExt};
use table::metadata::TableId;
@@ -132,11 +131,7 @@ impl SourceSender {
}
/// return number of rows it actual send(including what's in the buffer)
pub async fn send_rows(
&self,
rows: Vec<DiffRow>,
batch_datatypes: &[ConcreteDataType],
) -> Result<usize, Error> {
pub async fn send_rows(&self, rows: Vec<DiffRow>) -> Result<usize, Error> {
METRIC_FLOW_INPUT_BUF_SIZE.add(rows.len() as _);
while self.send_buf_row_cnt.load(Ordering::SeqCst) >= BATCH_SIZE * 4 {
tokio::task::yield_now().await;
@@ -144,11 +139,8 @@ impl SourceSender {
// row count metrics is approx so relaxed order is ok
self.send_buf_row_cnt
.fetch_add(rows.len(), Ordering::SeqCst);
let batch = Batch::try_from_rows_with_types(
rows.into_iter().map(|(row, _, _)| row).collect(),
batch_datatypes,
)
.context(EvalSnafu)?;
let batch = Batch::try_from_rows(rows.into_iter().map(|(row, _, _)| row).collect())
.context(EvalSnafu)?;
common_telemetry::trace!("Send one batch to worker with {} rows", batch.row_count());
self.send_buf_tx.send(batch).await.map_err(|e| {
crate::error::InternalSnafu {
@@ -165,19 +157,14 @@ impl FlownodeContext {
/// return number of rows it actual send(including what's in the buffer)
///
/// TODO(discord9): make this concurrent
pub async fn send(
&self,
table_id: TableId,
rows: Vec<DiffRow>,
batch_datatypes: &[ConcreteDataType],
) -> Result<usize, Error> {
pub async fn send(&self, table_id: TableId, rows: Vec<DiffRow>) -> Result<usize, Error> {
let sender = self
.source_sender
.get(&table_id)
.with_context(|| TableNotFoundSnafu {
name: table_id.to_string(),
})?;
sender.send_rows(rows, batch_datatypes).await
sender.send_rows(rows).await
}
/// flush all sender's buf

View File

@@ -1,40 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use common_meta::key::flow::flow_state::FlowStat;
use crate::FlowWorkerManager;
impl FlowWorkerManager {
pub async fn gen_state_report(&self) -> FlowStat {
let mut full_report = BTreeMap::new();
for worker in self.worker_handles.iter() {
let worker = worker.lock().await;
match worker.get_state_size().await {
Ok(state_size) => {
full_report.extend(state_size.into_iter().map(|(k, v)| (k as u32, v)))
}
Err(err) => {
common_telemetry::error!(err; "Get flow stat size error");
}
}
}
FlowStat {
state_size: full_report,
}
}
}

View File

@@ -16,27 +16,12 @@ use api::helper::ColumnDataTypeWrapper;
use api::v1::column_def::options_from_column_schema;
use api::v1::{ColumnDataType, ColumnDataTypeExtension, SemanticType};
use common_error::ext::BoxedError;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnSchema;
use itertools::Itertools;
use snafu::ResultExt;
use crate::error::{Error, ExternalSnafu};
pub fn from_proto_to_data_type(
column_schema: &api::v1::ColumnSchema,
) -> Result<ConcreteDataType, Error> {
let wrapper = ColumnDataTypeWrapper::try_new(
column_schema.datatype,
column_schema.datatype_extension.clone(),
)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let cdt = ConcreteDataType::from(wrapper);
Ok(cdt)
}
/// convert `ColumnSchema` lists to it's corresponding proto type
pub fn column_schemas_to_proto(
column_schemas: Vec<ColumnSchema>,

View File

@@ -197,21 +197,6 @@ impl WorkerHandle {
.fail()
}
}
pub async fn get_state_size(&self) -> Result<BTreeMap<FlowId, usize>, Error> {
let ret = self
.itc_client
.call_with_resp(Request::QueryStateSize)
.await?;
ret.into_query_state_size().map_err(|ret| {
InternalSnafu {
reason: format!(
"Flow Node/Worker itc failed, expect Response::QueryStateSize, found {ret:?}"
),
}
.build()
})
}
}
impl Drop for WorkerHandle {
@@ -376,13 +361,6 @@ impl<'s> Worker<'s> {
Some(Response::ContainTask { result: ret })
}
Request::Shutdown => return Err(()),
Request::QueryStateSize => {
let mut ret = BTreeMap::new();
for (flow_id, task_state) in self.task_states.iter() {
ret.insert(*flow_id, task_state.state.get_state_size());
}
Some(Response::QueryStateSize { result: ret })
}
};
Ok(ret)
}
@@ -413,7 +391,6 @@ pub enum Request {
flow_id: FlowId,
},
Shutdown,
QueryStateSize,
}
#[derive(Debug, EnumAsInner)]
@@ -429,10 +406,6 @@ enum Response {
result: bool,
},
RunAvail,
QueryStateSize {
/// each flow tasks' state size
result: BTreeMap<FlowId, usize>,
},
}
fn create_inter_thread_call() -> (InterThreadCallClient, InterThreadCallServer) {
@@ -450,12 +423,10 @@ struct InterThreadCallClient {
}
impl InterThreadCallClient {
/// call without response
fn call_no_resp(&self, req: Request) -> Result<(), Error> {
self.arg_sender.send((req, None)).map_err(from_send_error)
}
/// call with response
async fn call_with_resp(&self, req: Request) -> Result<Response, Error> {
let (tx, rx) = oneshot::channel();
self.arg_sender
@@ -556,7 +527,6 @@ mod test {
);
tx.send(Batch::empty()).unwrap();
handle.run_available(0, true).await.unwrap();
assert_eq!(handle.get_state_size().await.unwrap().len(), 1);
assert_eq!(sink_rx.recv().await.unwrap(), Batch::empty());
drop(handle);
worker_thread_handle.join().unwrap();

View File

@@ -30,7 +30,7 @@ use crate::compute::types::{Collection, CollectionBundle, ErrCollector, Toff};
use crate::error::{Error, InvalidQuerySnafu, NotImplementedSnafu};
use crate::expr::{self, Batch, GlobalId, LocalId};
use crate::plan::{Plan, TypedPlan};
use crate::repr::{self, DiffRow, RelationType};
use crate::repr::{self, DiffRow};
mod map;
mod reduce;
@@ -124,10 +124,10 @@ impl Context<'_, '_> {
/// Like `render_plan` but in Batch Mode
pub fn render_plan_batch(&mut self, plan: TypedPlan) -> Result<CollectionBundle<Batch>, Error> {
match plan.plan {
Plan::Constant { rows } => Ok(self.render_constant_batch(rows, &plan.schema.typ)),
Plan::Constant { rows } => Ok(self.render_constant_batch(rows)),
Plan::Get { id } => self.get_batch_by_id(id),
Plan::Let { id, value, body } => self.eval_batch_let(id, value, body),
Plan::Mfp { input, mfp } => self.render_mfp_batch(input, mfp, &plan.schema.typ),
Plan::Mfp { input, mfp } => self.render_mfp_batch(input, mfp),
Plan::Reduce {
input,
key_val_plan,
@@ -172,11 +172,7 @@ impl Context<'_, '_> {
/// render Constant, take all rows that have a timestamp not greater than the current time
/// This function is primarily used for testing
/// Always assume input is sorted by timestamp
pub fn render_constant_batch(
&mut self,
rows: Vec<DiffRow>,
output_type: &RelationType,
) -> CollectionBundle<Batch> {
pub fn render_constant_batch(&mut self, rows: Vec<DiffRow>) -> CollectionBundle<Batch> {
let (send_port, recv_port) = self.df.make_edge::<_, Toff<Batch>>("constant_batch");
let mut per_time: BTreeMap<repr::Timestamp, Vec<DiffRow>> = Default::default();
for (key, group) in &rows.into_iter().group_by(|(_row, ts, _diff)| *ts) {
@@ -189,8 +185,6 @@ impl Context<'_, '_> {
let scheduler_inner = scheduler.clone();
let err_collector = self.err_collector.clone();
let output_type = output_type.clone();
let subgraph_id =
self.df
.add_subgraph_source("ConstantBatch", send_port, move |_ctx, send_port| {
@@ -205,14 +199,7 @@ impl Context<'_, '_> {
not_great_than_now.into_iter().for_each(|(_ts, rows)| {
err_collector.run(|| {
let rows = rows.into_iter().map(|(row, _ts, _diff)| row).collect();
let batch = Batch::try_from_rows_with_types(
rows,
&output_type
.column_types
.iter()
.map(|ty| ty.scalar_type().clone())
.collect_vec(),
)?;
let batch = Batch::try_from_rows(rows)?;
send_port.give(vec![batch]);
Ok(())
});

View File

@@ -25,7 +25,7 @@ use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector
use crate::error::{Error, PlanSnafu};
use crate::expr::{Batch, EvalError, MapFilterProject, MfpPlan, ScalarExpr};
use crate::plan::TypedPlan;
use crate::repr::{self, DiffRow, KeyValDiffRow, RelationType, Row};
use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
use crate::utils::ArrangeHandler;
impl Context<'_, '_> {
@@ -34,7 +34,6 @@ impl Context<'_, '_> {
&mut self,
input: Box<TypedPlan>,
mfp: MapFilterProject,
_output_type: &RelationType,
) -> Result<CollectionBundle<Batch>, Error> {
let input = self.render_plan_batch(*input)?;

View File

@@ -87,8 +87,6 @@ impl Context<'_, '_> {
})?;
let key_val_plan = key_val_plan.clone();
let output_type = output_type.clone();
let now = self.compute_state.current_time_ref();
let err_collector = self.err_collector.clone();
@@ -120,7 +118,6 @@ impl Context<'_, '_> {
src_data,
&key_val_plan,
&accum_plan,
&output_type,
SubgraphArg {
now,
err_collector: &err_collector,
@@ -357,7 +354,6 @@ fn reduce_batch_subgraph(
src_data: impl IntoIterator<Item = Batch>,
key_val_plan: &KeyValPlan,
accum_plan: &AccumulablePlan,
output_type: &RelationType,
SubgraphArg {
now,
err_collector,
@@ -539,13 +535,17 @@ fn reduce_batch_subgraph(
// this output part is not supposed to be resource intensive
// (because for every batch there wouldn't usually be as many output row?),
// so we can do some costly operation here
let output_types = output_type
.column_types
.iter()
.map(|t| t.scalar_type.clone())
.collect_vec();
let output_types = all_output_dict.first_entry().map(|entry| {
entry
.key()
.iter()
.chain(entry.get().iter())
.map(|v| v.data_type())
.collect::<Vec<ConcreteDataType>>()
});
err_collector.run(|| {
if let Some(output_types) = output_types {
err_collector.run(|| {
let column_cnt = output_types.len();
let row_cnt = all_output_dict.len();
@@ -585,6 +585,7 @@ fn reduce_batch_subgraph(
Ok(())
});
}
}
/// reduce subgraph, reduce the input data into a single row
@@ -1515,9 +1516,7 @@ mod test {
let mut ctx = harness_test_ctx(&mut df, &mut state);
let rows = vec![
(Row::new(vec![Value::Null]), -1, 1),
(Row::new(vec![1i64.into()]), 0, 1),
(Row::new(vec![Value::Null]), 1, 1),
(Row::new(vec![1i64.into()]), 1, 1),
(Row::new(vec![2i64.into()]), 2, 1),
(Row::new(vec![3i64.into()]), 3, 1),
(Row::new(vec![1i64.into()]), 4, 1),
@@ -1559,15 +1558,13 @@ mod test {
Box::new(input_plan.with_types(typ.into_unnamed())),
&key_val_plan,
&reduce_plan,
&RelationType::new(vec![ColumnType::new(CDT::int64_datatype(), true)]),
&RelationType::empty(),
)
.unwrap();
{
let now_inner = now.clone();
let expected = BTreeMap::<i64, Vec<i64>>::from([
(-1, vec![]),
(0, vec![1i64]),
(1, vec![1i64]),
(2, vec![3i64]),
(3, vec![6i64]),
@@ -1584,11 +1581,7 @@ mod test {
if let Some(expected) = expected.get(&now) {
let batch = expected.iter().map(|v| Value::from(*v)).collect_vec();
let batch = Batch::try_from_rows_with_types(
vec![batch.into()],
&[CDT::int64_datatype()],
)
.unwrap();
let batch = Batch::try_from_rows(vec![batch.into()]).unwrap();
assert_eq!(res.first(), Some(&batch));
}
});

View File

@@ -14,7 +14,7 @@
//! Source and Sink for the dataflow
use std::collections::BTreeMap;
use std::collections::{BTreeMap, VecDeque};
use common_telemetry::{debug, trace};
use hydroflow::scheduled::graph_ext::GraphExt;
@@ -28,7 +28,7 @@ use crate::compute::types::{Arranged, Collection, CollectionBundle, Toff};
use crate::error::{Error, PlanSnafu};
use crate::expr::error::InternalSnafu;
use crate::expr::{Batch, EvalError};
use crate::repr::{DiffRow, Row};
use crate::repr::{DiffRow, Row, BROADCAST_CAP};
#[allow(clippy::mutable_key_type)]
impl Context<'_, '_> {
@@ -242,4 +242,44 @@ impl Context<'_, '_> {
},
);
}
/// Render a sink which send updates to broadcast channel, have internal buffer in case broadcast channel is full
pub fn render_sink(&mut self, bundle: CollectionBundle, sender: broadcast::Sender<DiffRow>) {
let CollectionBundle {
collection,
arranged: _,
} = bundle;
let mut buf = VecDeque::with_capacity(1000);
let schd = self.compute_state.get_scheduler();
let inner_schd = schd.clone();
let now = self.compute_state.current_time_ref();
let sink = self
.df
.add_subgraph_sink("Sink", collection.into_inner(), move |_ctx, recv| {
let data = recv.take_inner();
buf.extend(data.into_iter().flat_map(|i| i.into_iter()));
if sender.len() >= BROADCAST_CAP {
return;
} else {
while let Some(row) = buf.pop_front() {
// if the sender is full, stop sending
if sender.len() >= BROADCAST_CAP {
break;
}
// TODO(discord9): handling tokio broadcast error
let _ = sender.send(row);
}
}
// if buffer is not empty, schedule the next run at next tick
// so the buffer can be drained as soon as possible
if !buf.is_empty() {
inner_schd.schedule_at(*now.borrow() + 1);
}
});
schd.set_cur_subgraph(sink);
}
}

View File

@@ -16,7 +16,6 @@ use std::cell::RefCell;
use std::collections::{BTreeMap, VecDeque};
use std::rc::Rc;
use get_size2::GetSize;
use hydroflow::scheduled::graph::Hydroflow;
use hydroflow::scheduled::SubgraphId;
@@ -110,10 +109,6 @@ impl DataflowState {
pub fn expire_after(&self) -> Option<Timestamp> {
self.expire_after
}
pub fn get_state_size(&self) -> usize {
self.arrange_used.iter().map(|x| x.read().get_size()).sum()
}
}
#[derive(Debug, Clone)]

View File

@@ -82,6 +82,22 @@ impl Arranged {
writer: self.writer.clone(),
})
}
/// Copy the full arrangement, including the future and the current updates.
///
/// Internally `Rc-ed` so it's cheap to copy
pub fn try_copy_full(&self) -> Option<Self> {
self.arrangement
.clone_full_arrange()
.map(|arrangement| Arranged {
arrangement,
readers: self.readers.clone(),
writer: self.writer.clone(),
})
}
pub fn add_reader(&self, id: SubgraphId) {
self.readers.borrow_mut().push(id)
}
}
/// A bundle of the various ways a collection can be represented.

View File

@@ -24,7 +24,7 @@ mod scalar;
mod signature;
use arrow::compute::FilterBuilder;
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::prelude::DataType;
use datatypes::value::Value;
use datatypes::vectors::{BooleanVector, Helper, VectorRef};
pub(crate) use df_func::{DfScalarFunction, RawDfScalarFn};
@@ -85,18 +85,16 @@ impl Default for Batch {
}
impl Batch {
/// Get batch from rows, will try best to determine data type
pub fn try_from_rows_with_types(
rows: Vec<crate::repr::Row>,
batch_datatypes: &[ConcreteDataType],
) -> Result<Self, EvalError> {
pub fn try_from_rows(rows: Vec<crate::repr::Row>) -> Result<Self, EvalError> {
if rows.is_empty() {
return Ok(Self::empty());
}
let len = rows.len();
let mut builder = batch_datatypes
let mut builder = rows
.first()
.unwrap()
.iter()
.map(|ty| ty.create_mutable_vector(len))
.map(|v| v.data_type().create_mutable_vector(len))
.collect_vec();
for row in rows {
ensure!(
@@ -223,25 +221,10 @@ impl Batch {
return Ok(());
}
let dts = {
let max_len = self.batch.len().max(other.batch.len());
let mut dts = Vec::with_capacity(max_len);
for i in 0..max_len {
if let Some(v) = self.batch().get(i)
&& !v.data_type().is_null()
{
dts.push(v.data_type())
} else if let Some(v) = other.batch().get(i)
&& !v.data_type().is_null()
{
dts.push(v.data_type())
} else {
// both are null, so we will push null type
dts.push(datatypes::prelude::ConcreteDataType::null_datatype())
}
}
dts
let dts = if self.batch.is_empty() {
other.batch.iter().map(|v| v.data_type()).collect_vec()
} else {
self.batch.iter().map(|v| v.data_type()).collect_vec()
};
let batch_builders = dts

View File

@@ -21,6 +21,11 @@ use datafusion_common::DataFusionError;
use datatypes::data_type::ConcreteDataType;
use snafu::{Location, Snafu};
fn is_send_sync() {
fn check<T: Send + Sync>() {}
check::<EvalError>();
}
/// EvalError is about errors happen on columnar evaluation
///
/// TODO(discord9): add detailed location of column/operator(instead of code) to errors tp help identify related column

View File

@@ -359,6 +359,14 @@ impl MapFilterProject {
)
}
/// Convert the `MapFilterProject` into a staged evaluation plan.
///
/// The main behavior is extract temporal predicates, which cannot be evaluated
/// using the standard machinery.
pub fn into_plan(self) -> Result<MfpPlan, Error> {
MfpPlan::create_from(self)
}
/// Lists input columns whose values are used in outputs.
///
/// It is entirely appropriate to determine the demand of an instance
@@ -594,6 +602,26 @@ impl SafeMfpPlan {
}
}
/// A version of `evaluate` which produces an iterator over `Datum`
/// as output.
///
/// This version can be useful when one wants to capture the resulting
/// datums without packing and then unpacking a row.
#[inline(always)]
pub fn evaluate_iter<'a>(
&'a self,
datums: &'a mut Vec<Value>,
) -> Result<Option<impl Iterator<Item = Value> + 'a>, EvalError> {
let passed_predicates = self.evaluate_inner(datums)?;
if !passed_predicates {
Ok(None)
} else {
Ok(Some(
self.mfp.projection.iter().map(move |i| datums[*i].clone()),
))
}
}
/// Populates `values` with `self.expressions` and tests `self.predicates`.
///
/// This does not apply `self.projection`, which is up to the calling method.
@@ -908,33 +936,20 @@ mod test {
.unwrap()
.unwrap();
assert_eq!(ret, Row::pack(vec![Value::from(false), Value::from(true)]));
let ty = [
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype(),
];
// batch mode
let mut batch = Batch::try_from_rows_with_types(
vec![Row::from(vec![
Value::from(4),
Value::from(2),
Value::from(3),
])],
&ty,
)
let mut batch = Batch::try_from_rows(vec![Row::from(vec![
Value::from(4),
Value::from(2),
Value::from(3),
])])
.unwrap();
let ret = safe_mfp.eval_batch_into(&mut batch).unwrap();
assert_eq!(
ret,
Batch::try_from_rows_with_types(
vec![Row::from(vec![Value::from(false), Value::from(true)])],
&[
ConcreteDataType::boolean_datatype(),
ConcreteDataType::boolean_datatype(),
],
)
.unwrap()
Batch::try_from_rows(vec![Row::from(vec![Value::from(false), Value::from(true)])])
.unwrap()
);
}
@@ -969,15 +984,7 @@ mod test {
.unwrap();
assert_eq!(ret, None);
let input_type = [
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::string_datatype(),
];
let mut input1_batch =
Batch::try_from_rows_with_types(vec![Row::new(input1)], &input_type).unwrap();
let mut input1_batch = Batch::try_from_rows(vec![Row::new(input1)]).unwrap();
let ret_batch = safe_mfp.eval_batch_into(&mut input1_batch).unwrap();
assert_eq!(
ret_batch,
@@ -995,8 +1002,7 @@ mod test {
.unwrap();
assert_eq!(ret, Some(Row::pack(vec![Value::from(11)])));
let mut input2_batch =
Batch::try_from_rows_with_types(vec![Row::new(input2)], &input_type).unwrap();
let mut input2_batch = Batch::try_from_rows(vec![Row::new(input2)]).unwrap();
let ret_batch = safe_mfp.eval_batch_into(&mut input2_batch).unwrap();
assert_eq!(
ret_batch,
@@ -1049,14 +1055,7 @@ mod test {
let ret = safe_mfp.evaluate_into(&mut input1.clone(), &mut Row::empty());
assert!(matches!(ret, Err(EvalError::InvalidArgument { .. })));
let input_type = [
ConcreteDataType::int64_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype(),
];
let mut input1_batch =
Batch::try_from_rows_with_types(vec![Row::new(input1)], &input_type).unwrap();
let mut input1_batch = Batch::try_from_rows(vec![Row::new(input1)]).unwrap();
let ret_batch = safe_mfp.eval_batch_into(&mut input1_batch);
assert!(matches!(ret_batch, Err(EvalError::InvalidArgument { .. })));
@@ -1066,13 +1065,7 @@ mod test {
.unwrap();
assert_eq!(ret, Some(Row::new(input2.clone())));
let input_type = [
ConcreteDataType::int64_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype(),
];
let input2_batch =
Batch::try_from_rows_with_types(vec![Row::new(input2)], &input_type).unwrap();
let input2_batch = Batch::try_from_rows(vec![Row::new(input2)]).unwrap();
let ret_batch = safe_mfp.eval_batch_into(&mut input2_batch.clone()).unwrap();
assert_eq!(ret_batch, input2_batch);
@@ -1082,8 +1075,7 @@ mod test {
.unwrap();
assert_eq!(ret, None);
let input3_batch =
Batch::try_from_rows_with_types(vec![Row::new(input3)], &input_type).unwrap();
let input3_batch = Batch::try_from_rows(vec![Row::new(input3)]).unwrap();
let ret_batch = safe_mfp.eval_batch_into(&mut input3_batch.clone()).unwrap();
assert_eq!(
ret_batch,
@@ -1119,13 +1111,7 @@ mod test {
let ret = safe_mfp.evaluate_into(&mut input1.clone(), &mut Row::empty());
assert_eq!(ret.unwrap(), Some(Row::new(vec![Value::from(false)])));
let input_type = [
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype(),
];
let mut input1_batch =
Batch::try_from_rows_with_types(vec![Row::new(input1)], &input_type).unwrap();
let mut input1_batch = Batch::try_from_rows(vec![Row::new(input1)]).unwrap();
let ret_batch = safe_mfp.eval_batch_into(&mut input1_batch).unwrap();
assert_eq!(

View File

@@ -24,7 +24,6 @@ use common_meta::heartbeat::handler::{
};
use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage};
use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
use common_meta::key::flow::flow_state::FlowStat;
use common_telemetry::{debug, error, info, warn};
use greptime_proto::v1::meta::NodeInfo;
use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient};
@@ -35,27 +34,8 @@ use tokio::sync::mpsc;
use tokio::time::Duration;
use crate::error::ExternalSnafu;
use crate::utils::SizeReportSender;
use crate::{Error, FlownodeOptions};
async fn query_flow_state(
query_stat_size: &Option<SizeReportSender>,
timeout: Duration,
) -> Option<FlowStat> {
if let Some(report_requester) = query_stat_size.as_ref() {
let ret = report_requester.query(timeout).await;
match ret {
Ok(latest) => Some(latest),
Err(err) => {
error!(err; "Failed to get query stat size");
None
}
}
} else {
None
}
}
/// The flownode heartbeat task which sending `[HeartbeatRequest]` to Metasrv periodically in background.
#[derive(Clone)]
pub struct HeartbeatTask {
@@ -67,14 +47,9 @@ pub struct HeartbeatTask {
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
start_time_ms: u64,
running: Arc<AtomicBool>,
query_stat_size: Option<SizeReportSender>,
}
impl HeartbeatTask {
pub fn with_query_stat_size(mut self, query_stat_size: SizeReportSender) -> Self {
self.query_stat_size = Some(query_stat_size);
self
}
pub fn new(
opts: &FlownodeOptions,
meta_client: Arc<MetaClient>,
@@ -90,7 +65,6 @@ impl HeartbeatTask {
resp_handler_executor,
start_time_ms: common_time::util::current_time_millis() as u64,
running: Arc::new(AtomicBool::new(false)),
query_stat_size: None,
}
}
@@ -138,7 +112,6 @@ impl HeartbeatTask {
message: Option<OutgoingMessage>,
peer: Option<Peer>,
start_time_ms: u64,
latest_report: &Option<FlowStat>,
) -> Option<HeartbeatRequest> {
let mailbox_message = match message.map(outgoing_message_to_mailbox_message) {
Some(Ok(message)) => Some(message),
@@ -148,22 +121,11 @@ impl HeartbeatTask {
}
None => None,
};
let flow_stat = latest_report
.as_ref()
.map(|report| {
report
.state_size
.iter()
.map(|(k, v)| (*k, *v as u64))
.collect()
})
.map(|f| api::v1::meta::FlowStat { flow_stat_size: f });
Some(HeartbeatRequest {
mailbox_message,
peer,
info: Self::build_node_info(start_time_ms),
flow_stat,
..Default::default()
})
}
@@ -189,27 +151,24 @@ impl HeartbeatTask {
addr: self.peer_addr.clone(),
});
let query_stat_size = self.query_stat_size.clone();
common_runtime::spawn_hb(async move {
// note that using interval will cause it to first immediately send
// a heartbeat
let mut interval = tokio::time::interval(report_interval);
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
let mut latest_report = None;
loop {
let req = tokio::select! {
message = outgoing_rx.recv() => {
if let Some(message) = message {
Self::create_heartbeat_request(Some(message), self_peer.clone(), start_time_ms, &latest_report)
Self::create_heartbeat_request(Some(message), self_peer.clone(), start_time_ms)
} else {
// Receives None that means Sender was dropped, we need to break the current loop
break
}
}
_ = interval.tick() => {
Self::create_heartbeat_request(None, self_peer.clone(), start_time_ms, &latest_report)
Self::create_heartbeat_request(None, self_peer.clone(), start_time_ms)
}
};
@@ -221,10 +180,6 @@ impl HeartbeatTask {
debug!("Send a heartbeat request to metasrv, content: {:?}", req);
}
}
// after sending heartbeat, try to get the latest report
// TODO(discord9): consider a better place to update the size report
// set the timeout to half of the report interval so that it wouldn't delay heartbeat if something went horribly wrong
latest_report = query_flow_state(&query_stat_size, report_interval / 2).await;
}
});
}

View File

@@ -18,8 +18,10 @@
mod join;
mod reduce;
use std::collections::BTreeSet;
use crate::error::Error;
use crate::expr::{Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr};
use crate::expr::{GlobalId, Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr};
use crate::plan::join::JoinPlan;
pub(crate) use crate::plan::reduce::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan};
use crate::repr::{DiffRow, RelationDesc};
@@ -184,6 +186,48 @@ pub enum Plan {
},
}
impl Plan {
/// Find all the used collection in the plan
pub fn find_used_collection(&self) -> BTreeSet<GlobalId> {
fn recur_find_use(plan: &Plan, used: &mut BTreeSet<GlobalId>) {
match plan {
Plan::Get { id } => {
match id {
Id::Local(_) => (),
Id::Global(g) => {
used.insert(*g);
}
};
}
Plan::Let { value, body, .. } => {
recur_find_use(&value.plan, used);
recur_find_use(&body.plan, used);
}
Plan::Mfp { input, .. } => {
recur_find_use(&input.plan, used);
}
Plan::Reduce { input, .. } => {
recur_find_use(&input.plan, used);
}
Plan::Join { inputs, .. } => {
for input in inputs {
recur_find_use(&input.plan, used);
}
}
Plan::Union { inputs, .. } => {
for input in inputs {
recur_find_use(&input.plan, used);
}
}
_ => {}
}
}
let mut ret = Default::default();
recur_find_use(self, &mut ret);
ret
}
}
impl Plan {
pub fn with_types(self, schema: RelationDesc) -> TypedPlan {
TypedPlan { schema, plan: self }

View File

@@ -22,14 +22,12 @@ use api::v1::Row as ProtoRow;
use datatypes::data_type::ConcreteDataType;
use datatypes::types::cast;
use datatypes::value::Value;
use get_size2::GetSize;
use itertools::Itertools;
pub(crate) use relation::{ColumnType, Key, RelationDesc, RelationType};
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use crate::expr::error::{CastValueSnafu, EvalError, InvalidArgumentSnafu};
use crate::utils::get_value_heap_size;
/// System-wide Record count difference type. Useful for capture data change
///
@@ -107,12 +105,6 @@ pub struct Row {
pub inner: Vec<Value>,
}
impl GetSize for Row {
fn get_heap_size(&self) -> usize {
self.inner.iter().map(get_value_heap_size).sum()
}
}
impl Row {
/// Create an empty row
pub fn empty() -> Self {

View File

@@ -46,6 +46,14 @@ impl Key {
self.column_indices.push(col);
}
/// Add columns to Key
pub fn add_cols<I>(&mut self, cols: I)
where
I: IntoIterator<Item = usize>,
{
self.column_indices.extend(cols);
}
/// Remove a column from Key
pub fn remove_col(&mut self, col: usize) {
self.column_indices.retain(|&r| r != col);

View File

@@ -55,7 +55,6 @@ use crate::error::{
};
use crate::heartbeat::HeartbeatTask;
use crate::transform::register_function_to_query_engine;
use crate::utils::{SizeReportSender, StateReportHandler};
use crate::{Error, FlowWorkerManager, FlownodeOptions};
pub const FLOW_NODE_SERVER_NAME: &str = "FLOW_NODE_SERVER";
@@ -237,8 +236,6 @@ pub struct FlownodeBuilder {
catalog_manager: CatalogManagerRef,
flow_metadata_manager: FlowMetadataManagerRef,
heartbeat_task: Option<HeartbeatTask>,
/// receive a oneshot sender to send state size report
state_report_handler: Option<StateReportHandler>,
}
impl FlownodeBuilder {
@@ -257,20 +254,17 @@ impl FlownodeBuilder {
catalog_manager,
flow_metadata_manager,
heartbeat_task: None,
state_report_handler: None,
}
}
pub fn with_heartbeat_task(self, heartbeat_task: HeartbeatTask) -> Self {
let (sender, receiver) = SizeReportSender::new();
Self {
heartbeat_task: Some(heartbeat_task.with_query_stat_size(sender)),
state_report_handler: Some(receiver),
heartbeat_task: Some(heartbeat_task),
..self
}
}
pub async fn build(mut self) -> Result<FlownodeInstance, Error> {
pub async fn build(self) -> Result<FlownodeInstance, Error> {
// TODO(discord9): does this query engine need those?
let query_engine_factory = QueryEngineFactory::new_with_plugins(
// query engine in flownode is only used for translate plan with resolved table source.
@@ -389,7 +383,7 @@ impl FlownodeBuilder {
/// build [`FlowWorkerManager`], note this doesn't take ownership of `self`,
/// nor does it actually start running the worker.
async fn build_manager(
&mut self,
&self,
query_engine: Arc<dyn QueryEngine>,
) -> Result<FlowWorkerManager, Error> {
let table_meta = self.table_meta.clone();
@@ -408,15 +402,12 @@ impl FlownodeBuilder {
info!("Flow Worker started in new thread");
worker.run();
});
let mut man = rx.await.map_err(|_e| {
let man = rx.await.map_err(|_e| {
UnexpectedSnafu {
reason: "sender is dropped, failed to create flow node manager",
}
.build()
})?;
if let Some(handler) = self.state_report_handler.take() {
man = man.with_state_report_handler(handler).await;
}
info!("Flow Node Manager started");
Ok(man)
}

View File

@@ -18,73 +18,16 @@ use std::collections::{BTreeMap, BTreeSet};
use std::ops::Bound;
use std::sync::Arc;
use common_meta::key::flow::flow_state::FlowStat;
use common_telemetry::trace;
use datatypes::value::Value;
use get_size2::GetSize;
use smallvec::{smallvec, SmallVec};
use tokio::sync::{mpsc, oneshot, RwLock};
use tokio::time::Instant;
use tokio::sync::RwLock;
use crate::error::InternalSnafu;
use crate::expr::{EvalError, ScalarExpr};
use crate::repr::{value_to_internal_ts, DiffRow, Duration, KeyValDiffRow, Row, Timestamp};
/// A batch of updates, arranged by key
pub type Batch = BTreeMap<Row, SmallVec<[DiffRow; 2]>>;
/// Get a estimate of heap size of a value
pub fn get_value_heap_size(v: &Value) -> usize {
match v {
Value::Binary(bin) => bin.len(),
Value::String(s) => s.len(),
Value::List(list) => list.items().iter().map(get_value_heap_size).sum(),
_ => 0,
}
}
#[derive(Clone)]
pub struct SizeReportSender {
inner: mpsc::Sender<oneshot::Sender<FlowStat>>,
}
impl SizeReportSender {
pub fn new() -> (Self, StateReportHandler) {
let (tx, rx) = mpsc::channel(1);
let zelf = Self { inner: tx };
(zelf, rx)
}
/// Query the size report, will timeout after one second if no response
pub async fn query(&self, timeout: std::time::Duration) -> crate::Result<FlowStat> {
let (tx, rx) = oneshot::channel();
self.inner.send(tx).await.map_err(|_| {
InternalSnafu {
reason: "failed to send size report request due to receiver dropped",
}
.build()
})?;
let timeout = tokio::time::timeout(timeout, rx);
timeout
.await
.map_err(|_elapsed| {
InternalSnafu {
reason: "failed to receive size report after one second timeout",
}
.build()
})?
.map_err(|_| {
InternalSnafu {
reason: "failed to receive size report due to sender dropped",
}
.build()
})
}
}
/// Handle the size report request, and send the report back
pub type StateReportHandler = mpsc::Receiver<oneshot::Sender<FlowStat>>;
/// A spine of batches, arranged by timestamp
/// TODO(discord9): consider internally index by key, value, and timestamp for faster lookup
pub type Spine = BTreeMap<Timestamp, Batch>;
@@ -106,24 +49,6 @@ pub struct KeyExpiryManager {
event_timestamp_from_row: Option<ScalarExpr>,
}
impl GetSize for KeyExpiryManager {
fn get_heap_size(&self) -> usize {
let row_size = if let Some(row_size) = &self
.event_ts_to_key
.first_key_value()
.map(|(_, v)| v.first().get_heap_size())
{
*row_size
} else {
0
};
self.event_ts_to_key
.values()
.map(|v| v.len() * row_size + std::mem::size_of::<i64>())
.sum::<usize>()
}
}
impl KeyExpiryManager {
pub fn new(
key_expiration_duration: Option<Duration>,
@@ -229,7 +154,7 @@ impl KeyExpiryManager {
///
/// Note the two way arrow between reduce operator and arrange, it's because reduce operator need to query existing state
/// and also need to update existing state.
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
#[derive(Debug, Clone, Default, Eq, PartialEq, Ord, PartialOrd)]
pub struct Arrangement {
/// A name or identifier for the arrangement which can be used for debugging or logging purposes.
/// This field is not critical to the functionality but aids in monitoring and management of arrangements.
@@ -271,61 +196,6 @@ pub struct Arrangement {
/// The time that the last compaction happened, also known as the current time.
last_compaction_time: Option<Timestamp>,
/// Estimated size of the arrangement in heap size.
estimated_size: usize,
last_size_update: Instant,
size_update_interval: tokio::time::Duration,
}
impl Arrangement {
fn compute_size(&self) -> usize {
self.spine
.values()
.map(|v| {
let per_entry_size = v
.first_key_value()
.map(|(k, v)| {
k.get_heap_size()
+ v.len() * v.first().map(|r| r.get_heap_size()).unwrap_or(0)
})
.unwrap_or(0);
std::mem::size_of::<i64>() + v.len() * per_entry_size
})
.sum::<usize>()
+ self.expire_state.get_heap_size()
+ self.name.get_heap_size()
}
fn update_and_fetch_size(&mut self) -> usize {
if self.last_size_update.elapsed() > self.size_update_interval {
self.estimated_size = self.compute_size();
self.last_size_update = Instant::now();
}
self.estimated_size
}
}
impl GetSize for Arrangement {
fn get_heap_size(&self) -> usize {
self.estimated_size
}
}
impl Default for Arrangement {
fn default() -> Self {
Self {
spine: Default::default(),
full_arrangement: false,
is_written: false,
expire_state: None,
last_compaction_time: None,
name: Vec::new(),
estimated_size: 0,
last_size_update: Instant::now(),
size_update_interval: tokio::time::Duration::from_secs(3),
}
}
}
impl Arrangement {
@@ -337,9 +207,6 @@ impl Arrangement {
expire_state: None,
last_compaction_time: None,
name,
estimated_size: 0,
last_size_update: Instant::now(),
size_update_interval: tokio::time::Duration::from_secs(3),
}
}
@@ -402,7 +269,6 @@ impl Arrangement {
// without changing the order of updates within same tick
key_updates.sort_by_key(|(_val, ts, _diff)| *ts);
}
self.update_and_fetch_size();
Ok(max_expired_by)
}
@@ -524,7 +390,6 @@ impl Arrangement {
// insert the compacted batch into spine with key being `now`
self.spine.insert(now, compacting_batch);
self.update_and_fetch_size();
Ok(max_expired_by)
}

View File

@@ -25,6 +25,7 @@ common-catalog.workspace = true
common-config.workspace = true
common-datasource.workspace = true
common-error.workspace = true
common-frontend.workspace = true
common-function.workspace = true
common-grpc.workspace = true
common-macro.workspace = true
@@ -70,6 +71,7 @@ common-test-util.workspace = true
datanode.workspace = true
datatypes.workspace = true
futures = "0.3"
meta-srv = { workspace = true, features = ["mock"] }
serde_json.workspace = true
strfmt = "0.2"
tower.workspace = true

View File

@@ -17,7 +17,6 @@ common-error.workspace = true
common-macro.workspace = true
common-runtime.workspace = true
common-telemetry.workspace = true
fastbloom = "0.8"
fst.workspace = true
futures.workspace = true
greptime-proto.workspace = true
@@ -27,7 +26,6 @@ prost.workspace = true
regex.workspace = true
regex-automata.workspace = true
serde.workspace = true
serde_json.workspace = true
snafu.workspace = true
tantivy = { version = "0.22", features = ["zstd-compression"] }
tantivy-jieba = "0.11.0"

View File

@@ -1,53 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
pub mod creator;
mod error;
pub type Bytes = Vec<u8>;
pub type BytesRef<'a> = &'a [u8];
/// The Meta information of the bloom filter stored in the file.
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BloomFilterMeta {
/// The number of rows per segment.
pub rows_per_segment: usize,
/// The number of segments.
pub seg_count: usize,
/// The number of total rows.
pub row_count: usize,
/// The size of the bloom filter excluding the meta information.
pub bloom_filter_segments_size: usize,
/// Offset and size of bloom filters in the file.
pub bloom_filter_segments: Vec<BloomFilterSegmentLocation>,
}
/// The location of the bloom filter segment in the file.
#[derive(Debug, Serialize, Deserialize)]
pub struct BloomFilterSegmentLocation {
/// The offset of the bloom filter segment in the file.
pub offset: u64,
/// The size of the bloom filter segment in the file.
pub size: u64,
/// The number of elements in the bloom filter segment.
pub elem_count: usize,
}

View File

@@ -1,294 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use fastbloom::BloomFilter;
use futures::{AsyncWrite, AsyncWriteExt};
use snafu::ResultExt;
use super::error::{IoSnafu, SerdeJsonSnafu};
use crate::bloom_filter::error::Result;
use crate::bloom_filter::{BloomFilterMeta, BloomFilterSegmentLocation, Bytes};
/// The seed used for the Bloom filter.
const SEED: u128 = 42;
/// The false positive rate of the Bloom filter.
const FALSE_POSITIVE_RATE: f64 = 0.01;
/// `BloomFilterCreator` is responsible for creating and managing bloom filters
/// for a set of elements. It divides the rows into segments and creates
/// bloom filters for each segment.
///
/// # Format
///
/// The bloom filter creator writes the following format to the writer:
///
/// ```text
/// +--------------------+--------------------+-----+----------------------+----------------------+
/// | Bloom filter 0 | Bloom filter 1 | ... | BloomFilterMeta | Meta size |
/// +--------------------+--------------------+-----+----------------------+----------------------+
/// |<- bytes (size 0) ->|<- bytes (size 1) ->| ... |<- json (meta size) ->|<- u32 LE (4 bytes) ->|
/// ```
///
pub struct BloomFilterCreator {
/// The number of rows per segment set by the user.
rows_per_segment: usize,
/// Row count that added to the bloom filter so far.
accumulated_row_count: usize,
/// A set of distinct elements in the current segment.
cur_seg_distinct_elems: HashSet<Bytes>,
/// The memory usage of the current segment's distinct elements.
cur_seg_distinct_elems_mem_usage: usize,
/// Storage for finalized Bloom filters.
finalized_bloom_filters: FinalizedBloomFilterStorage,
}
impl BloomFilterCreator {
/// Creates a new `BloomFilterCreator` with the specified number of rows per segment.
///
/// # PANICS
///
/// `rows_per_segment` <= 0
pub fn new(rows_per_segment: usize) -> Self {
assert!(
rows_per_segment > 0,
"rows_per_segment must be greater than 0"
);
Self {
rows_per_segment,
accumulated_row_count: 0,
cur_seg_distinct_elems: HashSet::default(),
cur_seg_distinct_elems_mem_usage: 0,
finalized_bloom_filters: FinalizedBloomFilterStorage::default(),
}
}
/// Adds a row of elements to the bloom filter. If the number of accumulated rows
/// reaches `rows_per_segment`, it finalizes the current segment.
pub fn push_row_elems(&mut self, elems: impl IntoIterator<Item = Bytes>) {
self.accumulated_row_count += 1;
for elem in elems.into_iter() {
let len = elem.len();
let is_new = self.cur_seg_distinct_elems.insert(elem);
if is_new {
self.cur_seg_distinct_elems_mem_usage += len;
}
}
if self.accumulated_row_count % self.rows_per_segment == 0 {
self.finalize_segment();
}
}
/// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer.
pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> {
if !self.cur_seg_distinct_elems.is_empty() {
self.finalize_segment();
}
let mut meta = BloomFilterMeta {
rows_per_segment: self.rows_per_segment,
seg_count: self.finalized_bloom_filters.len(),
row_count: self.accumulated_row_count,
..Default::default()
};
let mut buf = Vec::new();
for segment in self.finalized_bloom_filters.drain() {
let slice = segment.bloom_filter.as_slice();
buf.clear();
write_u64_slice(&mut buf, slice);
writer.write_all(&buf).await.context(IoSnafu)?;
let size = buf.len();
meta.bloom_filter_segments.push(BloomFilterSegmentLocation {
offset: meta.bloom_filter_segments_size as _,
size: size as _,
elem_count: segment.element_count,
});
meta.bloom_filter_segments_size += size;
}
let meta_bytes = serde_json::to_vec(&meta).context(SerdeJsonSnafu)?;
writer.write_all(&meta_bytes).await.context(IoSnafu)?;
let meta_size = meta_bytes.len() as u32;
writer
.write_all(&meta_size.to_le_bytes())
.await
.context(IoSnafu)?;
writer.flush().await.unwrap();
Ok(())
}
/// Returns the memory usage of the creating bloom filter.
pub fn memory_usage(&self) -> usize {
self.cur_seg_distinct_elems_mem_usage + self.finalized_bloom_filters.memory_usage()
}
fn finalize_segment(&mut self) {
let elem_count = self.cur_seg_distinct_elems.len();
self.finalized_bloom_filters
.add(self.cur_seg_distinct_elems.drain(), elem_count);
self.cur_seg_distinct_elems_mem_usage = 0;
}
}
/// Storage for finalized Bloom filters.
///
/// TODO(zhongzc): Add support for storing intermediate bloom filters on disk to control memory usage.
#[derive(Debug, Default)]
struct FinalizedBloomFilterStorage {
/// Bloom filters that are stored in memory.
in_memory: Vec<FinalizedBloomFilterSegment>,
}
impl FinalizedBloomFilterStorage {
fn memory_usage(&self) -> usize {
self.in_memory.iter().map(|s| s.size).sum()
}
/// Adds a new finalized Bloom filter to the storage.
///
/// TODO(zhongzc): Add support for flushing to disk.
fn add(&mut self, elems: impl IntoIterator<Item = Bytes>, elem_count: usize) {
let mut bf = BloomFilter::with_false_pos(FALSE_POSITIVE_RATE)
.seed(&SEED)
.expected_items(elem_count);
for elem in elems.into_iter() {
bf.insert(&elem);
}
let cbf = FinalizedBloomFilterSegment::new(bf, elem_count);
self.in_memory.push(cbf);
}
fn len(&self) -> usize {
self.in_memory.len()
}
fn drain(&mut self) -> impl Iterator<Item = FinalizedBloomFilterSegment> + '_ {
self.in_memory.drain(..)
}
}
/// A finalized Bloom filter segment.
#[derive(Debug)]
struct FinalizedBloomFilterSegment {
/// The underlying Bloom filter.
bloom_filter: BloomFilter,
/// The number of elements in the Bloom filter.
element_count: usize,
/// The occupied memory size of the Bloom filter.
size: usize,
}
impl FinalizedBloomFilterSegment {
fn new(bloom_filter: BloomFilter, elem_count: usize) -> Self {
let memory_usage = std::mem::size_of_val(bloom_filter.as_slice());
Self {
bloom_filter,
element_count: elem_count,
size: memory_usage,
}
}
}
/// Writes a slice of `u64` to the buffer in little-endian order.
fn write_u64_slice(buf: &mut Vec<u8>, slice: &[u64]) {
buf.reserve(std::mem::size_of_val(slice));
for &x in slice {
buf.extend_from_slice(&x.to_le_bytes());
}
}
#[cfg(test)]
mod tests {
use futures::io::Cursor;
use super::*;
fn u64_vec_from_bytes(bytes: &[u8]) -> Vec<u64> {
bytes
.chunks_exact(std::mem::size_of::<u64>())
.map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap()))
.collect()
}
#[tokio::test]
async fn test_bloom_filter_creator() {
let mut writer = Cursor::new(Vec::new());
let mut creator = BloomFilterCreator::new(2);
creator.push_row_elems(vec![b"a".to_vec(), b"b".to_vec()]);
assert!(creator.cur_seg_distinct_elems_mem_usage > 0);
assert!(creator.memory_usage() > 0);
creator.push_row_elems(vec![b"c".to_vec(), b"d".to_vec()]);
// Finalize the first segment
assert!(creator.cur_seg_distinct_elems_mem_usage == 0);
assert!(creator.memory_usage() > 0);
creator.push_row_elems(vec![b"e".to_vec(), b"f".to_vec()]);
assert!(creator.cur_seg_distinct_elems_mem_usage > 0);
assert!(creator.memory_usage() > 0);
creator.finish(&mut writer).await.unwrap();
let bytes = writer.into_inner();
let total_size = bytes.len();
let meta_size_offset = total_size - 4;
let meta_size = u32::from_le_bytes((&bytes[meta_size_offset..]).try_into().unwrap());
let meta_bytes = &bytes[total_size - meta_size as usize - 4..total_size - 4];
let meta: BloomFilterMeta = serde_json::from_slice(meta_bytes).unwrap();
assert_eq!(meta.rows_per_segment, 2);
assert_eq!(meta.seg_count, 2);
assert_eq!(meta.row_count, 3);
assert_eq!(
meta.bloom_filter_segments_size + meta_bytes.len() + 4,
total_size
);
let mut bfs = Vec::new();
for segment in meta.bloom_filter_segments {
let bloom_filter_bytes =
&bytes[segment.offset as usize..(segment.offset + segment.size) as usize];
let v = u64_vec_from_bytes(bloom_filter_bytes);
let bloom_filter = BloomFilter::from_vec(v)
.seed(&SEED)
.expected_items(segment.elem_count);
bfs.push(bloom_filter);
}
assert_eq!(bfs.len(), 2);
assert!(bfs[0].contains(&b"a"));
assert!(bfs[0].contains(&b"b"));
assert!(bfs[0].contains(&b"c"));
assert!(bfs[0].contains(&b"d"));
assert!(bfs[1].contains(&b"e"));
assert!(bfs[1].contains(&b"f"));
}
}

View File

@@ -1,66 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu};
#[derive(Snafu)]
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display("IO error"))]
Io {
#[snafu(source)]
error: std::io::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to serde json"))]
SerdeJson {
#[snafu(source)]
error: serde_json::error::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("External error"))]
External {
source: BoxedError,
#[snafu(implicit)]
location: Location,
},
}
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
use Error::*;
match self {
Io { .. } | Self::SerdeJson { .. } => StatusCode::Unexpected,
External { source, .. } => source.status_code(),
}
}
fn as_any(&self) -> &dyn Any {
self
}
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@@ -16,7 +16,6 @@ use std::ops::Range;
use std::sync::Arc;
use async_trait::async_trait;
use bytes::Bytes;
use common_base::BitVec;
use greptime_proto::v1::index::InvertedIndexMetas;
use snafu::ResultExt;
@@ -36,7 +35,7 @@ pub trait InvertedIndexReader: Send {
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
/// Reads the bytes in the given ranges.
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>>;
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Vec<u8>>>;
/// Retrieves metadata of all inverted indices stored within the blob.
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>>;

View File

@@ -16,7 +16,6 @@ use std::ops::Range;
use std::sync::Arc;
use async_trait::async_trait;
use bytes::Bytes;
use common_base::range_read::RangeReader;
use greptime_proto::v1::index::InvertedIndexMetas;
use snafu::{ensure, ResultExt};
@@ -61,8 +60,9 @@ impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
Ok(buf.into())
}
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
self.source.read_vec(ranges).await.context(CommonIoSnafu)
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Vec<u8>>> {
let bufs = self.source.read_vec(ranges).await.context(CommonIoSnafu)?;
Ok(bufs.into_iter().map(|buf| buf.into()).collect())
}
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>> {

View File

@@ -15,6 +15,5 @@
#![feature(iter_partition_in_place)]
#![feature(assert_matches)]
pub mod bloom_filter;
pub mod fulltext_index;
pub mod inverted_index;

View File

@@ -25,7 +25,6 @@ use std::sync::Arc;
use api::v1::meta::{ProcedureDetailResponse, Role};
use cluster::Client as ClusterClient;
pub use cluster::ClusterKvBackend;
use common_error::ext::BoxedError;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::cluster::{
@@ -34,8 +33,6 @@ use common_meta::cluster::{
use common_meta::datanode::{DatanodeStatKey, DatanodeStatValue, RegionStat};
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
use common_meta::error::{self as meta_error, ExternalSnafu, Result as MetaResult};
use common_meta::key::flow::flow_state::{FlowStat, FlowStateManager};
use common_meta::kv_backend::KvBackendRef;
use common_meta::range_stream::PaginationStream;
use common_meta::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
use common_meta::rpc::procedure::{
@@ -57,8 +54,7 @@ use store::Client as StoreClient;
pub use self::heartbeat::{HeartbeatSender, HeartbeatStream};
use crate::error::{
ConvertMetaRequestSnafu, ConvertMetaResponseSnafu, Error, GetFlowStatSnafu, NotStartedSnafu,
Result,
ConvertMetaRequestSnafu, ConvertMetaResponseSnafu, Error, NotStartedSnafu, Result,
};
pub type Id = (u64, u64);
@@ -351,15 +347,6 @@ fn decode_stats(kv: KeyValue) -> MetaResult<DatanodeStatValue> {
}
impl MetaClient {
pub async fn list_flow_stats(&self) -> Result<Option<FlowStat>> {
let cluster_backend = ClusterKvBackend::new(Arc::new(self.cluster_client()?));
let cluster_backend = Arc::new(cluster_backend) as KvBackendRef;
let flow_state_manager = FlowStateManager::new(cluster_backend);
let res = flow_state_manager.get().await.context(GetFlowStatSnafu)?;
Ok(res.map(|r| r.into()))
}
pub fn new(id: Id) -> Self {
Self {
id,

View File

@@ -40,8 +40,8 @@ use tonic::Status;
use crate::client::ask_leader::AskLeader;
use crate::client::{util, Id};
use crate::error::{
ConvertMetaResponseSnafu, CreateChannelSnafu, Error, IllegalGrpcClientStateSnafu,
ReadOnlyKvBackendSnafu, Result, RetryTimesExceededSnafu,
ConvertMetaResponseSnafu, CreateChannelSnafu, Error, IllegalGrpcClientStateSnafu, Result,
RetryTimesExceededSnafu,
};
#[derive(Clone, Debug)]
@@ -308,75 +308,3 @@ impl Inner {
.map(|res| (res.leader, res.followers))
}
}
/// A client for the cluster info. Read only and corresponding to
/// `in_memory` kvbackend in the meta-srv.
#[derive(Clone, Debug)]
pub struct ClusterKvBackend {
inner: Arc<Client>,
}
impl ClusterKvBackend {
pub fn new(client: Arc<Client>) -> Self {
Self { inner: client }
}
fn unimpl(&self) -> common_meta::error::Error {
let ret: common_meta::error::Result<()> = ReadOnlyKvBackendSnafu {
name: self.name().to_string(),
}
.fail()
.map_err(BoxedError::new)
.context(common_meta::error::ExternalSnafu);
ret.unwrap_err()
}
}
impl TxnService for ClusterKvBackend {
type Error = common_meta::error::Error;
}
#[async_trait::async_trait]
impl KvBackend for ClusterKvBackend {
fn name(&self) -> &str {
"ClusterKvBackend"
}
fn as_any(&self) -> &dyn Any {
self
}
async fn range(&self, req: RangeRequest) -> common_meta::error::Result<RangeResponse> {
self.inner
.range(req)
.await
.map_err(BoxedError::new)
.context(common_meta::error::ExternalSnafu)
}
async fn batch_get(&self, _: BatchGetRequest) -> common_meta::error::Result<BatchGetResponse> {
Err(self.unimpl())
}
async fn put(&self, _: PutRequest) -> common_meta::error::Result<PutResponse> {
Err(self.unimpl())
}
async fn batch_put(&self, _: BatchPutRequest) -> common_meta::error::Result<BatchPutResponse> {
Err(self.unimpl())
}
async fn delete_range(
&self,
_: DeleteRangeRequest,
) -> common_meta::error::Result<DeleteRangeResponse> {
Err(self.unimpl())
}
async fn batch_delete(
&self,
_: BatchDeleteRequest,
) -> common_meta::error::Result<BatchDeleteResponse> {
Err(self.unimpl())
}
}

View File

@@ -99,22 +99,8 @@ pub enum Error {
source: common_meta::error::Error,
},
#[snafu(display("Failed to get flow stat"))]
GetFlowStat {
#[snafu(implicit)]
location: Location,
source: common_meta::error::Error,
},
#[snafu(display("Retry exceeded max times({}), message: {}", times, msg))]
RetryTimesExceeded { times: usize, msg: String },
#[snafu(display("Trying to write to a read-only kv backend: {}", name))]
ReadOnlyKvBackend {
name: String,
#[snafu(implicit)]
location: Location,
},
}
#[allow(dead_code)]
@@ -134,15 +120,13 @@ impl ErrorExt for Error {
| Error::SendHeartbeat { .. }
| Error::CreateHeartbeatStream { .. }
| Error::CreateChannel { .. }
| Error::RetryTimesExceeded { .. }
| Error::ReadOnlyKvBackend { .. } => StatusCode::Internal,
| Error::RetryTimesExceeded { .. } => StatusCode::Internal,
Error::MetaServer { code, .. } => *code,
Error::InvalidResponseHeader { source, .. }
| Error::ConvertMetaRequest { source, .. }
| Error::ConvertMetaResponse { source, .. }
| Error::GetFlowStat { source, .. } => source.status_code(),
| Error::ConvertMetaResponse { source, .. } => source.status_code(),
}
}
}

View File

@@ -716,13 +716,6 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Flow state handler error"))]
FlowStateHandler {
#[snafu(implicit)]
location: Location,
source: common_meta::error::Error,
},
}
impl Error {
@@ -768,8 +761,7 @@ impl ErrorExt for Error {
| Error::Join { .. }
| Error::PeerUnavailable { .. }
| Error::ExceededDeadline { .. }
| Error::ChooseItems { .. }
| Error::FlowStateHandler { .. } => StatusCode::Internal,
| Error::ChooseItems { .. } => StatusCode::Internal,
Error::Unsupported { .. } => StatusCode::Unsupported,

View File

@@ -51,7 +51,6 @@ use tokio::sync::mpsc::Sender;
use tokio::sync::{oneshot, Notify, RwLock};
use crate::error::{self, DeserializeFromJsonSnafu, Result, UnexpectedInstructionReplySnafu};
use crate::handler::flow_state_handler::FlowStateHandler;
use crate::metasrv::Context;
use crate::metrics::{METRIC_META_HANDLER_EXECUTE, METRIC_META_HEARTBEAT_CONNECTION_NUM};
use crate::pubsub::PublisherRef;
@@ -65,7 +64,6 @@ pub mod collect_stats_handler;
pub mod extract_stat_handler;
pub mod failure_handler;
pub mod filter_inactive_region_stats;
pub mod flow_state_handler;
pub mod keep_lease_handler;
pub mod mailbox_handler;
pub mod on_leader_start_handler;
@@ -484,8 +482,6 @@ pub struct HeartbeatHandlerGroupBuilder {
/// based on the number of received heartbeats. When the number of heartbeats
/// reaches this factor, a flush operation is triggered.
flush_stats_factor: Option<usize>,
/// A simple handler for flow internal state report
flow_state_handler: Option<FlowStateHandler>,
/// The plugins.
plugins: Option<Plugins>,
@@ -503,18 +499,12 @@ impl HeartbeatHandlerGroupBuilder {
region_failure_handler: None,
region_lease_handler: None,
flush_stats_factor: None,
flow_state_handler: None,
plugins: None,
pushers,
handlers: vec![],
}
}
pub fn with_flow_state_handler(mut self, handler: Option<FlowStateHandler>) -> Self {
self.flow_state_handler = handler;
self
}
pub fn with_region_lease_handler(mut self, handler: Option<RegionLeaseHandler>) -> Self {
self.region_lease_handler = handler;
self
@@ -574,10 +564,6 @@ impl HeartbeatHandlerGroupBuilder {
}
self.add_handler_last(CollectStatsHandler::new(self.flush_stats_factor));
if let Some(flow_state_handler) = self.flow_state_handler.take() {
self.add_handler_last(flow_state_handler);
}
self
}

View File

@@ -1,58 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::meta::{FlowStat, HeartbeatRequest, Role};
use common_meta::key::flow::flow_state::{FlowStateManager, FlowStateValue};
use snafu::ResultExt;
use crate::error::{FlowStateHandlerSnafu, Result};
use crate::handler::{HandleControl, HeartbeatAccumulator, HeartbeatHandler};
use crate::metasrv::Context;
pub struct FlowStateHandler {
flow_state_manager: FlowStateManager,
}
impl FlowStateHandler {
pub fn new(flow_state_manager: FlowStateManager) -> Self {
Self { flow_state_manager }
}
}
#[async_trait::async_trait]
impl HeartbeatHandler for FlowStateHandler {
fn is_acceptable(&self, role: Role) -> bool {
role == Role::Flownode
}
async fn handle(
&self,
req: &HeartbeatRequest,
_ctx: &mut Context,
_acc: &mut HeartbeatAccumulator,
) -> Result<HandleControl> {
if let Some(FlowStat { flow_stat_size }) = &req.flow_stat {
let state_size = flow_stat_size
.iter()
.map(|(k, v)| (*k, *v as usize))
.collect();
let value = FlowStateValue::new(state_size);
self.flow_state_manager
.put(value)
.await
.context(FlowStateHandlerSnafu)?;
}
Ok(HandleControl::Continue)
}
}

View File

@@ -204,6 +204,10 @@ impl Context {
pub fn reset_in_memory(&self) {
self.in_memory.reset();
}
pub fn reset_leader_cached_kv_backend(&self) {
self.leader_cached_kv_backend.reset();
}
}
/// The value of the leader. It is used to store the leader's address.

View File

@@ -26,7 +26,6 @@ use common_meta::ddl::{
};
use common_meta::ddl_manager::DdlManager;
use common_meta::distributed_time_constants;
use common_meta::key::flow::flow_state::FlowStateManager;
use common_meta::key::flow::FlowMetadataManager;
use common_meta::key::maintenance::MaintenanceModeManager;
use common_meta::key::TableMetadataManager;
@@ -48,7 +47,6 @@ use crate::error::{self, Result};
use crate::flow_meta_alloc::FlowPeerAllocator;
use crate::greptimedb_telemetry::get_greptimedb_telemetry_task;
use crate::handler::failure_handler::RegionFailureHandler;
use crate::handler::flow_state_handler::FlowStateHandler;
use crate::handler::region_lease_handler::RegionLeaseHandler;
use crate::handler::{HeartbeatHandlerGroupBuilder, HeartbeatMailbox, Pushers};
use crate::lease::MetaPeerLookupService;
@@ -230,7 +228,6 @@ impl MetasrvBuilder {
peer_allocator,
))
});
let flow_metadata_allocator = {
// for now flownode just use round-robin selector
let flow_selector = RoundRobinSelector::new(SelectTarget::Flownode);
@@ -251,9 +248,6 @@ impl MetasrvBuilder {
peer_allocator,
))
};
let flow_state_handler =
FlowStateHandler::new(FlowStateManager::new(in_memory.clone().as_kv_backend_ref()));
let memory_region_keeper = Arc::new(MemoryRegionKeeper::default());
let node_manager = node_manager.unwrap_or_else(|| {
let datanode_client_channel_config = ChannelConfig::new()
@@ -356,7 +350,6 @@ impl MetasrvBuilder {
.with_region_failure_handler(region_failover_handler)
.with_region_lease_handler(Some(region_lease_handler))
.with_flush_stats_factor(Some(options.flush_stats_factor))
.with_flow_state_handler(Some(flow_state_handler))
.add_default_handlers()
}
};

View File

@@ -52,6 +52,11 @@ pub async fn mock_with_etcdstore(addr: &str) -> MockInfo {
mock(Default::default(), kv_backend, None, None, None).await
}
pub async fn mock_with_memstore_and_selector(selector: SelectorRef) -> MockInfo {
let kv_backend = Arc::new(MemoryKvBackend::new());
mock(Default::default(), kv_backend, Some(selector), None, None).await
}
pub async fn mock(
opts: MetasrvOptions,
kv_backend: KvBackendRef,

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) mod close_downgraded_region;
pub(crate) mod downgrade_leader_region;
pub(crate) mod manager;
pub(crate) mod migration_abort;
@@ -44,7 +43,6 @@ use common_procedure::error::{
Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
};
use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status, StringKey};
use common_telemetry::info;
use manager::RegionMigrationProcedureGuard;
pub use manager::{
RegionMigrationManagerRef, RegionMigrationProcedureTask, RegionMigrationProcedureTracker,
@@ -93,9 +91,7 @@ impl PersistentContext {
let lock_key = vec![
CatalogLock::Read(&self.catalog).into(),
SchemaLock::read(&self.catalog, &self.schema).into(),
// The optimistic updating of table route is not working very well,
// so we need to use the write lock here.
TableLock::Write(region_id.table_id()).into(),
TableLock::Read(region_id.table_id()).into(),
RegionLock::Write(region_id).into(),
];
@@ -257,7 +253,7 @@ impl Context {
.await
.context(error::TableMetadataManagerSnafu)
.map_err(BoxedError::new)
.with_context(|_| error::RetryLaterWithSourceSnafu {
.context(error::RetryLaterWithSourceSnafu {
reason: format!("Failed to get TableRoute: {table_id}"),
})?
.context(error::TableRouteNotFoundSnafu { table_id })?;
@@ -321,7 +317,7 @@ impl Context {
.await
.context(error::TableMetadataManagerSnafu)
.map_err(BoxedError::new)
.with_context(|_| error::RetryLaterWithSourceSnafu {
.context(error::RetryLaterWithSourceSnafu {
reason: format!("Failed to get TableInfo: {table_id}"),
})?
.context(error::TableInfoNotFoundSnafu { table_id })?;
@@ -354,7 +350,7 @@ impl Context {
.await
.context(error::TableMetadataManagerSnafu)
.map_err(BoxedError::new)
.with_context(|_| error::RetryLaterWithSourceSnafu {
.context(error::RetryLaterWithSourceSnafu {
reason: format!("Failed to get DatanodeTable: ({datanode_id},{table_id})"),
})?
.context(error::DatanodeTableNotFoundSnafu {
@@ -368,6 +364,12 @@ impl Context {
Ok(datanode_value.as_ref().unwrap())
}
/// Removes the `table_info` of [VolatileContext], returns true if any.
pub fn remove_table_info_value(&mut self) -> bool {
let value = self.volatile_ctx.table_info.take();
value.is_some()
}
/// Returns the [RegionId].
pub fn region_id(&self) -> RegionId {
self.persistent_ctx.region_id
@@ -472,48 +474,6 @@ impl RegionMigrationProcedure {
_guard: guard,
})
}
async fn rollback_inner(&mut self) -> Result<()> {
let _timer = METRIC_META_REGION_MIGRATION_EXECUTE
.with_label_values(&["rollback"])
.start_timer();
let table_id = self.context.region_id().table_id();
let region_id = self.context.region_id();
self.context.remove_table_route_value();
let table_metadata_manager = self.context.table_metadata_manager.clone();
let table_route = self.context.get_table_route_value().await?;
// Safety: It must be a physical table route.
let downgraded = table_route
.region_routes()
.unwrap()
.iter()
.filter(|route| route.region.id == region_id)
.any(|route| route.is_leader_downgrading());
if downgraded {
info!("Rollbacking downgraded region leader table route, region: {region_id}");
table_metadata_manager
.update_leader_region_status(table_id, table_route, |route| {
if route.region.id == region_id {
Some(None)
} else {
None
}
})
.await
.context(error::TableMetadataManagerSnafu)
.map_err(BoxedError::new)
.with_context(|_| error::RetryLaterWithSourceSnafu {
reason: format!("Failed to update the table route during the rollback downgraded leader region: {region_id}"),
})?;
}
self.context.register_failure_detectors().await;
Ok(())
}
}
#[async_trait::async_trait]
@@ -522,16 +482,6 @@ impl Procedure for RegionMigrationProcedure {
Self::TYPE_NAME
}
async fn rollback(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<()> {
self.rollback_inner()
.await
.map_err(ProcedureError::external)
}
fn rollback_supported(&self) -> bool {
true
}
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
let state = &mut self.state;
@@ -757,12 +707,6 @@ mod tests {
Assertion::simple(assert_update_metadata_upgrade, assert_no_persist),
),
// UpdateMetadata::Upgrade
Step::next(
"Should be the close downgraded region",
None,
Assertion::simple(assert_close_downgraded_region, assert_no_persist),
),
// CloseDowngradedRegion
Step::next(
"Should be the region migration end",
None,
@@ -1133,12 +1077,6 @@ mod tests {
Assertion::simple(assert_update_metadata_upgrade, assert_no_persist),
),
// UpdateMetadata::Upgrade
Step::next(
"Should be the close downgraded region",
None,
Assertion::simple(assert_close_downgraded_region, assert_no_persist),
),
// CloseDowngradedRegion
Step::next(
"Should be the region migration end",
None,

View File

@@ -1,138 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::time::Duration;
use api::v1::meta::MailboxMessage;
use common_meta::distributed_time_constants::MAILBOX_RTT_SECS;
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
use common_meta::key::datanode_table::RegionInfo;
use common_meta::RegionIdent;
use common_procedure::Status;
use common_telemetry::{info, warn};
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use crate::error::{self, Result};
use crate::handler::HeartbeatMailbox;
use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
use crate::procedure::region_migration::{Context, State};
use crate::service::mailbox::Channel;
const CLOSE_DOWNGRADED_REGION_TIMEOUT: Duration = Duration::from_secs(MAILBOX_RTT_SECS);
#[derive(Debug, Serialize, Deserialize)]
pub struct CloseDowngradedRegion;
#[async_trait::async_trait]
#[typetag::serde]
impl State for CloseDowngradedRegion {
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)> {
if let Err(err) = self.close_downgraded_leader_region(ctx).await {
let downgrade_leader_datanode = &ctx.persistent_ctx.from_peer;
let region_id = ctx.region_id();
warn!(err; "Failed to close downgraded leader region: {region_id} on datanode {:?}", downgrade_leader_datanode);
}
Ok((Box::new(RegionMigrationEnd), Status::done()))
}
fn as_any(&self) -> &dyn Any {
self
}
}
impl CloseDowngradedRegion {
/// Builds close region instruction.
///
/// Abort(non-retry):
/// - Datanode Table is not found.
async fn build_close_region_instruction(&self, ctx: &mut Context) -> Result<Instruction> {
let pc = &ctx.persistent_ctx;
let downgrade_leader_datanode_id = pc.from_peer.id;
let cluster_id = pc.cluster_id;
let table_id = pc.region_id.table_id();
let region_number = pc.region_id.region_number();
let datanode_table_value = ctx.get_from_peer_datanode_table_value().await?;
let RegionInfo { engine, .. } = datanode_table_value.region_info.clone();
Ok(Instruction::CloseRegion(RegionIdent {
cluster_id,
datanode_id: downgrade_leader_datanode_id,
table_id,
region_number,
engine,
}))
}
/// Closes the downgraded leader region.
async fn close_downgraded_leader_region(&self, ctx: &mut Context) -> Result<()> {
let close_instruction = self.build_close_region_instruction(ctx).await?;
let region_id = ctx.region_id();
let pc = &ctx.persistent_ctx;
let downgrade_leader_datanode = &pc.from_peer;
let msg = MailboxMessage::json_message(
&format!("Close downgraded region: {}", region_id),
&format!("Meta@{}", ctx.server_addr()),
&format!(
"Datanode-{}@{}",
downgrade_leader_datanode.id, downgrade_leader_datanode.addr
),
common_time::util::current_time_millis(),
&close_instruction,
)
.with_context(|_| error::SerializeToJsonSnafu {
input: close_instruction.to_string(),
})?;
let ch = Channel::Datanode(downgrade_leader_datanode.id);
let receiver = ctx
.mailbox
.send(&ch, msg, CLOSE_DOWNGRADED_REGION_TIMEOUT)
.await?;
match receiver.await? {
Ok(msg) => {
let reply = HeartbeatMailbox::json_reply(&msg)?;
info!(
"Received close downgraded leade region reply: {:?}, region: {}",
reply, region_id
);
let InstructionReply::CloseRegion(SimpleReply { result, error }) = reply else {
return error::UnexpectedInstructionReplySnafu {
mailbox_message: msg.to_string(),
reason: "expect close region reply",
}
.fail();
};
if result {
Ok(())
} else {
error::UnexpectedSnafu {
violated: format!(
"Failed to close downgraded leader region: {region_id} on datanode {:?}, error: {error:?}",
downgrade_leader_datanode,
),
}
.fail()
}
}
Err(e) => Err(e),
}
}
}

View File

@@ -21,11 +21,11 @@ use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionId;
use super::migration_abort::RegionMigrationAbort;
use super::migration_end::RegionMigrationEnd;
use super::open_candidate_region::OpenCandidateRegion;
use super::update_metadata::UpdateMetadata;
use crate::error::{self, Result};
use crate::procedure::region_migration::migration_abort::RegionMigrationAbort;
use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
use crate::procedure::region_migration::open_candidate_region::OpenCandidateRegion;
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::{Context, State};
/// The behaviors:

View File

@@ -25,9 +25,9 @@ use common_telemetry::info;
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
use super::update_metadata::UpdateMetadata;
use crate::error::{self, Result};
use crate::handler::HeartbeatMailbox;
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::{Context, State};
use crate::service::mailbox::Channel;
@@ -145,10 +145,7 @@ impl OpenCandidateRegion {
match receiver.await? {
Ok(msg) => {
let reply = HeartbeatMailbox::json_reply(&msg)?;
info!(
"Received open region reply: {:?}, region: {}",
reply, region_id
);
info!("Received open region reply: {:?}", reply);
let InstructionReply::OpenRegion(SimpleReply { result, error }) = reply else {
return error::UnexpectedInstructionReplySnafu {
mailbox_message: msg.to_string(),

View File

@@ -44,21 +44,19 @@ use store_api::storage::RegionId;
use table::metadata::RawTableInfo;
use tokio::sync::mpsc::{Receiver, Sender};
use super::manager::RegionMigrationProcedureTracker;
use super::migration_abort::RegionMigrationAbort;
use super::upgrade_candidate_region::UpgradeCandidateRegion;
use super::{Context, ContextFactory, DefaultContextFactory, State, VolatileContext};
use crate::cache_invalidator::MetasrvCacheInvalidator;
use crate::error::{self, Error, Result};
use crate::handler::{HeartbeatMailbox, Pusher, Pushers};
use crate::metasrv::MetasrvInfo;
use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion;
use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion;
use crate::procedure::region_migration::manager::RegionMigrationProcedureTracker;
use crate::procedure::region_migration::migration_abort::RegionMigrationAbort;
use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
use crate::procedure::region_migration::open_candidate_region::OpenCandidateRegion;
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::upgrade_candidate_region::UpgradeCandidateRegion;
use crate::procedure::region_migration::{
Context, ContextFactory, DefaultContextFactory, PersistentContext, State, VolatileContext,
};
use crate::procedure::region_migration::PersistentContext;
use crate::service::mailbox::{Channel, MailboxRef};
pub type MockHeartbeatReceiver = Receiver<std::result::Result<HeartbeatResponse, tonic::Status>>;
@@ -571,14 +569,6 @@ pub(crate) fn assert_region_migration_end(next: &dyn State) {
let _ = next.as_any().downcast_ref::<RegionMigrationEnd>().unwrap();
}
/// Asserts the [State] should be [CloseDowngradedRegion].
pub(crate) fn assert_close_downgraded_region(next: &dyn State) {
let _ = next
.as_any()
.downcast_ref::<CloseDowngradedRegion>()
.unwrap();
}
/// Asserts the [State] should be [RegionMigrationAbort].
pub(crate) fn assert_region_migration_abort(next: &dyn State) {
let _ = next

View File

@@ -22,10 +22,10 @@ use common_procedure::Status;
use common_telemetry::warn;
use serde::{Deserialize, Serialize};
use super::migration_abort::RegionMigrationAbort;
use super::migration_end::RegionMigrationEnd;
use crate::error::Result;
use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion;
use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion;
use crate::procedure::region_migration::migration_abort::RegionMigrationAbort;
use crate::procedure::region_migration::{Context, State};
#[derive(Debug, Serialize, Deserialize)]
@@ -58,7 +58,7 @@ impl State for UpdateMetadata {
if let Err(err) = ctx.invalidate_table_cache().await {
warn!("Failed to broadcast the invalidate table cache message during the upgrade candidate, error: {err:?}");
};
Ok((Box::new(CloseDowngradedRegion), Status::executing(false)))
Ok((Box::new(RegionMigrationEnd), Status::done()))
}
UpdateMetadata::Rollback => {
self.rollback_downgraded_region(ctx).await?;

View File

@@ -195,7 +195,7 @@ mod tests {
use store_api::storage::RegionId;
use crate::error::Error;
use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion;
use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
use crate::procedure::region_migration::test_util::{self, TestingEnv};
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::{ContextFactory, PersistentContext, State};
@@ -443,7 +443,7 @@ mod tests {
}
#[tokio::test]
async fn test_next_close_downgraded_region_state() {
async fn test_next_migration_end_state() {
let mut state = Box::new(UpdateMetadata::Upgrade);
let env = TestingEnv::new();
let persistent_context = new_persistent_context();
@@ -471,10 +471,7 @@ mod tests {
let (next, _) = state.next(&mut ctx).await.unwrap();
let _ = next
.as_any()
.downcast_ref::<CloseDowngradedRegion>()
.unwrap();
let _ = next.as_any().downcast_ref::<RegionMigrationEnd>().unwrap();
let table_route = table_metadata_manager
.table_route_manager()

View File

@@ -23,9 +23,9 @@ use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use tokio::time::{sleep, Instant};
use super::update_metadata::UpdateMetadata;
use crate::error::{self, Result};
use crate::handler::HeartbeatMailbox;
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::{Context, State};
use crate::service::mailbox::Channel;
@@ -155,7 +155,7 @@ impl UpgradeCandidateRegion {
exists,
error::UnexpectedSnafu {
violated: format!(
"Candidate region {} doesn't exist on datanode {:?}",
"Expected region {} doesn't exist on datanode {:?}",
region_id, candidate
)
}

Some files were not shown because too many files have changed in this diff Show More