Compare commits

...

10 Commits

Author SHA1 Message Date
shuiyisong
b2377d4b87 chore: update toolchain to 2025-05-19 (#6124)
* chore: update toolchain to 2025-05-19

* chore: update nix sha

* chore: rebase main and fix
2025-05-20 04:29:40 +00:00
yinheli
8d36ffb4e1 chore: enable github folder typo check and fix typos (#6128) 2025-05-20 04:20:07 +00:00
Yingwen
955ad644f7 ci: add pull requests permissions to semantic check job (#6130)
* ci: add pull requests permissions

* ci: reduce permissions
2025-05-20 03:33:33 +00:00
localhost
c2e3c3d398 chore: Add more data format support to the pipeline dryrun api. (#6115)
* chore: supporting more data type for pipeline dryrun API

* chore: add docs for parse_dryrun_data

* chore: fix by pr comment

* chore: add user-friendly error message

* chore: change EventPayloadResolver content_type field type from owner to ref

* Apply suggestions from code review

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>

---------

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>
2025-05-20 03:29:28 +00:00
Zhenchi
400229c384 feat: introduce index result cache (#6110)
* feat: introduce index result cache

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/mito2/src/sst/index/inverted_index/applier/builder.rs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* optimize selector_len

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-05-20 01:45:42 +00:00
Ruihang Xia
cd9b6990bf feat: implement clamp_min and clamp_max (#6116)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-05-19 21:32:03 +00:00
Ruihang Xia
a56e6e04c2 chore: remove etcd from acknowledgement as not recommended (#6127)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-05-19 12:42:30 +00:00
Ning Sun
d324439014 ci: fix release job dependencies (#6125) 2025-05-19 11:48:57 +00:00
discord9
038acda7cd fix: flow update use proper update (#6108)
* fix: flow update use proper update

* refactor: per review

* fix: flow cache

* chore: per copilot review

* refactor: rm flow node id

* refactor: per review

* chore: per review

* refactor: per review

* chore: per review
2025-05-19 11:30:10 +00:00
shuiyisong
a0d89c9ed1 feat: Prometheus remote write with pipeline (#5981)
* chore: update nightly version

* chore: sort lint lines

* chore: minor fix

* chore: update nix

* chore: update toolchain to 2024-04-14

* chore: update toolchain to 2024-04-15

* chore: remove unnecessory test

* chore: do not assert oid in sqlness test

* chore: fix margin issue

* chore: fix cr issues

* chore: fix cr issues

* chore: add pipelie handler to prom state

* chore: add prom series processor to merge function

* chore: add run pipeline in decode

* chore: add channel to pipeline ctx

* chore: add pipeline info to remote wirte hander

* chore: minor update

* chore: minor update

* chore: add test

* chore: add comment

* refactor: simplify identity pipeline params

* fix: test

* refactor: remove is_prometheus

---------

Co-authored-by: Ning Sun <sunning@greptime.com>
2025-05-19 08:00:59 +00:00
73 changed files with 2547 additions and 505 deletions

2
.github/CODEOWNERS vendored
View File

@@ -4,7 +4,7 @@
* @GreptimeTeam/db-approver
## [Module] Databse Engine
## [Module] Database Engine
/src/index @zhongzc
/src/mito2 @evenyag @v0y4g3r @waynexia
/src/query @evenyag

View File

@@ -8,7 +8,7 @@ set -e
# - If it's a nightly build, the version is 'nightly-YYYYMMDD-$(git rev-parse --short HEAD)', like 'nightly-20230712-e5b243c'.
# create_version ${GIHUB_EVENT_NAME} ${NEXT_RELEASE_VERSION} ${NIGHTLY_RELEASE_PREFIX}
function create_version() {
# Read from envrionment variables.
# Read from environment variables.
if [ -z "$GITHUB_EVENT_NAME" ]; then
echo "GITHUB_EVENT_NAME is empty" >&2
exit 1

View File

@@ -10,7 +10,7 @@ GREPTIMEDB_IMAGE_TAG=${GREPTIMEDB_IMAGE_TAG:-latest}
ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/"
# Ceate a cluster with 1 control-plane node and 5 workers.
# Create a cluster with 1 control-plane node and 5 workers.
function create_kind_cluster() {
cat <<EOF | kind create cluster --name "${CLUSTER}" --image kindest/node:"$KUBERNETES_VERSION" --config=-
kind: Cluster

View File

@@ -41,7 +41,7 @@ function upload_artifacts() {
# Updates the latest version information in AWS S3 if UPDATE_VERSION_INFO is true.
function update_version_info() {
if [ "$UPDATE_VERSION_INFO" == "true" ]; then
# If it's the officail release(like v1.0.0, v1.0.1, v1.0.2, etc.), update latest-version.txt.
# If it's the official release(like v1.0.0, v1.0.1, v1.0.2, etc.), update latest-version.txt.
if [[ "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "Updating latest-version.txt"
echo "$VERSION" > latest-version.txt

View File

@@ -222,12 +222,12 @@ jobs:
run: |
sudo apt update && sudo apt install -y libfuzzer-14-dev
cargo install cargo-fuzz cargo-gc-bin --force
- name: Download pre-built binariy
- name: Download pre-built binary
uses: actions/download-artifact@v4
with:
name: bin
path: .
- name: Unzip bianry
- name: Unzip binary
run: |
tar -xvf ./bin.tar.gz
rm ./bin.tar.gz
@@ -275,7 +275,7 @@ jobs:
- name: Install cargo-gc-bin
shell: bash
run: cargo install cargo-gc-bin --force
- name: Build greptime bianry
- name: Build greptime binary
shell: bash
# `cargo gc` will invoke `cargo build` with specified args
run: cargo gc --profile ci -- --bin greptime --features "pg_kvbackend,mysql_kvbackend"
@@ -328,9 +328,9 @@ jobs:
name: Setup Minio
uses: ./.github/actions/setup-minio
- if: matrix.mode.kafka
name: Setup Kafka cluser
name: Setup Kafka cluster
uses: ./.github/actions/setup-kafka-cluster
- name: Setup Etcd cluser
- name: Setup Etcd cluster
uses: ./.github/actions/setup-etcd-cluster
# Prepares for fuzz tests
- uses: arduino/setup-protoc@v3
@@ -475,9 +475,9 @@ jobs:
name: Setup Minio
uses: ./.github/actions/setup-minio
- if: matrix.mode.kafka
name: Setup Kafka cluser
name: Setup Kafka cluster
uses: ./.github/actions/setup-kafka-cluster
- name: Setup Etcd cluser
- name: Setup Etcd cluster
uses: ./.github/actions/setup-etcd-cluster
# Prepares for fuzz tests
- uses: arduino/setup-protoc@v3

View File

@@ -88,7 +88,7 @@ env:
# Controls whether to run tests, include unit-test, integration-test and sqlness.
DISABLE_RUN_TESTS: ${{ inputs.skip_test || vars.DEFAULT_SKIP_TEST }}
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nightly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
jobs:
@@ -124,7 +124,7 @@ jobs:
# The create-version will create a global variable named 'version' in the global workflows.
# - If it's a tag push release, the version is the tag name(${{ github.ref_name }});
# - If it's a scheduled release, the version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-$buildTime', like v0.2.0-nigthly-20230313;
# - If it's a scheduled release, the version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-$buildTime', like v0.2.0-nightly-20230313;
# - If it's a manual release, the version is '${{ env.NEXT_RELEASE_VERSION }}-<short-git-sha>-YYYYMMDDSS', like v0.2.0-e5b243c-2023071245;
- name: Create version
id: create-version
@@ -388,7 +388,7 @@ jobs:
### Stop runners ###
# It's very necessary to split the job of releasing runners into 'stop-linux-amd64-runner' and 'stop-linux-arm64-runner'.
# Because we can terminate the specified EC2 instance immediately after the job is finished without uncessary waiting.
# Because we can terminate the specified EC2 instance immediately after the job is finished without unnecessary waiting.
stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
@@ -444,7 +444,7 @@ jobs:
bump-doc-version:
name: Bump doc version
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [publish-github-release]
needs: [allocate-runners, publish-github-release]
runs-on: ubuntu-latest
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:
@@ -466,8 +466,8 @@ jobs:
bump-website-version:
name: Bump website version
if: ${{ github.event_name == 'push' }}
needs: [publish-github-release]
if: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
needs: [allocate-runners, publish-github-release]
runs-on: ubuntu-latest
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:
@@ -490,7 +490,7 @@ jobs:
bump-helm-charts-version:
name: Bump helm charts version
if: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
needs: [publish-github-release]
needs: [allocate-runners, publish-github-release]
runs-on: ubuntu-latest
permissions:
contents: write
@@ -511,7 +511,7 @@ jobs:
bump-homebrew-greptime-version:
name: Bump homebrew greptime version
if: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
needs: [publish-github-release]
needs: [allocate-runners, publish-github-release]
runs-on: ubuntu-latest
permissions:
contents: write

View File

@@ -14,6 +14,8 @@ concurrency:
jobs:
check:
runs-on: ubuntu-latest
permissions:
pull-requests: write # Add permissions to modify PRs
timeout-minutes: 10
steps:
- uses: actions/checkout@v4

3
.gitignore vendored
View File

@@ -58,3 +58,6 @@ tests-fuzz/corpus/
## default data home
greptimedb_data
# github
!/.github

View File

@@ -79,6 +79,7 @@ clippy.implicit_clone = "warn"
clippy.result_large_err = "allow"
clippy.large_enum_variant = "allow"
clippy.doc_overindented_list_items = "allow"
clippy.uninlined_format_args = "allow"
rust.unknown_lints = "deny"
rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }

View File

@@ -215,4 +215,3 @@ Special thanks to all contributors! See [AUTHORS.md](https://github.com/Greptime
- [Apache Parquet™](https://parquet.apache.org/) (file storage)
- [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/) (query engine)
- [Apache OpenDAL™](https://opendal.apache.org/) (data access abstraction)
- [etcd](https://etcd.io/) (meta service)

View File

@@ -154,6 +154,7 @@
| `region_engine.mito.index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
| `region_engine.mito.index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
| `region_engine.mito.index.content_cache_page_size` | String | `64KiB` | Page size for inverted index content cache. |
| `region_engine.mito.index.result_cache_size` | String | `128MiB` | Cache size for index result. |
| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
@@ -494,6 +495,7 @@
| `region_engine.mito.index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
| `region_engine.mito.index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
| `region_engine.mito.index.content_cache_page_size` | String | `64KiB` | Page size for inverted index content cache. |
| `region_engine.mito.index.result_cache_size` | String | `128MiB` | Cache size for index result. |
| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |

View File

@@ -499,6 +499,9 @@ content_cache_size = "128MiB"
## Page size for inverted index content cache.
content_cache_page_size = "64KiB"
## Cache size for index result.
result_cache_size = "128MiB"
## The options for inverted index in Mito engine.
[region_engine.mito.inverted_index]

View File

@@ -590,6 +590,9 @@ content_cache_size = "128MiB"
## Page size for inverted index content cache.
content_cache_page_size = "64KiB"
## Cache size for index result.
result_cache_size = "128MiB"
## The options for inverted index in Mito engine.
[region_engine.mito.inverted_index]

View File

@@ -21,7 +21,7 @@
lib = nixpkgs.lib;
rustToolchain = fenix.packages.${system}.fromToolchainName {
name = (lib.importTOML ./rust-toolchain.toml).toolchain.channel;
sha256 = "sha256-arzEYlWLGGYeOhECHpBxQd2joZ4rPKV3qLNnZ+eql6A=";
sha256 = "sha256-tJJr8oqX3YD+ohhPK7jlt/7kvKBnBqJVjYtoFr520d4=";
};
in
{

View File

@@ -1,2 +1,2 @@
[toolchain]
channel = "nightly-2025-04-15"
channel = "nightly-2025-05-19"

View File

@@ -163,7 +163,7 @@ impl DfAccumulator for UddSketchState {
}
}
// meaning instantiate as `uddsketch_merge`
DataType::Binary => self.merge_batch(&[array.clone()])?,
DataType::Binary => self.merge_batch(std::slice::from_ref(array))?,
_ => {
return not_impl_err!(
"UDDSketch functions do not support data type: {}",

View File

@@ -468,8 +468,8 @@ mod tests {
let empty_values = vec![""];
let empty_input = Arc::new(StringVector::from_slice(&empty_values)) as VectorRef;
let ipv4_result = ipv4_func.eval(&ctx, &[empty_input.clone()]);
let ipv6_result = ipv6_func.eval(&ctx, &[empty_input.clone()]);
let ipv4_result = ipv4_func.eval(&ctx, std::slice::from_ref(&empty_input));
let ipv6_result = ipv6_func.eval(&ctx, std::slice::from_ref(&empty_input));
assert!(ipv4_result.is_err());
assert!(ipv6_result.is_err());
@@ -478,7 +478,7 @@ mod tests {
let invalid_values = vec!["not an ip", "192.168.1.256", "zzzz::ffff"];
let invalid_input = Arc::new(StringVector::from_slice(&invalid_values)) as VectorRef;
let ipv4_result = ipv4_func.eval(&ctx, &[invalid_input.clone()]);
let ipv4_result = ipv4_func.eval(&ctx, std::slice::from_ref(&invalid_input));
assert!(ipv4_result.is_err());
}

View File

@@ -294,7 +294,7 @@ mod tests {
let input = Arc::new(StringVector::from_slice(&values)) as VectorRef;
// Convert IPv6 addresses to binary
let binary_result = to_num.eval(&ctx, &[input.clone()]).unwrap();
let binary_result = to_num.eval(&ctx, std::slice::from_ref(&input)).unwrap();
// Convert binary to hex string representation (for ipv6_num_to_string)
let mut hex_strings = Vec::new();

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod clamp;
pub mod clamp;
mod modulo;
mod pow;
mod rate;
@@ -20,7 +20,7 @@ mod rate;
use std::fmt;
use std::sync::Arc;
pub use clamp::ClampFunction;
pub use clamp::{ClampFunction, ClampMaxFunction, ClampMinFunction};
use common_query::error::{GeneralDataFusionSnafu, Result};
use common_query::prelude::Signature;
use datafusion::error::DataFusionError;
@@ -44,6 +44,8 @@ impl MathFunction {
registry.register(Arc::new(RateFunction));
registry.register(Arc::new(RangeFunction));
registry.register(Arc::new(ClampFunction));
registry.register(Arc::new(ClampMinFunction));
registry.register(Arc::new(ClampMaxFunction));
}
}

View File

@@ -155,6 +155,182 @@ fn clamp_impl<T: LogicalPrimitiveType, const CLAMP_MIN: bool, const CLAMP_MAX: b
Ok(Arc::new(PrimitiveVector::<T>::from(result)))
}
#[derive(Clone, Debug, Default)]
pub struct ClampMinFunction;
const CLAMP_MIN_NAME: &str = "clamp_min";
impl Function for ClampMinFunction {
fn name(&self) -> &str {
CLAMP_MIN_NAME
}
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(input_types[0].clone())
}
fn signature(&self) -> Signature {
// input, min
Signature::uniform(2, ConcreteDataType::numerics(), Volatility::Immutable)
}
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly 2, have: {}",
columns.len()
),
}
);
ensure!(
columns[0].data_type().is_numeric(),
InvalidFuncArgsSnafu {
err_msg: format!(
"The first arg's type is not numeric, have: {}",
columns[0].data_type()
),
}
);
ensure!(
columns[0].data_type() == columns[1].data_type(),
InvalidFuncArgsSnafu {
err_msg: format!(
"Arguments don't have identical types: {}, {}",
columns[0].data_type(),
columns[1].data_type()
),
}
);
ensure!(
columns[1].len() == 1,
InvalidFuncArgsSnafu {
err_msg: format!(
"The second arg (min) should be scalar, have: {:?}",
columns[1]
),
}
);
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
let input_array = columns[0].to_arrow_array();
let input = input_array
.as_any()
.downcast_ref::<PrimitiveArray<<$S as LogicalPrimitiveType>::ArrowPrimitive>>()
.unwrap();
let min = TryAsPrimitive::<$S>::try_as_primitive(&columns[1].get(0))
.with_context(|| {
InvalidFuncArgsSnafu {
err_msg: "The second arg (min) should not be none",
}
})?;
// For clamp_min, max is effectively infinity, so we don't use it in the clamp_impl logic.
// We pass a default/dummy value for max.
let max_dummy = <$S as LogicalPrimitiveType>::Native::default();
clamp_impl::<$S, true, false>(input, min, max_dummy)
},{
unreachable!()
})
}
}
impl Display for ClampMinFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", CLAMP_MIN_NAME.to_ascii_uppercase())
}
}
#[derive(Clone, Debug, Default)]
pub struct ClampMaxFunction;
const CLAMP_MAX_NAME: &str = "clamp_max";
impl Function for ClampMaxFunction {
fn name(&self) -> &str {
CLAMP_MAX_NAME
}
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(input_types[0].clone())
}
fn signature(&self) -> Signature {
// input, max
Signature::uniform(2, ConcreteDataType::numerics(), Volatility::Immutable)
}
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly 2, have: {}",
columns.len()
),
}
);
ensure!(
columns[0].data_type().is_numeric(),
InvalidFuncArgsSnafu {
err_msg: format!(
"The first arg's type is not numeric, have: {}",
columns[0].data_type()
),
}
);
ensure!(
columns[0].data_type() == columns[1].data_type(),
InvalidFuncArgsSnafu {
err_msg: format!(
"Arguments don't have identical types: {}, {}",
columns[0].data_type(),
columns[1].data_type()
),
}
);
ensure!(
columns[1].len() == 1,
InvalidFuncArgsSnafu {
err_msg: format!(
"The second arg (max) should be scalar, have: {:?}",
columns[1]
),
}
);
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
let input_array = columns[0].to_arrow_array();
let input = input_array
.as_any()
.downcast_ref::<PrimitiveArray<<$S as LogicalPrimitiveType>::ArrowPrimitive>>()
.unwrap();
let max = TryAsPrimitive::<$S>::try_as_primitive(&columns[1].get(0))
.with_context(|| {
InvalidFuncArgsSnafu {
err_msg: "The second arg (max) should not be none",
}
})?;
// For clamp_max, min is effectively -infinity, so we don't use it in the clamp_impl logic.
// We pass a default/dummy value for min.
let min_dummy = <$S as LogicalPrimitiveType>::Native::default();
clamp_impl::<$S, false, true>(input, min_dummy, max)
},{
unreachable!()
})
}
}
impl Display for ClampMaxFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", CLAMP_MAX_NAME.to_ascii_uppercase())
}
}
#[cfg(test)]
mod test {
@@ -394,4 +570,134 @@ mod test {
let result = func.eval(&FunctionContext::default(), args.as_slice());
assert!(result.is_err());
}
#[test]
fn clamp_min_i64() {
let inputs = [
(
vec![Some(-3), Some(-2), Some(-1), Some(0), Some(1), Some(2)],
-1,
vec![Some(-1), Some(-1), Some(-1), Some(0), Some(1), Some(2)],
),
(
vec![Some(-3), None, Some(-1), None, None, Some(2)],
-2,
vec![Some(-2), None, Some(-1), None, None, Some(2)],
),
];
let func = ClampMinFunction;
for (in_data, min, expected) in inputs {
let args = [
Arc::new(Int64Vector::from(in_data)) as _,
Arc::new(Int64Vector::from_vec(vec![min])) as _,
];
let result = func
.eval(&FunctionContext::default(), args.as_slice())
.unwrap();
let expected: VectorRef = Arc::new(Int64Vector::from(expected));
assert_eq!(expected, result);
}
}
#[test]
fn clamp_max_i64() {
let inputs = [
(
vec![Some(-3), Some(-2), Some(-1), Some(0), Some(1), Some(2)],
1,
vec![Some(-3), Some(-2), Some(-1), Some(0), Some(1), Some(1)],
),
(
vec![Some(-3), None, Some(-1), None, None, Some(2)],
0,
vec![Some(-3), None, Some(-1), None, None, Some(0)],
),
];
let func = ClampMaxFunction;
for (in_data, max, expected) in inputs {
let args = [
Arc::new(Int64Vector::from(in_data)) as _,
Arc::new(Int64Vector::from_vec(vec![max])) as _,
];
let result = func
.eval(&FunctionContext::default(), args.as_slice())
.unwrap();
let expected: VectorRef = Arc::new(Int64Vector::from(expected));
assert_eq!(expected, result);
}
}
#[test]
fn clamp_min_f64() {
let inputs = [(
vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)],
-1.0,
vec![Some(-1.0), Some(-1.0), Some(-1.0), Some(0.0), Some(1.0)],
)];
let func = ClampMinFunction;
for (in_data, min, expected) in inputs {
let args = [
Arc::new(Float64Vector::from(in_data)) as _,
Arc::new(Float64Vector::from_vec(vec![min])) as _,
];
let result = func
.eval(&FunctionContext::default(), args.as_slice())
.unwrap();
let expected: VectorRef = Arc::new(Float64Vector::from(expected));
assert_eq!(expected, result);
}
}
#[test]
fn clamp_max_f64() {
let inputs = [(
vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)],
0.0,
vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(0.0)],
)];
let func = ClampMaxFunction;
for (in_data, max, expected) in inputs {
let args = [
Arc::new(Float64Vector::from(in_data)) as _,
Arc::new(Float64Vector::from_vec(vec![max])) as _,
];
let result = func
.eval(&FunctionContext::default(), args.as_slice())
.unwrap();
let expected: VectorRef = Arc::new(Float64Vector::from(expected));
assert_eq!(expected, result);
}
}
#[test]
fn clamp_min_type_not_match() {
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
let min = -1;
let func = ClampMinFunction;
let args = [
Arc::new(Float64Vector::from(input)) as _,
Arc::new(Int64Vector::from_vec(vec![min])) as _,
];
let result = func.eval(&FunctionContext::default(), args.as_slice());
assert!(result.is_err());
}
#[test]
fn clamp_max_type_not_match() {
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
let max = 1;
let func = ClampMaxFunction;
let args = [
Arc::new(Float64Vector::from(input)) as _,
Arc::new(Int64Vector::from_vec(vec![max])) as _,
];
let result = func.eval(&FunctionContext::default(), args.as_slice());
assert!(result.is_err());
}
}

View File

@@ -24,21 +24,39 @@ use crate::cache::{CacheContainer, Initializer};
use crate::error::Result;
use crate::instruction::{CacheIdent, CreateFlow, DropFlow};
use crate::key::flow::{TableFlowManager, TableFlowManagerRef};
use crate::key::{FlowId, FlowPartitionId};
use crate::kv_backend::KvBackendRef;
use crate::peer::Peer;
use crate::FlownodeId;
type FlownodeSet = Arc<HashMap<FlownodeId, Peer>>;
/// Flow id&flow partition key
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct FlowIdent {
pub flow_id: FlowId,
pub partition_id: FlowPartitionId,
}
impl FlowIdent {
pub fn new(flow_id: FlowId, partition_id: FlowPartitionId) -> Self {
Self {
flow_id,
partition_id,
}
}
}
/// cache for TableFlowManager, the table_id part is in the outer cache
/// include flownode_id, flow_id, partition_id mapping to Peer
type FlownodeFlowSet = Arc<HashMap<FlowIdent, Peer>>;
pub type TableFlownodeSetCacheRef = Arc<TableFlownodeSetCache>;
/// [TableFlownodeSetCache] caches the [TableId] to [FlownodeSet] mapping.
pub type TableFlownodeSetCache = CacheContainer<TableId, FlownodeSet, CacheIdent>;
pub type TableFlownodeSetCache = CacheContainer<TableId, FlownodeFlowSet, CacheIdent>;
/// Constructs a [TableFlownodeSetCache].
pub fn new_table_flownode_set_cache(
name: String,
cache: Cache<TableId, FlownodeSet>,
cache: Cache<TableId, FlownodeFlowSet>,
kv_backend: KvBackendRef,
) -> TableFlownodeSetCache {
let table_flow_manager = Arc::new(TableFlowManager::new(kv_backend));
@@ -47,7 +65,7 @@ pub fn new_table_flownode_set_cache(
CacheContainer::new(name, cache, Box::new(invalidator), init, filter)
}
fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId, FlownodeSet> {
fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId, FlownodeFlowSet> {
Arc::new(move |&table_id| {
let table_flow_manager = table_flow_manager.clone();
Box::pin(async move {
@@ -57,7 +75,12 @@ fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId,
.map(|flows| {
flows
.into_iter()
.map(|(key, value)| (key.flownode_id(), value.peer))
.map(|(key, value)| {
(
FlowIdent::new(key.flow_id(), key.partition_id()),
value.peer,
)
})
.collect::<HashMap<_, _>>()
})
// We must cache the `HashSet` even if it's empty,
@@ -71,26 +94,33 @@ fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId,
}
async fn handle_create_flow(
cache: &Cache<TableId, FlownodeSet>,
cache: &Cache<TableId, FlownodeFlowSet>,
CreateFlow {
flow_id,
source_table_ids,
flownodes: flownode_peers,
partition_to_peer_mapping: flow_part2nodes,
}: &CreateFlow,
) {
for table_id in source_table_ids {
let entry = cache.entry(*table_id);
entry
.and_compute_with(
async |entry: Option<moka::Entry<u32, Arc<HashMap<u64, _>>>>| match entry {
async |entry: Option<moka::Entry<u32, FlownodeFlowSet>>| match entry {
Some(entry) => {
let mut map = entry.into_value().as_ref().clone();
map.extend(flownode_peers.iter().map(|peer| (peer.id, peer.clone())));
map.extend(
flow_part2nodes.iter().map(|(part, peer)| {
(FlowIdent::new(*flow_id, *part), peer.clone())
}),
);
Op::Put(Arc::new(map))
}
None => Op::Put(Arc::new(HashMap::from_iter(
flownode_peers.iter().map(|peer| (peer.id, peer.clone())),
))),
None => {
Op::Put(Arc::new(HashMap::from_iter(flow_part2nodes.iter().map(
|(part, peer)| (FlowIdent::new(*flow_id, *part), peer.clone()),
))))
}
},
)
.await;
@@ -98,21 +128,23 @@ async fn handle_create_flow(
}
async fn handle_drop_flow(
cache: &Cache<TableId, FlownodeSet>,
cache: &Cache<TableId, FlownodeFlowSet>,
DropFlow {
flow_id,
source_table_ids,
flownode_ids,
flow_part2node_id,
}: &DropFlow,
) {
for table_id in source_table_ids {
let entry = cache.entry(*table_id);
entry
.and_compute_with(
async |entry: Option<moka::Entry<u32, Arc<HashMap<u64, _>>>>| match entry {
async |entry: Option<moka::Entry<u32, FlownodeFlowSet>>| match entry {
Some(entry) => {
let mut set = entry.into_value().as_ref().clone();
for flownode_id in flownode_ids {
set.remove(flownode_id);
for (part, _node) in flow_part2node_id {
let key = FlowIdent::new(*flow_id, *part);
set.remove(&key);
}
Op::Put(Arc::new(set))
@@ -128,7 +160,7 @@ async fn handle_drop_flow(
}
fn invalidator<'a>(
cache: &'a Cache<TableId, FlownodeSet>,
cache: &'a Cache<TableId, FlownodeFlowSet>,
ident: &'a CacheIdent,
) -> BoxFuture<'a, Result<()>> {
Box::pin(async move {
@@ -154,7 +186,7 @@ mod tests {
use moka::future::CacheBuilder;
use table::table_name::TableName;
use crate::cache::flow::table_flownode::new_table_flownode_set_cache;
use crate::cache::flow::table_flownode::{new_table_flownode_set_cache, FlowIdent};
use crate::instruction::{CacheIdent, CreateFlow, DropFlow};
use crate::key::flow::flow_info::FlowInfoValue;
use crate::key::flow::flow_route::FlowRouteValue;
@@ -214,12 +246,16 @@ mod tests {
let set = cache.get(1024).await.unwrap().unwrap();
assert_eq!(
set.as_ref().clone(),
HashMap::from_iter((1..=3).map(|i| { (i, Peer::empty(i),) }))
HashMap::from_iter(
(1..=3).map(|i| { (FlowIdent::new(1024, (i - 1) as u32), Peer::empty(i),) })
)
);
let set = cache.get(1025).await.unwrap().unwrap();
assert_eq!(
set.as_ref().clone(),
HashMap::from_iter((1..=3).map(|i| { (i, Peer::empty(i),) }))
HashMap::from_iter(
(1..=3).map(|i| { (FlowIdent::new(1024, (i - 1) as u32), Peer::empty(i),) })
)
);
let result = cache.get(1026).await.unwrap().unwrap();
assert_eq!(result.len(), 0);
@@ -231,8 +267,9 @@ mod tests {
let cache = CacheBuilder::new(128).build();
let cache = new_table_flownode_set_cache("test".to_string(), cache, mem_kv);
let ident = vec![CacheIdent::CreateFlow(CreateFlow {
flow_id: 2001,
source_table_ids: vec![1024, 1025],
flownodes: (1..=5).map(Peer::empty).collect(),
partition_to_peer_mapping: (1..=5).map(|i| (i as u32, Peer::empty(i + 1))).collect(),
})];
cache.invalidate(&ident).await.unwrap();
let set = cache.get(1024).await.unwrap().unwrap();
@@ -241,6 +278,54 @@ mod tests {
assert_eq!(set.len(), 5);
}
#[tokio::test]
async fn test_replace_flow() {
let mem_kv = Arc::new(MemoryKvBackend::default());
let cache = CacheBuilder::new(128).build();
let cache = new_table_flownode_set_cache("test".to_string(), cache, mem_kv);
let ident = vec![CacheIdent::CreateFlow(CreateFlow {
flow_id: 2001,
source_table_ids: vec![1024, 1025],
partition_to_peer_mapping: (1..=5).map(|i| (i as u32, Peer::empty(i + 1))).collect(),
})];
cache.invalidate(&ident).await.unwrap();
let set = cache.get(1024).await.unwrap().unwrap();
assert_eq!(set.len(), 5);
let set = cache.get(1025).await.unwrap().unwrap();
assert_eq!(set.len(), 5);
let drop_then_create_flow = vec![
CacheIdent::DropFlow(DropFlow {
flow_id: 2001,
source_table_ids: vec![1024, 1025],
flow_part2node_id: (1..=5).map(|i| (i as u32, i + 1)).collect(),
}),
CacheIdent::CreateFlow(CreateFlow {
flow_id: 2001,
source_table_ids: vec![1026, 1027],
partition_to_peer_mapping: (11..=15)
.map(|i| (i as u32, Peer::empty(i + 1)))
.collect(),
}),
CacheIdent::FlowId(2001),
];
cache.invalidate(&drop_then_create_flow).await.unwrap();
let set = cache.get(1024).await.unwrap().unwrap();
assert!(set.is_empty());
let expected = HashMap::from_iter(
(11..=15).map(|i| (FlowIdent::new(2001, i as u32), Peer::empty(i + 1))),
);
let set = cache.get(1026).await.unwrap().unwrap();
assert_eq!(set.as_ref().clone(), expected);
let set = cache.get(1027).await.unwrap().unwrap();
assert_eq!(set.as_ref().clone(), expected);
}
#[tokio::test]
async fn test_drop_flow() {
let mem_kv = Arc::new(MemoryKvBackend::default());
@@ -248,34 +333,57 @@ mod tests {
let cache = new_table_flownode_set_cache("test".to_string(), cache, mem_kv);
let ident = vec![
CacheIdent::CreateFlow(CreateFlow {
flow_id: 2001,
source_table_ids: vec![1024, 1025],
flownodes: (1..=5).map(Peer::empty).collect(),
partition_to_peer_mapping: (1..=5)
.map(|i| (i as u32, Peer::empty(i + 1)))
.collect(),
}),
CacheIdent::CreateFlow(CreateFlow {
flow_id: 2002,
source_table_ids: vec![1024, 1025],
flownodes: (11..=12).map(Peer::empty).collect(),
partition_to_peer_mapping: (11..=12)
.map(|i| (i as u32, Peer::empty(i + 1)))
.collect(),
}),
// same flownode that hold multiple flows
CacheIdent::CreateFlow(CreateFlow {
flow_id: 2003,
source_table_ids: vec![1024, 1025],
partition_to_peer_mapping: (1..=5)
.map(|i| (i as u32, Peer::empty(i + 1)))
.collect(),
}),
];
cache.invalidate(&ident).await.unwrap();
let set = cache.get(1024).await.unwrap().unwrap();
assert_eq!(set.len(), 7);
assert_eq!(set.len(), 12);
let set = cache.get(1025).await.unwrap().unwrap();
assert_eq!(set.len(), 7);
assert_eq!(set.len(), 12);
let ident = vec![CacheIdent::DropFlow(DropFlow {
flow_id: 2001,
source_table_ids: vec![1024, 1025],
flownode_ids: vec![1, 2, 3, 4, 5],
flow_part2node_id: (1..=5).map(|i| (i as u32, i + 1)).collect(),
})];
cache.invalidate(&ident).await.unwrap();
let set = cache.get(1024).await.unwrap().unwrap();
assert_eq!(
set.as_ref().clone(),
HashMap::from_iter((11..=12).map(|i| { (i, Peer::empty(i),) }))
HashMap::from_iter(
(11..=12)
.map(|i| (FlowIdent::new(2002, i as u32), Peer::empty(i + 1)))
.chain((1..=5).map(|i| (FlowIdent::new(2003, i as u32), Peer::empty(i + 1))))
)
);
let set = cache.get(1025).await.unwrap().unwrap();
assert_eq!(
set.as_ref().clone(),
HashMap::from_iter((11..=12).map(|i| { (i, Peer::empty(i),) }))
HashMap::from_iter(
(11..=12)
.map(|i| (FlowIdent::new(2002, i as u32), Peer::empty(i + 1)))
.chain((1..=5).map(|i| (FlowIdent::new(2003, i as u32), Peer::empty(i + 1))))
)
);
}
}

View File

@@ -39,7 +39,7 @@ use crate::cache_invalidator::Context;
use crate::ddl::utils::{add_peer_context_if_needed, handle_retry_error};
use crate::ddl::DdlContext;
use crate::error::{self, Result, UnexpectedSnafu};
use crate::instruction::{CacheIdent, CreateFlow};
use crate::instruction::{CacheIdent, CreateFlow, DropFlow};
use crate::key::flow::flow_info::FlowInfoValue;
use crate::key::flow::flow_route::FlowRouteValue;
use crate::key::table_name::TableNameKey;
@@ -70,6 +70,7 @@ impl CreateFlowProcedure {
query_context,
state: CreateFlowState::Prepare,
prev_flow_info_value: None,
did_replace: false,
flow_type: None,
},
}
@@ -224,6 +225,7 @@ impl CreateFlowProcedure {
.update_flow_metadata(flow_id, prev_flow_value, &flow_info, flow_routes)
.await?;
info!("Replaced flow metadata for flow {flow_id}");
self.data.did_replace = true;
} else {
self.context
.flow_metadata_manager
@@ -240,22 +242,43 @@ impl CreateFlowProcedure {
debug_assert!(self.data.state == CreateFlowState::InvalidateFlowCache);
// Safety: The flow id must be allocated.
let flow_id = self.data.flow_id.unwrap();
let did_replace = self.data.did_replace;
let ctx = Context {
subject: Some("Invalidate flow cache by creating flow".to_string()),
};
let mut caches = vec![];
// if did replaced, invalidate the flow cache with drop the old flow
if did_replace {
let old_flow_info = self.data.prev_flow_info_value.as_ref().unwrap();
// only drop flow is needed, since flow name haven't changed, and flow id already invalidated below
caches.extend([CacheIdent::DropFlow(DropFlow {
flow_id,
source_table_ids: old_flow_info.source_table_ids.clone(),
flow_part2node_id: old_flow_info.flownode_ids().clone().into_iter().collect(),
})]);
}
let (_flow_info, flow_routes) = (&self.data).into();
let flow_part2peers = flow_routes
.into_iter()
.map(|(part_id, route)| (part_id, route.peer))
.collect();
caches.extend([
CacheIdent::CreateFlow(CreateFlow {
flow_id,
source_table_ids: self.data.source_table_ids.clone(),
partition_to_peer_mapping: flow_part2peers,
}),
CacheIdent::FlowId(flow_id),
]);
self.context
.cache_invalidator
.invalidate(
&ctx,
&[
CacheIdent::CreateFlow(CreateFlow {
source_table_ids: self.data.source_table_ids.clone(),
flownodes: self.data.peers.clone(),
}),
CacheIdent::FlowId(flow_id),
],
)
.invalidate(&ctx, &caches)
.await?;
Ok(Status::done_with_output(flow_id))
@@ -377,6 +400,10 @@ pub struct CreateFlowData {
/// For verify if prev value is consistent when need to update flow metadata.
/// only set when `or_replace` is true.
pub(crate) prev_flow_info_value: Option<DeserializedValueWithBytes<FlowInfoValue>>,
/// Only set to true when replace actually happened.
/// This is used to determine whether to invalidate the cache.
#[serde(default)]
pub(crate) did_replace: bool,
pub(crate) flow_type: Option<FlowType>,
}

View File

@@ -13,6 +13,7 @@
// limitations under the License.
mod metadata;
use api::v1::flow::{flow_request, DropRequest, FlowRequest};
use async_trait::async_trait;
use common_catalog::format_full_flow_name;
@@ -153,6 +154,12 @@ impl DropFlowProcedure {
};
let flow_info_value = self.data.flow_info_value.as_ref().unwrap();
let flow_part2nodes = flow_info_value
.flownode_ids()
.clone()
.into_iter()
.collect::<Vec<_>>();
self.context
.cache_invalidator
.invalidate(
@@ -164,8 +171,9 @@ impl DropFlowProcedure {
flow_name: flow_info_value.flow_name.to_string(),
}),
CacheIdent::DropFlow(DropFlow {
flow_id,
source_table_ids: flow_info_value.source_table_ids.clone(),
flownode_ids: flow_info_value.flownode_ids.values().cloned().collect(),
flow_part2node_id: flow_part2nodes,
}),
],
)

View File

@@ -24,7 +24,7 @@ use table::table_name::TableName;
use crate::flow_name::FlowName;
use crate::key::schema_name::SchemaName;
use crate::key::FlowId;
use crate::key::{FlowId, FlowPartitionId};
use crate::peer::Peer;
use crate::{DatanodeId, FlownodeId};
@@ -184,14 +184,19 @@ pub enum CacheIdent {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct CreateFlow {
/// The unique identifier for the flow.
pub flow_id: FlowId,
pub source_table_ids: Vec<TableId>,
pub flownodes: Vec<Peer>,
/// Mapping of flow partition to peer information
pub partition_to_peer_mapping: Vec<(FlowPartitionId, Peer)>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct DropFlow {
pub flow_id: FlowId,
pub source_table_ids: Vec<TableId>,
pub flownode_ids: Vec<FlownodeId>,
/// Mapping of flow partition to flownode id
pub flow_part2node_id: Vec<(FlowPartitionId, FlownodeId)>,
}
/// Flushes a batch of regions.

View File

@@ -246,27 +246,32 @@ impl FlowMetadataManager {
new_flow_info: &FlowInfoValue,
flow_routes: Vec<(FlowPartitionId, FlowRouteValue)>,
) -> Result<()> {
let (create_flow_flow_name_txn, on_create_flow_flow_name_failure) =
let (update_flow_flow_name_txn, on_create_flow_flow_name_failure) =
self.flow_name_manager.build_update_txn(
&new_flow_info.catalog_name,
&new_flow_info.flow_name,
flow_id,
)?;
let (create_flow_txn, on_create_flow_failure) =
let (update_flow_txn, on_create_flow_failure) =
self.flow_info_manager
.build_update_txn(flow_id, current_flow_info, new_flow_info)?;
let create_flow_routes_txn = self
.flow_route_manager
.build_create_txn(flow_id, flow_routes.clone())?;
let create_flownode_flow_txn = self
.flownode_flow_manager
.build_create_txn(flow_id, new_flow_info.flownode_ids().clone());
let create_table_flow_txn = self.table_flow_manager.build_create_txn(
let update_flow_routes_txn = self.flow_route_manager.build_update_txn(
flow_id,
current_flow_info,
flow_routes.clone(),
)?;
let update_flownode_flow_txn = self.flownode_flow_manager.build_update_txn(
flow_id,
current_flow_info,
new_flow_info.flownode_ids().clone(),
);
let update_table_flow_txn = self.table_flow_manager.build_update_txn(
flow_id,
current_flow_info,
flow_routes
.into_iter()
.map(|(partition_id, route)| (partition_id, TableFlowValue { peer: route.peer }))
@@ -275,11 +280,11 @@ impl FlowMetadataManager {
)?;
let txn = Txn::merge_all(vec![
create_flow_flow_name_txn,
create_flow_txn,
create_flow_routes_txn,
create_flownode_flow_txn,
create_table_flow_txn,
update_flow_flow_name_txn,
update_flow_txn,
update_flow_routes_txn,
update_flownode_flow_txn,
update_table_flow_txn,
]);
info!(
"Creating flow {}.{}({}), with {} txn operations",
@@ -783,6 +788,141 @@ mod tests {
}
}
#[tokio::test]
async fn test_update_flow_metadata_diff_flownode() {
let mem_kv = Arc::new(MemoryKvBackend::default());
let flow_metadata_manager = FlowMetadataManager::new(mem_kv.clone());
let flow_id = 10;
let flow_value = test_flow_info_value(
"flow",
[(0u32, 1u64), (1u32, 2u64)].into(),
vec![1024, 1025, 1026],
);
let flow_routes = vec![
(
0u32,
FlowRouteValue {
peer: Peer::empty(1),
},
),
(
1,
FlowRouteValue {
peer: Peer::empty(2),
},
),
];
flow_metadata_manager
.create_flow_metadata(flow_id, flow_value.clone(), flow_routes.clone())
.await
.unwrap();
let new_flow_value = {
let mut tmp = flow_value.clone();
tmp.raw_sql = "new".to_string();
// move to different flownodes
tmp.flownode_ids = [(0, 3u64), (1, 4u64)].into();
tmp
};
let new_flow_routes = vec![
(
0u32,
FlowRouteValue {
peer: Peer::empty(3),
},
),
(
1,
FlowRouteValue {
peer: Peer::empty(4),
},
),
];
// Update flow instead
flow_metadata_manager
.update_flow_metadata(
flow_id,
&DeserializedValueWithBytes::from_inner(flow_value.clone()),
&new_flow_value,
new_flow_routes.clone(),
)
.await
.unwrap();
let got = flow_metadata_manager
.flow_info_manager()
.get(flow_id)
.await
.unwrap()
.unwrap();
let routes = flow_metadata_manager
.flow_route_manager()
.routes(flow_id)
.await
.unwrap();
assert_eq!(
routes,
vec![
(
FlowRouteKey::new(flow_id, 0),
FlowRouteValue {
peer: Peer::empty(3),
},
),
(
FlowRouteKey::new(flow_id, 1),
FlowRouteValue {
peer: Peer::empty(4),
},
),
]
);
assert_eq!(got, new_flow_value);
let flows = flow_metadata_manager
.flownode_flow_manager()
.flows(1)
.try_collect::<Vec<_>>()
.await
.unwrap();
// should moved to different flownode
assert_eq!(flows, vec![]);
let flows = flow_metadata_manager
.flownode_flow_manager()
.flows(3)
.try_collect::<Vec<_>>()
.await
.unwrap();
assert_eq!(flows, vec![(flow_id, 0)]);
for table_id in [1024, 1025, 1026] {
let nodes = flow_metadata_manager
.table_flow_manager()
.flows(table_id)
.await
.unwrap();
assert_eq!(
nodes,
vec![
(
TableFlowKey::new(table_id, 3, flow_id, 0),
TableFlowValue {
peer: Peer::empty(3)
}
),
(
TableFlowKey::new(table_id, 4, flow_id, 1),
TableFlowValue {
peer: Peer::empty(4)
}
)
]
);
}
}
#[tokio::test]
async fn test_update_flow_metadata_flow_replace_diff_id_err() {
let mem_kv = Arc::new(MemoryKvBackend::default());

View File

@@ -272,10 +272,11 @@ impl FlowInfoManager {
let raw_value = new_flow_value.try_as_raw_value()?;
let prev_value = current_flow_value.get_raw_bytes();
let txn = Txn::new()
.when(vec![
Compare::new(key.clone(), CompareOp::NotEqual, None),
Compare::new(key.clone(), CompareOp::Equal, Some(prev_value)),
])
.when(vec![Compare::new(
key.clone(),
CompareOp::Equal,
Some(prev_value),
)])
.and_then(vec![TxnOp::Put(key.clone(), raw_value)])
.or_else(vec![TxnOp::Get(key.clone())]);

View File

@@ -19,9 +19,12 @@ use serde::{Deserialize, Serialize};
use snafu::OptionExt;
use crate::error::{self, Result};
use crate::key::flow::flow_info::FlowInfoValue;
use crate::key::flow::{flownode_addr_helper, FlowScoped};
use crate::key::node_address::NodeAddressKey;
use crate::key::{BytesAdapter, FlowId, FlowPartitionId, MetadataKey, MetadataValue};
use crate::key::{
BytesAdapter, DeserializedValueWithBytes, FlowId, FlowPartitionId, MetadataKey, MetadataValue,
};
use crate::kv_backend::txn::{Txn, TxnOp};
use crate::kv_backend::KvBackendRef;
use crate::peer::Peer;
@@ -204,6 +207,30 @@ impl FlowRouteManager {
Ok(Txn::new().and_then(txns))
}
/// Builds a update flow routes transaction.
///
/// Puts `__flow/route/{flow_id}/{partition_id}` keys.
/// Also removes `__flow/route/{flow_id}/{old_partition_id}` keys.
pub(crate) fn build_update_txn<I: IntoIterator<Item = (FlowPartitionId, FlowRouteValue)>>(
&self,
flow_id: FlowId,
current_flow_info: &DeserializedValueWithBytes<FlowInfoValue>,
flow_routes: I,
) -> Result<Txn> {
let del_txns = current_flow_info.flownode_ids().keys().map(|partition_id| {
let key = FlowRouteKey::new(flow_id, *partition_id).to_bytes();
Ok(TxnOp::Delete(key))
});
let put_txns = flow_routes.into_iter().map(|(partition_id, route)| {
let key = FlowRouteKey::new(flow_id, partition_id).to_bytes();
Ok(TxnOp::Put(key, route.try_as_raw_value()?))
});
let txns = del_txns.chain(put_txns).collect::<Result<Vec<_>>>()?;
Ok(Txn::new().and_then(txns))
}
async fn remap_flow_route_addresses(
&self,
flow_routes: &mut [(FlowRouteKey, FlowRouteValue)],

View File

@@ -19,8 +19,9 @@ use regex::Regex;
use snafu::OptionExt;
use crate::error::{self, Result};
use crate::key::flow::flow_info::FlowInfoValue;
use crate::key::flow::FlowScoped;
use crate::key::{BytesAdapter, FlowId, FlowPartitionId, MetadataKey};
use crate::key::{BytesAdapter, DeserializedValueWithBytes, FlowId, FlowPartitionId, MetadataKey};
use crate::kv_backend::txn::{Txn, TxnOp};
use crate::kv_backend::KvBackendRef;
use crate::range_stream::{PaginationStream, DEFAULT_PAGE_SIZE};
@@ -202,6 +203,33 @@ impl FlownodeFlowManager {
Txn::new().and_then(txns)
}
/// Builds a update flownode flow transaction.
///
/// Puts `__flownode_flow/{flownode_id}/{flow_id}/{partition_id}` keys.
/// Remove the old `__flownode_flow/{old_flownode_id}/{flow_id}/{old_partition_id}` keys.
pub(crate) fn build_update_txn<I: IntoIterator<Item = (FlowPartitionId, FlownodeId)>>(
&self,
flow_id: FlowId,
current_flow_info: &DeserializedValueWithBytes<FlowInfoValue>,
flownode_ids: I,
) -> Txn {
let del_txns =
current_flow_info
.flownode_ids()
.iter()
.map(|(partition_id, flownode_id)| {
let key = FlownodeFlowKey::new(*flownode_id, flow_id, *partition_id).to_bytes();
TxnOp::Delete(key)
});
let put_txns = flownode_ids.into_iter().map(|(partition_id, flownode_id)| {
let key = FlownodeFlowKey::new(flownode_id, flow_id, partition_id).to_bytes();
TxnOp::Put(key, vec![])
});
let txns = del_txns.chain(put_txns).collect::<Vec<_>>();
Txn::new().and_then(txns)
}
}
#[cfg(test)]

View File

@@ -22,9 +22,12 @@ use snafu::OptionExt;
use table::metadata::TableId;
use crate::error::{self, Result};
use crate::key::flow::flow_info::FlowInfoValue;
use crate::key::flow::{flownode_addr_helper, FlowScoped};
use crate::key::node_address::NodeAddressKey;
use crate::key::{BytesAdapter, FlowId, FlowPartitionId, MetadataKey, MetadataValue};
use crate::key::{
BytesAdapter, DeserializedValueWithBytes, FlowId, FlowPartitionId, MetadataKey, MetadataValue,
};
use crate::kv_backend::txn::{Txn, TxnOp};
use crate::kv_backend::KvBackendRef;
use crate::peer::Peer;
@@ -215,7 +218,7 @@ impl TableFlowManager {
/// Builds a create table flow transaction.
///
/// Puts `__flow/source_table/{table_id}/{node_id}/{partition_id}` keys.
/// Puts `__flow/source_table/{table_id}/{node_id}/{flow_id}/{partition_id}` keys.
pub fn build_create_txn(
&self,
flow_id: FlowId,
@@ -239,6 +242,44 @@ impl TableFlowManager {
Ok(Txn::new().and_then(txns))
}
/// Builds a update table flow transaction.
///
/// Puts `__flow/source_table/{table_id}/{node_id}/{flow_id}/{partition_id}` keys,
/// Also remove previous
/// `__flow/source_table/{table_id}/{old_node_id}/{flow_id}/{partition_id}` keys.
pub fn build_update_txn(
&self,
flow_id: FlowId,
current_flow_info: &DeserializedValueWithBytes<FlowInfoValue>,
table_flow_values: Vec<(FlowPartitionId, TableFlowValue)>,
source_table_ids: &[TableId],
) -> Result<Txn> {
let mut txns = Vec::with_capacity(2 * source_table_ids.len() * table_flow_values.len());
// first remove the old keys
for (part_id, node_id) in current_flow_info.flownode_ids() {
for source_table_id in current_flow_info.source_table_ids() {
txns.push(TxnOp::Delete(
TableFlowKey::new(*source_table_id, *node_id, flow_id, *part_id).to_bytes(),
));
}
}
for (partition_id, table_flow_value) in table_flow_values {
let flownode_id = table_flow_value.peer.id;
let value = table_flow_value.try_as_raw_value()?;
for source_table_id in source_table_ids {
txns.push(TxnOp::Put(
TableFlowKey::new(*source_table_id, flownode_id, flow_id, partition_id)
.to_bytes(),
value.clone(),
));
}
}
Ok(Txn::new().and_then(txns))
}
async fn remap_table_flow_addresses(
&self,
table_flows: &mut [(TableFlowKey, TableFlowValue)],

View File

@@ -65,6 +65,7 @@ impl DataflowState {
/// schedule all subgraph that need to run with time <= `as_of` and run_available()
///
/// return true if any subgraph actually executed
#[allow(clippy::swap_with_temporary)]
pub fn run_available_with_schedule(&mut self, df: &mut Dfir) -> bool {
// first split keys <= as_of into another map
let mut before = self

View File

@@ -102,6 +102,7 @@ where
builder = builder
.with_prom_handler(
self.instance.clone(),
Some(self.instance.clone()),
opts.prom_store.with_metric_engine,
opts.http.is_strict_mode,
)

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::collections::BTreeSet;
use std::ops::Range;
use fastbloom::BloomFilter;
@@ -25,10 +25,10 @@ use crate::Bytes;
/// `InListPredicate` contains a list of acceptable values. A value needs to match at least
/// one of the elements (logical OR semantic) for the predicate to be satisfied.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct InListPredicate {
/// List of acceptable values.
pub list: HashSet<Bytes>,
pub list: BTreeSet<Bytes>,
}
pub struct BloomFilterApplier {
@@ -277,21 +277,21 @@ mod tests {
// Single value predicates
(
vec![InListPredicate {
list: HashSet::from_iter([b"row00".to_vec()]),
list: BTreeSet::from_iter([b"row00".to_vec()]),
}],
0..28,
vec![0..4],
),
(
vec![InListPredicate {
list: HashSet::from_iter([b"row05".to_vec()]),
list: BTreeSet::from_iter([b"row05".to_vec()]),
}],
4..8,
vec![4..8],
),
(
vec![InListPredicate {
list: HashSet::from_iter([b"row03".to_vec()]),
list: BTreeSet::from_iter([b"row03".to_vec()]),
}],
4..8,
vec![],
@@ -299,14 +299,14 @@ mod tests {
// Multiple values in a single predicate (OR logic)
(
vec![InListPredicate {
list: HashSet::from_iter([b"overl".to_vec(), b"row06".to_vec()]),
list: BTreeSet::from_iter([b"overl".to_vec(), b"row06".to_vec()]),
}],
0..28,
vec![0..8],
),
(
vec![InListPredicate {
list: HashSet::from_iter([b"seg01".to_vec(), b"overp".to_vec()]),
list: BTreeSet::from_iter([b"seg01".to_vec(), b"overp".to_vec()]),
}],
0..28,
vec![4..12],
@@ -314,7 +314,7 @@ mod tests {
// Non-existent values
(
vec![InListPredicate {
list: HashSet::from_iter([b"row99".to_vec()]),
list: BTreeSet::from_iter([b"row99".to_vec()]),
}],
0..28,
vec![],
@@ -322,7 +322,7 @@ mod tests {
// Empty range
(
vec![InListPredicate {
list: HashSet::from_iter([b"row00".to_vec()]),
list: BTreeSet::from_iter([b"row00".to_vec()]),
}],
12..12,
vec![],
@@ -330,21 +330,21 @@ mod tests {
// Multiple values in a single predicate within specific ranges
(
vec![InListPredicate {
list: HashSet::from_iter([b"row04".to_vec(), b"row05".to_vec()]),
list: BTreeSet::from_iter([b"row04".to_vec(), b"row05".to_vec()]),
}],
0..12,
vec![4..8],
),
(
vec![InListPredicate {
list: HashSet::from_iter([b"seg01".to_vec()]),
list: BTreeSet::from_iter([b"seg01".to_vec()]),
}],
0..28,
vec![4..8],
),
(
vec![InListPredicate {
list: HashSet::from_iter([b"seg01".to_vec()]),
list: BTreeSet::from_iter([b"seg01".to_vec()]),
}],
6..28,
vec![6..8],
@@ -352,21 +352,21 @@ mod tests {
// Values spanning multiple segments
(
vec![InListPredicate {
list: HashSet::from_iter([b"overl".to_vec()]),
list: BTreeSet::from_iter([b"overl".to_vec()]),
}],
0..28,
vec![0..8],
),
(
vec![InListPredicate {
list: HashSet::from_iter([b"overl".to_vec()]),
list: BTreeSet::from_iter([b"overl".to_vec()]),
}],
2..28,
vec![2..8],
),
(
vec![InListPredicate {
list: HashSet::from_iter([b"overp".to_vec()]),
list: BTreeSet::from_iter([b"overp".to_vec()]),
}],
0..10,
vec![4..10],
@@ -374,21 +374,21 @@ mod tests {
// Duplicate values
(
vec![InListPredicate {
list: HashSet::from_iter([b"dup".to_vec()]),
list: BTreeSet::from_iter([b"dup".to_vec()]),
}],
0..12,
vec![],
),
(
vec![InListPredicate {
list: HashSet::from_iter([b"dup".to_vec()]),
list: BTreeSet::from_iter([b"dup".to_vec()]),
}],
0..16,
vec![12..16],
),
(
vec![InListPredicate {
list: HashSet::from_iter([b"dup".to_vec()]),
list: BTreeSet::from_iter([b"dup".to_vec()]),
}],
0..28,
vec![12..28],
@@ -397,10 +397,10 @@ mod tests {
(
vec![
InListPredicate {
list: HashSet::from_iter([b"row00".to_vec(), b"row01".to_vec()]),
list: BTreeSet::from_iter([b"row00".to_vec(), b"row01".to_vec()]),
},
InListPredicate {
list: HashSet::from_iter([b"seg00".to_vec()]),
list: BTreeSet::from_iter([b"seg00".to_vec()]),
},
],
0..28,
@@ -409,10 +409,10 @@ mod tests {
(
vec![
InListPredicate {
list: HashSet::from_iter([b"overl".to_vec()]),
list: BTreeSet::from_iter([b"overl".to_vec()]),
},
InListPredicate {
list: HashSet::from_iter([b"seg01".to_vec()]),
list: BTreeSet::from_iter([b"seg01".to_vec()]),
},
],
0..28,

View File

@@ -183,7 +183,7 @@ impl TryFrom<Vec<Predicate>> for IntersectionFstApplier {
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use std::collections::BTreeSet;
use super::*;
use crate::inverted_index::error::Error;
@@ -405,7 +405,7 @@ mod tests {
#[test]
fn test_intersection_fst_applier_with_in_list_predicate() {
let result = IntersectionFstApplier::try_from(vec![Predicate::InList(InListPredicate {
list: HashSet::from_iter([b"one".to_vec(), b"two".to_vec()]),
list: BTreeSet::from_iter([b"one".to_vec(), b"two".to_vec()]),
})]);
assert!(matches!(
result,

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::collections::BTreeSet;
use std::mem::size_of;
use snafu::{ensure, ResultExt};
@@ -93,7 +93,7 @@ impl KeysFstApplier {
fn intersect_with_lists(in_lists: &mut [Predicate]) -> Vec<Bytes> {
#[inline]
fn get_list(p: &Predicate) -> &HashSet<Bytes> {
fn get_list(p: &Predicate) -> &BTreeSet<Bytes> {
match p {
Predicate::InList(i) => &i.list,
_ => unreachable!(), // `in_lists` is filtered by `split_at_in_lists`
@@ -229,7 +229,7 @@ mod tests {
fn test_keys_fst_applier_try_from() {
let predicates = vec![
Predicate::InList(InListPredicate {
list: HashSet::from_iter(vec![b("foo"), b("bar")]),
list: BTreeSet::from_iter(vec![b("foo"), b("bar")]),
}),
Predicate::Range(RangePredicate {
range: Range {
@@ -252,7 +252,7 @@ mod tests {
fn test_keys_fst_applier_try_from_filter_out_unmatched_keys() {
let predicates = vec![
Predicate::InList(InListPredicate {
list: HashSet::from_iter(vec![b("foo"), b("bar")]),
list: BTreeSet::from_iter(vec![b("foo"), b("bar")]),
}),
Predicate::Range(RangePredicate {
range: Range {
@@ -300,7 +300,7 @@ mod tests {
fn test_keys_fst_applier_try_from_with_invalid_regex() {
let predicates = vec![
Predicate::InList(InListPredicate {
list: HashSet::from_iter(vec![b("foo"), b("bar")]),
list: BTreeSet::from_iter(vec![b("foo"), b("bar")]),
}),
Predicate::RegexMatch(RegexMatchPredicate {
pattern: "*invalid regex".to_string(),

View File

@@ -12,12 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::collections::BTreeSet;
use crate::Bytes;
/// Enumerates types of predicates for value filtering.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Predicate {
/// Predicate for matching values in a list.
InList(InListPredicate),
@@ -31,14 +31,14 @@ pub enum Predicate {
/// `InListPredicate` contains a list of acceptable values. A value needs to match at least
/// one of the elements (logical OR semantic) for the predicate to be satisfied.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct InListPredicate {
/// List of acceptable values.
pub list: HashSet<Bytes>,
pub list: BTreeSet<Bytes>,
}
/// `Bound` is a sub-component of a range, representing a single-sided limit that could be inclusive or exclusive.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Bound {
/// Whether the bound is inclusive or exclusive.
pub inclusive: bool,
@@ -48,7 +48,7 @@ pub struct Bound {
/// `Range` defines a single continuous range which can optionally have a lower and/or upper limit.
/// Both the lower and upper bounds must be satisfied for the range condition to be true.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Range {
/// The lower bound of the range.
pub lower: Option<Bound>,
@@ -58,7 +58,7 @@ pub struct Range {
/// `RangePredicate` encapsulates a range condition that must be satisfied
/// for the predicate to hold true (logical AND semantic between the bounds).
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RangePredicate {
/// The range condition.
pub range: Range,
@@ -66,7 +66,7 @@ pub struct RangePredicate {
/// `RegexMatchPredicate` encapsulates a single regex pattern. A value must match
/// the pattern for the predicate to be satisfied.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RegexMatchPredicate {
/// The regex pattern.
pub pattern: String,

View File

@@ -34,7 +34,7 @@ use crate::key::{DatanodeLeaseKey, FlownodeLeaseKey, LeaseValue};
fn build_lease_filter(lease_secs: u64) -> impl Fn(&LeaseValue) -> bool {
move |v: &LeaseValue| {
((time_util::current_time_millis() - v.timestamp_millis) as u64)
< lease_secs.checked_mul(1000).unwrap_or(u64::MAX)
< lease_secs.saturating_mul(1000)
}
}

View File

@@ -29,6 +29,7 @@ use bytes::Bytes;
use datatypes::value::Value;
use datatypes::vectors::VectorRef;
use index::bloom_filter_index::{BloomFilterIndexCache, BloomFilterIndexCacheRef};
use index::result_cache::IndexResultCache;
use moka::notification::RemovalCause;
use moka::sync::Cache;
use parquet::column::page::Page;
@@ -242,6 +243,15 @@ impl CacheStrategy {
CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
}
}
/// Calls [CacheManager::index_result_cache()].
/// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
pub fn index_result_cache(&self) -> Option<&IndexResultCache> {
match self {
CacheStrategy::EnableAll(cache_manager) => cache_manager.index_result_cache(),
CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
}
}
}
/// Manages cached data for the engine.
@@ -258,13 +268,15 @@ pub struct CacheManager {
/// A Cache for writing files to object stores.
write_cache: Option<WriteCacheRef>,
/// Cache for inverted index.
index_cache: Option<InvertedIndexCacheRef>,
inverted_index_cache: Option<InvertedIndexCacheRef>,
/// Cache for bloom filter index.
bloom_filter_index_cache: Option<BloomFilterIndexCacheRef>,
/// Puffin metadata cache.
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
/// Cache for time series selectors.
selector_result_cache: Option<SelectorResultCache>,
/// Cache for index result.
index_result_cache: Option<IndexResultCache>,
}
pub type CacheManagerRef = Arc<CacheManager>;
@@ -410,7 +422,7 @@ impl CacheManager {
}
pub(crate) fn inverted_index_cache(&self) -> Option<&InvertedIndexCacheRef> {
self.index_cache.as_ref()
self.inverted_index_cache.as_ref()
}
pub(crate) fn bloom_filter_index_cache(&self) -> Option<&BloomFilterIndexCacheRef> {
@@ -420,6 +432,10 @@ impl CacheManager {
pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
self.puffin_metadata_cache.as_ref()
}
pub(crate) fn index_result_cache(&self) -> Option<&IndexResultCache> {
self.index_result_cache.as_ref()
}
}
/// Increases selector cache miss metrics.
@@ -441,6 +457,7 @@ pub struct CacheManagerBuilder {
index_metadata_size: u64,
index_content_size: u64,
index_content_page_size: u64,
index_result_cache_size: u64,
puffin_metadata_size: u64,
write_cache: Option<WriteCacheRef>,
selector_result_cache_size: u64,
@@ -489,6 +506,12 @@ impl CacheManagerBuilder {
self
}
/// Sets cache size for index result.
pub fn index_result_cache_size(mut self, bytes: u64) -> Self {
self.index_result_cache_size = bytes;
self
}
/// Sets cache size for puffin metadata.
pub fn puffin_metadata_size(mut self, bytes: u64) -> Self {
self.puffin_metadata_size = bytes;
@@ -566,6 +589,8 @@ impl CacheManagerBuilder {
self.index_content_size,
self.index_content_page_size,
);
let index_result_cache = (self.index_result_cache_size != 0)
.then(|| IndexResultCache::new(self.index_result_cache_size));
let puffin_metadata_cache =
PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES);
let selector_result_cache = (self.selector_result_cache_size != 0).then(|| {
@@ -588,10 +613,11 @@ impl CacheManagerBuilder {
vector_cache,
page_cache,
write_cache: self.write_cache,
index_cache: Some(Arc::new(inverted_index_cache)),
inverted_index_cache: Some(Arc::new(inverted_index_cache)),
bloom_filter_index_cache: Some(Arc::new(bloom_filter_index_cache)),
puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)),
selector_result_cache,
index_result_cache,
}
}
}

View File

@@ -14,6 +14,7 @@
pub mod bloom_filter_index;
pub mod inverted_index;
pub mod result_cache;
use std::future::Future;
use std::hash::Hash;

View File

@@ -0,0 +1,423 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use std::sync::Arc;
use index::bloom_filter::applier::InListPredicate;
use index::inverted_index::search::predicate::{Predicate, RangePredicate};
use moka::notification::RemovalCause;
use moka::sync::Cache;
use store_api::storage::ColumnId;
use crate::metrics::{CACHE_BYTES, CACHE_EVICTION, CACHE_HIT, CACHE_MISS};
use crate::sst::file::FileId;
use crate::sst::index::fulltext_index::applier::builder::{
FulltextQuery, FulltextRequest, FulltextTerm,
};
use crate::sst::parquet::row_selection::RowGroupSelection;
const INDEX_RESULT_TYPE: &str = "index_result";
/// Cache for storing index query results.
pub struct IndexResultCache {
cache: Cache<(PredicateKey, FileId), Arc<RowGroupSelection>>,
}
impl IndexResultCache {
/// Creates a new cache with the given capacity.
pub fn new(capacity: u64) -> Self {
fn to_str(cause: RemovalCause) -> &'static str {
match cause {
RemovalCause::Expired => "expired",
RemovalCause::Explicit => "explicit",
RemovalCause::Replaced => "replaced",
RemovalCause::Size => "size",
}
}
let cache = Cache::builder()
.max_capacity(capacity)
.weigher(Self::index_result_cache_weight)
.eviction_listener(|k, v, cause| {
let size = Self::index_result_cache_weight(&k, &v);
CACHE_BYTES
.with_label_values(&[INDEX_RESULT_TYPE])
.sub(size.into());
CACHE_EVICTION
.with_label_values(&[INDEX_RESULT_TYPE, to_str(cause)])
.inc();
})
.build();
Self { cache }
}
/// Puts a query result into the cache.
pub fn put(&self, key: PredicateKey, file_id: FileId, result: Arc<RowGroupSelection>) {
let key = (key, file_id);
let size = Self::index_result_cache_weight(&key, &result);
CACHE_BYTES
.with_label_values(&[INDEX_RESULT_TYPE])
.add(size.into());
self.cache.insert(key, result);
}
/// Gets a query result from the cache.
pub fn get(&self, key: &PredicateKey, file_id: FileId) -> Option<Arc<RowGroupSelection>> {
let res = self.cache.get(&(key.clone(), file_id));
if res.is_some() {
CACHE_HIT.with_label_values(&[INDEX_RESULT_TYPE]).inc();
} else {
CACHE_MISS.with_label_values(&[INDEX_RESULT_TYPE]).inc()
}
res
}
/// Calculates the memory usage of a cache entry.
fn index_result_cache_weight(k: &(PredicateKey, FileId), v: &Arc<RowGroupSelection>) -> u32 {
k.0.mem_usage() as u32 + v.mem_usage() as u32
}
}
/// Key for different types of index predicates.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum PredicateKey {
/// Fulltext index predicate.
Fulltext(FulltextIndexKey),
/// Bloom filter predicate.
Bloom(BloomFilterKey),
/// Inverted index predicate.
Inverted(InvertedIndexKey),
}
impl PredicateKey {
/// Creates a new fulltext index key.
pub fn new_fulltext(predicates: Arc<BTreeMap<ColumnId, FulltextRequest>>) -> Self {
Self::Fulltext(FulltextIndexKey::new(predicates))
}
/// Creates a new bloom filter key.
pub fn new_bloom(predicates: Arc<BTreeMap<ColumnId, Vec<InListPredicate>>>) -> Self {
Self::Bloom(BloomFilterKey::new(predicates))
}
/// Creates a new inverted index key.
pub fn new_inverted(predicates: Arc<BTreeMap<ColumnId, Vec<Predicate>>>) -> Self {
Self::Inverted(InvertedIndexKey::new(predicates))
}
/// Returns the memory usage of this key.
pub fn mem_usage(&self) -> usize {
match self {
Self::Fulltext(key) => key.mem_usage,
Self::Bloom(key) => key.mem_usage,
Self::Inverted(key) => key.mem_usage,
}
}
}
/// Key for fulltext index queries.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
pub struct FulltextIndexKey {
predicates: Arc<BTreeMap<ColumnId, FulltextRequest>>,
mem_usage: usize,
}
impl FulltextIndexKey {
/// Creates a new fulltext index key with the given predicates.
/// Calculates memory usage based on the size of queries and terms.
pub fn new(predicates: Arc<BTreeMap<ColumnId, FulltextRequest>>) -> Self {
let mem_usage = predicates
.values()
.map(|request| {
let query_size = request
.queries
.iter()
.map(|query| query.0.len() + size_of::<FulltextQuery>())
.sum::<usize>();
let term_size = request
.terms
.iter()
.map(|term| term.term.len() + size_of::<FulltextTerm>())
.sum::<usize>();
query_size + term_size
})
.sum();
Self {
predicates,
mem_usage,
}
}
}
/// Key for bloom filter queries.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
pub struct BloomFilterKey {
predicates: Arc<BTreeMap<ColumnId, Vec<InListPredicate>>>,
mem_usage: usize,
}
impl BloomFilterKey {
/// Creates a new bloom filter key with the given predicates.
/// Calculates memory usage based on the size of predicate lists.
pub fn new(predicates: Arc<BTreeMap<ColumnId, Vec<InListPredicate>>>) -> Self {
let mem_usage = predicates
.values()
.map(|predicates| {
predicates
.iter()
.map(|predicate| predicate.list.iter().map(|list| list.len()).sum::<usize>())
.sum::<usize>()
})
.sum();
Self {
predicates,
mem_usage,
}
}
}
/// Key for inverted index queries.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
pub struct InvertedIndexKey {
predicates: Arc<BTreeMap<ColumnId, Vec<Predicate>>>,
mem_usage: usize,
}
impl InvertedIndexKey {
/// Creates a new inverted index key with the given predicates.
/// Calculates memory usage based on the type and size of predicates.
pub fn new(predicates: Arc<BTreeMap<ColumnId, Vec<Predicate>>>) -> Self {
let mem_usage = predicates
.values()
.map(|predicates| {
predicates
.iter()
.map(|predicate| match predicate {
Predicate::InList(predicate) => {
predicate.list.iter().map(|list| list.len()).sum::<usize>()
}
Predicate::Range(_) => size_of::<RangePredicate>(),
Predicate::RegexMatch(predicate) => predicate.pattern.len(),
})
.sum::<usize>()
})
.sum();
Self {
predicates,
mem_usage,
}
}
}
#[cfg(test)]
#[allow(clippy::single_range_in_vec_init)]
mod tests {
use std::collections::{BTreeMap, BTreeSet};
use std::sync::Arc;
use index::bloom_filter::applier::InListPredicate as BloomInListPredicate;
use index::inverted_index::search::predicate::{Predicate, Range, RangePredicate};
use super::*;
use crate::sst::index::fulltext_index::applier::builder::{
FulltextQuery, FulltextRequest, FulltextTerm,
};
use crate::sst::parquet::row_selection::RowGroupSelection;
#[test]
fn test_cache_basic_operations() {
let cache = IndexResultCache::new(1000);
let file_id = FileId::random();
// Create a test key and value
let predicates = BTreeMap::new();
let key = PredicateKey::new_fulltext(Arc::new(predicates));
let selection = Arc::new(RowGroupSelection::from_row_ids(
[1, 2, 3].into_iter().collect(),
1,
10,
));
// Test put and get
cache.put(key.clone(), file_id, selection.clone());
let retrieved = cache.get(&key, file_id);
assert!(retrieved.is_some());
assert_eq!(
retrieved.unwrap().as_ref().row_count(),
selection.as_ref().row_count()
);
// Test get non-existent key
let non_existent_file_id = FileId::random();
assert!(cache.get(&key, non_existent_file_id).is_none());
}
#[test]
fn test_cache_capacity_limit() {
// Create a cache with small capacity (100 bytes)
let cache = IndexResultCache::new(100);
let file_id1 = FileId::random();
let file_id2 = FileId::random();
// Create two large keys that will exceed capacity
let mut predicates1 = BTreeMap::new();
let request1 = FulltextRequest {
queries: vec![
FulltextQuery(
"test query 1 with a very long string to ensure large weight".to_string(),
),
FulltextQuery("another long query string".to_string()),
],
terms: vec![],
};
predicates1.insert(1, request1);
let key1 = PredicateKey::new_fulltext(Arc::new(predicates1));
let selection1 = Arc::new(RowGroupSelection::default());
let mut predicates2 = BTreeMap::new();
let request2 = FulltextRequest {
queries: vec![
FulltextQuery(
"test query 2 with a very long string to ensure large weight".to_string(),
),
FulltextQuery("another long query string".to_string()),
],
terms: vec![],
};
predicates2.insert(1, request2);
let key2 = PredicateKey::new_fulltext(Arc::new(predicates2));
let selection2 = Arc::new(RowGroupSelection::default());
// Calculate weights
let weight1 =
IndexResultCache::index_result_cache_weight(&(key1.clone(), file_id1), &selection1);
let weight2 =
IndexResultCache::index_result_cache_weight(&(key2.clone(), file_id2), &selection2);
assert!(weight1 > 100);
assert!(weight2 > 100);
// Put first key-value pair
cache.put(key1.clone(), file_id1, selection1.clone());
// Verify first key is in cache
let retrieved1 = cache.get(&key1, file_id1);
assert!(retrieved1.is_some());
assert_eq!(
retrieved1.unwrap().as_ref().row_count(),
selection1.as_ref().row_count()
);
// Put second key-value pair, which should trigger eviction
cache.put(key2.clone(), file_id2, selection2.clone());
// Verify second key is in cache
let retrieved2 = cache.get(&key2, file_id2);
assert!(retrieved2.is_some());
assert_eq!(
retrieved2.unwrap().as_ref().row_count(),
selection2.as_ref().row_count()
);
// Verify first key was evicted
cache.cache.run_pending_tasks();
let retrieved1_after_eviction = cache.get(&key1, file_id1);
assert!(
retrieved1_after_eviction.is_none(),
"First key should have been evicted"
);
}
#[test]
fn test_index_result_cache_weight() {
let file_id = FileId::random();
// Test empty values
let empty_predicates = BTreeMap::new();
let empty_key = PredicateKey::new_fulltext(Arc::new(empty_predicates));
let empty_selection = Arc::new(RowGroupSelection::default());
let empty_weight = IndexResultCache::index_result_cache_weight(
&(empty_key.clone(), file_id),
&empty_selection,
);
assert_eq!(empty_weight, 0);
assert_eq!(
empty_weight,
empty_key.mem_usage() as u32 + empty_selection.mem_usage() as u32
);
// Test 1: FulltextIndexKey
let mut predicates1 = BTreeMap::new();
let request1 = FulltextRequest {
queries: vec![FulltextQuery("test query".to_string())],
terms: vec![FulltextTerm {
col_lowered: false,
term: "test term".to_string(),
}],
};
predicates1.insert(1, request1);
let key1 = PredicateKey::new_fulltext(Arc::new(predicates1));
let selection1 = Arc::new(RowGroupSelection::new(100, 250));
let weight1 =
IndexResultCache::index_result_cache_weight(&(key1.clone(), file_id), &selection1);
assert!(weight1 > 0);
assert_eq!(
weight1,
key1.mem_usage() as u32 + selection1.mem_usage() as u32
);
// Test 2: BloomFilterKey
let mut predicates2 = BTreeMap::new();
let predicate2 = BloomInListPredicate {
list: BTreeSet::from([b"test1".to_vec(), b"test2".to_vec()]),
};
predicates2.insert(1, vec![predicate2]);
let key2 = PredicateKey::new_bloom(Arc::new(predicates2));
let selection2 = Arc::new(RowGroupSelection::from_row_ids(
[1, 2, 3].into_iter().collect(),
100,
1,
));
let weight2 =
IndexResultCache::index_result_cache_weight(&(key2.clone(), file_id), &selection2);
assert!(weight2 > 0);
assert_eq!(
weight2,
key2.mem_usage() as u32 + selection2.mem_usage() as u32
);
// Test 3: InvertedIndexKey
let mut predicates3 = BTreeMap::new();
let predicate3 = Predicate::Range(RangePredicate {
range: Range {
lower: None,
upper: None,
},
});
predicates3.insert(1, vec![predicate3]);
let key3 = PredicateKey::new_inverted(Arc::new(predicates3));
let selection3 = Arc::new(RowGroupSelection::from_row_ranges(
vec![(0, vec![5..15])],
20,
));
let weight3 =
IndexResultCache::index_result_cache_weight(&(key3.clone(), file_id), &selection3);
assert!(weight3 > 0);
assert_eq!(
weight3,
key3.mem_usage() as u32 + selection3.mem_usage() as u32
);
}
}

View File

@@ -265,6 +265,8 @@ impl MitoConfig {
self.vector_cache_size = mem_cache_size;
self.page_cache_size = page_cache_size;
self.selector_result_cache_size = mem_cache_size;
self.index.adjust_buffer_and_cache_size(sys_memory);
}
/// Enable write cache.
@@ -315,6 +317,8 @@ pub struct IndexConfig {
pub content_cache_size: ReadableSize,
/// Page size for inverted index content.
pub content_cache_page_size: ReadableSize,
/// Cache size for index result. Setting it to 0 to disable the cache.
pub result_cache_size: ReadableSize,
}
impl Default for IndexConfig {
@@ -327,6 +331,7 @@ impl Default for IndexConfig {
metadata_cache_size: ReadableSize::mb(64),
content_cache_size: ReadableSize::mb(128),
content_cache_page_size: ReadableSize::kb(64),
result_cache_size: ReadableSize::mb(128),
}
}
}
@@ -365,6 +370,18 @@ impl IndexConfig {
Ok(())
}
pub fn adjust_buffer_and_cache_size(&mut self, sys_memory: ReadableSize) {
let cache_size = cmp::min(sys_memory / MEM_CACHE_SIZE_FACTOR, ReadableSize::mb(128));
self.result_cache_size = cmp::min(self.result_cache_size, cache_size);
self.content_cache_size = cmp::min(self.content_cache_size, cache_size);
let metadata_cache_size = cmp::min(
sys_memory / SST_META_CACHE_SIZE_FACTOR,
ReadableSize::mb(64),
);
self.metadata_cache_size = cmp::min(self.metadata_cache_size, metadata_cache_size);
}
}
/// Operational mode for certain actions.

View File

@@ -14,7 +14,7 @@
mod builder;
use std::collections::HashMap;
use std::collections::BTreeMap;
use std::ops::Range;
use std::sync::Arc;
@@ -33,6 +33,7 @@ use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
use crate::cache::index::bloom_filter_index::{
BloomFilterIndexCacheRef, CachedBloomFilterIndexBlobReader, Tag,
};
use crate::cache::index::result_cache::PredicateKey;
use crate::error::{
ApplyBloomFilterIndexSnafu, Error, MetadataSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu,
Result,
@@ -71,7 +72,10 @@ pub struct BloomFilterIndexApplier {
/// Bloom filter predicates.
/// For each column, the value will be retained only if it contains __all__ predicates.
predicates: HashMap<ColumnId, Vec<InListPredicate>>,
predicates: Arc<BTreeMap<ColumnId, Vec<InListPredicate>>>,
/// Predicate key. Used to identify the predicate and fetch result from cache.
predicate_key: PredicateKey,
}
impl BloomFilterIndexApplier {
@@ -83,8 +87,9 @@ impl BloomFilterIndexApplier {
region_id: RegionId,
object_store: ObjectStore,
puffin_manager_factory: PuffinManagerFactory,
predicates: HashMap<ColumnId, Vec<InListPredicate>>,
predicates: BTreeMap<ColumnId, Vec<InListPredicate>>,
) -> Self {
let predicates = Arc::new(predicates);
Self {
region_dir,
region_id,
@@ -93,6 +98,7 @@ impl BloomFilterIndexApplier {
puffin_manager_factory,
puffin_metadata_cache: None,
bloom_filter_index_cache: None,
predicate_key: PredicateKey::new_bloom(predicates.clone()),
predicates,
}
}
@@ -150,7 +156,7 @@ impl BloomFilterIndexApplier {
.map(|(i, range)| (*i, vec![range.clone()]))
.collect::<Vec<_>>();
for (column_id, predicates) in &self.predicates {
for (column_id, predicates) in self.predicates.iter() {
let blob = match self
.blob_reader(file_id, *column_id, file_size_hint)
.await?
@@ -320,6 +326,11 @@ impl BloomFilterIndexApplier {
Ok(())
}
/// Returns the predicate key.
pub fn predicate_key(&self) -> &PredicateKey {
&self.predicate_key
}
}
fn is_blob_not_found(err: &Error) -> bool {

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeMap, BTreeSet};
use common_telemetry::warn;
use datafusion_common::ScalarValue;
@@ -44,7 +44,7 @@ pub struct BloomFilterIndexApplierBuilder<'a> {
file_cache: Option<FileCacheRef>,
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
bloom_filter_index_cache: Option<BloomFilterIndexCacheRef>,
predicates: HashMap<ColumnId, Vec<InListPredicate>>,
predicates: BTreeMap<ColumnId, Vec<InListPredicate>>,
}
impl<'a> BloomFilterIndexApplierBuilder<'a> {
@@ -62,7 +62,7 @@ impl<'a> BloomFilterIndexApplierBuilder<'a> {
file_cache: None,
puffin_metadata_cache: None,
bloom_filter_index_cache: None,
predicates: HashMap::default(),
predicates: BTreeMap::default(),
}
}
@@ -168,7 +168,7 @@ impl<'a> BloomFilterIndexApplierBuilder<'a> {
.entry(column_id)
.or_default()
.push(InListPredicate {
list: HashSet::from([value]),
list: BTreeSet::from([value]),
});
Ok(())
@@ -196,7 +196,7 @@ impl<'a> BloomFilterIndexApplierBuilder<'a> {
.map(|lit| encode_lit(lit, data_type.clone()));
// Collect successful conversions
let mut valid_predicates = HashSet::new();
let mut valid_predicates = BTreeSet::new();
for predicate in predicates {
match predicate {
Ok(p) => {
@@ -323,7 +323,7 @@ mod tests {
ConcreteDataType::string_datatype(),
)
.unwrap();
assert_eq!(column_predicates[0].list, HashSet::from([expected]));
assert_eq!(column_predicates[0].list, BTreeSet::from([expected]));
}
fn int64_lit(i: i64) -> Expr {

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{BTreeSet, HashMap, HashSet};
use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::iter;
use std::ops::Range;
use std::sync::Arc;
@@ -34,6 +34,7 @@ use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
use crate::cache::index::bloom_filter_index::{
BloomFilterIndexCacheRef, CachedBloomFilterIndexBlobReader, Tag,
};
use crate::cache::index::result_cache::PredicateKey;
use crate::error::{
ApplyBloomFilterIndexSnafu, ApplyFulltextIndexSnafu, MetadataSnafu, PuffinBuildReaderSnafu,
PuffinReadBlobSnafu, Result,
@@ -52,13 +53,16 @@ pub mod builder;
/// `FulltextIndexApplier` is responsible for applying fulltext index to the provided SST files
pub struct FulltextIndexApplier {
/// Requests to be applied.
requests: HashMap<ColumnId, FulltextRequest>,
requests: Arc<BTreeMap<ColumnId, FulltextRequest>>,
/// The source of the index.
index_source: IndexSource,
/// Cache for bloom filter index.
bloom_filter_index_cache: Option<BloomFilterIndexCacheRef>,
/// Predicate key. Used to identify the predicate and fetch result from cache.
predicate_key: PredicateKey,
}
pub type FulltextIndexApplierRef = Arc<FulltextIndexApplier>;
@@ -69,12 +73,14 @@ impl FulltextIndexApplier {
region_dir: String,
region_id: RegionId,
store: ObjectStore,
requests: HashMap<ColumnId, FulltextRequest>,
requests: BTreeMap<ColumnId, FulltextRequest>,
puffin_manager_factory: PuffinManagerFactory,
) -> Self {
let requests = Arc::new(requests);
let index_source = IndexSource::new(region_dir, region_id, puffin_manager_factory, store);
Self {
predicate_key: PredicateKey::new_fulltext(requests.clone()),
requests,
index_source,
bloom_filter_index_cache: None,
@@ -105,6 +111,11 @@ impl FulltextIndexApplier {
self.bloom_filter_index_cache = bloom_filter_index_cache;
self
}
/// Returns the predicate key.
pub fn predicate_key(&self) -> &PredicateKey {
&self.predicate_key
}
}
impl FulltextIndexApplier {
@@ -120,7 +131,7 @@ impl FulltextIndexApplier {
.start_timer();
let mut row_ids: Option<BTreeSet<RowId>> = None;
for (column_id, request) in &self.requests {
for (column_id, request) in self.requests.iter() {
if request.queries.is_empty() && request.terms.is_empty() {
continue;
}
@@ -233,7 +244,7 @@ impl FulltextIndexApplier {
let (input, mut output) = Self::init_coarse_output(row_groups);
let mut applied = false;
for (column_id, request) in &self.requests {
for (column_id, request) in self.requests.iter() {
if request.terms.is_empty() {
// only apply terms
continue;

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::collections::BTreeMap;
use datafusion_common::ScalarValue;
use datafusion_expr::expr::ScalarFunction;
@@ -31,7 +31,7 @@ use crate::sst::index::puffin_manager::PuffinManagerFactory;
/// A request for fulltext index.
///
/// It contains all the queries and terms for a column.
#[derive(Default, Debug)]
#[derive(Default, Debug, Clone, PartialEq, Eq, Hash)]
pub struct FulltextRequest {
pub queries: Vec<FulltextQuery>,
pub terms: Vec<FulltextTerm>,
@@ -65,14 +65,14 @@ impl FulltextRequest {
/// A query to be matched in fulltext index.
///
/// `query` is the query to be matched, e.g. "+foo -bar" in `SELECT * FROM t WHERE matches(text, "+foo -bar")`.
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct FulltextQuery(pub String);
/// A term to be matched in fulltext index.
///
/// `term` is the term to be matched, e.g. "foo" in `SELECT * FROM t WHERE matches_term(text, "foo")`.
/// `col_lowered` indicates whether the column is lowercased, e.g. `col_lowered = true` when `matches_term(lower(text), "foo")`.
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct FulltextTerm {
pub col_lowered: bool,
pub term: String,
@@ -137,7 +137,7 @@ impl<'a> FulltextIndexApplierBuilder<'a> {
/// Builds `SstIndexApplier` from the given expressions.
pub fn build(self, exprs: &[Expr]) -> Result<Option<FulltextIndexApplier>> {
let mut requests = HashMap::new();
let mut requests = BTreeMap::new();
for expr in exprs {
Self::extract_requests(expr, self.metadata, &mut requests);
}
@@ -164,7 +164,7 @@ impl<'a> FulltextIndexApplierBuilder<'a> {
fn extract_requests(
expr: &Expr,
metadata: &'a RegionMetadata,
requests: &mut HashMap<ColumnId, FulltextRequest>,
requests: &mut BTreeMap<ColumnId, FulltextRequest>,
) {
match expr {
Expr::BinaryExpr(BinaryExpr {
@@ -526,7 +526,7 @@ mod tests {
func: matches_func(),
});
let mut requests = HashMap::new();
let mut requests = BTreeMap::new();
FulltextIndexApplierBuilder::extract_requests(&matches_expr, &metadata, &mut requests);
assert_eq!(requests.len(), 1);
@@ -565,7 +565,7 @@ mod tests {
right: Box::new(matches_term_expr),
});
let mut requests = HashMap::new();
let mut requests = BTreeMap::new();
FulltextIndexApplierBuilder::extract_requests(&binary_expr, &metadata, &mut requests);
assert_eq!(requests.len(), 1);

View File

@@ -356,7 +356,7 @@ impl AltFulltextCreator {
#[cfg(test)]
mod tests {
use std::collections::BTreeSet;
use std::collections::{BTreeMap, BTreeSet};
use std::sync::Arc;
use api::v1::SemanticType;
@@ -573,7 +573,7 @@ mod tests {
let object_store = object_store.clone();
let factory = factory.clone();
let mut requests: HashMap<ColumnId, FulltextRequest> = HashMap::new();
let mut requests: BTreeMap<ColumnId, FulltextRequest> = BTreeMap::new();
// Add queries
for (column_id, query) in queries {

View File

@@ -14,6 +14,7 @@
pub mod builder;
use std::collections::BTreeMap;
use std::sync::Arc;
use common_base::range_read::RangeReader;
@@ -22,15 +23,17 @@ use index::inverted_index::format::reader::InvertedIndexBlobReader;
use index::inverted_index::search::index_apply::{
ApplyOutput, IndexApplier, IndexNotFoundStrategy, SearchContext,
};
use index::inverted_index::search::predicate::Predicate;
use object_store::ObjectStore;
use puffin::puffin_manager::cache::PuffinMetadataCacheRef;
use puffin::puffin_manager::{PuffinManager, PuffinReader};
use snafu::ResultExt;
use store_api::storage::RegionId;
use store_api::storage::{ColumnId, RegionId};
use crate::access_layer::{RegionFilePathFactory, WriteCachePathProvider};
use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
use crate::cache::index::inverted_index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef};
use crate::cache::index::result_cache::PredicateKey;
use crate::error::{
ApplyInvertedIndexSnafu, MetadataSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result,
};
@@ -67,6 +70,9 @@ pub(crate) struct InvertedIndexApplier {
/// Puffin metadata cache.
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
/// Predicate key. Used to identify the predicate and fetch result from cache.
predicate_key: PredicateKey,
}
pub(crate) type InvertedIndexApplierRef = Arc<InvertedIndexApplier>;
@@ -79,6 +85,7 @@ impl InvertedIndexApplier {
store: ObjectStore,
index_applier: Box<dyn IndexApplier>,
puffin_manager_factory: PuffinManagerFactory,
predicates: BTreeMap<ColumnId, Vec<Predicate>>,
) -> Self {
INDEX_APPLY_MEMORY_USAGE.add(index_applier.memory_usage() as i64);
@@ -91,6 +98,7 @@ impl InvertedIndexApplier {
puffin_manager_factory,
inverted_index_cache: None,
puffin_metadata_cache: None,
predicate_key: PredicateKey::new_inverted(Arc::new(predicates)),
}
}
@@ -218,6 +226,11 @@ impl InvertedIndexApplier {
.await
.context(PuffinBuildReaderSnafu)
}
/// Returns the predicate key.
pub fn predicate_key(&self) -> &PredicateKey {
&self.predicate_key
}
}
impl Drop for InvertedIndexApplier {
@@ -276,6 +289,7 @@ mod tests {
object_store,
Box::new(mock_index_applier),
puffin_manager_factory,
Default::default(),
);
let output = sst_index_applier.apply(file_id, None).await.unwrap();
assert_eq!(
@@ -323,6 +337,7 @@ mod tests {
object_store,
Box::new(mock_index_applier),
puffin_manager_factory,
Default::default(),
);
let res = sst_index_applier.apply(file_id, None).await;
assert!(format!("{:?}", res.unwrap_err()).contains("Blob not found"));

View File

@@ -18,7 +18,7 @@ mod eq_list;
mod in_list;
mod regex_match;
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeMap, HashSet};
use common_telemetry::warn;
use datafusion_common::ScalarValue;
@@ -59,7 +59,7 @@ pub(crate) struct InvertedIndexApplierBuilder<'a> {
indexed_column_ids: HashSet<ColumnId>,
/// Stores predicates during traversal on the Expr tree.
output: HashMap<ColumnId, Vec<Predicate>>,
output: BTreeMap<ColumnId, Vec<Predicate>>,
/// The puffin manager factory.
puffin_manager_factory: PuffinManagerFactory,
@@ -85,7 +85,7 @@ impl<'a> InvertedIndexApplierBuilder<'a> {
object_store,
metadata,
indexed_column_ids,
output: HashMap::default(),
output: BTreeMap::default(),
puffin_manager_factory,
file_cache: None,
inverted_index_cache: None,
@@ -130,8 +130,8 @@ impl<'a> InvertedIndexApplierBuilder<'a> {
let predicates = self
.output
.into_iter()
.map(|(column_id, predicates)| (column_id.to_string(), predicates))
.iter()
.map(|(column_id, predicates)| (column_id.to_string(), predicates.clone()))
.collect();
let applier = PredicatesIndexApplier::try_from(predicates);
@@ -142,6 +142,7 @@ impl<'a> InvertedIndexApplierBuilder<'a> {
self.object_store,
Box::new(applier.context(BuildIndexApplierSnafu)?),
self.puffin_manager_factory,
self.output,
)
.with_file_cache(self.file_cache)
.with_puffin_metadata_cache(self.puffin_metadata_cache)

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::collections::BTreeSet;
use datafusion_expr::{BinaryExpr, Expr as DfExpr, Operator};
use datatypes::data_type::ConcreteDataType;
@@ -36,7 +36,7 @@ impl InvertedIndexApplierBuilder<'_> {
};
let predicate = Predicate::InList(InListPredicate {
list: HashSet::from_iter([Self::encode_lit(lit, data_type)?]),
list: BTreeSet::from_iter([Self::encode_lit(lit, data_type)?]),
});
self.add_predicate(column_id, predicate);
Ok(())
@@ -64,7 +64,7 @@ impl InvertedIndexApplierBuilder<'_> {
};
let bytes = Self::encode_lit(lit, data_type.clone())?;
let mut inlist = HashSet::from_iter([bytes]);
let mut inlist = BTreeSet::from_iter([bytes]);
if Self::collect_eq_list_inner(column_name, &data_type, or_list, &mut inlist)? {
let predicate = Predicate::InList(InListPredicate { list: inlist });
@@ -82,7 +82,7 @@ impl InvertedIndexApplierBuilder<'_> {
column_name: &str,
data_type: &ConcreteDataType,
expr: &DfExpr,
inlist: &mut HashSet<Bytes>,
inlist: &mut BTreeSet<Bytes>,
) -> Result<bool> {
let DfExpr::BinaryExpr(BinaryExpr {
left,
@@ -122,6 +122,8 @@ impl InvertedIndexApplierBuilder<'_> {
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::*;
use crate::error::Error;
use crate::sst::index::inverted_index::applier::builder::tests::{
@@ -154,13 +156,13 @@ mod tests {
assert_eq!(
predicates[0],
Predicate::InList(InListPredicate {
list: HashSet::from_iter([encoded_string("foo")])
list: BTreeSet::from_iter([encoded_string("foo")])
})
);
assert_eq!(
predicates[1],
Predicate::InList(InListPredicate {
list: HashSet::from_iter([encoded_string("bar")])
list: BTreeSet::from_iter([encoded_string("bar")])
})
);
}
@@ -187,7 +189,7 @@ mod tests {
assert_eq!(
predicates[0],
Predicate::InList(InListPredicate {
list: HashSet::from_iter([encoded_string("abc")])
list: BTreeSet::from_iter([encoded_string("abc")])
})
);
}
@@ -275,7 +277,7 @@ mod tests {
assert_eq!(
predicates[0],
Predicate::InList(InListPredicate {
list: HashSet::from_iter([
list: BTreeSet::from_iter([
encoded_string("abc"),
encoded_string("foo"),
encoded_string("bar"),

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::collections::BTreeSet;
use datafusion_expr::expr::InList;
use index::inverted_index::search::predicate::{InListPredicate, Predicate};
@@ -34,7 +34,7 @@ impl InvertedIndexApplierBuilder<'_> {
};
let mut predicate = InListPredicate {
list: HashSet::with_capacity(inlist.list.len()),
list: BTreeSet::new(),
};
for lit in &inlist.list {
let Some(lit) = Self::nonnull_lit(lit) else {
@@ -53,6 +53,8 @@ impl InvertedIndexApplierBuilder<'_> {
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::*;
use crate::error::Error;
use crate::sst::index::inverted_index::applier::builder::tests::{
@@ -86,7 +88,7 @@ mod tests {
assert_eq!(
predicates[0],
Predicate::InList(InListPredicate {
list: HashSet::from_iter([encoded_string("foo"), encoded_string("bar")])
list: BTreeSet::from_iter([encoded_string("foo"), encoded_string("bar")])
})
);
}
@@ -140,7 +142,7 @@ mod tests {
assert_eq!(
predicates[0],
Predicate::InList(InListPredicate {
list: HashSet::from_iter([encoded_string("foo"), encoded_string("bar")])
list: BTreeSet::from_iter([encoded_string("foo"), encoded_string("bar")])
})
);
}

View File

@@ -36,6 +36,7 @@ use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataRef};
use store_api::storage::ColumnId;
use table::predicate::Predicate;
use crate::cache::index::result_cache::PredicateKey;
use crate::cache::CacheStrategy;
use crate::error::{
ArrowReaderSnafu, InvalidMetadataSnafu, InvalidParquetSnafu, ReadDataPartSnafu,
@@ -48,7 +49,7 @@ use crate::metrics::{
use crate::read::prune::{PruneReader, Source};
use crate::read::{Batch, BatchReader};
use crate::row_converter::build_primary_key_codec;
use crate::sst::file::FileHandle;
use crate::sst::file::{FileHandle, FileId};
use crate::sst::index::bloom_filter::applier::BloomFilterIndexApplierRef;
use crate::sst::index::fulltext_index::applier::FulltextIndexApplierRef;
use crate::sst::index::inverted_index::applier::InvertedIndexApplierRef;
@@ -60,6 +61,31 @@ use crate::sst::parquet::row_selection::RowGroupSelection;
use crate::sst::parquet::stats::RowGroupPruningStats;
use crate::sst::parquet::{DEFAULT_READ_BATCH_SIZE, PARQUET_METADATA_KEY};
const INDEX_TYPE_FULLTEXT: &str = "fulltext";
const INDEX_TYPE_INVERTED: &str = "inverted";
const INDEX_TYPE_BLOOM: &str = "bloom filter";
macro_rules! handle_index_error {
($err:expr, $file_handle:expr, $index_type:expr) => {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to apply {} index, region_id: {}, file_id: {}, err: {:?}",
$index_type,
$file_handle.region_id(),
$file_handle.file_id(),
$err
);
} else {
warn!(
$err; "Failed to apply {} index, region_id: {}, file_id: {}",
$index_type,
$file_handle.region_id(),
$file_handle.file_id()
);
}
};
}
/// Parquet SST reader builder.
pub struct ParquetReaderBuilder {
/// SST directory.
@@ -346,34 +372,39 @@ impl ParquetReaderBuilder {
let mut output = RowGroupSelection::new(row_group_size, num_rows as _);
self.prune_row_groups_by_fulltext_index(row_group_size, parquet_meta, &mut output, metrics)
self.prune_row_groups_by_minmax(read_format, parquet_meta, &mut output, metrics);
if output.is_empty() {
return output;
}
let fulltext_filtered = self
.prune_row_groups_by_fulltext_index(row_group_size, parquet_meta, &mut output, metrics)
.await;
if output.is_empty() {
return output;
}
let inverted_filtered = self
.prune_row_groups_by_inverted_index(row_group_size, &mut output, metrics)
self.prune_row_groups_by_inverted_index(row_group_size, &mut output, metrics)
.await;
if output.is_empty() {
return output;
}
if !inverted_filtered {
self.prune_row_groups_by_minmax(read_format, parquet_meta, &mut output, metrics);
if output.is_empty() {
return output;
}
}
self.prune_row_groups_by_bloom_filter(row_group_size, parquet_meta, &mut output, metrics)
.await;
if output.is_empty() {
return output;
}
self.prune_row_groups_by_fulltext_bloom(row_group_size, parquet_meta, &mut output, metrics)
if !fulltext_filtered {
self.prune_row_groups_by_fulltext_bloom(
row_group_size,
parquet_meta,
&mut output,
metrics,
)
.await;
}
output
}
@@ -392,46 +423,42 @@ impl ParquetReaderBuilder {
return false;
}
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let apply_res = match index_applier
.apply_fine(self.file_handle.file_id(), Some(file_size_hint))
.await
{
Ok(Some(res)) => res,
Ok(None) => {
return false;
}
Err(err) => {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to apply full-text index, region_id: {}, file_id: {}, err: {:?}",
self.file_handle.region_id(),
self.file_handle.file_id(),
err
);
} else {
warn!(
err; "Failed to apply full-text index, region_id: {}, file_id: {}",
self.file_handle.region_id(), self.file_handle.file_id()
);
}
let predicate_key = index_applier.predicate_key();
// Fast path: return early if the result is in the cache.
if self.index_result_cache_get(
predicate_key,
self.file_handle.file_id(),
output,
metrics,
INDEX_TYPE_FULLTEXT,
) {
return true;
}
// Slow path: apply the index from the file.
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let apply_res = index_applier
.apply_fine(self.file_handle.file_id(), Some(file_size_hint))
.await;
let selection = match apply_res {
Ok(Some(res)) => {
RowGroupSelection::from_row_ids(res, row_group_size, parquet_meta.num_row_groups())
}
Ok(None) => return false,
Err(err) => {
handle_index_error!(err, self.file_handle, INDEX_TYPE_FULLTEXT);
return false;
}
};
let selection = RowGroupSelection::from_row_ids(
apply_res,
row_group_size,
parquet_meta.num_row_groups(),
self.apply_index_result_and_update_cache(
predicate_key,
self.file_handle.file_id(),
selection,
output,
metrics,
INDEX_TYPE_FULLTEXT,
);
let intersection = output.intersect(&selection);
metrics.rg_fulltext_filtered += output.row_group_count() - intersection.row_group_count();
metrics.rows_fulltext_filtered += output.row_count() - intersection.row_count();
*output = intersection;
true
}
@@ -449,44 +476,158 @@ impl ParquetReaderBuilder {
let Some(index_applier) = &self.inverted_index_applier else {
return false;
};
if !self.file_handle.meta_ref().inverted_index_available() {
return false;
}
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let apply_output = match index_applier
.apply(self.file_handle.file_id(), Some(file_size_hint))
.await
{
Ok(output) => output,
Err(err) => {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to apply inverted index, region_id: {}, file_id: {}, err: {:?}",
self.file_handle.region_id(),
self.file_handle.file_id(),
err
);
} else {
warn!(
err; "Failed to apply inverted index, region_id: {}, file_id: {}",
self.file_handle.region_id(), self.file_handle.file_id()
);
}
let predicate_key = index_applier.predicate_key();
// Fast path: return early if the result is in the cache.
if self.index_result_cache_get(
predicate_key,
self.file_handle.file_id(),
output,
metrics,
INDEX_TYPE_INVERTED,
) {
return true;
}
// Slow path: apply the index from the file.
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let apply_res = index_applier
.apply(self.file_handle.file_id(), Some(file_size_hint))
.await;
let selection = match apply_res {
Ok(output) => {
RowGroupSelection::from_inverted_index_apply_output(row_group_size, output)
}
Err(err) => {
handle_index_error!(err, self.file_handle, INDEX_TYPE_INVERTED);
return false;
}
};
let selection =
RowGroupSelection::from_inverted_index_apply_output(row_group_size, apply_output);
let intersection = output.intersect(&selection);
self.apply_index_result_and_update_cache(
predicate_key,
self.file_handle.file_id(),
selection,
output,
metrics,
INDEX_TYPE_INVERTED,
);
true
}
metrics.rg_inverted_filtered += output.row_group_count() - intersection.row_group_count();
metrics.rows_inverted_filtered += output.row_count() - intersection.row_count();
async fn prune_row_groups_by_bloom_filter(
&self,
row_group_size: usize,
parquet_meta: &ParquetMetaData,
output: &mut RowGroupSelection,
metrics: &mut ReaderFilterMetrics,
) -> bool {
let Some(index_applier) = &self.bloom_filter_index_applier else {
return false;
};
if !self.file_handle.meta_ref().bloom_filter_index_available() {
return false;
}
*output = intersection;
let predicate_key = index_applier.predicate_key();
// Fast path: return early if the result is in the cache.
if self.index_result_cache_get(
predicate_key,
self.file_handle.file_id(),
output,
metrics,
INDEX_TYPE_BLOOM,
) {
return true;
}
// Slow path: apply the index from the file.
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let rgs = parquet_meta
.row_groups()
.iter()
.enumerate()
.map(|(i, rg)| (rg.num_rows() as usize, output.contains_row_group(i)));
let apply_res = index_applier
.apply(self.file_handle.file_id(), Some(file_size_hint), rgs)
.await;
let selection = match apply_res {
Ok(apply_output) => RowGroupSelection::from_row_ranges(apply_output, row_group_size),
Err(err) => {
handle_index_error!(err, self.file_handle, INDEX_TYPE_BLOOM);
return false;
}
};
self.apply_index_result_and_update_cache(
predicate_key,
self.file_handle.file_id(),
selection,
output,
metrics,
INDEX_TYPE_BLOOM,
);
true
}
async fn prune_row_groups_by_fulltext_bloom(
&self,
row_group_size: usize,
parquet_meta: &ParquetMetaData,
output: &mut RowGroupSelection,
metrics: &mut ReaderFilterMetrics,
) -> bool {
let Some(index_applier) = &self.fulltext_index_applier else {
return false;
};
if !self.file_handle.meta_ref().fulltext_index_available() {
return false;
}
let predicate_key = index_applier.predicate_key();
// Fast path: return early if the result is in the cache.
if self.index_result_cache_get(
predicate_key,
self.file_handle.file_id(),
output,
metrics,
INDEX_TYPE_FULLTEXT,
) {
return true;
}
// Slow path: apply the index from the file.
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let rgs = parquet_meta
.row_groups()
.iter()
.enumerate()
.map(|(i, rg)| (rg.num_rows() as usize, output.contains_row_group(i)));
let apply_res = index_applier
.apply_coarse(self.file_handle.file_id(), Some(file_size_hint), rgs)
.await;
let selection = match apply_res {
Ok(Some(apply_output)) => {
RowGroupSelection::from_row_ranges(apply_output, row_group_size)
}
Ok(None) => return false,
Err(err) => {
handle_index_error!(err, self.file_handle, INDEX_TYPE_FULLTEXT);
return false;
}
};
self.apply_index_result_and_update_cache(
predicate_key,
self.file_handle.file_id(),
selection,
output,
metrics,
INDEX_TYPE_FULLTEXT,
);
true
}
@@ -533,126 +674,57 @@ impl ParquetReaderBuilder {
true
}
async fn prune_row_groups_by_bloom_filter(
fn index_result_cache_get(
&self,
row_group_size: usize,
parquet_meta: &ParquetMetaData,
predicate_key: &PredicateKey,
file_id: FileId,
output: &mut RowGroupSelection,
metrics: &mut ReaderFilterMetrics,
index_type: &str,
) -> bool {
let Some(index_applier) = &self.bloom_filter_index_applier else {
return false;
};
if !self.file_handle.meta_ref().bloom_filter_index_available() {
return false;
}
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let apply_output = match index_applier
.apply(
self.file_handle.file_id(),
Some(file_size_hint),
parquet_meta
.row_groups()
.iter()
.enumerate()
.map(|(i, rg)| (rg.num_rows() as usize, output.contains_row_group(i))),
)
.await
{
Ok(apply_output) => apply_output,
Err(err) => {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to apply bloom filter index, region_id: {}, file_id: {}, err: {:?}",
self.file_handle.region_id(),
self.file_handle.file_id(),
err
);
} else {
warn!(
err; "Failed to apply bloom filter index, region_id: {}, file_id: {}",
self.file_handle.region_id(), self.file_handle.file_id()
);
}
return false;
if let Some(index_result_cache) = &self.cache_strategy.index_result_cache() {
let result = index_result_cache.get(predicate_key, file_id);
if let Some(result) = result {
apply_selection_and_update_metrics(output, &result, metrics, index_type);
return true;
}
};
let selection = RowGroupSelection::from_row_ranges(apply_output, row_group_size);
let intersection = output.intersect(&selection);
metrics.rg_bloom_filtered += output.row_group_count() - intersection.row_group_count();
metrics.rows_bloom_filtered += output.row_count() - intersection.row_count();
*output = intersection;
true
}
false
}
async fn prune_row_groups_by_fulltext_bloom(
fn apply_index_result_and_update_cache(
&self,
row_group_size: usize,
parquet_meta: &ParquetMetaData,
predicate_key: &PredicateKey,
file_id: FileId,
result: RowGroupSelection,
output: &mut RowGroupSelection,
metrics: &mut ReaderFilterMetrics,
) -> bool {
let Some(index_applier) = &self.fulltext_index_applier else {
return false;
};
index_type: &str,
) {
apply_selection_and_update_metrics(output, &result, metrics, index_type);
if !self.file_handle.meta_ref().fulltext_index_available() {
return false;
if let Some(index_result_cache) = &self.cache_strategy.index_result_cache() {
index_result_cache.put(predicate_key.clone(), file_id, Arc::new(result));
}
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let apply_output = match index_applier
.apply_coarse(
self.file_handle.file_id(),
Some(file_size_hint),
parquet_meta
.row_groups()
.iter()
.enumerate()
.map(|(i, rg)| (rg.num_rows() as usize, output.contains_row_group(i))),
)
.await
{
Ok(Some(apply_output)) => apply_output,
Ok(None) => return false,
Err(err) => {
if cfg!(any(test, feature = "test")) {
panic!(
"Failed to apply fulltext index, region_id: {}, file_id: {}, err: {:?}",
self.file_handle.region_id(),
self.file_handle.file_id(),
err
);
} else {
warn!(
err; "Failed to apply fulltext index, region_id: {}, file_id: {}",
self.file_handle.region_id(), self.file_handle.file_id()
);
}
return false;
}
};
let selection = RowGroupSelection::from_row_ranges(apply_output, row_group_size);
let intersection = output.intersect(&selection);
metrics.rg_fulltext_filtered += output.row_group_count() - intersection.row_group_count();
metrics.rows_fulltext_filtered += output.row_count() - intersection.row_count();
*output = intersection;
true
}
}
fn apply_selection_and_update_metrics(
output: &mut RowGroupSelection,
result: &RowGroupSelection,
metrics: &mut ReaderFilterMetrics,
index_type: &str,
) {
let intersection = output.intersect(result);
let row_group_count = output.row_group_count() - intersection.row_group_count();
let row_count = output.row_count() - intersection.row_count();
metrics.update_index_metrics(index_type, row_group_count, row_count);
*output = intersection;
}
/// Metrics of filtering rows groups and rows.
#[derive(Debug, Default, Clone, Copy)]
pub(crate) struct ReaderFilterMetrics {
@@ -729,6 +801,24 @@ impl ReaderFilterMetrics {
.with_label_values(&["bloom_filter_index_filtered"])
.inc_by(self.rows_bloom_filtered as u64);
}
fn update_index_metrics(&mut self, index_type: &str, row_group_count: usize, row_count: usize) {
match index_type {
INDEX_TYPE_FULLTEXT => {
self.rg_fulltext_filtered += row_group_count;
self.rows_fulltext_filtered += row_count;
}
INDEX_TYPE_INVERTED => {
self.rg_inverted_filtered += row_group_count;
self.rows_inverted_filtered += row_count;
}
INDEX_TYPE_BLOOM => {
self.rg_bloom_filtered += row_group_count;
self.rows_bloom_filtered += row_count;
}
_ => {}
}
}
}
/// Parquet reader metrics.

View File

@@ -26,6 +26,8 @@ pub struct RowGroupSelection {
selection_in_rg: BTreeMap<usize, RowSelectionWithCount>,
/// Total number of rows in the selection.
row_count: usize,
/// Total length of the selectors.
selector_len: usize,
}
/// A row selection with its count.
@@ -35,6 +37,8 @@ struct RowSelectionWithCount {
selection: RowSelection,
/// Number of rows in the selection.
row_count: usize,
/// Length of the selectors.
selector_len: usize,
}
impl RowGroupSelection {
@@ -61,6 +65,7 @@ impl RowGroupSelection {
RowSelectionWithCount {
selection,
row_count: row_group_size,
selector_len: 1,
},
);
}
@@ -68,6 +73,7 @@ impl RowGroupSelection {
Self {
selection_in_rg,
row_count: total_row_count,
selector_len: row_group_count,
}
}
@@ -109,6 +115,7 @@ impl RowGroupSelection {
// Step 2: Group ranges by row group ID and create row selections
let mut total_row_count = 0;
let mut total_selector_len = 0;
let selection_in_rg = row_group_ranges
.chunk_by(|(row_group_id, _)| *row_group_id)
.into_iter()
@@ -122,12 +129,15 @@ impl RowGroupSelection {
// by the min() operation above
let selection = row_selection_from_row_ranges(ranges, row_group_size);
let row_count = selection.row_count();
let selector_len = selector_len(&selection);
total_row_count += row_count;
total_selector_len += selector_len;
(
row_group_id,
RowSelectionWithCount {
selection,
row_count,
selector_len,
},
)
})
@@ -136,6 +146,7 @@ impl RowGroupSelection {
Self {
selection_in_rg,
row_count: total_row_count,
selector_len: total_selector_len,
}
}
@@ -161,18 +172,22 @@ impl RowGroupSelection {
// Step 2: Create row selections for each row group
let mut total_row_count = 0;
let mut total_selector_len = 0;
let selection_in_rg = row_group_to_row_ids
.into_iter()
.map(|(row_group_id, row_ids)| {
let selection =
row_selection_from_sorted_row_ids(row_ids.into_iter(), row_group_size);
let row_count = selection.row_count();
let selector_len = selector_len(&selection);
total_row_count += row_count;
total_selector_len += selector_len;
(
row_group_id,
RowSelectionWithCount {
selection,
row_count,
selector_len,
},
)
})
@@ -181,6 +196,7 @@ impl RowGroupSelection {
Self {
selection_in_rg,
row_count: total_row_count,
selector_len: total_selector_len,
}
}
@@ -201,17 +217,21 @@ impl RowGroupSelection {
row_group_size: usize,
) -> Self {
let mut total_row_count = 0;
let mut total_selector_len = 0;
let selection_in_rg = row_ranges
.into_iter()
.map(|(row_group_id, ranges)| {
let selection = row_selection_from_row_ranges(ranges.into_iter(), row_group_size);
let row_count = selection.row_count();
let selector_len = selector_len(&selection);
total_row_count += row_count;
total_selector_len += selector_len;
(
row_group_id,
RowSelectionWithCount {
selection,
row_count,
selector_len,
},
)
})
@@ -220,6 +240,7 @@ impl RowGroupSelection {
Self {
selection_in_rg,
row_count: total_row_count,
selector_len: total_selector_len,
}
}
@@ -262,6 +283,7 @@ impl RowGroupSelection {
pub fn intersect(&self, other: &Self) -> Self {
let mut res = BTreeMap::new();
let mut total_row_count = 0;
let mut total_selector_len = 0;
for (rg_id, x) in other.selection_in_rg.iter() {
let Some(y) = self.selection_in_rg.get(rg_id) else {
@@ -269,13 +291,16 @@ impl RowGroupSelection {
};
let selection = x.selection.intersection(&y.selection);
let row_count = selection.row_count();
let selector_len = selector_len(&selection);
if row_count > 0 {
total_row_count += row_count;
total_selector_len += selector_len;
res.insert(
*rg_id,
RowSelectionWithCount {
selection,
row_count,
selector_len,
},
);
}
@@ -284,6 +309,7 @@ impl RowGroupSelection {
Self {
selection_in_rg: res,
row_count: total_row_count,
selector_len: total_selector_len,
}
}
@@ -304,21 +330,27 @@ impl RowGroupSelection {
RowSelectionWithCount {
selection,
row_count,
selector_len,
},
) = self.selection_in_rg.pop_first()?;
self.row_count -= row_count;
self.selector_len -= selector_len;
Some((row_group_id, selection))
}
/// Removes a row group from the selection.
pub fn remove_row_group(&mut self, row_group_id: usize) {
let Some(RowSelectionWithCount { row_count, .. }) =
self.selection_in_rg.remove(&row_group_id)
let Some(RowSelectionWithCount {
row_count,
selector_len,
..
}) = self.selection_in_rg.remove(&row_group_id)
else {
return;
};
self.row_count -= row_count;
self.selector_len -= selector_len;
}
/// Returns true if the selection is empty.
@@ -337,6 +369,12 @@ impl RowGroupSelection {
.iter()
.map(|(row_group_id, x)| (row_group_id, &x.selection))
}
/// Returns the memory usage of the selection.
pub fn mem_usage(&self) -> usize {
self.selector_len * size_of::<RowSelector>()
+ self.selection_in_rg.len() * size_of::<RowSelectionWithCount>()
}
}
/// Converts an iterator of row ranges into a `RowSelection` by creating a sequence of `RowSelector`s.
@@ -420,11 +458,32 @@ fn add_or_merge_selector(selectors: &mut Vec<RowSelector>, count: usize, is_skip
selectors.push(new_selector);
}
/// Returns the length of the selectors in the selection.
fn selector_len(selection: &RowSelection) -> usize {
selection.iter().size_hint().0
}
#[cfg(test)]
#[allow(clippy::single_range_in_vec_init)]
mod tests {
use super::*;
#[test]
fn test_selector_len() {
let selection = RowSelection::from(vec![RowSelector::skip(5), RowSelector::select(5)]);
assert_eq!(selector_len(&selection), 2);
let selection = RowSelection::from(vec![
RowSelector::select(5),
RowSelector::skip(5),
RowSelector::select(5),
]);
assert_eq!(selector_len(&selection), 3);
let selection = RowSelection::from(vec![]);
assert_eq!(selector_len(&selection), 0);
}
#[test]
fn test_single_contiguous_range() {
let selection = row_selection_from_row_ranges(Some(5..10).into_iter(), 10);

View File

@@ -177,6 +177,7 @@ impl WorkerGroup {
.index_metadata_size(config.index.metadata_cache_size.as_bytes())
.index_content_size(config.index.content_cache_size.as_bytes())
.index_content_page_size(config.index.content_cache_page_size.as_bytes())
.index_result_cache_size(config.index.result_cache_size.as_bytes())
.puffin_metadata_size(config.index.metadata_cache_size.as_bytes())
.write_cache(write_cache)
.build(),

View File

@@ -950,6 +950,7 @@ impl FlowMirrorTask {
// already know this is not source table
Some(None) => continue,
_ => {
// dedup peers
let peers = cache
.get(table_id)
.await
@@ -957,6 +958,8 @@ impl FlowMirrorTask {
.unwrap_or_default()
.values()
.cloned()
.collect::<HashSet<_>>()
.into_iter()
.collect::<Vec<_>>();
if !peers.is_empty() {

View File

@@ -319,8 +319,8 @@ pub fn column_schema(
columns: &HashMap<String, VectorRef>,
) -> Result<Vec<ColumnSchema>> {
columns
.iter()
.map(|(column_name, _vector)| {
.keys()
.map(|column_name| {
let column_schema = table_info
.meta
.schema

View File

@@ -185,7 +185,7 @@ impl StatementExecutor {
.contains_key(LOGICAL_TABLE_METADATA_KEY)
{
return self
.create_logical_tables(&[create_table.clone()], query_ctx)
.create_logical_tables(std::slice::from_ref(create_table), query_ctx)
.await?
.into_iter()
.next()

View File

@@ -561,8 +561,8 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Prepare value must be an object"))]
PrepareValueMustBeObject {
#[snafu(display("Input value must be an object"))]
InputValueMustBeObject {
#[snafu(implicit)]
location: Location,
},
@@ -833,7 +833,7 @@ impl ErrorExt for Error {
| ValueYamlKeyMustBeString { .. }
| YamlLoad { .. }
| YamlParse { .. }
| PrepareValueMustBeObject { .. }
| InputValueMustBeObject { .. }
| ColumnOptions { .. }
| UnsupportedIndexType { .. }
| UnsupportedNumberType { .. }

View File

@@ -29,7 +29,7 @@ use yaml_rust::YamlLoader;
use crate::dispatcher::{Dispatcher, Rule};
use crate::error::{
IntermediateKeyIndexSnafu, PrepareValueMustBeObjectSnafu, Result,
InputValueMustBeObjectSnafu, IntermediateKeyIndexSnafu, Result,
TransformNoTimestampProcessorSnafu, YamlLoadSnafu, YamlParseSnafu,
};
use crate::etl::processor::ProcessorKind;
@@ -186,7 +186,7 @@ pub fn json_to_map(val: serde_json::Value) -> Result<PipelineMap> {
}
Ok(intermediate_state)
}
_ => PrepareValueMustBeObjectSnafu.fail(),
_ => InputValueMustBeObjectSnafu.fail(),
}
}
@@ -203,7 +203,7 @@ pub fn simd_json_to_map(val: simd_json::OwnedValue) -> Result<PipelineMap> {
}
Ok(intermediate_state)
}
_ => PrepareValueMustBeObjectSnafu.fail(),
_ => InputValueMustBeObjectSnafu.fail(),
}
}

View File

@@ -23,10 +23,12 @@ use api::v1::column_data_type_extension::TypeExt;
use api::v1::value::ValueData;
use api::v1::{ColumnDataType, ColumnDataTypeExtension, JsonTypeExtension, SemanticType};
use coerce::{coerce_columns, coerce_value};
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
use greptime_proto::v1::{ColumnSchema, Row, Rows, Value as GreptimeValue};
use itertools::Itertools;
use once_cell::sync::OnceCell;
use serde_json::Number;
use session::context::Channel;
use crate::error::{
IdentifyPipelineColumnTypeMismatchSnafu, ReachedMaxNestedLevelsSnafu, Result,
@@ -38,7 +40,7 @@ use crate::etl::transform::index::Index;
use crate::etl::transform::{Transform, Transforms};
use crate::etl::value::{Timestamp, Value};
use crate::etl::PipelineMap;
use crate::{IdentityTimeIndex, PipelineContext};
use crate::PipelineContext;
const DEFAULT_GREPTIME_TIMESTAMP_COLUMN: &str = "greptime_timestamp";
const DEFAULT_MAX_NESTED_LEVELS_FOR_JSON_FLATTENING: usize = 10;
@@ -332,61 +334,87 @@ fn resolve_number_schema(
)
}
fn calc_ts(p_ctx: &PipelineContext, values: &PipelineMap) -> Result<Option<ValueData>> {
match p_ctx.channel {
Channel::Prometheus => Ok(Some(ValueData::TimestampMillisecondValue(
values
.get(GREPTIME_TIMESTAMP)
.and_then(|v| v.as_i64())
.unwrap_or_default(),
))),
_ => {
let custom_ts = p_ctx.pipeline_definition.get_custom_ts();
match custom_ts {
Some(ts) => {
let ts_field = values.get(ts.get_column_name());
Some(ts.get_timestamp(ts_field)).transpose()
}
None => Ok(Some(ValueData::TimestampNanosecondValue(
chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
))),
}
}
}
}
fn values_to_row(
schema_info: &mut SchemaInfo,
values: PipelineMap,
custom_ts: Option<&IdentityTimeIndex>,
pipeline_ctx: &PipelineContext<'_>,
) -> Result<Row> {
let mut row: Vec<GreptimeValue> = Vec::with_capacity(schema_info.schema.len());
let custom_ts = pipeline_ctx.pipeline_definition.get_custom_ts();
// set time index value
let value_data = match custom_ts {
Some(ts) => {
let ts_field = values.get(ts.get_column_name());
Some(ts.get_timestamp(ts_field)?)
}
None => Some(ValueData::TimestampNanosecondValue(
chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
)),
};
// calculate timestamp value based on the channel
let ts = calc_ts(pipeline_ctx, &values)?;
row.push(GreptimeValue { value_data });
row.push(GreptimeValue { value_data: ts });
for _ in 1..schema_info.schema.len() {
row.push(GreptimeValue { value_data: None });
}
// skip ts column
let ts_column_name = custom_ts
.as_ref()
.map_or(DEFAULT_GREPTIME_TIMESTAMP_COLUMN, |ts| ts.get_column_name());
for (column_name, value) in values {
// skip ts column
let ts_column = custom_ts
.as_ref()
.map_or(DEFAULT_GREPTIME_TIMESTAMP_COLUMN, |ts| ts.get_column_name());
if column_name == ts_column {
if column_name == ts_column_name {
continue;
}
let index = schema_info.index.get(&column_name).copied();
resolve_value(index, value, column_name, &mut row, schema_info)?;
resolve_value(value, column_name, &mut row, schema_info, pipeline_ctx)?;
}
Ok(Row { values: row })
}
fn decide_semantic(p_ctx: &PipelineContext, column_name: &str) -> i32 {
if p_ctx.channel == Channel::Prometheus && column_name != GREPTIME_VALUE {
SemanticType::Tag as i32
} else {
SemanticType::Field as i32
}
}
fn resolve_value(
index: Option<usize>,
value: Value,
column_name: String,
row: &mut Vec<GreptimeValue>,
schema_info: &mut SchemaInfo,
p_ctx: &PipelineContext,
) -> Result<()> {
let index = schema_info.index.get(&column_name).copied();
let mut resolve_simple_type =
|value_data: ValueData, column_name: String, data_type: ColumnDataType| {
let semantic_type = decide_semantic(p_ctx, &column_name);
resolve_schema(
index,
value_data,
ColumnSchema {
column_name,
datatype: data_type as i32,
semantic_type: SemanticType::Field as i32,
semantic_type,
datatype_extension: None,
options: None,
},
@@ -499,16 +527,20 @@ fn identity_pipeline_inner(
column_name: custom_ts
.map(|ts| ts.get_column_name().clone())
.unwrap_or_else(|| DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string()),
datatype: custom_ts
.map(|c| c.get_datatype())
.unwrap_or(ColumnDataType::TimestampNanosecond) as i32,
datatype: custom_ts.map(|c| c.get_datatype()).unwrap_or_else(|| {
if pipeline_ctx.channel == Channel::Prometheus {
ColumnDataType::TimestampMillisecond
} else {
ColumnDataType::TimestampNanosecond
}
}) as i32,
semantic_type: SemanticType::Timestamp as i32,
datatype_extension: None,
options: None,
});
for values in pipeline_maps {
let row = values_to_row(&mut schema_info, values, custom_ts)?;
let row = values_to_row(&mut schema_info, values, pipeline_ctx)?;
rows.push(row);
}
@@ -622,8 +654,11 @@ mod tests {
#[test]
fn test_identify_pipeline() {
let params = GreptimePipelineParams::default();
let pipeline_ctx =
PipelineContext::new(&PipelineDefinition::GreptimeIdentityPipeline(None), &params);
let pipeline_ctx = PipelineContext::new(
&PipelineDefinition::GreptimeIdentityPipeline(None),
&params,
Channel::Unknown,
);
{
let array = vec![
serde_json::json!({

View File

@@ -20,6 +20,7 @@ use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use datatypes::timestamp::TimestampNanosecond;
use itertools::Itertools;
use session::context::Channel;
use snafu::ensure;
use util::to_pipeline_version;
@@ -119,16 +120,19 @@ impl PipelineDefinition {
pub struct PipelineContext<'a> {
pub pipeline_definition: &'a PipelineDefinition,
pub pipeline_param: &'a GreptimePipelineParams,
pub channel: Channel,
}
impl<'a> PipelineContext<'a> {
pub fn new(
pipeline_definition: &'a PipelineDefinition,
pipeline_param: &'a GreptimePipelineParams,
channel: Channel,
) -> Self {
Self {
pipeline_definition,
pipeline_param,
channel,
}
}
}

View File

@@ -243,7 +243,7 @@ mod test {
// From prometheus `promql/functions_test.go` case `TestKahanSum`
#[test]
fn test_kahan_sum() {
let inputs = vec![1.0, 10.0f64.powf(100.0), 1.0, -1.0 * 10.0f64.powf(100.0)];
let inputs = vec![1.0, 10.0f64.powf(100.0), 1.0, -10.0f64.powf(100.0)];
let mut sum = 0.0;
let mut c = 0f64;

View File

@@ -18,6 +18,7 @@
use std::any::Any;
use std::collections::{BTreeMap, BTreeSet, VecDeque};
use std::pin::Pin;
use std::slice::from_ref;
use std::sync::Arc;
use std::task::{Context, Poll};
@@ -801,18 +802,18 @@ fn find_slice_from_range(
// note that `data < max_val`
// i,e, for max_val = 4, array = [5,3,2] should be start=1
// max_val = 4, array = [5, 4, 3, 2] should be start= 2
let start = bisect::<false>(&[array.clone()], &[max_val.clone()], &[*opt])?;
let start = bisect::<false>(from_ref(array), from_ref(&max_val), &[*opt])?;
// min_val = 1, array = [3, 2, 1, 0], end = 3
// min_val = 1, array = [3, 2, 0], end = 2
let end = bisect::<false>(&[array.clone()], &[min_val.clone()], &[*opt])?;
let end = bisect::<false>(from_ref(array), from_ref(&min_val), &[*opt])?;
(start, end)
} else {
// min_val = 1, array = [1, 2, 3], start = 0
// min_val = 1, array = [0, 2, 3], start = 1
let start = bisect::<true>(&[array.clone()], &[min_val.clone()], &[*opt])?;
let start = bisect::<true>(from_ref(array), from_ref(&min_val), &[*opt])?;
// max_val = 3, array = [1, 3, 4], end = 1
// max_val = 3, array = [1, 2, 4], end = 2
let end = bisect::<true>(&[array.clone()], &[max_val.clone()], &[*opt])?;
let end = bisect::<true>(from_ref(array), from_ref(&max_val), &[*opt])?;
(start, end)
};

View File

@@ -19,7 +19,7 @@ use bytes::Bytes;
use criterion::{criterion_group, criterion_main, Criterion};
use prost::Message;
use servers::prom_store::to_grpc_row_insert_requests;
use servers::proto::PromWriteRequest;
use servers::proto::{PromSeriesProcessor, PromWriteRequest};
fn bench_decode_prom_request_without_strict_mode(c: &mut Criterion) {
let mut d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
@@ -31,6 +31,8 @@ fn bench_decode_prom_request_without_strict_mode(c: &mut Criterion) {
let mut request = WriteRequest::default();
let mut prom_request = PromWriteRequest::default();
let is_strict_mode = false;
let mut p = PromSeriesProcessor::default_processor();
c.benchmark_group("decode")
.measurement_time(Duration::from_secs(3))
.bench_function("write_request", |b| {
@@ -44,7 +46,7 @@ fn bench_decode_prom_request_without_strict_mode(c: &mut Criterion) {
.bench_function("prom_write_request", |b| {
b.iter(|| {
let data = data.clone();
prom_request.merge(data, is_strict_mode).unwrap();
prom_request.merge(data, is_strict_mode, &mut p).unwrap();
prom_request.as_row_insert_requests();
});
});
@@ -60,6 +62,8 @@ fn bench_decode_prom_request_with_strict_mode(c: &mut Criterion) {
let mut request = WriteRequest::default();
let mut prom_request = PromWriteRequest::default();
let is_strict_mode = true;
let mut p = PromSeriesProcessor::default_processor();
c.benchmark_group("decode")
.measurement_time(Duration::from_secs(3))
.bench_function("write_request", |b| {
@@ -73,7 +77,7 @@ fn bench_decode_prom_request_with_strict_mode(c: &mut Criterion) {
.bench_function("prom_write_request", |b| {
b.iter(|| {
let data = data.clone();
prom_request.merge(data, is_strict_mode).unwrap();
prom_request.merge(data, is_strict_mode, &mut p).unwrap();
prom_request.as_row_insert_requests();
});
});

View File

@@ -554,11 +554,13 @@ impl HttpServerBuilder {
pub fn with_prom_handler(
self,
handler: PromStoreProtocolHandlerRef,
pipeline_handler: Option<PipelineHandlerRef>,
prom_store_with_metric_engine: bool,
is_strict_mode: bool,
) -> Self {
let state = PromStoreState {
prom_store_handler: handler,
pipeline_handler,
prom_store_with_metric_engine,
is_strict_mode,
};

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Display;
use std::io::BufRead;
use std::str::FromStr;
use std::sync::Arc;
@@ -38,10 +39,10 @@ use serde::{Deserialize, Serialize};
use serde_json::{json, Deserializer, Map, Value};
use session::context::{Channel, QueryContext, QueryContextRef};
use snafu::{ensure, OptionExt, ResultExt};
use strum::{EnumIter, IntoEnumIterator};
use crate::error::{
status_code_to_http_status, Error, InvalidParameterSnafu, ParseJsonSnafu, PipelineSnafu,
Result, UnsupportedContentTypeSnafu,
status_code_to_http_status, Error, InvalidParameterSnafu, ParseJsonSnafu, PipelineSnafu, Result,
};
use crate::http::header::constants::GREPTIME_PIPELINE_PARAMS_HEADER;
use crate::http::header::{CONTENT_TYPE_NDJSON_STR, CONTENT_TYPE_PROTOBUF_STR};
@@ -300,7 +301,7 @@ fn transform_ndjson_array_factory(
if !ignore_error {
warn!("invalid item in array: {:?}", item_value);
return InvalidParameterSnafu {
reason: format!("invalid item:{} in array", item_value),
reason: format!("invalid item: {} in array", item_value),
}
.fail();
}
@@ -326,7 +327,7 @@ async fn dryrun_pipeline_inner(
let params = GreptimePipelineParams::default();
let pipeline_def = PipelineDefinition::Resolved(pipeline);
let pipeline_ctx = PipelineContext::new(&pipeline_def, &params);
let pipeline_ctx = PipelineContext::new(&pipeline_def, &params, query_ctx.channel());
let results = run_pipeline(
&pipeline_handler,
&pipeline_ctx,
@@ -431,7 +432,8 @@ pub struct PipelineDryrunParams {
pub pipeline_name: Option<String>,
pub pipeline_version: Option<String>,
pub pipeline: Option<String>,
pub data: Vec<Value>,
pub data_type: Option<String>,
pub data: String,
}
/// Check if the payload is valid json
@@ -474,6 +476,24 @@ fn add_step_info_for_pipeline_dryrun_error(step_msg: &str, e: Error) -> Response
(status_code_to_http_status(&e.status_code()), body).into_response()
}
/// Parse the data with given content type
/// If the content type is invalid, return error
/// content type is one of application/json, text/plain, application/x-ndjson
fn parse_dryrun_data(data_type: String, data: String) -> Result<Vec<PipelineMap>> {
if let Ok(content_type) = ContentType::from_str(&data_type) {
extract_pipeline_value_by_content_type(content_type, Bytes::from(data), false)
} else {
InvalidParameterSnafu {
reason: format!(
"invalid content type: {}, expected: one of {}",
data_type,
EventPayloadResolver::support_content_type_list().join(", ")
),
}
.fail()
}
}
#[axum_macros::debug_handler]
pub async fn pipeline_dryrun(
State(log_state): State<LogState>,
@@ -489,7 +509,10 @@ pub async fn pipeline_dryrun(
match check_pipeline_dryrun_params_valid(&payload) {
Some(params) => {
let data = pipeline::json_array_to_map(params.data).context(PipelineSnafu)?;
let data = parse_dryrun_data(
params.data_type.unwrap_or("application/json".to_string()),
params.data,
)?;
check_data_valid(data.len())?;
@@ -616,62 +639,152 @@ pub async fn log_ingester(
.await
}
#[derive(Debug, EnumIter)]
enum EventPayloadResolverInner {
Json,
Ndjson,
Text,
}
impl Display for EventPayloadResolverInner {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
EventPayloadResolverInner::Json => write!(f, "{}", *JSON_CONTENT_TYPE),
EventPayloadResolverInner::Ndjson => write!(f, "{}", *NDJSON_CONTENT_TYPE),
EventPayloadResolverInner::Text => write!(f, "{}", *TEXT_CONTENT_TYPE),
}
}
}
impl TryFrom<&ContentType> for EventPayloadResolverInner {
type Error = Error;
fn try_from(content_type: &ContentType) -> Result<Self> {
match content_type {
x if *x == *JSON_CONTENT_TYPE => Ok(EventPayloadResolverInner::Json),
x if *x == *NDJSON_CONTENT_TYPE => Ok(EventPayloadResolverInner::Ndjson),
x if *x == *TEXT_CONTENT_TYPE || *x == *TEXT_UTF8_CONTENT_TYPE => {
Ok(EventPayloadResolverInner::Text)
}
_ => InvalidParameterSnafu {
reason: format!(
"invalid content type: {}, expected: one of {}",
content_type,
EventPayloadResolver::support_content_type_list().join(", ")
),
}
.fail(),
}
}
}
#[derive(Debug)]
struct EventPayloadResolver<'a> {
inner: EventPayloadResolverInner,
/// The content type of the payload.
/// keep it for logging original content type
#[allow(dead_code)]
content_type: &'a ContentType,
}
impl EventPayloadResolver<'_> {
pub(super) fn support_content_type_list() -> Vec<String> {
EventPayloadResolverInner::iter()
.map(|x| x.to_string())
.collect()
}
}
impl<'a> TryFrom<&'a ContentType> for EventPayloadResolver<'a> {
type Error = Error;
fn try_from(content_type: &'a ContentType) -> Result<Self> {
let inner = EventPayloadResolverInner::try_from(content_type)?;
Ok(EventPayloadResolver {
inner,
content_type,
})
}
}
impl EventPayloadResolver<'_> {
fn parse_payload(&self, payload: Bytes, ignore_errors: bool) -> Result<Vec<PipelineMap>> {
match self.inner {
EventPayloadResolverInner::Json => {
pipeline::json_array_to_map(transform_ndjson_array_factory(
Deserializer::from_slice(&payload).into_iter(),
ignore_errors,
)?)
.context(PipelineSnafu)
}
EventPayloadResolverInner::Ndjson => {
let mut result = Vec::with_capacity(1000);
for (index, line) in payload.lines().enumerate() {
let mut line = match line {
Ok(line) if !line.is_empty() => line,
Ok(_) => continue, // Skip empty lines
Err(_) if ignore_errors => continue,
Err(e) => {
warn!(e; "invalid string at index: {}", index);
return InvalidParameterSnafu {
reason: format!("invalid line at index: {}", index),
}
.fail();
}
};
// simd_json, according to description, only de-escapes string at character level,
// like any other json parser. So it should be safe here.
if let Ok(v) = simd_json::to_owned_value(unsafe { line.as_bytes_mut() }) {
let v = pipeline::simd_json_to_map(v).context(PipelineSnafu)?;
result.push(v);
} else if !ignore_errors {
warn!("invalid JSON at index: {}, content: {:?}", index, line);
return InvalidParameterSnafu {
reason: format!("invalid JSON at index: {}", index),
}
.fail();
}
}
Ok(result)
}
EventPayloadResolverInner::Text => {
let result = payload
.lines()
.filter_map(|line| line.ok().filter(|line| !line.is_empty()))
.map(|line| {
let mut map = PipelineMap::new();
map.insert("message".to_string(), pipeline::Value::String(line));
map
})
.collect::<Vec<_>>();
Ok(result)
}
}
}
}
fn extract_pipeline_value_by_content_type(
content_type: ContentType,
payload: Bytes,
ignore_errors: bool,
) -> Result<Vec<PipelineMap>> {
Ok(match content_type {
ct if ct == *JSON_CONTENT_TYPE => {
// `simd_json` have not support stream and ndjson, see https://github.com/simd-lite/simd-json/issues/349
pipeline::json_array_to_map(transform_ndjson_array_factory(
Deserializer::from_slice(&payload).into_iter(),
ignore_errors,
)?)
.context(PipelineSnafu)?
}
ct if ct == *NDJSON_CONTENT_TYPE => {
let mut result = Vec::with_capacity(1000);
for (index, line) in payload.lines().enumerate() {
let mut line = match line {
Ok(line) if !line.is_empty() => line,
Ok(_) => continue, // Skip empty lines
Err(_) if ignore_errors => continue,
Err(e) => {
warn!(e; "invalid string at index: {}", index);
return InvalidParameterSnafu {
reason: format!("invalid line at index: {}", index),
EventPayloadResolver::try_from(&content_type).and_then(|resolver| {
resolver
.parse_payload(payload, ignore_errors)
.map_err(|e| match &e {
Error::InvalidParameter { reason, .. } if content_type == *JSON_CONTENT_TYPE => {
if reason.contains("invalid item:") {
InvalidParameterSnafu {
reason: "json format error, please check the date is valid JSON.",
}
.fail();
.build()
} else {
e
}
};
// simd_json, according to description, only de-escapes string at character level,
// like any other json parser. So it should be safe here.
if let Ok(v) = simd_json::to_owned_value(unsafe { line.as_bytes_mut() }) {
let v = pipeline::simd_json_to_map(v).context(PipelineSnafu)?;
result.push(v);
} else if !ignore_errors {
warn!("invalid JSON at index: {}, content: {:?}", index, line);
return InvalidParameterSnafu {
reason: format!("invalid JSON at index: {}", index),
}
.fail();
}
}
result
}
ct if ct == *TEXT_CONTENT_TYPE || ct == *TEXT_UTF8_CONTENT_TYPE => payload
.lines()
.filter_map(|line| line.ok().filter(|line| !line.is_empty()))
.map(|line| {
let mut map = PipelineMap::new();
map.insert("message".to_string(), pipeline::Value::String(line));
map
_ => e,
})
.collect::<Vec<_>>(),
_ => UnsupportedContentTypeSnafu { content_type }.fail()?,
})
}
@@ -693,7 +806,7 @@ pub(crate) async fn ingest_logs_inner(
.and_then(|v| v.to_str().ok()),
);
let pipeline_ctx = PipelineContext::new(&pipeline, &pipeline_params);
let pipeline_ctx = PipelineContext::new(&pipeline, &pipeline_params, query_ctx.channel());
for pipeline_req in log_ingest_requests {
let requests =
run_pipeline(&handler, &pipeline_ctx, pipeline_req, &query_ctx, true).await?;

View File

@@ -28,16 +28,19 @@ use common_telemetry::tracing;
use hyper::HeaderMap;
use lazy_static::lazy_static;
use object_pool::Pool;
use pipeline::util::to_pipeline_version;
use pipeline::PipelineDefinition;
use prost::Message;
use serde::{Deserialize, Serialize};
use session::context::{Channel, QueryContext};
use snafu::prelude::*;
use crate::error::{self, Result};
use crate::error::{self, InternalSnafu, PipelineSnafu, Result};
use crate::http::extractor::PipelineInfo;
use crate::http::header::{write_cost_header_map, GREPTIME_DB_HEADER_METRICS};
use crate::prom_store::{snappy_decompress, zstd_decompress};
use crate::proto::PromWriteRequest;
use crate::query_handler::{PromStoreProtocolHandlerRef, PromStoreResponse};
use crate::proto::{PromSeriesProcessor, PromWriteRequest};
use crate::query_handler::{PipelineHandlerRef, PromStoreProtocolHandlerRef, PromStoreResponse};
pub const PHYSICAL_TABLE_PARAM: &str = "physical_table";
lazy_static! {
@@ -52,6 +55,7 @@ pub const VM_PROTO_VERSION: &str = "1";
#[derive(Clone)]
pub struct PromStoreState {
pub prom_store_handler: PromStoreProtocolHandlerRef,
pub pipeline_handler: Option<PipelineHandlerRef>,
pub prom_store_with_metric_engine: bool,
pub is_strict_mode: bool,
}
@@ -85,11 +89,13 @@ pub async fn remote_write(
State(state): State<PromStoreState>,
Query(params): Query<RemoteWriteQuery>,
Extension(mut query_ctx): Extension<QueryContext>,
pipeline_info: PipelineInfo,
content_encoding: TypedHeader<headers::ContentEncoding>,
body: Bytes,
) -> Result<impl IntoResponse> {
let PromStoreState {
prom_store_handler,
pipeline_handler,
prom_store_with_metric_engine,
is_strict_mode,
} = state;
@@ -100,17 +106,34 @@ pub async fn remote_write(
let db = params.db.clone().unwrap_or_default();
query_ctx.set_channel(Channel::Prometheus);
if let Some(physical_table) = params.physical_table {
query_ctx.set_extension(PHYSICAL_TABLE_PARAM, physical_table);
}
let query_ctx = Arc::new(query_ctx);
let _timer = crate::metrics::METRIC_HTTP_PROM_STORE_WRITE_ELAPSED
.with_label_values(&[db.as_str()])
.start_timer();
let is_zstd = content_encoding.contains(VM_ENCODING);
let (request, samples) = decode_remote_write_request(is_zstd, body, is_strict_mode).await?;
if let Some(physical_table) = params.physical_table {
query_ctx.set_extension(PHYSICAL_TABLE_PARAM, physical_table);
let mut processor = PromSeriesProcessor::default_processor();
if let Some(pipeline_name) = pipeline_info.pipeline_name {
let pipeline_def = PipelineDefinition::from_name(
&pipeline_name,
to_pipeline_version(pipeline_info.pipeline_version.as_deref())
.context(PipelineSnafu)?,
None,
)
.context(PipelineSnafu)?;
let pipeline_handler = pipeline_handler.context(InternalSnafu {
err_msg: "pipeline handler is not set".to_string(),
})?;
processor.set_pipeline(pipeline_handler, query_ctx.clone(), pipeline_def);
}
let query_ctx = Arc::new(query_ctx);
let (request, samples) =
decode_remote_write_request(is_zstd, body, is_strict_mode, &mut processor).await?;
let output = prom_store_handler
.write(request, query_ctx, prom_store_with_metric_engine)
@@ -177,6 +200,7 @@ async fn decode_remote_write_request(
is_zstd: bool,
body: Bytes,
is_strict_mode: bool,
processor: &mut PromSeriesProcessor,
) -> Result<(RowInsertRequests, usize)> {
let _timer = crate::metrics::METRIC_HTTP_PROM_STORE_DECODE_ELAPSED.start_timer();
@@ -194,10 +218,16 @@ async fn decode_remote_write_request(
};
let mut request = PROM_WRITE_REQUEST_POOL.pull(PromWriteRequest::default);
request
.merge(buf, is_strict_mode)
.merge(buf, is_strict_mode, processor)
.context(error::DecodePromRemoteRequestSnafu)?;
Ok(request.as_row_insert_requests())
if processor.use_pipeline {
processor.exec_pipeline().await
} else {
Ok(request.as_row_insert_requests())
}
}
async fn decode_remote_read_request(body: Bytes) -> Result<ReadRequest> {

View File

@@ -75,7 +75,8 @@ pub async fn to_grpc_insert_requests(
let data = parse_export_logs_service_request(request);
let array = pipeline::json_array_to_map(data).context(PipelineSnafu)?;
let pipeline_ctx = PipelineContext::new(&pipeline_def, &pipeline_params);
let pipeline_ctx =
PipelineContext::new(&pipeline_def, &pipeline_params, query_ctx.channel());
let inserts = run_pipeline(
&pipeline_handler,
&pipeline_ctx,

View File

@@ -23,7 +23,7 @@ use pipeline::{
DispatchedTo, IdentityTimeIndex, Pipeline, PipelineContext, PipelineDefinition,
PipelineExecOutput, PipelineMap, GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
};
use session::context::QueryContextRef;
use session::context::{Channel, QueryContextRef};
use snafu::{OptionExt, ResultExt};
use crate::error::{CatalogSnafu, PipelineSnafu, Result};
@@ -84,10 +84,14 @@ async fn run_identity_pipeline(
table: table_name,
values: data_array,
} = pipeline_req;
let table = handler
.get_table(&table_name, query_ctx)
.await
.context(CatalogSnafu)?;
let table = if pipeline_ctx.channel == Channel::Prometheus {
None
} else {
handler
.get_table(&table_name, query_ctx)
.await
.context(CatalogSnafu)?
};
pipeline::identity_pipeline(data_array, table, pipeline_ctx)
.map(|rows| {
vec![RowInsertRequest {
@@ -187,7 +191,8 @@ async fn run_custom_pipeline(
let ident_ts_index = IdentityTimeIndex::Epoch(ts_key.to_string(), unit, false);
let new_def = PipelineDefinition::GreptimeIdentityPipeline(Some(ident_ts_index));
let next_pipeline_ctx = PipelineContext::new(&new_def, pipeline_ctx.pipeline_param);
let next_pipeline_ctx =
PipelineContext::new(&new_def, pipeline_ctx.pipeline_param, pipeline_ctx.channel);
let reqs = run_identity_pipeline(
handler,
@@ -218,8 +223,11 @@ async fn run_custom_pipeline(
// run pipeline recursively.
let next_pipeline_def =
PipelineDefinition::from_name(next_pipeline_name, None, None).context(PipelineSnafu)?;
let next_pipeline_ctx =
PipelineContext::new(&next_pipeline_def, pipeline_ctx.pipeline_param);
let next_pipeline_ctx = PipelineContext::new(
&next_pipeline_def,
pipeline_ctx.pipeline_param,
pipeline_ctx.channel,
);
let requests = Box::pin(run_pipeline(
handler,
&next_pipeline_ctx,

View File

@@ -12,18 +12,28 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::ops::Deref;
use std::slice;
use api::prom_store::remote::Sample;
use api::v1::RowInsertRequests;
use bytes::{Buf, Bytes};
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
use pipeline::{GreptimePipelineParams, PipelineContext, PipelineDefinition, PipelineMap, Value};
use prost::encoding::message::merge;
use prost::encoding::{decode_key, decode_varint, WireType};
use prost::DecodeError;
use session::context::QueryContextRef;
use snafu::OptionExt;
use crate::error::InternalSnafu;
use crate::http::event::PipelineIngestRequest;
use crate::pipeline::run_pipeline;
use crate::prom_row_builder::TablesBuilder;
use crate::prom_store::METRIC_NAME_LABEL_BYTES;
use crate::query_handler::PipelineHandlerRef;
use crate::repeated_field::{Clear, RepeatedField};
impl Clear for Sample {
@@ -222,8 +232,6 @@ impl PromTimeSeries {
self.samples.as_slice(),
is_strict_mode,
)?;
self.labels.clear();
self.samples.clear();
Ok(())
}
@@ -247,7 +255,12 @@ impl PromWriteRequest {
}
// todo(hl): maybe use &[u8] can reduce the overhead introduced with Bytes.
pub fn merge(&mut self, mut buf: Bytes, is_strict_mode: bool) -> Result<(), DecodeError> {
pub fn merge(
&mut self,
mut buf: Bytes,
is_strict_mode: bool,
processor: &mut PromSeriesProcessor,
) -> Result<(), DecodeError> {
const STRUCT_NAME: &str = "PromWriteRequest";
while buf.has_remaining() {
let (tag, wire_type) = decode_key(&mut buf)?;
@@ -273,8 +286,17 @@ impl PromWriteRequest {
if buf.remaining() != limit {
return Err(DecodeError::new("delimited length exceeded"));
}
self.series
.add_to_table_data(&mut self.table_data, is_strict_mode)?;
if processor.use_pipeline {
processor.consume_series_to_pipeline_map(&mut self.series)?;
} else {
self.series
.add_to_table_data(&mut self.table_data, is_strict_mode)?;
}
// clear state
self.series.labels.clear();
self.series.samples.clear();
}
3u32 => {
// todo(hl): metadata are skipped.
@@ -283,10 +305,133 @@ impl PromWriteRequest {
_ => prost::encoding::skip_field(wire_type, tag, &mut buf, Default::default())?,
}
}
Ok(())
}
}
/// A hook to be injected into the PromWriteRequest decoding process.
/// It was originally designed with two usage:
/// 1. consume one series to desired type, in this case, the pipeline map
/// 2. convert itself to RowInsertRequests
///
/// Since the origin conversion is coupled with PromWriteRequest,
/// let's keep it that way for now.
pub struct PromSeriesProcessor {
pub(crate) use_pipeline: bool,
pub(crate) table_values: BTreeMap<String, Vec<PipelineMap>>,
// optional fields for pipeline
pub(crate) pipeline_handler: Option<PipelineHandlerRef>,
pub(crate) query_ctx: Option<QueryContextRef>,
pub(crate) pipeline_def: Option<PipelineDefinition>,
}
impl PromSeriesProcessor {
pub fn default_processor() -> Self {
Self {
use_pipeline: false,
table_values: BTreeMap::new(),
pipeline_handler: None,
query_ctx: None,
pipeline_def: None,
}
}
pub fn set_pipeline(
&mut self,
handler: PipelineHandlerRef,
query_ctx: QueryContextRef,
pipeline_def: PipelineDefinition,
) {
self.use_pipeline = true;
self.pipeline_handler = Some(handler);
self.query_ctx = Some(query_ctx);
self.pipeline_def = Some(pipeline_def);
}
// convert one series to pipeline map
pub(crate) fn consume_series_to_pipeline_map(
&mut self,
series: &mut PromTimeSeries,
) -> Result<(), DecodeError> {
let mut vec_pipeline_map: Vec<PipelineMap> = Vec::new();
let mut pipeline_map = PipelineMap::new();
for l in series.labels.iter() {
let name = String::from_utf8(l.name.to_vec())
.map_err(|_| DecodeError::new("invalid utf-8"))?;
let value = String::from_utf8(l.value.to_vec())
.map_err(|_| DecodeError::new("invalid utf-8"))?;
pipeline_map.insert(name, Value::String(value));
}
let one_sample = series.samples.len() == 1;
for s in series.samples.iter() {
let timestamp = s.timestamp;
pipeline_map.insert(GREPTIME_TIMESTAMP.to_string(), Value::Int64(timestamp));
pipeline_map.insert(GREPTIME_VALUE.to_string(), Value::Float64(s.value));
if one_sample {
vec_pipeline_map.push(pipeline_map);
break;
} else {
vec_pipeline_map.push(pipeline_map.clone());
}
}
let table_name = std::mem::take(&mut series.table_name);
match self.table_values.entry(table_name) {
Entry::Occupied(mut occupied_entry) => {
occupied_entry.get_mut().append(&mut vec_pipeline_map);
}
Entry::Vacant(vacant_entry) => {
vacant_entry.insert(vec_pipeline_map);
}
}
Ok(())
}
pub(crate) async fn exec_pipeline(
&mut self,
) -> crate::error::Result<(RowInsertRequests, usize)> {
// prepare params
let handler = self.pipeline_handler.as_ref().context(InternalSnafu {
err_msg: "pipeline handler is not set",
})?;
let pipeline_def = self.pipeline_def.as_ref().context(InternalSnafu {
err_msg: "pipeline definition is not set",
})?;
let pipeline_param = GreptimePipelineParams::default();
let query_ctx = self.query_ctx.as_ref().context(InternalSnafu {
err_msg: "query context is not set",
})?;
let pipeline_ctx = PipelineContext::new(pipeline_def, &pipeline_param, query_ctx.channel());
let mut size = 0;
// run pipeline
let mut inserts = Vec::with_capacity(self.table_values.len());
for (table_name, pipeline_maps) in self.table_values.iter_mut() {
let pipeline_req = PipelineIngestRequest {
table: table_name.clone(),
values: pipeline_maps.clone(),
};
let row_req =
run_pipeline(handler, &pipeline_ctx, pipeline_req, query_ctx, true).await?;
size += row_req
.iter()
.map(|rq| rq.rows.as_ref().map(|r| r.rows.len()).unwrap_or(0))
.sum::<usize>();
inserts.extend(row_req);
}
let row_insert_requests = RowInsertRequests { inserts };
Ok((row_insert_requests, size))
}
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
@@ -297,7 +442,7 @@ mod tests {
use prost::Message;
use crate::prom_store::to_grpc_row_insert_requests;
use crate::proto::PromWriteRequest;
use crate::proto::{PromSeriesProcessor, PromWriteRequest};
use crate::repeated_field::Clear;
fn sort_rows(rows: Rows) -> Rows {
@@ -321,8 +466,11 @@ mod tests {
expected_samples: usize,
expected_rows: &RowInsertRequests,
) {
let mut p = PromSeriesProcessor::default_processor();
prom_write_request.clear();
prom_write_request.merge(data.clone(), true).unwrap();
prom_write_request
.merge(data.clone(), true, &mut p)
.unwrap();
let (prom_rows, samples) = prom_write_request.as_row_insert_requests();
assert_eq!(expected_samples, samples);

View File

@@ -124,7 +124,7 @@ fn make_test_app(tx: mpsc::Sender<(String, Vec<u8>)>) -> Router {
let instance = Arc::new(DummyInstance { tx });
let server = HttpServerBuilder::new(http_opts)
.with_sql_handler(instance.clone())
.with_prom_handler(instance, true, is_strict_mode)
.with_prom_handler(instance, None, true, is_strict_mode)
.build();
server.build(server.make_app()).unwrap()
}

View File

@@ -537,7 +537,12 @@ pub async fn setup_test_prom_app_with_frontend(
let http_server = HttpServerBuilder::new(http_opts)
.with_sql_handler(ServerSqlQueryHandlerAdapter::arc(frontend_ref.clone()))
.with_logs_handler(instance.fe_instance().clone())
.with_prom_handler(frontend_ref.clone(), true, is_strict_mode)
.with_prom_handler(
frontend_ref.clone(),
Some(frontend_ref.clone()),
true,
is_strict_mode,
)
.with_prometheus_handler(frontend_ref)
.with_greptime_config_options(instance.opts.datanode_options().to_toml().unwrap())
.build();

View File

@@ -91,6 +91,7 @@ macro_rules! http_tests {
test_config_api,
test_dashboard_path,
test_prometheus_remote_write,
test_prometheus_remote_write_with_pipeline,
test_vm_proto_remote_write,
test_pipeline_api,
@@ -1186,6 +1187,7 @@ fn drop_lines_with_inconsistent_results(input: String) -> String {
"selector_result_cache_size =",
"metadata_cache_size =",
"content_cache_size =",
"result_cache_size =",
"name =",
"recovery_parallelism =",
"max_background_flushes =",
@@ -1256,6 +1258,64 @@ pub async fn test_prometheus_remote_write(store_type: StorageType) {
.await;
assert_eq!(res.status(), StatusCode::NO_CONTENT);
let expected = "[[\"demo\"],[\"demo_metrics\"],[\"demo_metrics_with_nanos\"],[\"greptime_physical_table\"],[\"metric1\"],[\"metric2\"],[\"metric3\"],[\"mito\"],[\"multi_labels\"],[\"numbers\"],[\"phy\"],[\"phy2\"],[\"phy_ns\"]]";
validate_data("prometheus_remote_write", &client, "show tables;", expected).await;
let table_val = "[[1000,3.0,\"z001\",\"test_host1\"],[2000,4.0,\"z001\",\"test_host1\"]]";
validate_data(
"prometheus_remote_write",
&client,
"select * from metric2",
table_val,
)
.await;
guard.remove_all().await;
}
pub async fn test_prometheus_remote_write_with_pipeline(store_type: StorageType) {
common_telemetry::init_default_ut_logging();
let (app, mut guard) =
setup_test_prom_app_with_frontend(store_type, "prometheus_remote_write_with_pipeline")
.await;
let client = TestClient::new(app).await;
// write snappy encoded data
let write_request = WriteRequest {
timeseries: prom_store::mock_timeseries(),
..Default::default()
};
let serialized_request = write_request.encode_to_vec();
let compressed_request =
prom_store::snappy_compress(&serialized_request).expect("failed to encode snappy");
let res = client
.post("/v1/prometheus/write")
.header("Content-Encoding", "snappy")
.header("x-greptime-log-pipeline-name", "greptime_identity")
.body(compressed_request)
.send()
.await;
assert_eq!(res.status(), StatusCode::NO_CONTENT);
let expected = "[[\"demo\"],[\"demo_metrics\"],[\"demo_metrics_with_nanos\"],[\"greptime_physical_table\"],[\"metric1\"],[\"metric2\"],[\"metric3\"],[\"mito\"],[\"multi_labels\"],[\"numbers\"],[\"phy\"],[\"phy2\"],[\"phy_ns\"]]";
validate_data(
"prometheus_remote_write_pipeline",
&client,
"show tables;",
expected,
)
.await;
let table_val = "[[1000,3.0,\"z001\",\"test_host1\"],[2000,4.0,\"z001\",\"test_host1\"]]";
validate_data(
"prometheus_remote_write_pipeline",
&client,
"select * from metric2",
table_val,
)
.await;
guard.remove_all().await;
}
@@ -2146,7 +2206,7 @@ transform:
"data_type": "STRING",
"key": "log",
"semantic_type": "FIELD",
"value": "ClusterAdapter:enter sendTextDataToCluster\\n"
"value": "ClusterAdapter:enter sendTextDataToCluster"
},
{
"data_type": "STRING",
@@ -2160,6 +2220,44 @@ transform:
"semantic_type": "TIMESTAMP",
"value": "2024-05-25 20:16:37.217+0000"
}
],
[
{
"data_type": "INT32",
"key": "id1",
"semantic_type": "FIELD",
"value": 1111
},
{
"data_type": "INT32",
"key": "id2",
"semantic_type": "FIELD",
"value": 2222
},
{
"data_type": "STRING",
"key": "type",
"semantic_type": "FIELD",
"value": "D"
},
{
"data_type": "STRING",
"key": "log",
"semantic_type": "FIELD",
"value": "ClusterAdapter:enter sendTextDataToCluster ggg"
},
{
"data_type": "STRING",
"key": "logger",
"semantic_type": "FIELD",
"value": "INTERACT.MANAGER"
},
{
"data_type": "TIMESTAMP_NANOSECOND",
"key": "time",
"semantic_type": "TIMESTAMP",
"value": "2024-05-25 20:16:38.217+0000"
}
]
]);
{
@@ -2172,7 +2270,15 @@ transform:
"logger": "INTERACT.MANAGER",
"type": "I",
"time": "2024-05-25 20:16:37.217",
"log": "ClusterAdapter:enter sendTextDataToCluster\\n"
"log": "ClusterAdapter:enter sendTextDataToCluster"
},
{
"id1": "1111",
"id2": "2222",
"logger": "INTERACT.MANAGER",
"type": "D",
"time": "2024-05-25 20:16:38.217",
"log": "ClusterAdapter:enter sendTextDataToCluster ggg"
}
]
"#;
@@ -2191,25 +2297,29 @@ transform:
}
{
// test new api specify pipeline via pipeline_name
let body = r#"
{
"pipeline_name": "test",
"data": [
let data = r#"[
{
"id1": "2436",
"id2": "2528",
"logger": "INTERACT.MANAGER",
"type": "I",
"time": "2024-05-25 20:16:37.217",
"log": "ClusterAdapter:enter sendTextDataToCluster\\n"
"log": "ClusterAdapter:enter sendTextDataToCluster"
},
{
"id1": "1111",
"id2": "2222",
"logger": "INTERACT.MANAGER",
"type": "D",
"time": "2024-05-25 20:16:38.217",
"log": "ClusterAdapter:enter sendTextDataToCluster ggg"
}
]
}
"#;
]"#;
let body = json!({"pipeline_name":"test","data":data});
let res = client
.post("/v1/pipelines/_dryrun")
.header("Content-Type", "application/json")
.body(body)
.body(body.to_string())
.send()
.await;
assert_eq!(res.status(), StatusCode::OK);
@@ -2220,18 +2330,55 @@ transform:
assert_eq!(rows, &dryrun_rows);
}
{
let pipeline_content_for_text = r#"
processors:
- dissect:
fields:
- message
patterns:
- "%{id1} %{id2} %{logger} %{type} \"%{time}\" \"%{log}\""
- date:
field: time
formats:
- "%Y-%m-%d %H:%M:%S%.3f"
ignore_missing: true
transform:
- fields:
- id1
- id2
type: int32
- fields:
- type
- log
- logger
type: string
- field: time
type: time
index: timestamp
"#;
// test new api specify pipeline via pipeline raw data
let mut body = json!({
"data": [
let data = r#"[
{
"id1": "2436",
"id2": "2528",
"logger": "INTERACT.MANAGER",
"type": "I",
"time": "2024-05-25 20:16:37.217",
"log": "ClusterAdapter:enter sendTextDataToCluster\\n"
"log": "ClusterAdapter:enter sendTextDataToCluster"
},
{
"id1": "1111",
"id2": "2222",
"logger": "INTERACT.MANAGER",
"type": "D",
"time": "2024-05-25 20:16:38.217",
"log": "ClusterAdapter:enter sendTextDataToCluster ggg"
}
]
]"#;
let mut body = json!({
"data": data
});
body["pipeline"] = json!(pipeline_content);
let res = client
@@ -2246,6 +2393,73 @@ transform:
let rows = &body[0]["rows"];
assert_eq!(schema, &dryrun_schema);
assert_eq!(rows, &dryrun_rows);
let mut body_for_text = json!({
"data": r#"2436 2528 INTERACT.MANAGER I "2024-05-25 20:16:37.217" "ClusterAdapter:enter sendTextDataToCluster"
1111 2222 INTERACT.MANAGER D "2024-05-25 20:16:38.217" "ClusterAdapter:enter sendTextDataToCluster ggg"
"#,
});
body_for_text["pipeline"] = json!(pipeline_content_for_text);
body_for_text["data_type"] = json!("text/plain");
let ndjson_content = r#"{"id1":"2436","id2":"2528","logger":"INTERACT.MANAGER","type":"I","time":"2024-05-25 20:16:37.217","log":"ClusterAdapter:enter sendTextDataToCluster"}
{"id1":"1111","id2":"2222","logger":"INTERACT.MANAGER","type":"D","time":"2024-05-25 20:16:38.217","log":"ClusterAdapter:enter sendTextDataToCluster ggg"}
"#;
let body_for_ndjson = json!({
"pipeline":pipeline_content,
"data_type": "application/x-ndjson",
"data": ndjson_content,
});
let res = client
.post("/v1/pipelines/_dryrun")
.header("Content-Type", "application/json")
.body(body_for_ndjson.to_string())
.send()
.await;
assert_eq!(res.status(), StatusCode::OK);
let body: Value = res.json().await;
let schema = &body[0]["schema"];
let rows = &body[0]["rows"];
assert_eq!(schema, &dryrun_schema);
assert_eq!(rows, &dryrun_rows);
body_for_text["data_type"] = json!("application/yaml");
let res = client
.post("/v1/pipelines/_dryrun")
.header("Content-Type", "application/json")
.body(body_for_text.to_string())
.send()
.await;
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
let body: Value = res.json().await;
assert_eq!(body["error"], json!("Invalid request parameter: invalid content type: application/yaml, expected: one of application/json, application/x-ndjson, text/plain"));
body_for_text["data_type"] = json!("application/json");
let res = client
.post("/v1/pipelines/_dryrun")
.header("Content-Type", "application/json")
.body(body_for_text.to_string())
.send()
.await;
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
let body: Value = res.json().await;
assert_eq!(
body["error"],
json!("Invalid request parameter: json format error, please check the date is valid JSON.")
);
body_for_text["data_type"] = json!("text/plain");
let res = client
.post("/v1/pipelines/_dryrun")
.header("Content-Type", "application/json")
.body(body_for_text.to_string())
.send()
.await;
assert_eq!(res.status(), StatusCode::OK);
let body: Value = res.json().await;
let schema = &body[0]["schema"];
let rows = &body[0]["rows"];
assert_eq!(schema, &dryrun_schema);
assert_eq!(rows, &dryrun_rows);
}
{
// failback to old version api
@@ -2259,6 +2473,14 @@ transform:
"type": "I",
"time": "2024-05-25 20:16:37.217",
"log": "ClusterAdapter:enter sendTextDataToCluster\\n"
},
{
"id1": "1111",
"id2": "2222",
"logger": "INTERACT.MANAGER",
"type": "D",
"time": "2024-05-25 20:16:38.217",
"log": "ClusterAdapter:enter sendTextDataToCluster ggg"
}
]
});

View File

@@ -78,3 +78,83 @@ SELECT CLAMP(10, 1, 0);
Error: 3001(EngineExecuteQuery), Invalid function args: The second arg should be less than or equal to the third arg, have: ConstantVector([Int64(1); 1]), ConstantVector([Int64(0); 1])
SELECT CLAMP_MIN(10, 12);
+--------------------------------+
| clamp_min(Int64(10),Int64(12)) |
+--------------------------------+
| 12 |
+--------------------------------+
SELECT CLAMP_MIN(10, 8);
+-------------------------------+
| clamp_min(Int64(10),Int64(8)) |
+-------------------------------+
| 10 |
+-------------------------------+
SELECT CLAMP_MIN(10.5, 10.6);
+----------------------------------------+
| clamp_min(Float64(10.5),Float64(10.6)) |
+----------------------------------------+
| 10.6 |
+----------------------------------------+
SELECT CLAMP_MIN(10.5, 10.4);
+----------------------------------------+
| clamp_min(Float64(10.5),Float64(10.4)) |
+----------------------------------------+
| 10.5 |
+----------------------------------------+
SELECT CLAMP_MIN(-5, -3);
+--------------------------------+
| clamp_min(Int64(-5),Int64(-3)) |
+--------------------------------+
| -3 |
+--------------------------------+
SELECT CLAMP_MAX(10, 12);
+--------------------------------+
| clamp_max(Int64(10),Int64(12)) |
+--------------------------------+
| 10 |
+--------------------------------+
SELECT CLAMP_MAX(10, 8);
+-------------------------------+
| clamp_max(Int64(10),Int64(8)) |
+-------------------------------+
| 8 |
+-------------------------------+
SELECT CLAMP_MAX(10.5, 10.6);
+----------------------------------------+
| clamp_max(Float64(10.5),Float64(10.6)) |
+----------------------------------------+
| 10.5 |
+----------------------------------------+
SELECT CLAMP_MAX(10.5, 10.4);
+----------------------------------------+
| clamp_max(Float64(10.5),Float64(10.4)) |
+----------------------------------------+
| 10.4 |
+----------------------------------------+
SELECT CLAMP_MAX(-5, -7);
+--------------------------------+
| clamp_max(Int64(-5),Int64(-7)) |
+--------------------------------+
| -7 |
+--------------------------------+

View File

@@ -1,4 +1,3 @@
SELECT MOD(18, 4);
SELECT MOD(-18, 4);
@@ -23,3 +22,23 @@ SELECT CLAMP(-10, 0, 1);
SELECT CLAMP(0.5, 0, 1);
SELECT CLAMP(10, 1, 0);
SELECT CLAMP_MIN(10, 12);
SELECT CLAMP_MIN(10, 8);
SELECT CLAMP_MIN(10.5, 10.6);
SELECT CLAMP_MIN(10.5, 10.4);
SELECT CLAMP_MIN(-5, -3);
SELECT CLAMP_MAX(10, 12);
SELECT CLAMP_MAX(10, 8);
SELECT CLAMP_MAX(10.5, 10.6);
SELECT CLAMP_MAX(10.5, 10.4);
SELECT CLAMP_MAX(-5, -7);