feat: admin gc table/regions (#7619)

* feat: gc table

Signed-off-by: discord9 <discord9@163.com>

* test: admin gc

Signed-off-by: discord9 <discord9@163.com>

* chore: after rebase fix

Signed-off-by: discord9 <discord9@163.com>

* refactor: GcStats

Signed-off-by: discord9 <discord9@163.com>

* refactor: use gc ticker for admin gc

Signed-off-by: discord9 <discord9@163.com>

* fix: region routes override

Signed-off-by: discord9 <discord9@163.com>

* test: non happy path

Signed-off-by: discord9 <discord9@163.com>

* refactor: gc job report enum

Signed-off-by: discord9 <discord9@163.com>

* test: process 0 regions

Signed-off-by: discord9 <discord9@163.com>

* after rebase

Signed-off-by: discord9 <discord9@163.com>

* feat: allow manual gc to return error

Signed-off-by: discord9 <discord9@163.com>

* chore: update proto

Signed-off-by: discord9 <discord9@163.com>

* per review

Signed-off-by: discord9 <discord9@163.com>

* chore: timeout and update proto

Signed-off-by: discord9 <discord9@163.com>

* chore: udpate proto

Signed-off-by: discord9 <discord9@163.com>

---------

Signed-off-by: discord9 <discord9@163.com>
This commit is contained in:
discord9
2026-03-06 16:25:44 +08:00
committed by GitHub
parent 5e6d2b221e
commit 56ee8baa3f
29 changed files with 1687 additions and 132 deletions

View File

@@ -15,6 +15,7 @@
mod build_index_table;
mod flush_compact_region;
mod flush_compact_table;
mod gc;
mod migrate_region;
mod reconcile_catalog;
mod reconcile_database;
@@ -22,6 +23,7 @@ mod reconcile_table;
use flush_compact_region::{CompactRegionFunction, FlushRegionFunction};
use flush_compact_table::{CompactTableFunction, FlushTableFunction};
use gc::{GcRegionsFunction, GcTableFunction};
use migrate_region::MigrateRegionFunction;
use reconcile_catalog::ReconcileCatalogFunction;
use reconcile_database::ReconcileDatabaseFunction;
@@ -42,6 +44,8 @@ impl AdminFunction {
registry.register(CompactRegionFunction::factory());
registry.register(FlushTableFunction::factory());
registry.register(CompactTableFunction::factory());
registry.register(GcRegionsFunction::factory());
registry.register(GcTableFunction::factory());
registry.register(BuildIndexFunction::factory());
registry.register(FlushFlowFunction::factory());
registry.register(ReconcileCatalogFunction::factory());

View File

@@ -0,0 +1,220 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::time::Duration;
use common_error::ext::BoxedError;
use common_macro::admin_fn;
use common_meta::rpc::procedure::{GcRegionsRequest, GcTableRequest};
use common_query::error::{
InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result, TableMutationSnafu,
UnsupportedInputDataTypeSnafu,
};
use datafusion_expr::{Signature, TypeSignature, Volatility};
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::prelude::*;
use session::context::QueryContextRef;
use snafu::{ResultExt, ensure};
use crate::handlers::ProcedureServiceHandlerRef;
use crate::helper::cast_u64;
const DEFAULT_GC_TIMEOUT: Duration = Duration::from_secs(60);
const DEFAULT_FULL_FILE_LISTING: bool = false;
#[admin_fn(
name = GcRegionsFunction,
display_name = gc_regions,
sig_fn = gc_regions_signature,
ret = uint64
)]
pub(crate) async fn gc_regions(
procedure_service_handler: &ProcedureServiceHandlerRef,
_ctx: &QueryContextRef,
params: &[ValueRef<'_>],
) -> Result<Value> {
let (region_ids, full_file_listing) = parse_gc_regions_params(params)?;
let resp = procedure_service_handler
.gc_regions(GcRegionsRequest {
region_ids,
full_file_listing,
timeout: DEFAULT_GC_TIMEOUT,
})
.await?;
Ok(Value::from(resp.processed_regions))
}
#[admin_fn(
name = GcTableFunction,
display_name = gc_table,
sig_fn = gc_table_signature,
ret = uint64
)]
pub(crate) async fn gc_table(
procedure_service_handler: &ProcedureServiceHandlerRef,
query_ctx: &QueryContextRef,
params: &[ValueRef<'_>],
) -> Result<Value> {
let (catalog_name, schema_name, table_name, full_file_listing) =
parse_gc_table_params(params, query_ctx)?;
let resp = procedure_service_handler
.gc_table(GcTableRequest {
catalog_name,
schema_name,
table_name,
full_file_listing,
timeout: DEFAULT_GC_TIMEOUT,
})
.await?;
Ok(Value::from(resp.processed_regions))
}
fn parse_gc_regions_params(params: &[ValueRef<'_>]) -> Result<(Vec<u64>, bool)> {
ensure!(
!params.is_empty(),
InvalidFuncArgsSnafu {
err_msg: "The length of the args is not correct, expect at least 1 region id, have 0"
.to_string(),
}
);
let (full_file_listing, region_params) = match params.last() {
Some(ValueRef::Boolean(value)) => (*value, &params[..params.len() - 1]),
_ => (DEFAULT_FULL_FILE_LISTING, params),
};
ensure!(
!region_params.is_empty(),
InvalidFuncArgsSnafu {
err_msg: "The length of the args is not correct, expect at least 1 region id"
.to_string(),
}
);
let mut region_ids = Vec::with_capacity(region_params.len());
for param in region_params {
let Some(region_id) = cast_u64(param)? else {
return UnsupportedInputDataTypeSnafu {
function: "gc_regions",
datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
}
.fail();
};
region_ids.push(region_id);
}
Ok((region_ids, full_file_listing))
}
fn parse_gc_table_params(
params: &[ValueRef<'_>],
query_ctx: &QueryContextRef,
) -> Result<(String, String, String, bool)> {
ensure!(
matches!(params.len(), 1 | 2),
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect 1 or 2, have: {}",
params.len()
),
}
);
let ValueRef::String(table_name) = params[0] else {
return UnsupportedInputDataTypeSnafu {
function: "gc_table",
datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
}
.fail();
};
let full_file_listing = if params.len() == 2 {
let ValueRef::Boolean(value) = params[1] else {
return UnsupportedInputDataTypeSnafu {
function: "gc_table",
datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
}
.fail();
};
value
} else {
DEFAULT_FULL_FILE_LISTING
};
let (catalog_name, schema_name, table_name) =
session::table_name::table_name_to_full_name(table_name, query_ctx)
.map_err(BoxedError::new)
.context(TableMutationSnafu)?;
Ok((catalog_name, schema_name, table_name, full_file_listing))
}
fn gc_regions_signature() -> Signature {
Signature::variadic_any(Volatility::Immutable)
}
fn gc_table_signature() -> Signature {
Signature::one_of(
vec![
TypeSignature::Uniform(1, vec![ArrowDataType::Utf8]),
TypeSignature::Exact(vec![ArrowDataType::Utf8, ArrowDataType::Boolean]),
],
Volatility::Immutable,
)
}
#[cfg(test)]
mod tests {
use session::context::QueryContext;
use super::*;
#[test]
fn test_parse_gc_regions_params_with_full_file_listing() {
let params = vec![
ValueRef::UInt64(1),
ValueRef::UInt64(2),
ValueRef::Boolean(true),
];
let (region_ids, full_file_listing) = parse_gc_regions_params(&params).unwrap();
assert_eq!(region_ids, vec![1, 2]);
assert!(full_file_listing);
}
#[test]
fn test_parse_gc_regions_params_default_full_file_listing() {
let params = vec![ValueRef::UInt64(1), ValueRef::UInt32(2)];
let (region_ids, full_file_listing) = parse_gc_regions_params(&params).unwrap();
assert_eq!(region_ids, vec![1, 2]);
assert!(!full_file_listing);
}
#[test]
fn test_parse_gc_table_params_with_full_file_listing() {
let params = vec![ValueRef::String("public.t"), ValueRef::Boolean(true)];
let (catalog, schema, table, full_file_listing) =
parse_gc_table_params(&params, &QueryContext::arc()).unwrap();
assert_eq!(catalog, "greptime");
assert_eq!(schema, "public");
assert_eq!(table, "t");
assert!(full_file_listing);
}
}

View File

@@ -19,7 +19,9 @@ use async_trait::async_trait;
use catalog::CatalogManagerRef;
use common_base::AffectedRows;
use common_meta::rpc::procedure::{
ManageRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
GcRegionsRequest as MetaGcRegionsRequest, GcResponse as MetaGcResponse,
GcTableRequest as MetaGcTableRequest, ManageRegionFollowerRequest, MigrateRegionRequest,
ProcedureStateResponse,
};
use common_query::Output;
use common_query::error::Result;
@@ -85,6 +87,12 @@ pub trait ProcedureServiceHandler: Send + Sync {
/// Get the catalog manager
fn catalog_manager(&self) -> &CatalogManagerRef;
/// Manually trigger GC for specific regions.
async fn gc_regions(&self, request: MetaGcRegionsRequest) -> Result<MetaGcResponse>;
/// Manually trigger GC for a table.
async fn gc_table(&self, request: MetaGcTableRequest) -> Result<MetaGcResponse>;
}
/// This flow service handler is only use for flush flow for now.

View File

@@ -37,7 +37,8 @@ impl FunctionState {
use catalog::CatalogManagerRef;
use common_base::AffectedRows;
use common_meta::rpc::procedure::{
ManageRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
GcRegionsRequest, GcResponse, GcTableRequest, ManageRegionFollowerRequest,
MigrateRegionRequest, ProcedureStateResponse,
};
use common_query::Output;
use common_query::error::Result;
@@ -82,6 +83,24 @@ impl FunctionState {
Ok(())
}
async fn gc_regions(&self, _request: GcRegionsRequest) -> Result<GcResponse> {
Ok(GcResponse {
processed_regions: 1,
need_retry_regions: vec![],
deleted_files: 0,
deleted_indexes: 0,
})
}
async fn gc_table(&self, _request: GcTableRequest) -> Result<GcResponse> {
Ok(GcResponse {
processed_regions: 1,
need_retry_regions: vec![],
deleted_files: 0,
deleted_indexes: 0,
})
}
fn catalog_manager(&self) -> &CatalogManagerRef {
unimplemented!()
}