feat: add query engine options (#5895)

* feat: add query engine options

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update example

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2025-04-14 21:12:37 +08:00
committed by GitHub
parent c522893552
commit 747b71bf74
24 changed files with 195 additions and 14 deletions

View File

@@ -567,6 +567,7 @@ mod tests {
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use super::*;
use crate::options::QueryOptions;
use crate::parser::QueryLanguageParser;
use crate::query_engine::{QueryEngineFactory, QueryEngineRef};
@@ -581,7 +582,16 @@ mod tests {
};
catalog_manager.register_table_sync(req).unwrap();
QueryEngineFactory::new(catalog_manager, None, None, None, None, false).query_engine()
QueryEngineFactory::new(
catalog_manager,
None,
None,
None,
None,
false,
QueryOptions::default(),
)
.query_engine()
}
#[tokio::test]

View File

@@ -29,6 +29,7 @@ pub mod executor;
pub mod log_query;
pub mod metrics;
mod optimizer;
pub mod options;
pub mod parser;
mod part_sort;
pub mod physical_wrapper;

30
src/query/src/options.rs Normal file
View File

@@ -0,0 +1,30 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
/// Query engine config
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct QueryOptions {
/// Parallelism of query engine. Default to 0, which implies the number of logical CPUs.
pub parallelism: usize,
}
#[allow(clippy::derivable_impls)]
impl Default for QueryOptions {
fn default() -> Self {
Self { parallelism: 0 }
}
}

View File

@@ -38,6 +38,7 @@ use table::TableRef;
use crate::dataframe::DataFrame;
use crate::datafusion::DatafusionQueryEngine;
use crate::error::Result;
use crate::options::QueryOptions;
use crate::planner::LogicalPlanner;
pub use crate::query_engine::context::QueryEngineContext;
pub use crate::query_engine::state::QueryEngineState;
@@ -106,6 +107,7 @@ impl QueryEngineFactory {
procedure_service_handler: Option<ProcedureServiceHandlerRef>,
flow_service_handler: Option<FlowServiceHandlerRef>,
with_dist_planner: bool,
options: QueryOptions,
) -> Self {
Self::new_with_plugins(
catalog_manager,
@@ -115,9 +117,11 @@ impl QueryEngineFactory {
flow_service_handler,
with_dist_planner,
Default::default(),
options,
)
}
#[allow(clippy::too_many_arguments)]
pub fn new_with_plugins(
catalog_manager: CatalogManagerRef,
region_query_handler: Option<RegionQueryHandlerRef>,
@@ -126,6 +130,7 @@ impl QueryEngineFactory {
flow_service_handler: Option<FlowServiceHandlerRef>,
with_dist_planner: bool,
plugins: Plugins,
options: QueryOptions,
) -> Self {
let state = Arc::new(QueryEngineState::new(
catalog_manager,
@@ -135,6 +140,7 @@ impl QueryEngineFactory {
flow_service_handler,
with_dist_planner,
plugins.clone(),
options,
));
let query_engine = Arc::new(DatafusionQueryEngine::new(state, plugins));
register_functions(&query_engine);
@@ -166,7 +172,15 @@ mod tests {
#[test]
fn test_query_engine_factory() {
let catalog_list = catalog::memory::new_memory_catalog_manager().unwrap();
let factory = QueryEngineFactory::new(catalog_list, None, None, None, None, false);
let factory = QueryEngineFactory::new(
catalog_list,
None,
None,
None,
None,
false,
QueryOptions::default(),
);
let engine = factory.query_engine();

View File

@@ -75,6 +75,7 @@ impl QueryEngineContext {
use common_base::Plugins;
use session::context::QueryContext;
use crate::options::QueryOptions;
use crate::query_engine::QueryEngineState;
let state = Arc::new(QueryEngineState::new(
@@ -85,6 +86,7 @@ impl QueryEngineContext {
None,
false,
Plugins::default(),
QueryOptions::default(),
));
QueryEngineContext::new(state.session_state(), QueryContext::arc())

View File

@@ -159,6 +159,7 @@ mod tests {
use super::*;
use crate::dummy_catalog::DummyCatalogList;
use crate::optimizer::test_util::mock_table_provider;
use crate::options::QueryOptions;
use crate::QueryEngineFactory;
fn mock_plan(schema: SchemaRef) -> LogicalPlan {
@@ -177,7 +178,15 @@ mod tests {
#[tokio::test]
async fn test_serializer_decode_plan() {
let catalog_list = catalog::memory::new_memory_catalog_manager().unwrap();
let factory = QueryEngineFactory::new(catalog_list, None, None, None, None, false);
let factory = QueryEngineFactory::new(
catalog_list,
None,
None,
None,
None,
false,
QueryOptions::default(),
);
let engine = factory.query_engine();

View File

@@ -54,6 +54,7 @@ use crate::optimizer::string_normalization::StringNormalizationRule;
use crate::optimizer::type_conversion::TypeConversionRule;
use crate::optimizer::windowed_sort::WindowedSortPhysicalRule;
use crate::optimizer::ExtensionAnalyzerRule;
use crate::options::QueryOptions as QueryOptionsNew;
use crate::query_engine::options::QueryOptions;
use crate::query_engine::DefaultSerializer;
use crate::range_select::planner::RangeSelectPlanner;
@@ -81,6 +82,7 @@ impl fmt::Debug for QueryEngineState {
}
impl QueryEngineState {
#[allow(clippy::too_many_arguments)]
pub fn new(
catalog_list: CatalogManagerRef,
region_query_handler: Option<RegionQueryHandlerRef>,
@@ -89,9 +91,13 @@ impl QueryEngineState {
flow_service_handler: Option<FlowServiceHandlerRef>,
with_dist_planner: bool,
plugins: Plugins,
options: QueryOptionsNew,
) -> Self {
let runtime_env = Arc::new(RuntimeEnv::default());
let mut session_config = SessionConfig::new().with_create_default_catalog_and_schema(false);
if options.parallelism > 0 {
session_config = session_config.with_target_partitions(options.parallelism);
}
// todo(hl): This serves as a workaround for https://github.com/GreptimeTeam/greptimedb/issues/5659
// and we can add that check back once we upgrade datafusion.

View File

@@ -611,6 +611,7 @@ mod test {
use table::test_util::EmptyTable;
use super::*;
use crate::options::QueryOptions;
use crate::parser::QueryLanguageParser;
use crate::{QueryEngineFactory, QueryEngineRef};
@@ -663,7 +664,16 @@ mod test {
table,
})
.is_ok());
QueryEngineFactory::new(catalog_list, None, None, None, None, false).query_engine()
QueryEngineFactory::new(
catalog_list,
None,
None,
None,
None,
false,
QueryOptions::default(),
)
.query_engine()
}
async fn do_query(sql: &str) -> Result<LogicalPlan> {

View File

@@ -18,6 +18,7 @@ use common_recordbatch::{util, RecordBatch};
use session::context::QueryContext;
use table::TableRef;
use crate::options::QueryOptions;
use crate::parser::QueryLanguageParser;
use crate::{QueryEngineFactory, QueryEngineRef};
@@ -46,5 +47,14 @@ async fn exec_selection(engine: QueryEngineRef, sql: &str) -> Vec<RecordBatch> {
pub fn new_query_engine_with_table(table: TableRef) -> QueryEngineRef {
let catalog_manager = MemoryCatalogManager::new_with_table(table);
QueryEngineFactory::new(catalog_manager, None, None, None, None, false).query_engine()
QueryEngineFactory::new(
catalog_manager,
None,
None,
None,
None,
false,
QueryOptions::default(),
)
.query_engine()
}

View File

@@ -33,6 +33,7 @@ use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use table::test_util::MemTable;
use crate::error::{QueryExecutionSnafu, Result};
use crate::options::QueryOptions as QueryOptionsNew;
use crate::parser::QueryLanguageParser;
use crate::query_engine::options::QueryOptions;
use crate::query_engine::QueryEngineFactory;
@@ -43,7 +44,15 @@ async fn test_datafusion_query_engine() -> Result<()> {
let catalog_list = catalog::memory::new_memory_catalog_manager()
.map_err(BoxedError::new)
.context(QueryExecutionSnafu)?;
let factory = QueryEngineFactory::new(catalog_list, None, None, None, None, false);
let factory = QueryEngineFactory::new(
catalog_list,
None,
None,
None,
None,
false,
QueryOptionsNew::default(),
);
let engine = factory.query_engine();
let column_schemas = vec![ColumnSchema::new(
@@ -122,8 +131,16 @@ async fn test_query_validate() -> Result<()> {
disallow_cross_catalog_query: true,
});
let factory =
QueryEngineFactory::new_with_plugins(catalog_list, None, None, None, None, false, plugins);
let factory = QueryEngineFactory::new_with_plugins(
catalog_list,
None,
None,
None,
None,
false,
plugins,
QueryOptionsNew::default(),
);
let engine = factory.query_engine();
let stmt =

View File

@@ -33,6 +33,7 @@ use table::predicate::build_time_range_predicate;
use table::test_util::MemTable;
use table::{Table, TableRef};
use crate::options::QueryOptions;
use crate::tests::exec_selection;
use crate::{QueryEngineFactory, QueryEngineRef};
@@ -102,8 +103,16 @@ fn create_test_engine() -> TimeRangeTester {
};
let _ = catalog_manager.register_table_sync(req).unwrap();
let engine =
QueryEngineFactory::new(catalog_manager, None, None, None, None, false).query_engine();
let engine = QueryEngineFactory::new(
catalog_manager,
None,
None,
None,
None,
false,
QueryOptions::default(),
)
.query_engine();
TimeRangeTester { engine, filter }
}