diff --git a/src/query/src/optimizer/count_wildcard.rs b/src/query/src/optimizer/count_wildcard.rs index 32090af30d..affdaa18f7 100644 --- a/src/query/src/optimizer/count_wildcard.rs +++ b/src/query/src/optimizer/count_wildcard.rs @@ -162,7 +162,7 @@ impl TreeNodeVisitor<'_> for TimeIndexFinder { { let table_info = adapter.table().table_info(); self.table_alias - .get_or_insert(TableReference::bare(table_info.name.clone())); + .get_or_insert(table_scan.table_name.clone()); self.time_index_col = table_info .meta .schema @@ -196,9 +196,21 @@ impl TimeIndexFinder { mod test { use std::sync::Arc; + use common_catalog::consts::DEFAULT_CATALOG_NAME; + use common_error::ext::{BoxedError, ErrorExt, StackError}; + use common_error::status_code::StatusCode; + use common_recordbatch::SendableRecordBatchStream; use datafusion::functions_aggregate::count::count_all; + use datafusion_common::Column; use datafusion_expr::LogicalPlanBuilder; + use datafusion_sql::TableReference; + use datatypes::data_type::ConcreteDataType; + use datatypes::schema::{ColumnSchema, SchemaBuilder}; + use store_api::data_source::DataSource; + use store_api::storage::ScanRequest; + use table::metadata::{FilterPushDownType, TableInfoBuilder, TableMetaBuilder, TableType}; use table::table::numbers::NumbersTable; + use table::{Table, TableRef}; use super::*; @@ -224,4 +236,160 @@ mod test { assert_eq!(finder.table_alias, Some(TableReference::bare("FgHiJ"))); assert!(finder.time_index_col.is_none()); } + + #[test] + fn bare_table_name_time_index() { + let table_ref = TableReference::bare("multi_partitioned_test_1"); + let table = + build_time_index_table("multi_partitioned_test_1", "public", DEFAULT_CATALOG_NAME); + let table_source = Arc::new(DefaultTableSource::new(Arc::new( + DfTableProviderAdapter::new(table), + ))); + + let plan = + LogicalPlanBuilder::scan_with_filters(table_ref.clone(), table_source, None, vec![]) + .unwrap() + .aggregate(Vec::::new(), vec![count_all()]) + .unwrap() + .build() + .unwrap(); + + let time_index = CountWildcardToTimeIndexRule::try_find_time_index_col(&plan); + assert_eq!( + time_index, + Some(Column::new(Some(table_ref), "greptime_timestamp")) + ); + } + + #[test] + fn schema_qualified_table_name_time_index() { + let table_ref = TableReference::partial("telemetry_events", "multi_partitioned_test_1"); + let table = build_time_index_table( + "multi_partitioned_test_1", + "telemetry_events", + DEFAULT_CATALOG_NAME, + ); + let table_source = Arc::new(DefaultTableSource::new(Arc::new( + DfTableProviderAdapter::new(table), + ))); + + let plan = + LogicalPlanBuilder::scan_with_filters(table_ref.clone(), table_source, None, vec![]) + .unwrap() + .aggregate(Vec::::new(), vec![count_all()]) + .unwrap() + .build() + .unwrap(); + + let time_index = CountWildcardToTimeIndexRule::try_find_time_index_col(&plan); + assert_eq!( + time_index, + Some(Column::new(Some(table_ref), "greptime_timestamp")) + ); + } + + #[test] + fn fully_qualified_table_name_time_index() { + let table_ref = TableReference::full( + "telemetry_catalog", + "telemetry_events", + "multi_partitioned_test_1", + ); + let table = build_time_index_table( + "multi_partitioned_test_1", + "telemetry_events", + "telemetry_catalog", + ); + let table_source = Arc::new(DefaultTableSource::new(Arc::new( + DfTableProviderAdapter::new(table), + ))); + + let plan = + LogicalPlanBuilder::scan_with_filters(table_ref.clone(), table_source, None, vec![]) + .unwrap() + .aggregate(Vec::::new(), vec![count_all()]) + .unwrap() + .build() + .unwrap(); + + let time_index = CountWildcardToTimeIndexRule::try_find_time_index_col(&plan); + assert_eq!( + time_index, + Some(Column::new(Some(table_ref), "greptime_timestamp")) + ); + } + + fn build_time_index_table(table_name: &str, schema_name: &str, catalog_name: &str) -> TableRef { + let column_schemas = vec![ + ColumnSchema::new( + "greptime_timestamp", + ConcreteDataType::timestamp_nanosecond_datatype(), + false, + ) + .with_time_index(true), + ]; + let schema = SchemaBuilder::try_from_columns(column_schemas) + .unwrap() + .build() + .unwrap(); + let meta = TableMetaBuilder::new_external_table() + .schema(Arc::new(schema)) + .next_column_id(1) + .build() + .unwrap(); + let info = TableInfoBuilder::new(table_name.to_string(), meta) + .table_id(1) + .table_version(0) + .catalog_name(catalog_name) + .schema_name(schema_name) + .table_type(TableType::Base) + .build() + .unwrap(); + let data_source = Arc::new(DummyDataSource); + Arc::new(Table::new( + Arc::new(info), + FilterPushDownType::Unsupported, + data_source, + )) + } + + struct DummyDataSource; + + impl DataSource for DummyDataSource { + fn get_stream( + &self, + _request: ScanRequest, + ) -> Result { + Err(BoxedError::new(DummyDataSourceError)) + } + } + + #[derive(Debug)] + struct DummyDataSourceError; + + impl std::fmt::Display for DummyDataSourceError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "dummy data source error") + } + } + + impl std::error::Error for DummyDataSourceError {} + + impl StackError for DummyDataSourceError { + fn debug_fmt(&self, _: usize, _: &mut Vec) {} + + fn next(&self) -> Option<&dyn StackError> { + None + } + } + + impl ErrorExt for DummyDataSourceError { + fn status_code(&self) -> StatusCode { + StatusCode::Internal + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + } } diff --git a/tests/cases/distributed/optimizer/time_index_filter_pushdown.result b/tests/cases/distributed/optimizer/time_index_filter_pushdown.result new file mode 100644 index 0000000000..22e9679374 --- /dev/null +++ b/tests/cases/distributed/optimizer/time_index_filter_pushdown.result @@ -0,0 +1,90 @@ +-- Corresponding to issue TBD +-- An append-only mito table without primary key, and partitioned by a field column. +CREATE TABLE + IF NOT EXISTS `cpu` ( + `rack` STRING NULL, + `os` STRING NULL, + `usage_user` BIGINT NULL, + `greptime_timestamp` TIMESTAMP(9) NOT NULL, + TIME INDEX (`greptime_timestamp`), + ) PARTITION ON COLUMNS (`rack`) ( + rack < '2', + rack >= '2' + AND rack < '4', + rack >= '4' + AND rack < '6', + rack >= '6' + AND rack < '8', + rack >= '8' + ) ENGINE = mito +WITH + (append_mode = 'true', sst_format = 'flat'); + +Affected Rows: 0 + +INSERT INTO + cpu +VALUES + ("1", "linux", 10, "2023-06-12 01:04:49"), + ("1", "linux", 15, "2023-06-12 01:04:50"), + ("3", "windows", 25, "2023-06-12 01:05:00"), + ("5", "mac", 30, "2023-06-12 01:03:00"), + ("7", "linux", 45, "2023-06-12 02:00:00"); + +Affected Rows: 5 + +ADMIN FLUSH_TABLE ('cpu'); + ++--------------------------+ +| ADMIN FLUSH_TABLE('cpu') | ++--------------------------+ +| 0 | ++--------------------------+ + +INSERT INTO + cpu +VALUES + ("2", "linux", 20, "2023-06-12 01:04:51"), + ("2", "windows", 22, "2023-06-12 01:06:00"), + ("4", "mac", 12, "2023-06-12 00:59:00"), + ("6", "linux", 35, "2023-06-12 01:04:55"), + ("8", "windows", 50, "2023-06-12 02:10:00"); + +Affected Rows: 5 + +-- SQLNESS SORT_RESULT 3 1 +select + count(*) +from + public.cpu +where + greptime_timestamp > '2023-06-12 01:05:00'; + ++----------+ +| count(*) | ++----------+ +| 3 | ++----------+ + +-- SQLNESS SORT_RESULT 3 1 +select + os, + count(*) +from + public.cpu +where + greptime_timestamp > '2023-06-12 01:05:00' +group by + os; + ++---------+----------+ +| os | count(*) | ++---------+----------+ +| linux | 1 | +| windows | 2 | ++---------+----------+ + +drop table cpu; + +Affected Rows: 0 + diff --git a/tests/cases/distributed/optimizer/time_index_filter_pushdown.sql b/tests/cases/distributed/optimizer/time_index_filter_pushdown.sql new file mode 100644 index 0000000000..f5fa24efe7 --- /dev/null +++ b/tests/cases/distributed/optimizer/time_index_filter_pushdown.sql @@ -0,0 +1,62 @@ +-- Corresponding to issue TBD +-- An append-only mito table without primary key, and partitioned by a field column. +CREATE TABLE + IF NOT EXISTS `cpu` ( + `rack` STRING NULL, + `os` STRING NULL, + `usage_user` BIGINT NULL, + `greptime_timestamp` TIMESTAMP(9) NOT NULL, + TIME INDEX (`greptime_timestamp`), + ) PARTITION ON COLUMNS (`rack`) ( + rack < '2', + rack >= '2' + AND rack < '4', + rack >= '4' + AND rack < '6', + rack >= '6' + AND rack < '8', + rack >= '8' + ) ENGINE = mito +WITH + (append_mode = 'true', sst_format = 'flat'); + +INSERT INTO + cpu +VALUES + ("1", "linux", 10, "2023-06-12 01:04:49"), + ("1", "linux", 15, "2023-06-12 01:04:50"), + ("3", "windows", 25, "2023-06-12 01:05:00"), + ("5", "mac", 30, "2023-06-12 01:03:00"), + ("7", "linux", 45, "2023-06-12 02:00:00"); + +ADMIN FLUSH_TABLE ('cpu'); + +INSERT INTO + cpu +VALUES + ("2", "linux", 20, "2023-06-12 01:04:51"), + ("2", "windows", 22, "2023-06-12 01:06:00"), + ("4", "mac", 12, "2023-06-12 00:59:00"), + ("6", "linux", 35, "2023-06-12 01:04:55"), + ("8", "windows", 50, "2023-06-12 02:10:00"); + +-- SQLNESS SORT_RESULT 3 1 +select + count(*) +from + public.cpu +where + greptime_timestamp > '2023-06-12 01:05:00'; + +-- SQLNESS SORT_RESULT 3 1 +select + os, + count(*) +from + public.cpu +where + greptime_timestamp > '2023-06-12 01:05:00' +group by + os; + +drop table cpu; \ No newline at end of file diff --git a/tests/cases/standalone/optimizer/time_index_filter_pushdown.result b/tests/cases/standalone/optimizer/time_index_filter_pushdown.result new file mode 100644 index 0000000000..ea10cfc416 --- /dev/null +++ b/tests/cases/standalone/optimizer/time_index_filter_pushdown.result @@ -0,0 +1,108 @@ +-- Corresponding to issue TBD +-- An append-only mito table without primary key, and partitioned by a field column. +CREATE TABLE + IF NOT EXISTS `cpu` ( + `rack` STRING NULL, + `os` STRING NULL, + `usage_user` BIGINT NULL, + `greptime_timestamp` TIMESTAMP(9) NOT NULL, + TIME INDEX (`greptime_timestamp`), + ) PARTITION ON COLUMNS (`rack`) ( + rack < '2', + rack >= '2' + AND rack < '4', + rack >= '4' + AND rack < '6', + rack >= '6' + AND rack < '8', + rack >= '8' + ) ENGINE = mito +WITH + (append_mode = 'true', sst_format = 'flat'); + +Affected Rows: 0 + +INSERT INTO + cpu +VALUES + ("1", "linux", 10, "2023-06-12 01:04:49"), + ("1", "linux", 15, "2023-06-12 01:04:50"), + ("3", "windows", 25, "2023-06-12 01:05:00"), + ("5", "mac", 30, "2023-06-12 01:03:00"), + ("7", "linux", 45, "2023-06-12 02:00:00"); + +Affected Rows: 5 + +ADMIN FLUSH_TABLE ('cpu'); + ++--------------------------+ +| ADMIN FLUSH_TABLE('cpu') | ++--------------------------+ +| 0 | ++--------------------------+ + +INSERT INTO + cpu +VALUES + ("2", "linux", 20, "2023-06-12 01:04:51"), + ("2", "windows", 22, "2023-06-12 01:06:00"), + ("4", "mac", 12, "2023-06-12 00:59:00"), + ("6", "linux", 35, "2023-06-12 01:04:55"), + ("8", "windows", 50, "2023-06-12 02:10:00"); + +Affected Rows: 5 + +-- SQLNESS SORT_RESULT 3 1 +select + count(*) +from + public.cpu +where + greptime_timestamp > '2023-06-12 01:05:00'; + ++----------+ +| count(*) | ++----------+ +| 3 | ++----------+ + +-- SQLNESS SORT_RESULT 3 1 +select + os, + count(*) +from + public.cpu +where + greptime_timestamp > '2023-06-12 01:05:00' +group by + os; + ++---------+----------+ +| os | count(*) | ++---------+----------+ +| linux | 1 | +| windows | 2 | ++---------+----------+ + +-- SQLNESS SORT_RESULT 3 1 +select + os, + count(*) +from + cpu +where + greptime_timestamp > '2023-06-12 01:05:00' +group by + os; + ++---------+----------+ +| os | count(*) | ++---------+----------+ +| linux | 1 | +| windows | 2 | ++---------+----------+ + +drop table cpu; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/optimizer/time_index_filter_pushdown.sql b/tests/cases/standalone/optimizer/time_index_filter_pushdown.sql new file mode 100644 index 0000000000..28f4180a2c --- /dev/null +++ b/tests/cases/standalone/optimizer/time_index_filter_pushdown.sql @@ -0,0 +1,73 @@ +-- Corresponding to issue TBD +-- An append-only mito table without primary key, and partitioned by a field column. +CREATE TABLE + IF NOT EXISTS `cpu` ( + `rack` STRING NULL, + `os` STRING NULL, + `usage_user` BIGINT NULL, + `greptime_timestamp` TIMESTAMP(9) NOT NULL, + TIME INDEX (`greptime_timestamp`), + ) PARTITION ON COLUMNS (`rack`) ( + rack < '2', + rack >= '2' + AND rack < '4', + rack >= '4' + AND rack < '6', + rack >= '6' + AND rack < '8', + rack >= '8' + ) ENGINE = mito +WITH + (append_mode = 'true', sst_format = 'flat'); + +INSERT INTO + cpu +VALUES + ("1", "linux", 10, "2023-06-12 01:04:49"), + ("1", "linux", 15, "2023-06-12 01:04:50"), + ("3", "windows", 25, "2023-06-12 01:05:00"), + ("5", "mac", 30, "2023-06-12 01:03:00"), + ("7", "linux", 45, "2023-06-12 02:00:00"); + +ADMIN FLUSH_TABLE ('cpu'); + +INSERT INTO + cpu +VALUES + ("2", "linux", 20, "2023-06-12 01:04:51"), + ("2", "windows", 22, "2023-06-12 01:06:00"), + ("4", "mac", 12, "2023-06-12 00:59:00"), + ("6", "linux", 35, "2023-06-12 01:04:55"), + ("8", "windows", 50, "2023-06-12 02:10:00"); + +-- SQLNESS SORT_RESULT 3 1 +select + count(*) +from + public.cpu +where + greptime_timestamp > '2023-06-12 01:05:00'; + +-- SQLNESS SORT_RESULT 3 1 +select + os, + count(*) +from + public.cpu +where + greptime_timestamp > '2023-06-12 01:05:00' +group by + os; + +-- SQLNESS SORT_RESULT 3 1 +select + os, + count(*) +from + cpu +where + greptime_timestamp > '2023-06-12 01:05:00' +group by + os; + +drop table cpu; \ No newline at end of file