fix: incorrect timestamp index inference (#7530)

* add sqlness case, but can't reproduce Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * reproduction Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix wildcard rule Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * sort result Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2026-01-08 14:22:58 +00:00 · 2026-01-07 19:18:25 +08:00
parent ef6dd5b99f
commit fce1687fa7
5 changed files with 502 additions and 1 deletions
--- a/src/query/src/optimizer/count_wildcard.rs
+++ b/src/query/src/optimizer/count_wildcard.rs
@@ -162,7 +162,7 @@ impl TreeNodeVisitor<'_> for TimeIndexFinder {
        {
            let table_info = adapter.table().table_info();
            self.table_alias
-                .get_or_insert(TableReference::bare(table_info.name.clone()));
+                .get_or_insert(table_scan.table_name.clone());
            self.time_index_col = table_info
                .meta
                .schema
@@ -196,9 +196,21 @@ impl TimeIndexFinder {
 mod test {
    use std::sync::Arc;

+    use common_catalog::consts::DEFAULT_CATALOG_NAME;
+    use common_error::ext::{BoxedError, ErrorExt, StackError};
+    use common_error::status_code::StatusCode;
+    use common_recordbatch::SendableRecordBatchStream;
    use datafusion::functions_aggregate::count::count_all;
+    use datafusion_common::Column;
    use datafusion_expr::LogicalPlanBuilder;
+    use datafusion_sql::TableReference;
+    use datatypes::data_type::ConcreteDataType;
+    use datatypes::schema::{ColumnSchema, SchemaBuilder};
+    use store_api::data_source::DataSource;
+    use store_api::storage::ScanRequest;
+    use table::metadata::{FilterPushDownType, TableInfoBuilder, TableMetaBuilder, TableType};
    use table::table::numbers::NumbersTable;
+    use table::{Table, TableRef};

    use super::*;

@@ -224,4 +236,160 @@ mod test {
        assert_eq!(finder.table_alias, Some(TableReference::bare("FgHiJ")));
        assert!(finder.time_index_col.is_none());
    }
+
+    #[test]
+    fn bare_table_name_time_index() {
+        let table_ref = TableReference::bare("multi_partitioned_test_1");
+        let table =
+            build_time_index_table("multi_partitioned_test_1", "public", DEFAULT_CATALOG_NAME);
+        let table_source = Arc::new(DefaultTableSource::new(Arc::new(
+            DfTableProviderAdapter::new(table),
+        )));
+
+        let plan =
+            LogicalPlanBuilder::scan_with_filters(table_ref.clone(), table_source, None, vec![])
+                .unwrap()
+                .aggregate(Vec::<Expr>::new(), vec![count_all()])
+                .unwrap()
+                .build()
+                .unwrap();
+
+        let time_index = CountWildcardToTimeIndexRule::try_find_time_index_col(&plan);
+        assert_eq!(
+            time_index,
+            Some(Column::new(Some(table_ref), "greptime_timestamp"))
+        );
+    }
+
+    #[test]
+    fn schema_qualified_table_name_time_index() {
+        let table_ref = TableReference::partial("telemetry_events", "multi_partitioned_test_1");
+        let table = build_time_index_table(
+            "multi_partitioned_test_1",
+            "telemetry_events",
+            DEFAULT_CATALOG_NAME,
+        );
+        let table_source = Arc::new(DefaultTableSource::new(Arc::new(
+            DfTableProviderAdapter::new(table),
+        )));
+
+        let plan =
+            LogicalPlanBuilder::scan_with_filters(table_ref.clone(), table_source, None, vec![])
+                .unwrap()
+                .aggregate(Vec::<Expr>::new(), vec![count_all()])
+                .unwrap()
+                .build()
+                .unwrap();
+
+        let time_index = CountWildcardToTimeIndexRule::try_find_time_index_col(&plan);
+        assert_eq!(
+            time_index,
+            Some(Column::new(Some(table_ref), "greptime_timestamp"))
+        );
+    }
+
+    #[test]
+    fn fully_qualified_table_name_time_index() {
+        let table_ref = TableReference::full(
+            "telemetry_catalog",
+            "telemetry_events",
+            "multi_partitioned_test_1",
+        );
+        let table = build_time_index_table(
+            "multi_partitioned_test_1",
+            "telemetry_events",
+            "telemetry_catalog",
+        );
+        let table_source = Arc::new(DefaultTableSource::new(Arc::new(
+            DfTableProviderAdapter::new(table),
+        )));
+
+        let plan =
+            LogicalPlanBuilder::scan_with_filters(table_ref.clone(), table_source, None, vec![])
+                .unwrap()
+                .aggregate(Vec::<Expr>::new(), vec![count_all()])
+                .unwrap()
+                .build()
+                .unwrap();
+
+        let time_index = CountWildcardToTimeIndexRule::try_find_time_index_col(&plan);
+        assert_eq!(
+            time_index,
+            Some(Column::new(Some(table_ref), "greptime_timestamp"))
+        );
+    }
+
+    fn build_time_index_table(table_name: &str, schema_name: &str, catalog_name: &str) -> TableRef {
+        let column_schemas = vec![
+            ColumnSchema::new(
+                "greptime_timestamp",
+                ConcreteDataType::timestamp_nanosecond_datatype(),
+                false,
+            )
+            .with_time_index(true),
+        ];
+        let schema = SchemaBuilder::try_from_columns(column_schemas)
+            .unwrap()
+            .build()
+            .unwrap();
+        let meta = TableMetaBuilder::new_external_table()
+            .schema(Arc::new(schema))
+            .next_column_id(1)
+            .build()
+            .unwrap();
+        let info = TableInfoBuilder::new(table_name.to_string(), meta)
+            .table_id(1)
+            .table_version(0)
+            .catalog_name(catalog_name)
+            .schema_name(schema_name)
+            .table_type(TableType::Base)
+            .build()
+            .unwrap();
+        let data_source = Arc::new(DummyDataSource);
+        Arc::new(Table::new(
+            Arc::new(info),
+            FilterPushDownType::Unsupported,
+            data_source,
+        ))
+    }
+
+    struct DummyDataSource;
+
+    impl DataSource for DummyDataSource {
+        fn get_stream(
+            &self,
+            _request: ScanRequest,
+        ) -> Result<SendableRecordBatchStream, BoxedError> {
+            Err(BoxedError::new(DummyDataSourceError))
+        }
+    }
+
+    #[derive(Debug)]
+    struct DummyDataSourceError;
+
+    impl std::fmt::Display for DummyDataSourceError {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            write!(f, "dummy data source error")
+        }
+    }
+
+    impl std::error::Error for DummyDataSourceError {}
+
+    impl StackError for DummyDataSourceError {
+        fn debug_fmt(&self, _: usize, _: &mut Vec<String>) {}
+
+        fn next(&self) -> Option<&dyn StackError> {
+            None
+        }
+    }
+
+    impl ErrorExt for DummyDataSourceError {
+        fn status_code(&self) -> StatusCode {
+            StatusCode::Internal
+        }
+
+        fn as_any(&self) -> &dyn std::any::Any {
+            self
+        }
+    }
 }
--- a/tests/cases/distributed/optimizer/time_index_filter_pushdown.result
+++ b/tests/cases/distributed/optimizer/time_index_filter_pushdown.result
@@ -0,0 +1,90 @@
+-- Corresponding to issue TBD
+-- An append-only mito table without primary key, and partitioned by a field column.
+CREATE TABLE
+    IF NOT EXISTS `cpu` (
+        `rack` STRING NULL,
+        `os` STRING NULL,
+        `usage_user` BIGINT NULL,
+        `greptime_timestamp` TIMESTAMP(9) NOT NULL,
+        TIME INDEX (`greptime_timestamp`),
+    ) PARTITION ON COLUMNS (`rack`) (
+        rack < '2',
+        rack >= '2'
+        AND rack < '4',
+        rack >= '4'
+        AND rack < '6',
+        rack >= '6'
+        AND rack < '8',
+        rack >= '8'
+    ) ENGINE = mito
+WITH
+    (append_mode = 'true', sst_format = 'flat');
+
+Affected Rows: 0
+
+INSERT INTO
+    cpu
+VALUES
+    ("1", "linux", 10, "2023-06-12 01:04:49"),
+    ("1", "linux", 15, "2023-06-12 01:04:50"),
+    ("3", "windows", 25, "2023-06-12 01:05:00"),
+    ("5", "mac", 30, "2023-06-12 01:03:00"),
+    ("7", "linux", 45, "2023-06-12 02:00:00");
+
+Affected Rows: 5
+
+ADMIN FLUSH_TABLE ('cpu');
+
+--------------------------+
+| ADMIN FLUSH_TABLE('cpu') |
+--------------------------+
+| 0                        |
+--------------------------+
+
+INSERT INTO
+    cpu
+VALUES
+    ("2", "linux", 20, "2023-06-12 01:04:51"),
+    ("2", "windows", 22, "2023-06-12 01:06:00"),
+    ("4", "mac", 12, "2023-06-12 00:59:00"),
+    ("6", "linux", 35, "2023-06-12 01:04:55"),
+    ("8", "windows", 50, "2023-06-12 02:10:00");
+
+Affected Rows: 5
+
+-- SQLNESS SORT_RESULT 3 1
+select
+    count(*)
+from
+    public.cpu
+where
+    greptime_timestamp > '2023-06-12 01:05:00';
+
+----------+
+| count(*) |
+----------+
+| 3        |
+----------+
+
+-- SQLNESS SORT_RESULT 3 1
+select
+    os,
+    count(*)
+from
+    public.cpu
+where
+    greptime_timestamp > '2023-06-12 01:05:00'
+group by
+    os;
+
+---------+----------+
+| os      | count(*) |
+---------+----------+
+| linux   | 1        |
+| windows | 2        |
+---------+----------+
+
+drop table cpu;
+
+Affected Rows: 0
+
--- a/tests/cases/distributed/optimizer/time_index_filter_pushdown.sql
+++ b/tests/cases/distributed/optimizer/time_index_filter_pushdown.sql
@@ -0,0 +1,62 @@
+-- Corresponding to issue TBD
+-- An append-only mito table without primary key, and partitioned by a field column.
+CREATE TABLE
+    IF NOT EXISTS `cpu` (
+        `rack` STRING NULL,
+        `os` STRING NULL,
+        `usage_user` BIGINT NULL,
+        `greptime_timestamp` TIMESTAMP(9) NOT NULL,
+        TIME INDEX (`greptime_timestamp`),
+    ) PARTITION ON COLUMNS (`rack`) (
+        rack < '2',
+        rack >= '2'
+        AND rack < '4',
+        rack >= '4'
+        AND rack < '6',
+        rack >= '6'
+        AND rack < '8',
+        rack >= '8'
+    ) ENGINE = mito
+WITH
+    (append_mode = 'true', sst_format = 'flat');
+
+INSERT INTO
+    cpu
+VALUES
+    ("1", "linux", 10, "2023-06-12 01:04:49"),
+    ("1", "linux", 15, "2023-06-12 01:04:50"),
+    ("3", "windows", 25, "2023-06-12 01:05:00"),
+    ("5", "mac", 30, "2023-06-12 01:03:00"),
+    ("7", "linux", 45, "2023-06-12 02:00:00");
+
+ADMIN FLUSH_TABLE ('cpu');
+
+INSERT INTO
+    cpu
+VALUES
+    ("2", "linux", 20, "2023-06-12 01:04:51"),
+    ("2", "windows", 22, "2023-06-12 01:06:00"),
+    ("4", "mac", 12, "2023-06-12 00:59:00"),
+    ("6", "linux", 35, "2023-06-12 01:04:55"),
+    ("8", "windows", 50, "2023-06-12 02:10:00");
+
+-- SQLNESS SORT_RESULT 3 1
+select
+    count(*)
+from
+    public.cpu
+where
+    greptime_timestamp > '2023-06-12 01:05:00';
+
+-- SQLNESS SORT_RESULT 3 1
+select
+    os,
+    count(*)
+from
+    public.cpu
+where
+    greptime_timestamp > '2023-06-12 01:05:00'
+group by
+    os;
+
+drop table cpu;
--- a/tests/cases/standalone/optimizer/time_index_filter_pushdown.result
+++ b/tests/cases/standalone/optimizer/time_index_filter_pushdown.result
@@ -0,0 +1,108 @@
+-- Corresponding to issue TBD
+-- An append-only mito table without primary key, and partitioned by a field column.
+CREATE TABLE
+    IF NOT EXISTS `cpu` (
+        `rack` STRING NULL,
+        `os` STRING NULL,
+        `usage_user` BIGINT NULL,
+        `greptime_timestamp` TIMESTAMP(9) NOT NULL,
+        TIME INDEX (`greptime_timestamp`),
+    ) PARTITION ON COLUMNS (`rack`) (
+        rack < '2',
+        rack >= '2'
+        AND rack < '4',
+        rack >= '4'
+        AND rack < '6',
+        rack >= '6'
+        AND rack < '8',
+        rack >= '8'
+    ) ENGINE = mito
+WITH
+    (append_mode = 'true', sst_format = 'flat');
+
+Affected Rows: 0
+
+INSERT INTO
+    cpu
+VALUES
+    ("1", "linux", 10, "2023-06-12 01:04:49"),
+    ("1", "linux", 15, "2023-06-12 01:04:50"),
+    ("3", "windows", 25, "2023-06-12 01:05:00"),
+    ("5", "mac", 30, "2023-06-12 01:03:00"),
+    ("7", "linux", 45, "2023-06-12 02:00:00");
+
+Affected Rows: 5
+
+ADMIN FLUSH_TABLE ('cpu');
+
+--------------------------+
+| ADMIN FLUSH_TABLE('cpu') |
+--------------------------+
+| 0                        |
+--------------------------+
+
+INSERT INTO
+    cpu
+VALUES
+    ("2", "linux", 20, "2023-06-12 01:04:51"),
+    ("2", "windows", 22, "2023-06-12 01:06:00"),
+    ("4", "mac", 12, "2023-06-12 00:59:00"),
+    ("6", "linux", 35, "2023-06-12 01:04:55"),
+    ("8", "windows", 50, "2023-06-12 02:10:00");
+
+Affected Rows: 5
+
+-- SQLNESS SORT_RESULT 3 1
+select
+    count(*)
+from
+    public.cpu
+where
+    greptime_timestamp > '2023-06-12 01:05:00';
+
+----------+
+| count(*) |
+----------+
+| 3        |
+----------+
+
+-- SQLNESS SORT_RESULT 3 1
+select
+    os,
+    count(*)
+from
+    public.cpu
+where
+    greptime_timestamp > '2023-06-12 01:05:00'
+group by
+    os;
+
+---------+----------+
+| os      | count(*) |
+---------+----------+
+| linux   | 1        |
+| windows | 2        |
+---------+----------+
+
+-- SQLNESS SORT_RESULT 3 1
+select
+    os,
+    count(*)
+from
+    cpu
+where
+    greptime_timestamp > '2023-06-12 01:05:00'
+group by
+    os;
+
+---------+----------+
+| os      | count(*) |
+---------+----------+
+| linux   | 1        |
+| windows | 2        |
+---------+----------+
+
+drop table cpu;
+
+Affected Rows: 0
+
--- a/tests/cases/standalone/optimizer/time_index_filter_pushdown.sql
+++ b/tests/cases/standalone/optimizer/time_index_filter_pushdown.sql
@@ -0,0 +1,73 @@
+-- Corresponding to issue TBD
+-- An append-only mito table without primary key, and partitioned by a field column.
+CREATE TABLE
+    IF NOT EXISTS `cpu` (
+        `rack` STRING NULL,
+        `os` STRING NULL,
+        `usage_user` BIGINT NULL,
+        `greptime_timestamp` TIMESTAMP(9) NOT NULL,
+        TIME INDEX (`greptime_timestamp`),
+    ) PARTITION ON COLUMNS (`rack`) (
+        rack < '2',
+        rack >= '2'
+        AND rack < '4',
+        rack >= '4'
+        AND rack < '6',
+        rack >= '6'
+        AND rack < '8',
+        rack >= '8'
+    ) ENGINE = mito
+WITH
+    (append_mode = 'true', sst_format = 'flat');
+
+INSERT INTO
+    cpu
+VALUES
+    ("1", "linux", 10, "2023-06-12 01:04:49"),
+    ("1", "linux", 15, "2023-06-12 01:04:50"),
+    ("3", "windows", 25, "2023-06-12 01:05:00"),
+    ("5", "mac", 30, "2023-06-12 01:03:00"),
+    ("7", "linux", 45, "2023-06-12 02:00:00");
+
+ADMIN FLUSH_TABLE ('cpu');
+
+INSERT INTO
+    cpu
+VALUES
+    ("2", "linux", 20, "2023-06-12 01:04:51"),
+    ("2", "windows", 22, "2023-06-12 01:06:00"),
+    ("4", "mac", 12, "2023-06-12 00:59:00"),
+    ("6", "linux", 35, "2023-06-12 01:04:55"),
+    ("8", "windows", 50, "2023-06-12 02:10:00");
+
+-- SQLNESS SORT_RESULT 3 1
+select
+    count(*)
+from
+    public.cpu
+where
+    greptime_timestamp > '2023-06-12 01:05:00';
+
+-- SQLNESS SORT_RESULT 3 1
+select
+    os,
+    count(*)
+from
+    public.cpu
+where
+    greptime_timestamp > '2023-06-12 01:05:00'
+group by
+    os;
+
+-- SQLNESS SORT_RESULT 3 1
+select
+    os,
+    count(*)
+from
+    cpu
+where
+    greptime_timestamp > '2023-06-12 01:05:00'
+group by
+    os;
+
+drop table cpu;