mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-08 14:22:58 +00:00
fix: incorrect timestamp index inference (#7530)
* add sqlness case, but can't reproduce Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * reproduction Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix wildcard rule Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * sort result Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
@@ -162,7 +162,7 @@ impl TreeNodeVisitor<'_> for TimeIndexFinder {
|
||||
{
|
||||
let table_info = adapter.table().table_info();
|
||||
self.table_alias
|
||||
.get_or_insert(TableReference::bare(table_info.name.clone()));
|
||||
.get_or_insert(table_scan.table_name.clone());
|
||||
self.time_index_col = table_info
|
||||
.meta
|
||||
.schema
|
||||
@@ -196,9 +196,21 @@ impl TimeIndexFinder {
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::DEFAULT_CATALOG_NAME;
|
||||
use common_error::ext::{BoxedError, ErrorExt, StackError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use datafusion::functions_aggregate::count::count_all;
|
||||
use datafusion_common::Column;
|
||||
use datafusion_expr::LogicalPlanBuilder;
|
||||
use datafusion_sql::TableReference;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder};
|
||||
use store_api::data_source::DataSource;
|
||||
use store_api::storage::ScanRequest;
|
||||
use table::metadata::{FilterPushDownType, TableInfoBuilder, TableMetaBuilder, TableType};
|
||||
use table::table::numbers::NumbersTable;
|
||||
use table::{Table, TableRef};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -224,4 +236,160 @@ mod test {
|
||||
assert_eq!(finder.table_alias, Some(TableReference::bare("FgHiJ")));
|
||||
assert!(finder.time_index_col.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_table_name_time_index() {
|
||||
let table_ref = TableReference::bare("multi_partitioned_test_1");
|
||||
let table =
|
||||
build_time_index_table("multi_partitioned_test_1", "public", DEFAULT_CATALOG_NAME);
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(table),
|
||||
)));
|
||||
|
||||
let plan =
|
||||
LogicalPlanBuilder::scan_with_filters(table_ref.clone(), table_source, None, vec![])
|
||||
.unwrap()
|
||||
.aggregate(Vec::<Expr>::new(), vec![count_all()])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let time_index = CountWildcardToTimeIndexRule::try_find_time_index_col(&plan);
|
||||
assert_eq!(
|
||||
time_index,
|
||||
Some(Column::new(Some(table_ref), "greptime_timestamp"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn schema_qualified_table_name_time_index() {
|
||||
let table_ref = TableReference::partial("telemetry_events", "multi_partitioned_test_1");
|
||||
let table = build_time_index_table(
|
||||
"multi_partitioned_test_1",
|
||||
"telemetry_events",
|
||||
DEFAULT_CATALOG_NAME,
|
||||
);
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(table),
|
||||
)));
|
||||
|
||||
let plan =
|
||||
LogicalPlanBuilder::scan_with_filters(table_ref.clone(), table_source, None, vec![])
|
||||
.unwrap()
|
||||
.aggregate(Vec::<Expr>::new(), vec![count_all()])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let time_index = CountWildcardToTimeIndexRule::try_find_time_index_col(&plan);
|
||||
assert_eq!(
|
||||
time_index,
|
||||
Some(Column::new(Some(table_ref), "greptime_timestamp"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fully_qualified_table_name_time_index() {
|
||||
let table_ref = TableReference::full(
|
||||
"telemetry_catalog",
|
||||
"telemetry_events",
|
||||
"multi_partitioned_test_1",
|
||||
);
|
||||
let table = build_time_index_table(
|
||||
"multi_partitioned_test_1",
|
||||
"telemetry_events",
|
||||
"telemetry_catalog",
|
||||
);
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(table),
|
||||
)));
|
||||
|
||||
let plan =
|
||||
LogicalPlanBuilder::scan_with_filters(table_ref.clone(), table_source, None, vec![])
|
||||
.unwrap()
|
||||
.aggregate(Vec::<Expr>::new(), vec![count_all()])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let time_index = CountWildcardToTimeIndexRule::try_find_time_index_col(&plan);
|
||||
assert_eq!(
|
||||
time_index,
|
||||
Some(Column::new(Some(table_ref), "greptime_timestamp"))
|
||||
);
|
||||
}
|
||||
|
||||
fn build_time_index_table(table_name: &str, schema_name: &str, catalog_name: &str) -> TableRef {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new(
|
||||
"greptime_timestamp",
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from_columns(column_schemas)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
let meta = TableMetaBuilder::new_external_table()
|
||||
.schema(Arc::new(schema))
|
||||
.next_column_id(1)
|
||||
.build()
|
||||
.unwrap();
|
||||
let info = TableInfoBuilder::new(table_name.to_string(), meta)
|
||||
.table_id(1)
|
||||
.table_version(0)
|
||||
.catalog_name(catalog_name)
|
||||
.schema_name(schema_name)
|
||||
.table_type(TableType::Base)
|
||||
.build()
|
||||
.unwrap();
|
||||
let data_source = Arc::new(DummyDataSource);
|
||||
Arc::new(Table::new(
|
||||
Arc::new(info),
|
||||
FilterPushDownType::Unsupported,
|
||||
data_source,
|
||||
))
|
||||
}
|
||||
|
||||
struct DummyDataSource;
|
||||
|
||||
impl DataSource for DummyDataSource {
|
||||
fn get_stream(
|
||||
&self,
|
||||
_request: ScanRequest,
|
||||
) -> Result<SendableRecordBatchStream, BoxedError> {
|
||||
Err(BoxedError::new(DummyDataSourceError))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct DummyDataSourceError;
|
||||
|
||||
impl std::fmt::Display for DummyDataSourceError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "dummy data source error")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for DummyDataSourceError {}
|
||||
|
||||
impl StackError for DummyDataSourceError {
|
||||
fn debug_fmt(&self, _: usize, _: &mut Vec<String>) {}
|
||||
|
||||
fn next(&self) -> Option<&dyn StackError> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorExt for DummyDataSourceError {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
StatusCode::Internal
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
-- Corresponding to issue TBD
|
||||
-- An append-only mito table without primary key, and partitioned by a field column.
|
||||
CREATE TABLE
|
||||
IF NOT EXISTS `cpu` (
|
||||
`rack` STRING NULL,
|
||||
`os` STRING NULL,
|
||||
`usage_user` BIGINT NULL,
|
||||
`greptime_timestamp` TIMESTAMP(9) NOT NULL,
|
||||
TIME INDEX (`greptime_timestamp`),
|
||||
) PARTITION ON COLUMNS (`rack`) (
|
||||
rack < '2',
|
||||
rack >= '2'
|
||||
AND rack < '4',
|
||||
rack >= '4'
|
||||
AND rack < '6',
|
||||
rack >= '6'
|
||||
AND rack < '8',
|
||||
rack >= '8'
|
||||
) ENGINE = mito
|
||||
WITH
|
||||
(append_mode = 'true', sst_format = 'flat');
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO
|
||||
cpu
|
||||
VALUES
|
||||
("1", "linux", 10, "2023-06-12 01:04:49"),
|
||||
("1", "linux", 15, "2023-06-12 01:04:50"),
|
||||
("3", "windows", 25, "2023-06-12 01:05:00"),
|
||||
("5", "mac", 30, "2023-06-12 01:03:00"),
|
||||
("7", "linux", 45, "2023-06-12 02:00:00");
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
ADMIN FLUSH_TABLE ('cpu');
|
||||
|
||||
+--------------------------+
|
||||
| ADMIN FLUSH_TABLE('cpu') |
|
||||
+--------------------------+
|
||||
| 0 |
|
||||
+--------------------------+
|
||||
|
||||
INSERT INTO
|
||||
cpu
|
||||
VALUES
|
||||
("2", "linux", 20, "2023-06-12 01:04:51"),
|
||||
("2", "windows", 22, "2023-06-12 01:06:00"),
|
||||
("4", "mac", 12, "2023-06-12 00:59:00"),
|
||||
("6", "linux", 35, "2023-06-12 01:04:55"),
|
||||
("8", "windows", 50, "2023-06-12 02:10:00");
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
select
|
||||
count(*)
|
||||
from
|
||||
public.cpu
|
||||
where
|
||||
greptime_timestamp > '2023-06-12 01:05:00';
|
||||
|
||||
+----------+
|
||||
| count(*) |
|
||||
+----------+
|
||||
| 3 |
|
||||
+----------+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
select
|
||||
os,
|
||||
count(*)
|
||||
from
|
||||
public.cpu
|
||||
where
|
||||
greptime_timestamp > '2023-06-12 01:05:00'
|
||||
group by
|
||||
os;
|
||||
|
||||
+---------+----------+
|
||||
| os | count(*) |
|
||||
+---------+----------+
|
||||
| linux | 1 |
|
||||
| windows | 2 |
|
||||
+---------+----------+
|
||||
|
||||
drop table cpu;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
-- Corresponding to issue TBD
|
||||
-- An append-only mito table without primary key, and partitioned by a field column.
|
||||
CREATE TABLE
|
||||
IF NOT EXISTS `cpu` (
|
||||
`rack` STRING NULL,
|
||||
`os` STRING NULL,
|
||||
`usage_user` BIGINT NULL,
|
||||
`greptime_timestamp` TIMESTAMP(9) NOT NULL,
|
||||
TIME INDEX (`greptime_timestamp`),
|
||||
) PARTITION ON COLUMNS (`rack`) (
|
||||
rack < '2',
|
||||
rack >= '2'
|
||||
AND rack < '4',
|
||||
rack >= '4'
|
||||
AND rack < '6',
|
||||
rack >= '6'
|
||||
AND rack < '8',
|
||||
rack >= '8'
|
||||
) ENGINE = mito
|
||||
WITH
|
||||
(append_mode = 'true', sst_format = 'flat');
|
||||
|
||||
INSERT INTO
|
||||
cpu
|
||||
VALUES
|
||||
("1", "linux", 10, "2023-06-12 01:04:49"),
|
||||
("1", "linux", 15, "2023-06-12 01:04:50"),
|
||||
("3", "windows", 25, "2023-06-12 01:05:00"),
|
||||
("5", "mac", 30, "2023-06-12 01:03:00"),
|
||||
("7", "linux", 45, "2023-06-12 02:00:00");
|
||||
|
||||
ADMIN FLUSH_TABLE ('cpu');
|
||||
|
||||
INSERT INTO
|
||||
cpu
|
||||
VALUES
|
||||
("2", "linux", 20, "2023-06-12 01:04:51"),
|
||||
("2", "windows", 22, "2023-06-12 01:06:00"),
|
||||
("4", "mac", 12, "2023-06-12 00:59:00"),
|
||||
("6", "linux", 35, "2023-06-12 01:04:55"),
|
||||
("8", "windows", 50, "2023-06-12 02:10:00");
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
select
|
||||
count(*)
|
||||
from
|
||||
public.cpu
|
||||
where
|
||||
greptime_timestamp > '2023-06-12 01:05:00';
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
select
|
||||
os,
|
||||
count(*)
|
||||
from
|
||||
public.cpu
|
||||
where
|
||||
greptime_timestamp > '2023-06-12 01:05:00'
|
||||
group by
|
||||
os;
|
||||
|
||||
drop table cpu;
|
||||
@@ -0,0 +1,108 @@
|
||||
-- Corresponding to issue TBD
|
||||
-- An append-only mito table without primary key, and partitioned by a field column.
|
||||
CREATE TABLE
|
||||
IF NOT EXISTS `cpu` (
|
||||
`rack` STRING NULL,
|
||||
`os` STRING NULL,
|
||||
`usage_user` BIGINT NULL,
|
||||
`greptime_timestamp` TIMESTAMP(9) NOT NULL,
|
||||
TIME INDEX (`greptime_timestamp`),
|
||||
) PARTITION ON COLUMNS (`rack`) (
|
||||
rack < '2',
|
||||
rack >= '2'
|
||||
AND rack < '4',
|
||||
rack >= '4'
|
||||
AND rack < '6',
|
||||
rack >= '6'
|
||||
AND rack < '8',
|
||||
rack >= '8'
|
||||
) ENGINE = mito
|
||||
WITH
|
||||
(append_mode = 'true', sst_format = 'flat');
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO
|
||||
cpu
|
||||
VALUES
|
||||
("1", "linux", 10, "2023-06-12 01:04:49"),
|
||||
("1", "linux", 15, "2023-06-12 01:04:50"),
|
||||
("3", "windows", 25, "2023-06-12 01:05:00"),
|
||||
("5", "mac", 30, "2023-06-12 01:03:00"),
|
||||
("7", "linux", 45, "2023-06-12 02:00:00");
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
ADMIN FLUSH_TABLE ('cpu');
|
||||
|
||||
+--------------------------+
|
||||
| ADMIN FLUSH_TABLE('cpu') |
|
||||
+--------------------------+
|
||||
| 0 |
|
||||
+--------------------------+
|
||||
|
||||
INSERT INTO
|
||||
cpu
|
||||
VALUES
|
||||
("2", "linux", 20, "2023-06-12 01:04:51"),
|
||||
("2", "windows", 22, "2023-06-12 01:06:00"),
|
||||
("4", "mac", 12, "2023-06-12 00:59:00"),
|
||||
("6", "linux", 35, "2023-06-12 01:04:55"),
|
||||
("8", "windows", 50, "2023-06-12 02:10:00");
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
select
|
||||
count(*)
|
||||
from
|
||||
public.cpu
|
||||
where
|
||||
greptime_timestamp > '2023-06-12 01:05:00';
|
||||
|
||||
+----------+
|
||||
| count(*) |
|
||||
+----------+
|
||||
| 3 |
|
||||
+----------+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
select
|
||||
os,
|
||||
count(*)
|
||||
from
|
||||
public.cpu
|
||||
where
|
||||
greptime_timestamp > '2023-06-12 01:05:00'
|
||||
group by
|
||||
os;
|
||||
|
||||
+---------+----------+
|
||||
| os | count(*) |
|
||||
+---------+----------+
|
||||
| linux | 1 |
|
||||
| windows | 2 |
|
||||
+---------+----------+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
select
|
||||
os,
|
||||
count(*)
|
||||
from
|
||||
cpu
|
||||
where
|
||||
greptime_timestamp > '2023-06-12 01:05:00'
|
||||
group by
|
||||
os;
|
||||
|
||||
+---------+----------+
|
||||
| os | count(*) |
|
||||
+---------+----------+
|
||||
| linux | 1 |
|
||||
| windows | 2 |
|
||||
+---------+----------+
|
||||
|
||||
drop table cpu;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
-- Corresponding to issue TBD
|
||||
-- An append-only mito table without primary key, and partitioned by a field column.
|
||||
CREATE TABLE
|
||||
IF NOT EXISTS `cpu` (
|
||||
`rack` STRING NULL,
|
||||
`os` STRING NULL,
|
||||
`usage_user` BIGINT NULL,
|
||||
`greptime_timestamp` TIMESTAMP(9) NOT NULL,
|
||||
TIME INDEX (`greptime_timestamp`),
|
||||
) PARTITION ON COLUMNS (`rack`) (
|
||||
rack < '2',
|
||||
rack >= '2'
|
||||
AND rack < '4',
|
||||
rack >= '4'
|
||||
AND rack < '6',
|
||||
rack >= '6'
|
||||
AND rack < '8',
|
||||
rack >= '8'
|
||||
) ENGINE = mito
|
||||
WITH
|
||||
(append_mode = 'true', sst_format = 'flat');
|
||||
|
||||
INSERT INTO
|
||||
cpu
|
||||
VALUES
|
||||
("1", "linux", 10, "2023-06-12 01:04:49"),
|
||||
("1", "linux", 15, "2023-06-12 01:04:50"),
|
||||
("3", "windows", 25, "2023-06-12 01:05:00"),
|
||||
("5", "mac", 30, "2023-06-12 01:03:00"),
|
||||
("7", "linux", 45, "2023-06-12 02:00:00");
|
||||
|
||||
ADMIN FLUSH_TABLE ('cpu');
|
||||
|
||||
INSERT INTO
|
||||
cpu
|
||||
VALUES
|
||||
("2", "linux", 20, "2023-06-12 01:04:51"),
|
||||
("2", "windows", 22, "2023-06-12 01:06:00"),
|
||||
("4", "mac", 12, "2023-06-12 00:59:00"),
|
||||
("6", "linux", 35, "2023-06-12 01:04:55"),
|
||||
("8", "windows", 50, "2023-06-12 02:10:00");
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
select
|
||||
count(*)
|
||||
from
|
||||
public.cpu
|
||||
where
|
||||
greptime_timestamp > '2023-06-12 01:05:00';
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
select
|
||||
os,
|
||||
count(*)
|
||||
from
|
||||
public.cpu
|
||||
where
|
||||
greptime_timestamp > '2023-06-12 01:05:00'
|
||||
group by
|
||||
os;
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
select
|
||||
os,
|
||||
count(*)
|
||||
from
|
||||
cpu
|
||||
where
|
||||
greptime_timestamp > '2023-06-12 01:05:00'
|
||||
group by
|
||||
os;
|
||||
|
||||
drop table cpu;
|
||||
Reference in New Issue
Block a user