Files
greptimedb/tests/cases/standalone/common/alter/alter_table.sql
Yvan Wang d1873ca31d fix(metric-engine): validate column types and require time index in verify_rows (#8018)
* fix(metric-engine): validate column types and require time index in verify_rows

The remote-write path into the metric engine previously bypassed schema
validation. When a row's time index column carried a non-timestamp
datatype (e.g. a string), the request reached mito's ValueBuilder::push
for the timestamp builder and panicked instead of surfacing a typed
error.

Cache the (column_id, data_type, semantic_type) tuple for each physical
column on PhysicalRegionState and use it in verify_rows to:

- reject columns whose datatype or semantic type disagrees with the
  physical region's schema (mirrors mito's WriteRequest::check_schema)
- reject requests that omit the time index column entirely

Field columns stay optional; tag completeness needs per-logical-region
metadata that verify_rows doesn't have and is left to a follow-up.

Fixes #7990.

Signed-off-by: BootstrapperSBL <yvanwww01@gmail.com>

* refactor(metric-engine): simplify PhysicalColumnInfo construction

- Add From<ColumnMetadata> and From<&ColumnMetadata> for PhysicalColumnInfo
  so call sites can use metadata.into() instead of repeating the field list.
- Replace the four struct-literal constructions in create.rs, open.rs and
  alter.rs with the conversion.
- In verify_rows, pass &col.column_name to ColumnNotFoundSnafu instead of
  cloning it explicitly (snafu's context handles the conversion).

Signed-off-by: BootstrapperSBL <yvanwww01@gmail.com>

* perf(metric-engine): cache time index column name in PhysicalRegionState

verify_rows previously scanned every physical column on each row batch to
find the timestamp column. Since the time index is fixed at region
creation and never changes, stash its name on PhysicalRegionState when
the region is first registered and read it directly from there.

add_physical_columns carries a debug_assert to document the invariant
that alter never introduces a new time index.

Signed-off-by: BootstrapperSBL <yvanwww01@gmail.com>

* perf(metric-engine): borrow physical column names when building name_to_id

On the row-write path we built a HashMap<String, ColumnId> by cloning
every column name out of the physical region's cached state. The map is
scoped to the block that holds the state's read guard, so there's no
need to own the keys.

Switch the map to HashMap<&str, ColumnId> and widen RowsIter::new /
IterIndex::new to accept any key type that borrows as str. Existing
test helpers that pass HashMap<String, ColumnId> keep working through
the Borrow<str> bound.

Signed-off-by: BootstrapperSBL <yvanwww01@gmail.com>

* fix: validate metric rows against physical schema

Cache physical column metadata in the metric engine state so row validation and row modification can use the same source of truth for column IDs, data types, and semantic types.

Validate incoming metric rows against the physical schema before writes. Put requests now require the time index and the expected field column, while delete requests keep accepting primary-key-plus-timestamp payloads by skipping the field completeness check.

Pass physical column metadata directly into RowsIter instead of rebuilding a name-to-column-id map at each call site, and cover the new validation paths with tests for missing time indexes, missing fields, and duplicate field columns.

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: do not allow adding a new field

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: fill default value for fields

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: fill default for nullable fields

Signed-off-by: evenyag <realevenyag@gmail.com>

---------

Signed-off-by: BootstrapperSBL <yvanwww01@gmail.com>
Signed-off-by: evenyag <realevenyag@gmail.com>
Co-authored-by: BootstrapperSBL <yvanwww01@gmail.com>
Co-authored-by: evenyag <realevenyag@gmail.com>
2026-05-07 12:41:07 +00:00

245 lines
6.1 KiB
SQL

CREATE TABLE test_alt_table(h INTEGER, i INTEGER, j TIMESTAMP TIME INDEX, PRIMARY KEY (h, i));
DESC TABLE test_alt_table;
INSERT INTO test_alt_table VALUES (1, 1, 0), (2, 2, 1);
-- TODO: It may result in an error if `k` is with type INTEGER.
-- Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Int32 but found Utf8 at column index 3
ALTER TABLE test_alt_table ADD COLUMN k STRING PRIMARY KEY;
DESC TABLE test_alt_table;
SELECT * FROM test_alt_table;
SELECT * FROM test_alt_table WHERE i = 1;
-- SQLNESS ARG restart=true
ALTER TABLE test_alt_table ADD COLUMN m INTEGER;
ALTER TABLE test_alt_table ADD COLUMN dt DATETIME;
-- Should fail issue #5422
ALTER TABLE test_alt_table ADD COLUMN n interval;
-- Should fail issue #5422
ALTER TABLE test_alt_table MODIFY COLUMN m interval;
INSERT INTO test_alt_table (h, i, j, m, dt) VALUES (42, 42, 0, 11, 0);
ALTER TABLE test_alt_table MODIFY COLUMN m Float64;
SELECT * FROM test_alt_table;
ALTER TABLE test_alt_table MODIFY COLUMN m INTEGER;
SELECT * FROM test_alt_table;
ALTER TABLE test_alt_table MODIFY COLUMN m BOOLEAN;
SELECT * FROM test_alt_table;
DESC TABLE test_alt_table;
DROP TABLE test_alt_table;
-- test if column with default value can change type properly
CREATE TABLE test_alt_table_default(h INTEGER, i Float64 DEFAULT 0.0, j TIMESTAMP TIME INDEX, PRIMARY KEY (h));
INSERT INTO test_alt_table_default (h, j) VALUES (0, 0);
INSERT INTO test_alt_table_default (h, i, j) VALUES (1, 0.1, 0);
SELECT * FROM test_alt_table_default ORDER BY h;
ALTER TABLE test_alt_table_default MODIFY COLUMN i BOOLEAN;
INSERT INTO test_alt_table_default (h, j) VALUES (2, 0);
INSERT INTO test_alt_table_default (h, i, j) VALUES (3, TRUE, 0);
SELECT * FROM test_alt_table_default ORDER BY h;
ALTER TABLE test_alt_table_default MODIFY COLUMN i INTEGER;
DESC TABLE test_alt_table_default;
INSERT INTO test_alt_table_default (h, j) VALUES (4, 0);
INSERT INTO test_alt_table_default (h, i, j) VALUES (5, 42, 0);
SELECT * FROM test_alt_table_default ORDER BY h;
ALTER TABLE test_alt_table_default MODIFY COLUMN i STRING;
INSERT INTO test_alt_table_default (h, j) VALUES (6, 0);
INSERT INTO test_alt_table_default (h, i, j) VALUES (7, "word" ,1);
SELECT * FROM test_alt_table_default ORDER BY h;
DROP TABLE test_alt_table_default;
-- test with non-zero default value
CREATE TABLE test_alt_table_default_nz(h INTEGER, i Float64 DEFAULT 0.1, j TIMESTAMP TIME INDEX, PRIMARY KEY (h));
INSERT INTO test_alt_table_default_nz (h, j) VALUES (0, 0);
INSERT INTO test_alt_table_default_nz (h, i, j) VALUES (1, 0.0, 0);
ADMIN FLUSH_TABLE('test_alt_table_default_nz');
SELECT * FROM test_alt_table_default_nz ORDER BY h;
ALTER TABLE test_alt_table_default_nz MODIFY COLUMN i BOOLEAN;
INSERT INTO test_alt_table_default_nz (h, j) VALUES (2, 0);
INSERT INTO test_alt_table_default_nz (h, i, j) VALUES (3, FALSE, 0);
SELECT * FROM test_alt_table_default_nz ORDER BY h;
ALTER TABLE test_alt_table_default_nz MODIFY COLUMN i INTEGER;
DESC TABLE test_alt_table_default_nz;
INSERT INTO test_alt_table_default_nz (h, j) VALUES (4, 0);
INSERT INTO test_alt_table_default_nz (h, i, j) VALUES (5, 42, 0);
SELECT * FROM test_alt_table_default_nz ORDER BY h;
ALTER TABLE test_alt_table_default_nz MODIFY COLUMN i STRING;
INSERT INTO test_alt_table_default_nz (h, j) VALUES (6, 0);
INSERT INTO test_alt_table_default_nz (h, i, j) VALUES (7, "word" ,1);
SELECT * FROM test_alt_table_default_nz ORDER BY h;
DROP TABLE test_alt_table_default_nz;
-- test alter table type will cause wired behavior due to underlying column data is unchanged
CREATE TABLE test_alt_table_col_ty(h INTEGER, i Float64 DEFAULT 0.1, j TIMESTAMP TIME INDEX, PRIMARY KEY (h));
INSERT INTO test_alt_table_col_ty (h, j) VALUES (0, 0);
INSERT INTO test_alt_table_col_ty (h, i, j) VALUES (1, 0.2, 0);
SELECT * FROM test_alt_table_col_ty ORDER BY h;
ALTER TABLE test_alt_table_col_ty MODIFY COLUMN i BOOLEAN;
INSERT INTO test_alt_table_col_ty (h, j) VALUES (2, 0);
INSERT INTO test_alt_table_col_ty (h, i, j) VALUES (3, TRUE, 0);
SELECT * FROM test_alt_table_col_ty ORDER BY h;
ALTER TABLE test_alt_table_col_ty MODIFY COLUMN i INTEGER;
INSERT INTO test_alt_table_col_ty (h, j) VALUES (4, 0);
INSERT INTO test_alt_table_col_ty (h, i, j) VALUES (5, 42, 0);
SELECT * FROM test_alt_table_col_ty ORDER BY h;
ALTER TABLE test_alt_table_col_ty MODIFY COLUMN i STRING;
INSERT INTO test_alt_table_col_ty (h, j) VALUES (6, 0);
INSERT INTO test_alt_table_col_ty (h, i, j) VALUES (7, "how many roads must a man walk down before they call him a man", 0);
-- here see 0.1 is converted to "0.1" since underlying column data is unchanged
SELECT * FROM test_alt_table_col_ty ORDER BY h;
DROP TABLE test_alt_table_col_ty;
-- to test if same name column can be added
CREATE TABLE phy (ts timestamp time index, val double) engine = metric with ("physical_metric_table" = "");
CREATE TABLE t1 (
ts timestamp time index,
val double,
host string primary key
) engine = metric with ("on_physical_table" = "phy");
INSERT INTO
t1
VALUES
('host1', 0, 1),
('host2', 1, 0,);
SELECT
*
FROM
t1;
CREATE TABLE t2 (
ts timestamp time index,
job string primary key,
val double
) engine = metric with ("on_physical_table" = "phy");
ALTER TABLE
t1
ADD
COLUMN `at` STRING PRIMARY KEY;
ALTER TABLE
t2
ADD
COLUMN at3 STRING PRIMARY KEY;
ALTER TABLE
t2
ADD
COLUMN `at` STRING PRIMARY KEY;
ALTER TABLE
t2
ADD
COLUMN at2 STRING PRIMARY KEY;
ALTER TABLE
t2
ADD
COLUMN at4 STRING PRIMARY KEY;
INSERT INTO
t2
VALUES
("loc_1", "loc_2", "loc_3", "loc_4", 'job1', 0, 1);
SELECT
*
FROM
t2;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE phy;
CREATE TABLE grpc_latencies (
ts TIMESTAMP TIME INDEX,
host STRING,
method_name STRING,
latency DOUBLE,
PRIMARY KEY (host, method_name)
) with('append_mode'='true');
INSERT INTO grpc_latencies (ts, host, method_name, latency) VALUES
('2024-07-11 20:00:06', 'host1', 'GetUser', 103.0);
SELECT * FROM grpc_latencies;
ALTER TABLE grpc_latencies SET ttl = '10000d';
ALTER TABLE grpc_latencies ADD COLUMN home INTEGER FIRST;
SELECT * FROM grpc_latencies;
DROP TABLE grpc_latencies;