diff --git a/Cargo.lock b/Cargo.lock index d483ec7088..c5b8fc016a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1812,10 +1812,12 @@ name = "common-base" version = "0.9.3" dependencies = [ "anymap", + "async-trait", "bitvec", "bytes", "common-error", "common-macro", + "futures", "paste", "serde", "snafu 0.8.4", @@ -1952,6 +1954,7 @@ dependencies = [ "datatypes", "geohash", "h3o", + "jsonb", "num", "num-traits", "once_cell", @@ -2293,6 +2296,7 @@ dependencies = [ "common-telemetry", "futures-util", "humantime-serde", + "num_cpus", "rskafka", "rustls 0.23.10", "rustls-native-certs", @@ -3166,6 +3170,7 @@ dependencies = [ "datafusion-common", "enum_dispatch", "greptime-proto", + "jsonb", "num", "num-traits", "ordered-float 3.9.2", @@ -3698,6 +3703,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fast-float" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" + [[package]] name = "fastdivide" version = "0.4.1" @@ -4302,7 +4313,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=157cfdb52709e489cf1f3ce8e3042ed4ee8a524a#157cfdb52709e489cf1f3ce8e3042ed4ee8a524a" +source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=973f49cde88a582fb65755cc572ebcf6fb93ccf7#973f49cde88a582fb65755cc572ebcf6fb93ccf7" dependencies = [ "prost 0.12.6", "serde", @@ -5409,6 +5420,21 @@ dependencies = [ "serde", ] +[[package]] +name = "jsonb" +version = "0.4.1" +source = "git+https://github.com/CookiePieWw/jsonb.git?rev=d0166c130fce903bf6c58643417a3173a6172d31#d0166c130fce903bf6c58643417a3173a6172d31" +dependencies = [ + "byteorder", + "fast-float", + "itoa", + "nom", + "ordered-float 4.2.0", + "rand", + "ryu", + "serde_json", +] + [[package]] name = "jsonpath-rust" version = "0.5.1" @@ -8062,6 +8088,8 @@ dependencies = [ "chrono", "fallible-iterator", "postgres-protocol", + "serde", + "serde_json", ] [[package]] @@ -10400,6 +10428,7 @@ dependencies = [ "hyper 0.14.29", "influxdb_line_protocol", "itertools 0.10.5", + "jsonb", "lazy_static", "mime_guess", "mysql_async", @@ -10779,6 +10808,7 @@ dependencies = [ "hex", "iso8601", "itertools 0.10.5", + "jsonb", "lazy_static", "regex", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 93ea8db134..d412bf7e97 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -120,10 +120,11 @@ etcd-client = { version = "0.13" } fst = "0.4.7" futures = "0.3" futures-util = "0.3" -greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "157cfdb52709e489cf1f3ce8e3042ed4ee8a524a" } +greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "973f49cde88a582fb65755cc572ebcf6fb93ccf7" } humantime = "2.1" humantime-serde = "1.1" itertools = "0.10" +jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "d0166c130fce903bf6c58643417a3173a6172d31", default-features = false } lazy_static = "1.4" meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" } mockall = "0.11.4" diff --git a/config/config.md b/config/config.md index f0ee9e54f8..a792be5de5 100644 --- a/config/config.md +++ b/config/config.md @@ -68,6 +68,7 @@ | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.
**It's only used when the provider is `raft_engine`**. | | `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.
**It's only used when the provider is `raft_engine`**. | | `wal.sync_period` | String | `10s` | Duration for fsyncing log files.
**It's only used when the provider is `raft_engine`**. | +| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. | | `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.
**It's only used when the provider is `kafka`**. | | `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.
Set to `true` to automatically create topics for WAL.
Otherwise, use topics named `topic_name_prefix_[0..num_topics)` | | `wal.num_topics` | Integer | `64` | Number of topics.
**It's only used when the provider is `kafka`**. | @@ -381,6 +382,7 @@ | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.
**It's only used when the provider is `raft_engine`**. | | `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.
**It's only used when the provider is `raft_engine`**. | | `wal.sync_period` | String | `10s` | Duration for fsyncing log files.
**It's only used when the provider is `raft_engine`**. | +| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. | | `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.
**It's only used when the provider is `kafka`**. | | `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.
Warning: Kafka has a default limit of 1MB per message in a topic.
**It's only used when the provider is `kafka`**. | | `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.
**It's only used when the provider is `kafka`**. | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index 07c1df3e2a..14fbf914e7 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -170,6 +170,9 @@ prefill_log_files = false ## **It's only used when the provider is `raft_engine`**. sync_period = "10s" +## Parallelism during WAL recovery. +recovery_parallelism = 2 + ## The Kafka broker endpoints. ## **It's only used when the provider is `kafka`**. broker_endpoints = ["127.0.0.1:9092"] diff --git a/config/standalone.example.toml b/config/standalone.example.toml index f36c0e2904..f7c7b2af29 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -174,6 +174,9 @@ prefill_log_files = false ## **It's only used when the provider is `raft_engine`**. sync_period = "10s" +## Parallelism during WAL recovery. +recovery_parallelism = 2 + ## The Kafka broker endpoints. ## **It's only used when the provider is `kafka`**. broker_endpoints = ["127.0.0.1:9092"] diff --git a/src/api/src/helper.rs b/src/api/src/helper.rs index d8e9c524d8..101cae8802 100644 --- a/src/api/src/helper.rs +++ b/src/api/src/helper.rs @@ -42,7 +42,8 @@ use greptime_proto::v1::greptime_request::Request; use greptime_proto::v1::query_request::Query; use greptime_proto::v1::value::ValueData; use greptime_proto::v1::{ - ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, QueryRequest, Row, SemanticType, + ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonTypeExtension, QueryRequest, + Row, SemanticType, }; use paste::paste; use snafu::prelude::*; @@ -103,7 +104,17 @@ impl From for ConcreteDataType { ColumnDataType::Uint64 => ConcreteDataType::uint64_datatype(), ColumnDataType::Float32 => ConcreteDataType::float32_datatype(), ColumnDataType::Float64 => ConcreteDataType::float64_datatype(), - ColumnDataType::Binary => ConcreteDataType::binary_datatype(), + ColumnDataType::Binary => { + if let Some(TypeExt::JsonType(_)) = datatype_wrapper + .datatype_ext + .as_ref() + .and_then(|datatype_ext| datatype_ext.type_ext.as_ref()) + { + ConcreteDataType::json_datatype() + } else { + ConcreteDataType::binary_datatype() + } + } ColumnDataType::String => ConcreteDataType::string_datatype(), ColumnDataType::Date => ConcreteDataType::date_datatype(), ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(), @@ -236,7 +247,7 @@ impl TryFrom for ColumnDataTypeWrapper { ConcreteDataType::UInt64(_) => ColumnDataType::Uint64, ConcreteDataType::Float32(_) => ColumnDataType::Float32, ConcreteDataType::Float64(_) => ColumnDataType::Float64, - ConcreteDataType::Binary(_) => ColumnDataType::Binary, + ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ColumnDataType::Binary, ConcreteDataType::String(_) => ColumnDataType::String, ConcreteDataType::Date(_) => ColumnDataType::Date, ConcreteDataType::DateTime(_) => ColumnDataType::Datetime, @@ -276,6 +287,16 @@ impl TryFrom for ColumnDataTypeWrapper { })), }) } + ColumnDataType::Binary => { + if datatype == ConcreteDataType::json_datatype() { + // Json is the same as binary in proto. The extension marks the binary in proto is actually a json. + Some(ColumnDataTypeExtension { + type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())), + }) + } else { + None + } + } _ => None, }; Ok(Self { @@ -649,7 +670,8 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) -> ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) - | ConcreteDataType::Duration(_) => { + | ConcreteDataType::Duration(_) + | ConcreteDataType::Json(_) => { unreachable!() } } @@ -813,7 +835,8 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec< ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) - | ConcreteDataType::Duration(_) => { + | ConcreteDataType::Duration(_) + | ConcreteDataType::Json(_) => { unreachable!() } } @@ -831,7 +854,13 @@ pub fn is_column_type_value_eq( expect_type: &ConcreteDataType, ) -> bool { ColumnDataTypeWrapper::try_new(type_value, type_extension) - .map(|wrapper| ConcreteDataType::from(wrapper) == *expect_type) + .map(|wrapper| { + let datatype = ConcreteDataType::from(wrapper); + (datatype == *expect_type) + // Json type leverage binary type in pb, so this is valid. + || (datatype == ConcreteDataType::binary_datatype() + && *expect_type == ConcreteDataType::json_datatype()) + }) .unwrap_or(false) } diff --git a/src/catalog/src/kvbackend/manager.rs b/src/catalog/src/kvbackend/manager.rs index d39e1abdb9..feb5e31d09 100644 --- a/src/catalog/src/kvbackend/manager.rs +++ b/src/catalog/src/kvbackend/manager.rs @@ -36,6 +36,7 @@ use futures_util::{StreamExt, TryStreamExt}; use meta_client::client::MetaClient; use moka::sync::Cache; use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef}; +use session::context::{Channel, QueryContext}; use snafu::prelude::*; use table::dist_table::DistTable; use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME}; @@ -152,7 +153,11 @@ impl CatalogManager for KvBackendCatalogManager { Ok(keys) } - async fn schema_names(&self, catalog: &str) -> Result> { + async fn schema_names( + &self, + catalog: &str, + query_ctx: Option<&QueryContext>, + ) -> Result> { let stream = self .table_metadata_manager .schema_manager() @@ -163,12 +168,17 @@ impl CatalogManager for KvBackendCatalogManager { .map_err(BoxedError::new) .context(ListSchemasSnafu { catalog })?; - keys.extend(self.system_catalog.schema_names()); + keys.extend(self.system_catalog.schema_names(query_ctx)); Ok(keys.into_iter().collect()) } - async fn table_names(&self, catalog: &str, schema: &str) -> Result> { + async fn table_names( + &self, + catalog: &str, + schema: &str, + query_ctx: Option<&QueryContext>, + ) -> Result> { let stream = self .table_metadata_manager .table_name_manager() @@ -181,7 +191,7 @@ impl CatalogManager for KvBackendCatalogManager { .into_iter() .map(|(k, _)| k) .collect::>(); - tables.extend_from_slice(&self.system_catalog.table_names(schema)); + tables.extend_from_slice(&self.system_catalog.table_names(schema, query_ctx)); Ok(tables.into_iter().collect()) } @@ -194,8 +204,13 @@ impl CatalogManager for KvBackendCatalogManager { .context(TableMetadataManagerSnafu) } - async fn schema_exists(&self, catalog: &str, schema: &str) -> Result { - if self.system_catalog.schema_exists(schema) { + async fn schema_exists( + &self, + catalog: &str, + schema: &str, + query_ctx: Option<&QueryContext>, + ) -> Result { + if self.system_catalog.schema_exists(schema, query_ctx) { return Ok(true); } @@ -206,8 +221,14 @@ impl CatalogManager for KvBackendCatalogManager { .context(TableMetadataManagerSnafu) } - async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result { - if self.system_catalog.table_exists(schema, table) { + async fn table_exists( + &self, + catalog: &str, + schema: &str, + table: &str, + query_ctx: Option<&QueryContext>, + ) -> Result { + if self.system_catalog.table_exists(schema, table, query_ctx) { return Ok(true); } @@ -225,10 +246,12 @@ impl CatalogManager for KvBackendCatalogManager { catalog_name: &str, schema_name: &str, table_name: &str, + query_ctx: Option<&QueryContext>, ) -> Result> { - if let Some(table) = self - .system_catalog - .table(catalog_name, schema_name, table_name) + let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel()); + if let Some(table) = + self.system_catalog + .table(catalog_name, schema_name, table_name, query_ctx) { return Ok(Some(table)); } @@ -236,23 +259,45 @@ impl CatalogManager for KvBackendCatalogManager { let table_cache: TableCacheRef = self.cache_registry.get().context(CacheNotFoundSnafu { name: "table_cache", })?; - - table_cache + if let Some(table) = table_cache .get_by_ref(&TableName { catalog_name: catalog_name.to_string(), schema_name: schema_name.to_string(), table_name: table_name.to_string(), }) .await - .context(GetTableCacheSnafu) + .context(GetTableCacheSnafu)? + { + return Ok(Some(table)); + } + + if channel == Channel::Postgres { + // falldown to pg_catalog + if let Some(table) = + self.system_catalog + .table(catalog_name, PG_CATALOG_NAME, table_name, query_ctx) + { + return Ok(Some(table)); + } + } + + return Ok(None); } - fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result> { + fn tables<'a>( + &'a self, + catalog: &'a str, + schema: &'a str, + query_ctx: Option<&'a QueryContext>, + ) -> BoxStream<'a, Result> { let sys_tables = try_stream!({ // System tables - let sys_table_names = self.system_catalog.table_names(schema); + let sys_table_names = self.system_catalog.table_names(schema, query_ctx); for table_name in sys_table_names { - if let Some(table) = self.system_catalog.table(catalog, schema, &table_name) { + if let Some(table) = + self.system_catalog + .table(catalog, schema, &table_name, query_ctx) + { yield table; } } @@ -320,18 +365,27 @@ struct SystemCatalog { } impl SystemCatalog { - // TODO(j0hn50n133): remove the duplicated hard-coded table names logic - fn schema_names(&self) -> Vec { - vec![ - INFORMATION_SCHEMA_NAME.to_string(), - PG_CATALOG_NAME.to_string(), - ] + fn schema_names(&self, query_ctx: Option<&QueryContext>) -> Vec { + let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel()); + match channel { + // pg_catalog only visible under postgres protocol + Channel::Postgres => vec![ + INFORMATION_SCHEMA_NAME.to_string(), + PG_CATALOG_NAME.to_string(), + ], + _ => { + vec![INFORMATION_SCHEMA_NAME.to_string()] + } + } } - fn table_names(&self, schema: &str) -> Vec { + fn table_names(&self, schema: &str, query_ctx: Option<&QueryContext>) -> Vec { + let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel()); match schema { INFORMATION_SCHEMA_NAME => self.information_schema_provider.table_names(), - PG_CATALOG_NAME => self.pg_catalog_provider.table_names(), + PG_CATALOG_NAME if channel == Channel::Postgres => { + self.pg_catalog_provider.table_names() + } DEFAULT_SCHEMA_NAME => { vec![NUMBERS_TABLE_NAME.to_string()] } @@ -339,23 +393,35 @@ impl SystemCatalog { } } - fn schema_exists(&self, schema: &str) -> bool { - schema == INFORMATION_SCHEMA_NAME || schema == PG_CATALOG_NAME + fn schema_exists(&self, schema: &str, query_ctx: Option<&QueryContext>) -> bool { + let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel()); + match channel { + Channel::Postgres => schema == PG_CATALOG_NAME || schema == INFORMATION_SCHEMA_NAME, + _ => schema == INFORMATION_SCHEMA_NAME, + } } - fn table_exists(&self, schema: &str, table: &str) -> bool { + fn table_exists(&self, schema: &str, table: &str, query_ctx: Option<&QueryContext>) -> bool { + let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel()); if schema == INFORMATION_SCHEMA_NAME { self.information_schema_provider.table(table).is_some() } else if schema == DEFAULT_SCHEMA_NAME { table == NUMBERS_TABLE_NAME - } else if schema == PG_CATALOG_NAME { + } else if schema == PG_CATALOG_NAME && channel == Channel::Postgres { self.pg_catalog_provider.table(table).is_some() } else { false } } - fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Option { + fn table( + &self, + catalog: &str, + schema: &str, + table_name: &str, + query_ctx: Option<&QueryContext>, + ) -> Option { + let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel()); if schema == INFORMATION_SCHEMA_NAME { let information_schema_provider = self.catalog_cache.get_with_by_ref(catalog, move || { @@ -366,7 +432,7 @@ impl SystemCatalog { )) }); information_schema_provider.table(table_name) - } else if schema == PG_CATALOG_NAME { + } else if schema == PG_CATALOG_NAME && channel == Channel::Postgres { if catalog == DEFAULT_CATALOG_NAME { self.pg_catalog_provider.table(table_name) } else { diff --git a/src/catalog/src/lib.rs b/src/catalog/src/lib.rs index 394500bb75..3444c0e089 100644 --- a/src/catalog/src/lib.rs +++ b/src/catalog/src/lib.rs @@ -20,8 +20,10 @@ use std::fmt::{Debug, Formatter}; use std::sync::Arc; use api::v1::CreateTableExpr; +use common_catalog::consts::{INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME}; use futures::future::BoxFuture; use futures_util::stream::BoxStream; +use session::context::QueryContext; use table::metadata::TableId; use table::TableRef; @@ -44,15 +46,35 @@ pub trait CatalogManager: Send + Sync { async fn catalog_names(&self) -> Result>; - async fn schema_names(&self, catalog: &str) -> Result>; + async fn schema_names( + &self, + catalog: &str, + query_ctx: Option<&QueryContext>, + ) -> Result>; - async fn table_names(&self, catalog: &str, schema: &str) -> Result>; + async fn table_names( + &self, + catalog: &str, + schema: &str, + query_ctx: Option<&QueryContext>, + ) -> Result>; async fn catalog_exists(&self, catalog: &str) -> Result; - async fn schema_exists(&self, catalog: &str, schema: &str) -> Result; + async fn schema_exists( + &self, + catalog: &str, + schema: &str, + query_ctx: Option<&QueryContext>, + ) -> Result; - async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result; + async fn table_exists( + &self, + catalog: &str, + schema: &str, + table: &str, + query_ctx: Option<&QueryContext>, + ) -> Result; /// Returns the table by catalog, schema and table name. async fn table( @@ -60,10 +82,25 @@ pub trait CatalogManager: Send + Sync { catalog: &str, schema: &str, table_name: &str, + query_ctx: Option<&QueryContext>, ) -> Result>; /// Returns all tables with a stream by catalog and schema. - fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result>; + fn tables<'a>( + &'a self, + catalog: &'a str, + schema: &'a str, + query_ctx: Option<&'a QueryContext>, + ) -> BoxStream<'a, Result>; + + /// Check if `schema` is a reserved schema name + fn is_reserved_schema_name(&self, schema: &str) -> bool { + // We have to check whether a schema name is reserved before create schema. + // We need this rather than use schema_exists directly because `pg_catalog` is + // only visible via postgres protocol. So if we don't check, a mysql client may + // create a schema named `pg_catalog` which is somehow malformed. + schema == INFORMATION_SCHEMA_NAME || schema == PG_CATALOG_NAME + } } pub type CatalogManagerRef = Arc; diff --git a/src/catalog/src/memory/manager.rs b/src/catalog/src/memory/manager.rs index 3c27d4736b..62ff863c46 100644 --- a/src/catalog/src/memory/manager.rs +++ b/src/catalog/src/memory/manager.rs @@ -26,6 +26,7 @@ use common_catalog::consts::{ use common_meta::key::flow::FlowMetadataManager; use common_meta::kv_backend::memory::MemoryKvBackend; use futures_util::stream::BoxStream; +use session::context::QueryContext; use snafu::OptionExt; use table::TableRef; @@ -53,7 +54,11 @@ impl CatalogManager for MemoryCatalogManager { Ok(self.catalogs.read().unwrap().keys().cloned().collect()) } - async fn schema_names(&self, catalog: &str) -> Result> { + async fn schema_names( + &self, + catalog: &str, + _query_ctx: Option<&QueryContext>, + ) -> Result> { Ok(self .catalogs .read() @@ -67,7 +72,12 @@ impl CatalogManager for MemoryCatalogManager { .collect()) } - async fn table_names(&self, catalog: &str, schema: &str) -> Result> { + async fn table_names( + &self, + catalog: &str, + schema: &str, + _query_ctx: Option<&QueryContext>, + ) -> Result> { Ok(self .catalogs .read() @@ -87,11 +97,22 @@ impl CatalogManager for MemoryCatalogManager { self.catalog_exist_sync(catalog) } - async fn schema_exists(&self, catalog: &str, schema: &str) -> Result { + async fn schema_exists( + &self, + catalog: &str, + schema: &str, + _query_ctx: Option<&QueryContext>, + ) -> Result { self.schema_exist_sync(catalog, schema) } - async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result { + async fn table_exists( + &self, + catalog: &str, + schema: &str, + table: &str, + _query_ctx: Option<&QueryContext>, + ) -> Result { let catalogs = self.catalogs.read().unwrap(); Ok(catalogs .get(catalog) @@ -108,6 +129,7 @@ impl CatalogManager for MemoryCatalogManager { catalog: &str, schema: &str, table_name: &str, + _query_ctx: Option<&QueryContext>, ) -> Result> { let result = try { self.catalogs @@ -121,7 +143,12 @@ impl CatalogManager for MemoryCatalogManager { Ok(result) } - fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result> { + fn tables<'a>( + &'a self, + catalog: &'a str, + schema: &'a str, + _query_ctx: Option<&QueryContext>, + ) -> BoxStream<'a, Result> { let catalogs = self.catalogs.read().unwrap(); let Some(schemas) = catalogs.get(catalog) else { @@ -371,11 +398,12 @@ mod tests { DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, NUMBERS_TABLE_NAME, + None, ) .await .unwrap() .unwrap(); - let stream = catalog_list.tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME); + let stream = catalog_list.tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, None); let tables = stream.try_collect::>().await.unwrap(); assert_eq!(tables.len(), 1); assert_eq!( @@ -384,7 +412,12 @@ mod tests { ); assert!(catalog_list - .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists") + .table( + DEFAULT_CATALOG_NAME, + DEFAULT_SCHEMA_NAME, + "not_exists", + None + ) .await .unwrap() .is_none()); @@ -411,7 +444,7 @@ mod tests { }; catalog.register_table_sync(register_table_req).unwrap(); assert!(catalog - .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name) + .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name, None) .await .unwrap() .is_some()); @@ -423,7 +456,7 @@ mod tests { }; catalog.deregister_table_sync(deregister_table_req).unwrap(); assert!(catalog - .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name) + .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name, None) .await .unwrap() .is_none()); diff --git a/src/catalog/src/system_schema/information_schema/columns.rs b/src/catalog/src/system_schema/information_schema/columns.rs index b291e02341..152fc33a04 100644 --- a/src/catalog/src/system_schema/information_schema/columns.rs +++ b/src/catalog/src/system_schema/information_schema/columns.rs @@ -257,8 +257,8 @@ impl InformationSchemaColumnsBuilder { .context(UpgradeWeakCatalogManagerRefSnafu)?; let predicates = Predicates::from_scan_request(&request); - for schema_name in catalog_manager.schema_names(&catalog_name).await? { - let mut stream = catalog_manager.tables(&catalog_name, &schema_name); + for schema_name in catalog_manager.schema_names(&catalog_name, None).await? { + let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None); while let Some(table) = stream.try_next().await? { let keys = &table.table_info().meta.primary_key_indices; diff --git a/src/catalog/src/system_schema/information_schema/key_column_usage.rs b/src/catalog/src/system_schema/information_schema/key_column_usage.rs index f7cedfee2a..56713dabba 100644 --- a/src/catalog/src/system_schema/information_schema/key_column_usage.rs +++ b/src/catalog/src/system_schema/information_schema/key_column_usage.rs @@ -212,8 +212,8 @@ impl InformationSchemaKeyColumnUsageBuilder { .context(UpgradeWeakCatalogManagerRefSnafu)?; let predicates = Predicates::from_scan_request(&request); - for schema_name in catalog_manager.schema_names(&catalog_name).await? { - let mut stream = catalog_manager.tables(&catalog_name, &schema_name); + for schema_name in catalog_manager.schema_names(&catalog_name, None).await? { + let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None); while let Some(table) = stream.try_next().await? { let mut primary_constraints = vec![]; diff --git a/src/catalog/src/system_schema/information_schema/partitions.rs b/src/catalog/src/system_schema/information_schema/partitions.rs index 3e49a2ddbd..93d6067990 100644 --- a/src/catalog/src/system_schema/information_schema/partitions.rs +++ b/src/catalog/src/system_schema/information_schema/partitions.rs @@ -240,9 +240,9 @@ impl InformationSchemaPartitionsBuilder { let predicates = Predicates::from_scan_request(&request); - for schema_name in catalog_manager.schema_names(&catalog_name).await? { + for schema_name in catalog_manager.schema_names(&catalog_name, None).await? { let table_info_stream = catalog_manager - .tables(&catalog_name, &schema_name) + .tables(&catalog_name, &schema_name, None) .try_filter_map(|t| async move { let table_info = t.table_info(); if table_info.table_type == TableType::Temporary { diff --git a/src/catalog/src/system_schema/information_schema/region_peers.rs b/src/catalog/src/system_schema/information_schema/region_peers.rs index 4bcc281447..5496879af0 100644 --- a/src/catalog/src/system_schema/information_schema/region_peers.rs +++ b/src/catalog/src/system_schema/information_schema/region_peers.rs @@ -176,9 +176,9 @@ impl InformationSchemaRegionPeersBuilder { let predicates = Predicates::from_scan_request(&request); - for schema_name in catalog_manager.schema_names(&catalog_name).await? { + for schema_name in catalog_manager.schema_names(&catalog_name, None).await? { let table_id_stream = catalog_manager - .tables(&catalog_name, &schema_name) + .tables(&catalog_name, &schema_name, None) .try_filter_map(|t| async move { let table_info = t.table_info(); if table_info.table_type == TableType::Temporary { diff --git a/src/catalog/src/system_schema/information_schema/schemata.rs b/src/catalog/src/system_schema/information_schema/schemata.rs index ca594b61a6..02d6e606e7 100644 --- a/src/catalog/src/system_schema/information_schema/schemata.rs +++ b/src/catalog/src/system_schema/information_schema/schemata.rs @@ -171,7 +171,7 @@ impl InformationSchemaSchemataBuilder { let table_metadata_manager = utils::table_meta_manager(&self.catalog_manager)?; let predicates = Predicates::from_scan_request(&request); - for schema_name in catalog_manager.schema_names(&catalog_name).await? { + for schema_name in catalog_manager.schema_names(&catalog_name, None).await? { let opts = if let Some(table_metadata_manager) = &table_metadata_manager { table_metadata_manager .schema_manager() diff --git a/src/catalog/src/system_schema/information_schema/table_constraints.rs b/src/catalog/src/system_schema/information_schema/table_constraints.rs index ac3d468c36..50e2469946 100644 --- a/src/catalog/src/system_schema/information_schema/table_constraints.rs +++ b/src/catalog/src/system_schema/information_schema/table_constraints.rs @@ -176,8 +176,8 @@ impl InformationSchemaTableConstraintsBuilder { .context(UpgradeWeakCatalogManagerRefSnafu)?; let predicates = Predicates::from_scan_request(&request); - for schema_name in catalog_manager.schema_names(&catalog_name).await? { - let mut stream = catalog_manager.tables(&catalog_name, &schema_name); + for schema_name in catalog_manager.schema_names(&catalog_name, None).await? { + let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None); while let Some(table) = stream.try_next().await? { let keys = &table.table_info().meta.primary_key_indices; diff --git a/src/catalog/src/system_schema/information_schema/tables.rs b/src/catalog/src/system_schema/information_schema/tables.rs index 638ff073ef..976c920b9a 100644 --- a/src/catalog/src/system_schema/information_schema/tables.rs +++ b/src/catalog/src/system_schema/information_schema/tables.rs @@ -234,8 +234,8 @@ impl InformationSchemaTablesBuilder { .context(UpgradeWeakCatalogManagerRefSnafu)?; let predicates = Predicates::from_scan_request(&request); - for schema_name in catalog_manager.schema_names(&catalog_name).await? { - let mut stream = catalog_manager.tables(&catalog_name, &schema_name); + for schema_name in catalog_manager.schema_names(&catalog_name, None).await? { + let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None); while let Some(table) = stream.try_next().await? { let table_info = table.table_info(); diff --git a/src/catalog/src/system_schema/information_schema/views.rs b/src/catalog/src/system_schema/information_schema/views.rs index daf41f5e59..082e6c2ff5 100644 --- a/src/catalog/src/system_schema/information_schema/views.rs +++ b/src/catalog/src/system_schema/information_schema/views.rs @@ -192,8 +192,8 @@ impl InformationSchemaViewsBuilder { .context(CastManagerSnafu)? .view_info_cache()?; - for schema_name in catalog_manager.schema_names(&catalog_name).await? { - let mut stream = catalog_manager.tables(&catalog_name, &schema_name); + for schema_name in catalog_manager.schema_names(&catalog_name, None).await? { + let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None); while let Some(table) = stream.try_next().await? { let table_info = table.table_info(); diff --git a/src/catalog/src/system_schema/pg_catalog.rs b/src/catalog/src/system_schema/pg_catalog.rs index 36b7d7119e..bc9c246e25 100644 --- a/src/catalog/src/system_schema/pg_catalog.rs +++ b/src/catalog/src/system_schema/pg_catalog.rs @@ -18,15 +18,16 @@ mod pg_namespace; mod table_names; use std::collections::HashMap; -use std::sync::{Arc, Weak}; +use std::sync::{Arc, LazyLock, Weak}; -use common_catalog::consts::{self, PG_CATALOG_NAME}; +use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, PG_CATALOG_NAME}; use datatypes::schema::ColumnSchema; use lazy_static::lazy_static; use paste::paste; use pg_catalog_memory_table::get_schema_columns; use pg_class::PGClass; use pg_namespace::PGNamespace; +use session::context::{Channel, QueryContext}; use table::TableRef; pub use table_names::*; @@ -142,3 +143,12 @@ impl SystemSchemaProviderInner for PGCatalogProvider { &self.catalog_name } } + +/// Provide query context to call the [`CatalogManager`]'s method. +static PG_QUERY_CTX: LazyLock = LazyLock::new(|| { + QueryContext::with_channel(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, Channel::Postgres) +}); + +fn query_ctx() -> Option<&'static QueryContext> { + Some(&PG_QUERY_CTX) +} diff --git a/src/catalog/src/system_schema/pg_catalog/pg_class.rs b/src/catalog/src/system_schema/pg_catalog/pg_class.rs index d32d56d315..30476cc253 100644 --- a/src/catalog/src/system_schema/pg_catalog/pg_class.rs +++ b/src/catalog/src/system_schema/pg_catalog/pg_class.rs @@ -32,7 +32,7 @@ use store_api::storage::ScanRequest; use table::metadata::TableType; use super::pg_namespace::oid_map::PGNamespaceOidMapRef; -use super::{OID_COLUMN_NAME, PG_CLASS}; +use super::{query_ctx, OID_COLUMN_NAME, PG_CLASS}; use crate::error::{ CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu, }; @@ -202,8 +202,11 @@ impl PGClassBuilder { .upgrade() .context(UpgradeWeakCatalogManagerRefSnafu)?; let predicates = Predicates::from_scan_request(&request); - for schema_name in catalog_manager.schema_names(&catalog_name).await? { - let mut stream = catalog_manager.tables(&catalog_name, &schema_name); + for schema_name in catalog_manager + .schema_names(&catalog_name, query_ctx()) + .await? + { + let mut stream = catalog_manager.tables(&catalog_name, &schema_name, query_ctx()); while let Some(table) = stream.try_next().await? { let table_info = table.table_info(); self.add_class( diff --git a/src/catalog/src/system_schema/pg_catalog/pg_namespace.rs b/src/catalog/src/system_schema/pg_catalog/pg_namespace.rs index e78534b37d..c6db980de7 100644 --- a/src/catalog/src/system_schema/pg_catalog/pg_namespace.rs +++ b/src/catalog/src/system_schema/pg_catalog/pg_namespace.rs @@ -31,7 +31,7 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef}; use snafu::{OptionExt, ResultExt}; use store_api::storage::ScanRequest; -use super::{PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE}; +use super::{query_ctx, PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE}; use crate::error::{ CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu, }; @@ -180,7 +180,10 @@ impl PGNamespaceBuilder { .upgrade() .context(UpgradeWeakCatalogManagerRefSnafu)?; let predicates = Predicates::from_scan_request(&request); - for schema_name in catalog_manager.schema_names(&catalog_name).await? { + for schema_name in catalog_manager + .schema_names(&catalog_name, query_ctx()) + .await? + { self.add_namespace(&predicates, &schema_name); } self.finish() diff --git a/src/catalog/src/table_source.rs b/src/catalog/src/table_source.rs index 220cc3d5ec..d6d81fa134 100644 --- a/src/catalog/src/table_source.rs +++ b/src/catalog/src/table_source.rs @@ -23,7 +23,7 @@ use datafusion::datasource::view::ViewTable; use datafusion::datasource::{provider_as_source, TableProvider}; use datafusion::logical_expr::TableSource; use itertools::Itertools; -use session::context::QueryContext; +use session::context::QueryContextRef; use snafu::{ensure, OptionExt, ResultExt}; use table::metadata::TableType; use table::table::adapter::DfTableProviderAdapter; @@ -45,6 +45,7 @@ pub struct DfTableSourceProvider { disallow_cross_catalog_query: bool, default_catalog: String, default_schema: String, + query_ctx: QueryContextRef, plan_decoder: SubstraitPlanDecoderRef, enable_ident_normalization: bool, } @@ -53,7 +54,7 @@ impl DfTableSourceProvider { pub fn new( catalog_manager: CatalogManagerRef, disallow_cross_catalog_query: bool, - query_ctx: &QueryContext, + query_ctx: QueryContextRef, plan_decoder: SubstraitPlanDecoderRef, enable_ident_normalization: bool, ) -> Self { @@ -63,6 +64,7 @@ impl DfTableSourceProvider { resolved_tables: HashMap::new(), default_catalog: query_ctx.current_catalog().to_owned(), default_schema: query_ctx.current_schema(), + query_ctx, plan_decoder, enable_ident_normalization, } @@ -71,8 +73,7 @@ impl DfTableSourceProvider { pub fn resolve_table_ref(&self, table_ref: TableReference) -> Result { if self.disallow_cross_catalog_query { match &table_ref { - TableReference::Bare { .. } => (), - TableReference::Partial { .. } => {} + TableReference::Bare { .. } | TableReference::Partial { .. } => {} TableReference::Full { catalog, schema, .. } => { @@ -107,7 +108,7 @@ impl DfTableSourceProvider { let table = self .catalog_manager - .table(catalog_name, schema_name, table_name) + .table(catalog_name, schema_name, table_name, Some(&self.query_ctx)) .await? .with_context(|| TableNotExistSnafu { table: format_full_table_name(catalog_name, schema_name, table_name), @@ -210,12 +211,12 @@ mod tests { #[test] fn test_validate_table_ref() { - let query_ctx = &QueryContext::with("greptime", "public"); + let query_ctx = Arc::new(QueryContext::with("greptime", "public")); let table_provider = DfTableSourceProvider::new( MemoryCatalogManager::with_default_setup(), true, - query_ctx, + query_ctx.clone(), DummyDecoder::arc(), true, ); @@ -308,7 +309,7 @@ mod tests { #[tokio::test] async fn test_resolve_view() { - let query_ctx = &QueryContext::with("greptime", "public"); + let query_ctx = Arc::new(QueryContext::with("greptime", "public")); let backend = Arc::new(MemoryKvBackend::default()); let layered_cache_builder = LayeredCacheRegistryBuilder::default() .add_cache_registry(CacheRegistryBuilder::default().build()); @@ -344,8 +345,13 @@ mod tests { .await .unwrap(); - let mut table_provider = - DfTableSourceProvider::new(catalog_manager, true, query_ctx, MockDecoder::arc(), true); + let mut table_provider = DfTableSourceProvider::new( + catalog_manager, + true, + query_ctx.clone(), + MockDecoder::arc(), + true, + ); // View not found let table_ref = TableReference::bare("not_exists_view"); diff --git a/src/catalog/src/table_source/dummy_catalog.rs b/src/catalog/src/table_source/dummy_catalog.rs index 602a5c9cbe..09a703e4dd 100644 --- a/src/catalog/src/table_source/dummy_catalog.rs +++ b/src/catalog/src/table_source/dummy_catalog.rs @@ -112,7 +112,7 @@ impl SchemaProvider for DummySchemaProvider { async fn table(&self, name: &str) -> datafusion::error::Result>> { let table = self .catalog_manager - .table(&self.catalog_name, &self.schema_name, name) + .table(&self.catalog_name, &self.schema_name, name, None) .await? .with_context(|| TableNotExistSnafu { table: format_full_table_name(&self.catalog_name, &self.schema_name, name), diff --git a/src/cmd/tests/load_config_test.rs b/src/cmd/tests/load_config_test.rs index 199e23717d..78d0786f7c 100644 --- a/src/cmd/tests/load_config_test.rs +++ b/src/cmd/tests/load_config_test.rs @@ -65,6 +65,7 @@ fn test_load_datanode_example_config() { wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig { dir: Some("/tmp/greptimedb/wal".to_string()), sync_period: Some(Duration::from_secs(10)), + recovery_parallelism: 2, ..Default::default() }), storage: StorageConfig { @@ -207,6 +208,7 @@ fn test_load_standalone_example_config() { wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig { dir: Some("/tmp/greptimedb/wal".to_string()), sync_period: Some(Duration::from_secs(10)), + recovery_parallelism: 2, ..Default::default() }), region_engine: vec![ diff --git a/src/common/base/Cargo.toml b/src/common/base/Cargo.toml index 38f677dd3f..5afbc3b88c 100644 --- a/src/common/base/Cargo.toml +++ b/src/common/base/Cargo.toml @@ -9,10 +9,12 @@ workspace = true [dependencies] anymap = "1.0.0-beta.2" +async-trait.workspace = true bitvec = "1.0" bytes.workspace = true common-error.workspace = true common-macro.workspace = true +futures.workspace = true paste = "1.0" serde = { version = "1.0", features = ["derive"] } snafu.workspace = true diff --git a/src/common/base/src/buffer.rs b/src/common/base/src/buffer.rs deleted file mode 100644 index bce39842e1..0000000000 --- a/src/common/base/src/buffer.rs +++ /dev/null @@ -1,242 +0,0 @@ -// Copyright 2023 Greptime Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; -use std::io::{Read, Write}; - -use bytes::{Buf, BufMut, BytesMut}; -use common_error::ext::ErrorExt; -use common_macro::stack_trace_debug; -use paste::paste; -use snafu::{ensure, Location, ResultExt, Snafu}; - -#[derive(Snafu)] -#[snafu(visibility(pub))] -#[stack_trace_debug] -pub enum Error { - #[snafu(display( - "Destination buffer overflow, src_len: {}, dst_len: {}", - src_len, - dst_len - ))] - Overflow { - src_len: usize, - dst_len: usize, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Buffer underflow"))] - Underflow { - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("IO operation reach EOF"))] - Eof { - #[snafu(source)] - error: std::io::Error, - #[snafu(implicit)] - location: Location, - }, -} - -pub type Result = std::result::Result; - -impl ErrorExt for Error { - fn as_any(&self) -> &dyn Any { - self - } -} - -macro_rules! impl_read_le { - ( $($num_ty: ty), *) => { - $( - paste!{ - // TODO(hl): default implementation requires allocating a - // temp buffer. maybe use more efficient impls in concrete buffers. - // see https://github.com/GrepTimeTeam/greptimedb/pull/97#discussion_r930798941 - fn [](&mut self) -> Result<$num_ty> { - let mut buf = [0u8; std::mem::size_of::<$num_ty>()]; - self.read_to_slice(&mut buf)?; - Ok($num_ty::from_le_bytes(buf)) - } - - fn [](&mut self) -> Result<$num_ty> { - let mut buf = [0u8; std::mem::size_of::<$num_ty>()]; - self.peek_to_slice(&mut buf)?; - Ok($num_ty::from_le_bytes(buf)) - } - } - )* - } -} - -macro_rules! impl_write_le { - ( $($num_ty: ty), *) => { - $( - paste!{ - fn [](&mut self, n: $num_ty) -> Result<()> { - self.write_from_slice(&n.to_le_bytes())?; - Ok(()) - } - } - )* - } -} - -pub trait Buffer { - /// Returns remaining data size for read. - fn remaining_size(&self) -> usize; - - /// Returns true if buffer has no data for read. - fn is_empty(&self) -> bool { - self.remaining_size() == 0 - } - - /// Peeks data into dst. This method should not change internal cursor, - /// invoke `advance_by` if needed. - /// # Panics - /// This method **may** panic if buffer does not have enough data to be copied to dst. - fn peek_to_slice(&self, dst: &mut [u8]) -> Result<()>; - - /// Reads data into dst. This method will change internal cursor. - /// # Panics - /// This method **may** panic if buffer does not have enough data to be copied to dst. - fn read_to_slice(&mut self, dst: &mut [u8]) -> Result<()> { - self.peek_to_slice(dst)?; - self.advance_by(dst.len()); - Ok(()) - } - - /// Advances internal cursor for next read. - /// # Panics - /// This method **may** panic if the offset after advancing exceeds the length of underlying buffer. - fn advance_by(&mut self, by: usize); - - impl_read_le![u8, i8, u16, i16, u32, i32, u64, i64, f32, f64]; -} - -macro_rules! impl_buffer_for_bytes { - ( $($buf_ty:ty), *) => { - $( - impl Buffer for $buf_ty { - fn remaining_size(&self) -> usize{ - self.len() - } - - fn peek_to_slice(&self, dst: &mut [u8]) -> Result<()> { - let dst_len = dst.len(); - ensure!(self.remaining() >= dst.len(), OverflowSnafu { - src_len: self.remaining_size(), - dst_len, - } - ); - dst.copy_from_slice(&self[0..dst_len]); - Ok(()) - } - - #[inline] - fn advance_by(&mut self, by: usize) { - self.advance(by); - } - } - )* - }; -} - -impl_buffer_for_bytes![bytes::Bytes, bytes::BytesMut]; - -impl Buffer for &[u8] { - fn remaining_size(&self) -> usize { - self.len() - } - - fn peek_to_slice(&self, dst: &mut [u8]) -> Result<()> { - let dst_len = dst.len(); - ensure!( - self.len() >= dst.len(), - OverflowSnafu { - src_len: self.remaining_size(), - dst_len, - } - ); - dst.copy_from_slice(&self[0..dst_len]); - Ok(()) - } - - fn read_to_slice(&mut self, dst: &mut [u8]) -> Result<()> { - ensure!( - self.len() >= dst.len(), - OverflowSnafu { - src_len: self.remaining_size(), - dst_len: dst.len(), - } - ); - self.read_exact(dst).context(EofSnafu) - } - - fn advance_by(&mut self, by: usize) { - *self = &self[by..]; - } -} - -/// Mutable buffer. -pub trait BufferMut { - fn as_slice(&self) -> &[u8]; - - fn write_from_slice(&mut self, src: &[u8]) -> Result<()>; - - impl_write_le![i8, u8, i16, u16, i32, u32, i64, u64, f32, f64]; -} - -impl BufferMut for BytesMut { - fn as_slice(&self) -> &[u8] { - self - } - - fn write_from_slice(&mut self, src: &[u8]) -> Result<()> { - self.put_slice(src); - Ok(()) - } -} - -impl BufferMut for &mut [u8] { - fn as_slice(&self) -> &[u8] { - self - } - - fn write_from_slice(&mut self, src: &[u8]) -> Result<()> { - // see std::io::Write::write_all - // https://doc.rust-lang.org/src/std/io/impls.rs.html#363 - self.write_all(src).map_err(|_| { - OverflowSnafu { - src_len: src.len(), - dst_len: self.as_slice().len(), - } - .build() - }) - } -} - -impl BufferMut for Vec { - fn as_slice(&self) -> &[u8] { - self - } - - fn write_from_slice(&mut self, src: &[u8]) -> Result<()> { - self.extend_from_slice(src); - Ok(()) - } -} diff --git a/src/common/base/src/bytes.rs b/src/common/base/src/bytes.rs index aec2dfd9ed..ea08a9f0b0 100644 --- a/src/common/base/src/bytes.rs +++ b/src/common/base/src/bytes.rs @@ -44,6 +44,12 @@ impl From> for Bytes { } } +impl From for Vec { + fn from(bytes: Bytes) -> Vec { + bytes.0.into() + } +} + impl Deref for Bytes { type Target = [u8]; diff --git a/src/common/base/src/lib.rs b/src/common/base/src/lib.rs index 539da1ba8c..62a801d946 100644 --- a/src/common/base/src/lib.rs +++ b/src/common/base/src/lib.rs @@ -13,9 +13,9 @@ // limitations under the License. pub mod bit_vec; -pub mod buffer; pub mod bytes; pub mod plugins; +pub mod range_read; #[allow(clippy::all)] pub mod readable_size; pub mod secrets; diff --git a/src/common/base/src/range_read.rs b/src/common/base/src/range_read.rs new file mode 100644 index 0000000000..920b2e1f8c --- /dev/null +++ b/src/common/base/src/range_read.rs @@ -0,0 +1,80 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io; +use std::ops::Range; + +use async_trait::async_trait; +use bytes::{BufMut, Bytes}; +use futures::{AsyncReadExt, AsyncSeekExt}; + +/// `Metadata` contains the metadata of a source. +pub struct Metadata { + /// The length of the source in bytes. + pub content_length: u64, +} + +/// `RangeReader` reads a range of bytes from a source. +#[async_trait] +pub trait RangeReader: Send + Unpin { + /// Returns the metadata of the source. + async fn metadata(&mut self) -> io::Result; + + /// Reads the bytes in the given range. + async fn read(&mut self, range: Range) -> io::Result; + + /// Reads the bytes in the given range into the buffer. + /// + /// Handles the buffer based on its capacity: + /// - If the buffer is insufficient to hold the bytes, it will either: + /// - Allocate additional space (e.g., for `Vec`) + /// - Panic (e.g., for `&mut [u8]`) + async fn read_into( + &mut self, + range: Range, + buf: &mut (impl BufMut + Send), + ) -> io::Result<()> { + let bytes = self.read(range).await?; + buf.put_slice(&bytes); + Ok(()) + } + + /// Reads the bytes in the given ranges. + async fn read_vec(&mut self, ranges: &[Range]) -> io::Result> { + let mut result = Vec::with_capacity(ranges.len()); + for range in ranges { + result.push(self.read(range.clone()).await?); + } + Ok(result) + } +} + +/// Implement `RangeReader` for a type that implements `AsyncRead + AsyncSeek`. +/// +/// TODO(zhongzc): It's a temporary solution for porting the codebase from `AsyncRead + AsyncSeek` to `RangeReader`. +/// Until the codebase is fully ported to `RangeReader`, remove this implementation. +#[async_trait] +impl RangeReader for R { + async fn metadata(&mut self) -> io::Result { + let content_length = self.seek(io::SeekFrom::End(0)).await?; + Ok(Metadata { content_length }) + } + + async fn read(&mut self, range: Range) -> io::Result { + let mut buf = vec![0; (range.end - range.start) as usize]; + self.seek(io::SeekFrom::Start(range.start)).await?; + self.read_exact(&mut buf).await?; + Ok(Bytes::from(buf)) + } +} diff --git a/src/common/base/tests/buffer_tests.rs b/src/common/base/tests/buffer_tests.rs deleted file mode 100644 index a59bde64b6..0000000000 --- a/src/common/base/tests/buffer_tests.rs +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2023 Greptime Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#![feature(assert_matches)] - -#[cfg(test)] -mod tests { - use std::assert_matches::assert_matches; - - use bytes::{Buf, Bytes, BytesMut}; - use common_base::buffer::Error::Overflow; - use common_base::buffer::{Buffer, BufferMut}; - use paste::paste; - - #[test] - pub fn test_buffer_read_write() { - let mut buf = BytesMut::with_capacity(16); - buf.write_u64_le(1234u64).unwrap(); - let result = buf.peek_u64_le().unwrap(); - assert_eq!(1234u64, result); - buf.advance_by(8); - - buf.write_from_slice("hello, world".as_bytes()).unwrap(); - let mut content = vec![0u8; 5]; - buf.peek_to_slice(&mut content).unwrap(); - let read = String::from_utf8_lossy(&content); - assert_eq!("hello", read); - buf.advance_by(5); - // after read, buffer should still have 7 bytes to read. - assert_eq!(7, buf.remaining()); - - let mut content = vec![0u8; 6]; - buf.read_to_slice(&mut content).unwrap(); - let read = String::from_utf8_lossy(&content); - assert_eq!(", worl", read); - // after read, buffer should still have 1 byte to read. - assert_eq!(1, buf.remaining()); - } - - #[test] - pub fn test_buffer_read() { - let mut bytes = Bytes::from_static("hello".as_bytes()); - assert_eq!(5, bytes.remaining_size()); - assert_eq!(b'h', bytes.peek_u8_le().unwrap()); - bytes.advance_by(1); - assert_eq!(4, bytes.remaining_size()); - } - - macro_rules! test_primitive_read_write { - ( $($num_ty: ty), *) => { - $( - paste!{ - #[test] - fn []() { - assert_eq!($num_ty::MAX,(&mut $num_ty::MAX.to_le_bytes() as &[u8]).[]().unwrap()); - assert_eq!($num_ty::MIN,(&mut $num_ty::MIN.to_le_bytes() as &[u8]).[]().unwrap()); - } - } - )* - } - } - - test_primitive_read_write![u8, u16, u32, u64, i8, i16, i32, i64, f32, f64]; - - #[test] - pub fn test_read_write_from_slice_buffer() { - let mut buf = "hello".as_bytes(); - assert_eq!(104, buf.peek_u8_le().unwrap()); - buf.advance_by(1); - assert_eq!(101, buf.peek_u8_le().unwrap()); - buf.advance_by(1); - assert_eq!(108, buf.peek_u8_le().unwrap()); - buf.advance_by(1); - assert_eq!(108, buf.peek_u8_le().unwrap()); - buf.advance_by(1); - assert_eq!(111, buf.peek_u8_le().unwrap()); - buf.advance_by(1); - assert_matches!(buf.peek_u8_le(), Err(Overflow { .. })); - } - - #[test] - pub fn test_read_u8_from_slice_buffer() { - let mut buf = "hello".as_bytes(); - assert_eq!(104, buf.read_u8_le().unwrap()); - assert_eq!(101, buf.read_u8_le().unwrap()); - assert_eq!(108, buf.read_u8_le().unwrap()); - assert_eq!(108, buf.read_u8_le().unwrap()); - assert_eq!(111, buf.read_u8_le().unwrap()); - assert_matches!(buf.read_u8_le(), Err(Overflow { .. })); - } - - #[test] - pub fn test_read_write_numbers() { - let mut buf: Vec = vec![]; - buf.write_u64_le(1234).unwrap(); - assert_eq!(1234, (&buf[..]).read_u64_le().unwrap()); - - buf.write_u32_le(4242).unwrap(); - let mut p = &buf[..]; - assert_eq!(1234, p.read_u64_le().unwrap()); - assert_eq!(4242, p.read_u32_le().unwrap()); - } - - macro_rules! test_primitive_vec_read_write { - ( $($num_ty: ty), *) => { - $( - paste!{ - #[test] - fn []() { - let mut buf = vec![]; - let _ = buf.[]($num_ty::MAX).unwrap(); - assert_eq!($num_ty::MAX, buf.as_slice().[]().unwrap()); - } - } - )* - } - } - - test_primitive_vec_read_write![u8, u16, u32, u64, i8, i16, i32, i64, f32, f64]; - - #[test] - pub fn test_peek_write_from_vec_buffer() { - let mut buf: Vec = vec![]; - buf.write_from_slice("hello".as_bytes()).unwrap(); - let mut slice = buf.as_slice(); - assert_eq!(104, slice.peek_u8_le().unwrap()); - slice.advance_by(1); - assert_eq!(101, slice.peek_u8_le().unwrap()); - slice.advance_by(1); - assert_eq!(108, slice.peek_u8_le().unwrap()); - slice.advance_by(1); - assert_eq!(108, slice.peek_u8_le().unwrap()); - slice.advance_by(1); - assert_eq!(111, slice.peek_u8_le().unwrap()); - slice.advance_by(1); - assert_matches!(slice.read_u8_le(), Err(Overflow { .. })); - } - - macro_rules! test_primitive_bytes_read_write { - ( $($num_ty: ty), *) => { - $( - paste!{ - #[test] - fn []() { - let mut bytes = bytes::Bytes::from($num_ty::MAX.to_le_bytes().to_vec()); - assert_eq!($num_ty::MAX, bytes.[]().unwrap()); - - let mut bytes = bytes::Bytes::from($num_ty::MIN.to_le_bytes().to_vec()); - assert_eq!($num_ty::MIN, bytes.[]().unwrap()); - } - } - )* - } - } - - test_primitive_bytes_read_write![u8, u16, u32, u64, i8, i16, i32, i64, f32, f64]; - - #[test] - pub fn test_write_overflow() { - let mut buf = [0u8; 4]; - assert_matches!( - (&mut buf[..]).write_from_slice("hell".as_bytes()), - Ok { .. } - ); - - assert_matches!( - (&mut buf[..]).write_from_slice("hello".as_bytes()), - Err(common_base::buffer::Error::Overflow { .. }) - ); - } -} diff --git a/src/common/function/Cargo.toml b/src/common/function/Cargo.toml index 2451b2bcbd..b2e9c5a98b 100644 --- a/src/common/function/Cargo.toml +++ b/src/common/function/Cargo.toml @@ -29,6 +29,7 @@ datafusion.workspace = true datatypes.workspace = true geohash = { version = "0.13", optional = true } h3o = { version = "0.6", optional = true } +jsonb.workspace = true num = "0.4" num-traits = "0.2" once_cell.workspace = true diff --git a/src/common/function/src/function_registry.rs b/src/common/function/src/function_registry.rs index ed863c16aa..46af3b7610 100644 --- a/src/common/function/src/function_registry.rs +++ b/src/common/function/src/function_registry.rs @@ -22,6 +22,7 @@ use crate::function::{AsyncFunctionRef, FunctionRef}; use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions}; use crate::scalars::date::DateFunction; use crate::scalars::expression::ExpressionFunction; +use crate::scalars::json::JsonFunction; use crate::scalars::matches::MatchesFunction; use crate::scalars::math::MathFunction; use crate::scalars::numpy::NumpyFunction; @@ -116,6 +117,9 @@ pub static FUNCTION_REGISTRY: Lazy> = Lazy::new(|| { SystemFunction::register(&function_registry); TableFunction::register(&function_registry); + // Json related functions + JsonFunction::register(&function_registry); + // Geo functions #[cfg(feature = "geo")] crate::scalars::geo::GeoFunctions::register(&function_registry); diff --git a/src/common/function/src/scalars.rs b/src/common/function/src/scalars.rs index f8dc570d12..f60cf2b0d9 100644 --- a/src/common/function/src/scalars.rs +++ b/src/common/function/src/scalars.rs @@ -17,9 +17,11 @@ pub(crate) mod date; pub mod expression; #[cfg(feature = "geo")] pub mod geo; +pub mod json; pub mod matches; pub mod math; pub mod numpy; + #[cfg(test)] pub(crate) mod test; pub(crate) mod timestamp; diff --git a/src/common/function/src/scalars/json.rs b/src/common/function/src/scalars/json.rs new file mode 100644 index 0000000000..3812b33f23 --- /dev/null +++ b/src/common/function/src/scalars/json.rs @@ -0,0 +1,31 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +mod json_to_string; +mod to_json; + +use json_to_string::JsonToStringFunction; +use to_json::ToJsonFunction; + +use crate::function_registry::FunctionRegistry; + +pub(crate) struct JsonFunction; + +impl JsonFunction { + pub fn register(registry: &FunctionRegistry) { + registry.register(Arc::new(JsonToStringFunction)); + registry.register(Arc::new(ToJsonFunction)); + } +} diff --git a/src/common/function/src/scalars/json/json_to_string.rs b/src/common/function/src/scalars/json/json_to_string.rs new file mode 100644 index 0000000000..8a5e569a14 --- /dev/null +++ b/src/common/function/src/scalars/json/json_to_string.rs @@ -0,0 +1,174 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::{self, Display}; + +use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu}; +use common_query::prelude::Signature; +use datafusion::logical_expr::Volatility; +use datatypes::data_type::ConcreteDataType; +use datatypes::prelude::VectorRef; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{MutableVector, StringVectorBuilder}; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; + +/// Converts the `JSONB` into `String`. It's useful for displaying JSONB content. +#[derive(Clone, Debug, Default)] +pub struct JsonToStringFunction; + +const NAME: &str = "json_to_string"; + +impl Function for JsonToStringFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::string_datatype()) + } + + fn signature(&self) -> Signature { + Signature::exact( + vec![ConcreteDataType::json_datatype()], + Volatility::Immutable, + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 1, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly one, have: {}", + columns.len() + ), + } + ); + let jsons = &columns[0]; + + let size = jsons.len(); + let datatype = jsons.data_type(); + let mut results = StringVectorBuilder::with_capacity(size); + + match datatype { + // JSON data type uses binary vector + ConcreteDataType::Binary(_) => { + for i in 0..size { + let json = jsons.get_ref(i); + + let json = json.as_binary(); + let result = match json { + Ok(Some(json)) => match jsonb::from_slice(json) { + Ok(json) => { + let json = json.to_string(); + Some(json) + } + Err(_) => { + return InvalidFuncArgsSnafu { + err_msg: format!("Illegal json binary: {:?}", json), + } + .fail() + } + }, + _ => None, + }; + + results.push(result.as_deref()); + } + } + _ => { + return UnsupportedInputDataTypeSnafu { + function: NAME, + datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + } + .fail(); + } + } + + Ok(results.to_vector()) + } +} + +impl Display for JsonToStringFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "JSON_TO_STRING") + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use common_query::prelude::TypeSignature; + use datatypes::scalars::ScalarVector; + use datatypes::vectors::BinaryVector; + + use super::*; + + #[test] + fn test_get_by_path_function() { + let json_to_string = JsonToStringFunction; + + assert_eq!("json_to_string", json_to_string.name()); + assert_eq!( + ConcreteDataType::string_datatype(), + json_to_string + .return_type(&[ConcreteDataType::json_datatype()]) + .unwrap() + ); + + assert!(matches!(json_to_string.signature(), + Signature { + type_signature: TypeSignature::Exact(valid_types), + volatility: Volatility::Immutable + } if valid_types == vec![ConcreteDataType::json_datatype()] + )); + + let json_strings = [ + r#"{"a": {"b": 2}, "b": 2, "c": 3}"#, + r#"{"a": 4, "b": {"c": 6}, "c": 6}"#, + r#"{"a": 7, "b": 8, "c": {"a": 7}}"#, + ]; + + let jsonbs = json_strings + .iter() + .map(|s| { + let value = jsonb::parse_value(s.as_bytes()).unwrap(); + value.to_vec() + }) + .collect::>(); + + let json_vector = BinaryVector::from_vec(jsonbs); + let args: Vec = vec![Arc::new(json_vector)]; + let vector = json_to_string + .eval(FunctionContext::default(), &args) + .unwrap(); + + assert_eq!(3, vector.len()); + for (i, gt) in json_strings.iter().enumerate() { + let result = vector.get_ref(i); + let result = result.as_string().unwrap().unwrap(); + // remove whitespaces + assert_eq!(gt.replace(" ", ""), result); + } + + let invalid_jsonb = vec![b"invalid json"]; + let invalid_json_vector = BinaryVector::from_vec(invalid_jsonb); + let args: Vec = vec![Arc::new(invalid_json_vector)]; + let vector = json_to_string.eval(FunctionContext::default(), &args); + assert!(vector.is_err()); + } +} diff --git a/src/common/function/src/scalars/json/to_json.rs b/src/common/function/src/scalars/json/to_json.rs new file mode 100644 index 0000000000..9c3cc90b66 --- /dev/null +++ b/src/common/function/src/scalars/json/to_json.rs @@ -0,0 +1,165 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::{self, Display}; + +use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu}; +use common_query::prelude::Signature; +use datafusion::logical_expr::Volatility; +use datatypes::data_type::ConcreteDataType; +use datatypes::prelude::VectorRef; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{BinaryVectorBuilder, MutableVector}; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; + +/// Parses the `String` into `JSONB`. +#[derive(Clone, Debug, Default)] +pub struct ToJsonFunction; + +const NAME: &str = "to_json"; + +impl Function for ToJsonFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::json_datatype()) + } + + fn signature(&self) -> Signature { + Signature::exact( + vec![ConcreteDataType::string_datatype()], + Volatility::Immutable, + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 1, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly one, have: {}", + columns.len() + ), + } + ); + let json_strings = &columns[0]; + + let size = json_strings.len(); + let datatype = json_strings.data_type(); + let mut results = BinaryVectorBuilder::with_capacity(size); + + match datatype { + ConcreteDataType::String(_) => { + for i in 0..size { + let json_string = json_strings.get_ref(i); + + let json_string = json_string.as_string(); + let result = match json_string { + Ok(Some(json_string)) => match jsonb::parse_value(json_string.as_bytes()) { + Ok(json) => Some(json.to_vec()), + Err(_) => { + return InvalidFuncArgsSnafu { + err_msg: format!( + "Cannot convert the string to json, have: {}", + json_string + ), + } + .fail() + } + }, + _ => None, + }; + + results.push(result.as_deref()); + } + } + _ => { + return UnsupportedInputDataTypeSnafu { + function: NAME, + datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + } + .fail(); + } + } + + Ok(results.to_vector()) + } +} + +impl Display for ToJsonFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "TO_JSON") + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use common_query::prelude::TypeSignature; + use datatypes::scalars::ScalarVector; + use datatypes::vectors::StringVector; + + use super::*; + + #[test] + fn test_get_by_path_function() { + let to_json = ToJsonFunction; + + assert_eq!("to_json", to_json.name()); + assert_eq!( + ConcreteDataType::json_datatype(), + to_json + .return_type(&[ConcreteDataType::json_datatype()]) + .unwrap() + ); + + assert!(matches!(to_json.signature(), + Signature { + type_signature: TypeSignature::Exact(valid_types), + volatility: Volatility::Immutable + } if valid_types == vec![ConcreteDataType::string_datatype()] + )); + + let json_strings = [ + r#"{"a": {"b": 2}, "b": 2, "c": 3}"#, + r#"{"a": 4, "b": {"c": 6}, "c": 6}"#, + r#"{"a": 7, "b": 8, "c": {"a": 7}}"#, + ]; + + let jsonbs = json_strings + .iter() + .map(|s| { + let value = jsonb::parse_value(s.as_bytes()).unwrap(); + value.to_vec() + }) + .collect::>(); + + let json_string_vector = StringVector::from_vec(json_strings.to_vec()); + let args: Vec = vec![Arc::new(json_string_vector)]; + let vector = to_json.eval(FunctionContext::default(), &args).unwrap(); + + assert_eq!(3, vector.len()); + for (i, gt) in jsonbs.iter().enumerate() { + let result = vector.get_ref(i); + let result = result.as_binary().unwrap().unwrap(); + // remove whitespaces + assert_eq!(gt, result); + } + } +} diff --git a/src/common/function/src/system/version.rs b/src/common/function/src/system/version.rs index 1817583590..0843ec2363 100644 --- a/src/common/function/src/system/version.rs +++ b/src/common/function/src/system/version.rs @@ -19,6 +19,7 @@ use common_query::error::Result; use common_query::prelude::{Signature, Volatility}; use datatypes::data_type::ConcreteDataType; use datatypes::vectors::{StringVector, VectorRef}; +use session::context::Channel; use crate::function::{Function, FunctionContext}; @@ -44,11 +45,22 @@ impl Function for VersionFunction { Signature::exact(vec![], Volatility::Immutable) } - fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result { - let result = StringVector::from(vec![format!( - "5.7.20-greptimedb-{}", - env!("CARGO_PKG_VERSION") - )]); + fn eval(&self, func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result { + let version = match func_ctx.query_ctx.channel() { + Channel::Mysql => { + format!( + "{}-greptimedb-{}", + std::env::var("GREPTIMEDB_MYSQL_SERVER_VERSION") + .unwrap_or_else(|_| "8.4.2".to_string()), + env!("CARGO_PKG_VERSION") + ) + } + Channel::Postgres => { + format!("16.3-greptimedb-{}", env!("CARGO_PKG_VERSION")) + } + _ => env!("CARGO_PKG_VERSION").to_string(), + }; + let result = StringVector::from(vec![version]); Ok(Arc::new(result)) } } diff --git a/src/common/grpc-expr/src/insert.rs b/src/common/grpc-expr/src/insert.rs index 6170c53008..56ed7e5bf0 100644 --- a/src/common/grpc-expr/src/insert.rs +++ b/src/common/grpc-expr/src/insert.rs @@ -14,11 +14,10 @@ use api::helper; use api::v1::column::Values; -use api::v1::{AddColumns, Column, CreateTableExpr}; +use api::v1::{Column, CreateTableExpr}; use common_base::BitVec; use datatypes::data_type::{ConcreteDataType, DataType}; use datatypes::prelude::VectorRef; -use datatypes::schema::SchemaRef; use snafu::{ensure, ResultExt}; use table::metadata::TableId; use table::table_reference::TableReference; @@ -27,11 +26,6 @@ use crate::error::{CreateVectorSnafu, Result, UnexpectedValuesLengthSnafu}; use crate::util; use crate::util::ColumnExpr; -pub fn find_new_columns(schema: &SchemaRef, columns: &[Column]) -> Result> { - let column_exprs = ColumnExpr::from_columns(columns); - util::extract_new_columns(schema, column_exprs) -} - /// Try to build create table request from insert data. pub fn build_create_expr_from_insertion( catalog_name: &str, @@ -114,7 +108,6 @@ mod tests { use super::*; use crate::error; use crate::error::ColumnDataTypeSnafu; - use crate::insert::find_new_columns; #[inline] fn build_column_schema( @@ -281,11 +274,18 @@ mod tests { let schema = Arc::new(SchemaBuilder::try_from(columns).unwrap().build().unwrap()); - assert!(find_new_columns(&schema, &[]).unwrap().is_none()); + assert!( + util::extract_new_columns(&schema, ColumnExpr::from_columns(&[])) + .unwrap() + .is_none() + ); let insert_batch = mock_insert_batch(); - let add_columns = find_new_columns(&schema, &insert_batch.0).unwrap().unwrap(); + let add_columns = + util::extract_new_columns(&schema, ColumnExpr::from_columns(&insert_batch.0)) + .unwrap() + .unwrap(); assert_eq!(5, add_columns.add_columns.len()); let host_column = &add_columns.add_columns[0]; diff --git a/src/common/grpc-expr/src/lib.rs b/src/common/grpc-expr/src/lib.rs index 7a2fea237b..c8afaf98d9 100644 --- a/src/common/grpc-expr/src/lib.rs +++ b/src/common/grpc-expr/src/lib.rs @@ -19,4 +19,4 @@ pub mod insert; pub mod util; pub use alter::{alter_expr_to_request, create_table_schema}; -pub use insert::{build_create_expr_from_insertion, find_new_columns}; +pub use insert::build_create_expr_from_insertion; diff --git a/src/common/grpc/src/select.rs b/src/common/grpc/src/select.rs index df4131bde6..ba13acf3b7 100644 --- a/src/common/grpc/src/select.rs +++ b/src/common/grpc/src/select.rs @@ -70,7 +70,7 @@ macro_rules! convert_arrow_array_to_grpc_vals { return Ok(vals); }, )+ - ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) | ConcreteDataType::Duration(_) => unreachable!("Should not send {:?} in gRPC", $data_type), + ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) | ConcreteDataType::Duration(_) | ConcreteDataType::Json(_) => unreachable!("Should not send {:?} in gRPC", $data_type), } }}; } diff --git a/src/common/meta/src/ddl/alter_logical_tables.rs b/src/common/meta/src/ddl/alter_logical_tables.rs index 3af359ef6e..d9b318e9e9 100644 --- a/src/common/meta/src/ddl/alter_logical_tables.rs +++ b/src/common/meta/src/ddl/alter_logical_tables.rs @@ -39,7 +39,7 @@ use crate::key::DeserializedValueWithBytes; use crate::lock_key::{CatalogLock, SchemaLock, TableLock}; use crate::rpc::ddl::AlterTableTask; use crate::rpc::router::find_leaders; -use crate::{cache_invalidator, metrics, ClusterId}; +use crate::{metrics, ClusterId}; pub struct AlterLogicalTablesProcedure { pub context: DdlContext, @@ -170,12 +170,11 @@ impl AlterLogicalTablesProcedure { } pub(crate) async fn on_invalidate_table_cache(&mut self) -> Result { - let ctx = cache_invalidator::Context::default(); let to_invalidate = self.build_table_cache_keys_to_invalidate(); self.context .cache_invalidator - .invalidate(&ctx, &to_invalidate) + .invalidate(&Default::default(), &to_invalidate) .await?; Ok(Status::done()) } diff --git a/src/common/meta/src/ddl_manager.rs b/src/common/meta/src/ddl_manager.rs index 1b561b3043..152a4631e2 100644 --- a/src/common/meta/src/ddl_manager.rs +++ b/src/common/meta/src/ddl_manager.rs @@ -441,11 +441,9 @@ async fn handle_alter_table_task( .table_metadata_manager() .table_route_manager() .table_route_storage() - .get_raw(table_id) + .get(table_id) .await? - .context(TableRouteNotFoundSnafu { table_id })? - .into_inner(); - + .context(TableRouteNotFoundSnafu { table_id })?; ensure!( table_route_value.is_physical(), UnexpectedLogicalRouteTableSnafu { diff --git a/src/common/meta/src/key.rs b/src/common/meta/src/key.rs index df9c8161fa..3cf6f3a3b7 100644 --- a/src/common/meta/src/key.rs +++ b/src/common/meta/src/key.rs @@ -90,6 +90,7 @@ pub mod catalog_name; pub mod datanode_table; pub mod flow; +pub mod node_address; pub mod schema_name; pub mod table_info; pub mod table_name; @@ -102,7 +103,7 @@ pub mod view_info; use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Debug; -use std::ops::Deref; +use std::ops::{Deref, DerefMut}; use std::sync::Arc; use bytes::Bytes; @@ -134,6 +135,7 @@ use self::table_route::{TableRouteManager, TableRouteValue}; use self::tombstone::TombstoneManager; use crate::ddl::utils::region_storage_path; use crate::error::{self, Result, SerdeJsonSnafu}; +use crate::key::node_address::NodeAddressValue; use crate::key::table_route::TableRouteKey; use crate::key::txn_helper::TxnOpGetResponseSet; use crate::kv_backend::txn::{Txn, TxnOp}; @@ -152,12 +154,15 @@ pub const TABLE_NAME_KEY_PREFIX: &str = "__table_name"; pub const CATALOG_NAME_KEY_PREFIX: &str = "__catalog_name"; pub const SCHEMA_NAME_KEY_PREFIX: &str = "__schema_name"; pub const TABLE_ROUTE_PREFIX: &str = "__table_route"; +pub const NODE_ADDRESS_PREFIX: &str = "__node_address"; -pub const CACHE_KEY_PREFIXES: [&str; 4] = [ +/// The keys with these prefixes will be loaded into the cache when the leader starts. +pub const CACHE_KEY_PREFIXES: [&str; 5] = [ TABLE_NAME_KEY_PREFIX, CATALOG_NAME_KEY_PREFIX, SCHEMA_NAME_KEY_PREFIX, TABLE_ROUTE_PREFIX, + NODE_ADDRESS_PREFIX, ]; pub type RegionDistribution = BTreeMap>; @@ -210,6 +215,11 @@ lazy_static! { .unwrap(); } +lazy_static! { + static ref NODE_ADDRESS_PATTERN: Regex = + Regex::new(&format!("^{NODE_ADDRESS_PREFIX}/([0-9]+)/([0-9]+)$")).unwrap(); +} + /// The key of metadata. pub trait MetadataKey<'a, T> { fn to_bytes(&self) -> Vec; @@ -306,6 +316,12 @@ impl Deref for DeserializedValueWithBytes { } } +impl DerefMut for DeserializedValueWithBytes { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + impl Debug for DeserializedValueWithBytes { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( @@ -1230,7 +1246,8 @@ impl_metadata_value! { FlowInfoValue, FlowNameValue, FlowRouteValue, - TableFlowValue + TableFlowValue, + NodeAddressValue } impl_optional_metadata_value! { @@ -1952,7 +1969,7 @@ mod tests { let table_route_value = table_metadata_manager .table_route_manager .table_route_storage() - .get_raw(table_id) + .get_with_raw_bytes(table_id) .await .unwrap() .unwrap(); @@ -2005,7 +2022,7 @@ mod tests { let table_route_value = table_metadata_manager .table_route_manager .table_route_storage() - .get_raw(table_id) + .get_with_raw_bytes(table_id) .await .unwrap() .unwrap(); diff --git a/src/common/meta/src/key/node_address.rs b/src/common/meta/src/key/node_address.rs new file mode 100644 index 0000000000..55085c928f --- /dev/null +++ b/src/common/meta/src/key/node_address.rs @@ -0,0 +1,114 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; + +use api::v1::meta::Role; +use serde::{Deserialize, Serialize}; +use snafu::OptionExt; + +use crate::error::{InvalidMetadataSnafu, Result}; +use crate::key::{MetadataKey, NODE_ADDRESS_PATTERN, NODE_ADDRESS_PREFIX}; +use crate::peer::Peer; + +/// The key stores node address. +/// +/// The layout: `__node_address/{role}/{node_id}` +#[derive(Debug, PartialEq)] +pub struct NodeAddressKey { + pub role: Role, + pub node_id: u64, +} + +impl NodeAddressKey { + pub fn new(role: Role, node_id: u64) -> Self { + Self { role, node_id } + } + + pub fn with_datanode(node_id: u64) -> Self { + Self::new(Role::Datanode, node_id) + } +} + +#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] +pub struct NodeAddressValue { + pub peer: Peer, +} + +impl NodeAddressValue { + pub fn new(peer: Peer) -> Self { + Self { peer } + } +} + +impl<'a> MetadataKey<'a, NodeAddressKey> for NodeAddressKey { + fn to_bytes(&self) -> Vec { + self.to_string().into_bytes() + } + + fn from_bytes(bytes: &[u8]) -> Result { + let key = std::str::from_utf8(bytes).map_err(|e| { + InvalidMetadataSnafu { + err_msg: format!( + "NodeAddressKey '{}' is not a valid UTF8 string: {e}", + String::from_utf8_lossy(bytes) + ), + } + .build() + })?; + let captures = NODE_ADDRESS_PATTERN + .captures(key) + .context(InvalidMetadataSnafu { + err_msg: format!("Invalid NodeAddressKey '{key}'"), + })?; + // Safety: pass the regex check above + let role = captures[1].parse::().unwrap(); + let role = Role::try_from(role).map_err(|_| { + InvalidMetadataSnafu { + err_msg: format!("Invalid Role value: {role}"), + } + .build() + })?; + let node_id = captures[2].parse::().unwrap(); + Ok(NodeAddressKey::new(role, node_id)) + } +} + +impl Display for NodeAddressKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}/{}/{}", + NODE_ADDRESS_PREFIX, self.role as i32, self.node_id + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_node_address_key() { + let key = NodeAddressKey::new(Role::Datanode, 1); + let bytes = key.to_bytes(); + let key2 = NodeAddressKey::from_bytes(&bytes).unwrap(); + assert_eq!(key, key2); + + let key = NodeAddressKey::new(Role::Flownode, 3); + let bytes = key.to_bytes(); + let key2 = NodeAddressKey::from_bytes(&bytes).unwrap(); + assert_eq!(key, key2); + } +} diff --git a/src/common/meta/src/key/table_route.rs b/src/common/meta/src/key/table_route.rs index baaaeb75b9..0be0aab3aa 100644 --- a/src/common/meta/src/key/table_route.rs +++ b/src/common/meta/src/key/table_route.rs @@ -22,9 +22,10 @@ use store_api::storage::{RegionId, RegionNumber}; use table::metadata::TableId; use crate::error::{ - self, InvalidMetadataSnafu, MetadataCorruptionSnafu, Result, SerdeJsonSnafu, - TableRouteNotFoundSnafu, UnexpectedLogicalRouteTableSnafu, + InvalidMetadataSnafu, MetadataCorruptionSnafu, Result, SerdeJsonSnafu, TableRouteNotFoundSnafu, + UnexpectedLogicalRouteTableSnafu, }; +use crate::key::node_address::{NodeAddressKey, NodeAddressValue}; use crate::key::txn_helper::TxnOpGetResponseSet; use crate::key::{ DeserializedValueWithBytes, MetadataKey, MetadataValue, RegionDistribution, @@ -85,7 +86,7 @@ impl TableRouteValue { debug_assert_eq!(region.region.id.table_id(), physical_table_id); RegionId::new(table_id, region.region.id.region_number()) }) - .collect::>(); + .collect(); TableRouteValue::logical(physical_table_id, region_routes) } } @@ -189,12 +190,12 @@ impl TableRouteValue { .region_routes .iter() .map(|region_route| region_route.region.id.region_number()) - .collect::>(), + .collect(), TableRouteValue::Logical(x) => x .region_ids() .iter() .map(|region_id| region_id.region_number()) - .collect::>(), + .collect(), } } } @@ -301,7 +302,7 @@ impl TableRouteManager { Some(route) => { ensure!( route.is_physical(), - error::UnexpectedLogicalRouteTableSnafu { + UnexpectedLogicalRouteTableSnafu { err_msg: format!("{route:?} is a non-physical TableRouteValue.") } ); @@ -321,7 +322,7 @@ impl TableRouteManager { ) -> Result { let table_route = self .storage - .get(logical_or_physical_table_id) + .get_inner(logical_or_physical_table_id) .await? .context(TableRouteNotFoundSnafu { table_id: logical_or_physical_table_id, @@ -335,7 +336,7 @@ impl TableRouteManager { /// Returns the [TableRouteValue::Physical] recursively. /// - /// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if: + /// Returns a [TableRouteNotFound](error::Error::TableRouteNotFound) Error if: /// - the physical table(`logical_or_physical_table_id`) does not exist /// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist. pub async fn get_physical_table_route( @@ -528,6 +529,15 @@ impl TableRouteStorage { /// Returns the [`TableRouteValue`]. pub async fn get(&self, table_id: TableId) -> Result> { + let mut table_route = self.get_inner(table_id).await?; + if let Some(table_route) = &mut table_route { + self.remap_route_address(table_route).await?; + }; + + Ok(table_route) + } + + async fn get_inner(&self, table_id: TableId) -> Result> { let key = TableRouteKey::new(table_id); self.kv_backend .get(&key.to_bytes()) @@ -537,7 +547,19 @@ impl TableRouteStorage { } /// Returns the [`TableRouteValue`] wrapped with [`DeserializedValueWithBytes`]. - pub async fn get_raw( + pub async fn get_with_raw_bytes( + &self, + table_id: TableId, + ) -> Result>> { + let mut table_route = self.get_with_raw_bytes_inner(table_id).await?; + if let Some(table_route) = &mut table_route { + self.remap_route_address(table_route).await?; + }; + + Ok(table_route) + } + + async fn get_with_raw_bytes_inner( &self, table_id: TableId, ) -> Result>> { @@ -554,27 +576,27 @@ impl TableRouteStorage { /// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if: /// - the physical table(`logical_or_physical_table_id`) does not exist /// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist. - pub async fn get_raw_physical_table_route( + pub async fn get_physical_table_route_with_raw_bytes( &self, logical_or_physical_table_id: TableId, ) -> Result<(TableId, DeserializedValueWithBytes)> { - let table_route = - self.get_raw(logical_or_physical_table_id) - .await? - .context(TableRouteNotFoundSnafu { - table_id: logical_or_physical_table_id, - })?; + let table_route = self + .get_with_raw_bytes(logical_or_physical_table_id) + .await? + .context(TableRouteNotFoundSnafu { + table_id: logical_or_physical_table_id, + })?; match table_route.get_inner_ref() { TableRouteValue::Physical(_) => Ok((logical_or_physical_table_id, table_route)), TableRouteValue::Logical(x) => { let physical_table_id = x.physical_table_id(); - let physical_table_route = - self.get_raw(physical_table_id) - .await? - .context(TableRouteNotFoundSnafu { - table_id: physical_table_id, - })?; + let physical_table_route = self + .get_with_raw_bytes(physical_table_id) + .await? + .context(TableRouteNotFoundSnafu { + table_id: physical_table_id, + })?; Ok((physical_table_id, physical_table_route)) } } @@ -582,6 +604,13 @@ impl TableRouteStorage { /// Returns batch of [`TableRouteValue`] that respects the order of `table_ids`. pub async fn batch_get(&self, table_ids: &[TableId]) -> Result>> { + let mut table_routes = self.batch_get_inner(table_ids).await?; + self.remap_routes_addresses(&mut table_routes).await?; + + Ok(table_routes) + } + + async fn batch_get_inner(&self, table_ids: &[TableId]) -> Result>> { let keys = table_ids .iter() .map(|id| TableRouteKey::new(*id).to_bytes()) @@ -604,8 +633,107 @@ impl TableRouteStorage { Ok(None) } }) - .collect::>>() + .collect() } + + async fn remap_routes_addresses( + &self, + table_routes: &mut [Option], + ) -> Result<()> { + let keys = table_routes + .iter() + .flat_map(|table_route| { + table_route + .as_ref() + .map(extract_address_keys) + .unwrap_or_default() + }) + .collect::>() + .into_iter() + .collect(); + let node_addrs = self.get_node_addresses(keys).await?; + for table_route in table_routes.iter_mut().flatten() { + set_addresses(&node_addrs, table_route)?; + } + + Ok(()) + } + + async fn remap_route_address(&self, table_route: &mut TableRouteValue) -> Result<()> { + let keys = extract_address_keys(table_route).into_iter().collect(); + let node_addrs = self.get_node_addresses(keys).await?; + set_addresses(&node_addrs, table_route)?; + + Ok(()) + } + + async fn get_node_addresses( + &self, + keys: Vec>, + ) -> Result> { + if keys.is_empty() { + return Ok(HashMap::default()); + } + + self.kv_backend + .batch_get(BatchGetRequest { keys }) + .await? + .kvs + .into_iter() + .map(|kv| { + let node_id = NodeAddressKey::from_bytes(&kv.key)?.node_id; + let node_addr = NodeAddressValue::try_from_raw_value(&kv.value)?; + Ok((node_id, node_addr)) + }) + .collect() + } +} + +fn set_addresses( + node_addrs: &HashMap, + table_route: &mut TableRouteValue, +) -> Result<()> { + let TableRouteValue::Physical(physical_table_route) = table_route else { + return Ok(()); + }; + + for region_route in &mut physical_table_route.region_routes { + if let Some(leader) = &mut region_route.leader_peer { + if let Some(node_addr) = node_addrs.get(&leader.id) { + leader.addr = node_addr.peer.addr.clone(); + } + } + for follower in &mut region_route.follower_peers { + if let Some(node_addr) = node_addrs.get(&follower.id) { + follower.addr = node_addr.peer.addr.clone(); + } + } + } + + Ok(()) +} + +fn extract_address_keys(table_route: &TableRouteValue) -> HashSet> { + let TableRouteValue::Physical(physical_table_route) = table_route else { + return HashSet::default(); + }; + + physical_table_route + .region_routes + .iter() + .flat_map(|region_route| { + region_route + .follower_peers + .iter() + .map(|peer| NodeAddressKey::with_datanode(peer.id).to_bytes()) + .chain( + region_route + .leader_peer + .as_ref() + .map(|leader| NodeAddressKey::with_datanode(leader.id).to_bytes()), + ) + }) + .collect() } #[cfg(test)] @@ -614,7 +742,9 @@ mod tests { use super::*; use crate::kv_backend::memory::MemoryKvBackend; - use crate::kv_backend::TxnService; + use crate::kv_backend::{KvBackend, TxnService}; + use crate::peer::Peer; + use crate::rpc::store::PutRequest; #[test] fn test_table_route_compatibility() { @@ -643,18 +773,18 @@ mod tests { } #[tokio::test] - async fn test_table_route_storage_get_raw_empty() { + async fn test_table_route_storage_get_with_raw_bytes_empty() { let kv = Arc::new(MemoryKvBackend::default()); let table_route_storage = TableRouteStorage::new(kv); - let table_route = table_route_storage.get_raw(1024).await.unwrap(); + let table_route = table_route_storage.get_with_raw_bytes(1024).await.unwrap(); assert!(table_route.is_none()); } #[tokio::test] - async fn test_table_route_storage_get_raw() { + async fn test_table_route_storage_get_with_raw_bytes() { let kv = Arc::new(MemoryKvBackend::default()); let table_route_storage = TableRouteStorage::new(kv.clone()); - let table_route = table_route_storage.get_raw(1024).await.unwrap(); + let table_route = table_route_storage.get_with_raw_bytes(1024).await.unwrap(); assert!(table_route.is_none()); let table_route_manager = TableRouteManager::new(kv.clone()); let table_route_value = TableRouteValue::Logical(LogicalTableRouteValue { @@ -667,7 +797,7 @@ mod tests { .unwrap(); let r = kv.txn(txn).await.unwrap(); assert!(r.succeeded); - let table_route = table_route_storage.get_raw(1024).await.unwrap(); + let table_route = table_route_storage.get_with_raw_bytes(1024).await.unwrap(); assert!(table_route.is_some()); let got = table_route.unwrap().inner; assert_eq!(got, table_route_value); @@ -718,4 +848,61 @@ mod tests { assert!(results[2].is_none()); assert_eq!(results[3].as_ref().unwrap(), &routes[0].1); } + + #[tokio::test] + async fn remap_route_address_updates_addresses() { + let kv = Arc::new(MemoryKvBackend::default()); + let table_route_storage = TableRouteStorage::new(kv.clone()); + let mut table_route = TableRouteValue::Physical(PhysicalTableRouteValue { + region_routes: vec![RegionRoute { + leader_peer: Some(Peer { + id: 1, + ..Default::default() + }), + follower_peers: vec![Peer { + id: 2, + ..Default::default() + }], + ..Default::default() + }], + version: 0, + }); + + kv.put(PutRequest { + key: NodeAddressKey::with_datanode(1).to_bytes(), + value: NodeAddressValue { + peer: Peer { + addr: "addr1".to_string(), + ..Default::default() + }, + } + .try_as_raw_value() + .unwrap(), + ..Default::default() + }) + .await + .unwrap(); + + table_route_storage + .remap_route_address(&mut table_route) + .await + .unwrap(); + + if let TableRouteValue::Physical(physical_table_route) = table_route { + assert_eq!( + physical_table_route.region_routes[0] + .leader_peer + .as_ref() + .unwrap() + .addr, + "addr1" + ); + assert_eq!( + physical_table_route.region_routes[0].follower_peers[0].addr, + "" + ); + } else { + panic!("Expected PhysicalTableRouteValue"); + } + } } diff --git a/src/common/wal/Cargo.toml b/src/common/wal/Cargo.toml index 0bced0dd38..202b2825e3 100644 --- a/src/common/wal/Cargo.toml +++ b/src/common/wal/Cargo.toml @@ -17,6 +17,7 @@ common-macro.workspace = true common-telemetry.workspace = true futures-util.workspace = true humantime-serde.workspace = true +num_cpus.workspace = true rskafka.workspace = true rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] } rustls-native-certs = "0.7" diff --git a/src/common/wal/src/config/raft_engine.rs b/src/common/wal/src/config/raft_engine.rs index f54e3f1ba5..af5daa9d38 100644 --- a/src/common/wal/src/config/raft_engine.rs +++ b/src/common/wal/src/config/raft_engine.rs @@ -41,6 +41,8 @@ pub struct RaftEngineConfig { /// Duration for fsyncing log files. #[serde(with = "humantime_serde")] pub sync_period: Option, + /// Parallelism during log recovery. + pub recovery_parallelism: usize, } impl Default for RaftEngineConfig { @@ -55,6 +57,7 @@ impl Default for RaftEngineConfig { enable_log_recycle: true, prefill_log_files: false, sync_period: None, + recovery_parallelism: num_cpus::get(), } } } diff --git a/src/datanode/src/datanode.rs b/src/datanode/src/datanode.rs index 09bf901d37..149aa44ebe 100644 --- a/src/datanode/src/datanode.rs +++ b/src/datanode/src/datanode.rs @@ -454,7 +454,7 @@ impl DatanodeBuilder { "Creating raft-engine logstore with config: {:?} and storage path: {}", config, &wal_dir ); - let logstore = RaftEngineLogStore::try_new(wal_dir, config.clone()) + let logstore = RaftEngineLogStore::try_new(wal_dir, config) .await .map_err(Box::new) .context(OpenLogStoreSnafu)?; diff --git a/src/datanode/src/heartbeat.rs b/src/datanode/src/heartbeat.rs index 68b4637fce..6633d0ab62 100644 --- a/src/datanode/src/heartbeat.rs +++ b/src/datanode/src/heartbeat.rs @@ -192,7 +192,7 @@ impl HeartbeatTask { let (outgoing_tx, mut outgoing_rx) = mpsc::channel(16); let mailbox = Arc::new(HeartbeatMailbox::new(outgoing_tx)); - let quit_signal = Arc::new(tokio::sync::Notify::new()); + let quit_signal = Arc::new(Notify::new()); let mut tx = Self::create_streams( &meta_client, diff --git a/src/datatypes/Cargo.toml b/src/datatypes/Cargo.toml index b10ea682dd..23eac53a03 100644 --- a/src/datatypes/Cargo.toml +++ b/src/datatypes/Cargo.toml @@ -25,6 +25,7 @@ common-time.workspace = true datafusion-common.workspace = true enum_dispatch = "0.3" greptime-proto.workspace = true +jsonb.workspace = true num = "0.4" num-traits = "0.2" ordered-float = { version = "3.0", features = ["serde"] } diff --git a/src/datatypes/src/data_type.rs b/src/datatypes/src/data_type.rs index 262110dbf5..495c47dc5f 100644 --- a/src/datatypes/src/data_type.rs +++ b/src/datatypes/src/data_type.rs @@ -33,8 +33,8 @@ use crate::types::{ BinaryType, BooleanType, DateTimeType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, - IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, ListType, - NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType, + IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, + ListType, NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; @@ -81,6 +81,9 @@ pub enum ConcreteDataType { // Compound types: List(ListType), Dictionary(DictionaryType), + + // JSON type: + Json(JsonType), } impl fmt::Display for ConcreteDataType { @@ -128,6 +131,7 @@ impl fmt::Display for ConcreteDataType { ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()), ConcreteDataType::List(v) => write!(f, "{}", v.name()), ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()), + ConcreteDataType::Json(v) => write!(f, "{}", v.name()), } } } @@ -162,6 +166,7 @@ impl ConcreteDataType { | ConcreteDataType::Duration(_) | ConcreteDataType::Decimal128(_) | ConcreteDataType::Binary(_) + | ConcreteDataType::Json(_) ) } @@ -216,6 +221,10 @@ impl ConcreteDataType { matches!(self, ConcreteDataType::Decimal128(_)) } + pub fn is_json(&self) -> bool { + matches!(self, ConcreteDataType::Json(_)) + } + pub fn numerics() -> Vec { vec![ ConcreteDataType::int8_datatype(), @@ -404,7 +413,7 @@ macro_rules! impl_new_concrete_type_functions { impl_new_concrete_type_functions!( Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64, - Binary, Date, DateTime, String + Binary, Date, DateTime, String, Json ); impl ConcreteDataType { diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index 3bb35a595f..2ca79ff490 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -25,6 +25,7 @@ use datafusion_common::DFSchemaRef; use snafu::{ensure, ResultExt}; use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result}; +use crate::prelude::DataType; pub use crate::schema::column_schema::{ ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, COMMENT_KEY, FULLTEXT_KEY, TIME_INDEX_KEY, @@ -34,6 +35,8 @@ pub use crate::schema::raw::RawSchema; /// Key used to store version number of the schema in metadata. pub const VERSION_KEY: &str = "greptime:version"; +/// Key used to store actual column type in field metadata. +pub const TYPE_KEY: &str = "greptime:type"; /// A common schema, should be immutable. #[derive(Clone, PartialEq, Eq)] @@ -256,7 +259,13 @@ fn collect_fields(column_schemas: &[ColumnSchema]) -> Result { if column_schema.is_time_index() && timestamp_index.is_none() { timestamp_index = Some(index); } - let field = Field::try_from(column_schema)?; + let mut field = Field::try_from(column_schema)?; + + // Json column performs the same as binary column in Arrow, so we need to mark it + if column_schema.data_type.is_json() { + let metadata = HashMap::from([(TYPE_KEY.to_string(), column_schema.data_type.name())]); + field = field.with_metadata(metadata); + } fields.push(field); ensure!( name_to_index diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index c3cd8b3453..861924df3b 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -22,6 +22,8 @@ use snafu::{ensure, ResultExt}; use crate::data_type::{ConcreteDataType, DataType}; use crate::error::{self, Error, Result}; use crate::schema::constraint::ColumnDefaultConstraint; +use crate::schema::TYPE_KEY; +use crate::types::JSON_TYPE_NAME; use crate::value::Value; use crate::vectors::VectorRef; @@ -268,7 +270,14 @@ impl TryFrom<&Field> for ColumnSchema { type Error = Error; fn try_from(field: &Field) -> Result { - let data_type = ConcreteDataType::try_from(field.data_type())?; + let mut data_type = ConcreteDataType::try_from(field.data_type())?; + // Override the data type if it is specified in the metadata. + if field.metadata().contains_key(TYPE_KEY) { + data_type = match field.metadata().get(TYPE_KEY).unwrap().as_str() { + JSON_TYPE_NAME => ConcreteDataType::json_datatype(), + _ => data_type, + }; + } let mut metadata = field.metadata().clone(); let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) { Some(json) => { @@ -528,4 +537,32 @@ mod tests { assert_eq!(formatted_int8, "test_column_1 Int8 null"); assert_eq!(formatted_int32, "test_column_2 Int32 not null"); } + + #[test] + fn test_from_field_to_column_schema() { + let field = Field::new("test", ArrowDataType::Int32, true); + let column_schema = ColumnSchema::try_from(&field).unwrap(); + assert_eq!("test", column_schema.name); + assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type); + assert!(column_schema.is_nullable); + assert!(!column_schema.is_time_index); + assert!(column_schema.default_constraint.is_none()); + assert!(column_schema.metadata.is_empty()); + + let field = Field::new("test", ArrowDataType::Binary, true); + let field = field.with_metadata(Metadata::from([( + TYPE_KEY.to_string(), + ConcreteDataType::json_datatype().name(), + )])); + let column_schema = ColumnSchema::try_from(&field).unwrap(); + assert_eq!("test", column_schema.name); + assert_eq!(ConcreteDataType::json_datatype(), column_schema.data_type); + assert!(column_schema.is_nullable); + assert!(!column_schema.is_time_index); + assert!(column_schema.default_constraint.is_none()); + assert_eq!( + column_schema.metadata.get(TYPE_KEY).unwrap(), + &ConcreteDataType::json_datatype().name() + ); + } } diff --git a/src/datatypes/src/type_id.rs b/src/datatypes/src/type_id.rs index 29e3065abe..d7496a54e0 100644 --- a/src/datatypes/src/type_id.rs +++ b/src/datatypes/src/type_id.rs @@ -68,6 +68,8 @@ pub enum LogicalTypeId { List, Dictionary, + + Json, } impl LogicalTypeId { @@ -126,6 +128,7 @@ impl LogicalTypeId { LogicalTypeId::DurationMicrosecond => ConcreteDataType::duration_microsecond_datatype(), LogicalTypeId::DurationNanosecond => ConcreteDataType::duration_nanosecond_datatype(), LogicalTypeId::Decimal128 => ConcreteDataType::decimal128_default_datatype(), + LogicalTypeId::Json => ConcreteDataType::json_datatype(), } } } diff --git a/src/datatypes/src/types.rs b/src/datatypes/src/types.rs index 686fd9c49f..0bedd2965c 100644 --- a/src/datatypes/src/types.rs +++ b/src/datatypes/src/types.rs @@ -21,6 +21,7 @@ mod decimal_type; mod dictionary_type; mod duration_type; mod interval_type; +mod json_type; mod list_type; mod null_type; mod primitive_type; @@ -42,6 +43,7 @@ pub use duration_type::{ pub use interval_type::{ IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, }; +pub use json_type::{JsonType, JSON_TYPE_NAME}; pub use list_type::ListType; pub use null_type::NullType; pub use primitive_type::{ diff --git a/src/datatypes/src/types/json_type.rs b/src/datatypes/src/types/json_type.rs new file mode 100644 index 0000000000..416b59b5c0 --- /dev/null +++ b/src/datatypes/src/types/json_type.rs @@ -0,0 +1,67 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use arrow::datatypes::DataType as ArrowDataType; +use common_base::bytes::Bytes; +use serde::{Deserialize, Serialize}; + +use crate::data_type::{DataType, DataTypeRef}; +use crate::scalars::ScalarVectorBuilder; +use crate::type_id::LogicalTypeId; +use crate::value::Value; +use crate::vectors::{BinaryVectorBuilder, MutableVector}; + +pub const JSON_TYPE_NAME: &str = "Json"; + +/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format. +/// It utilizes current binary value and vector implementation. +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] +pub struct JsonType; + +impl JsonType { + pub fn arc() -> DataTypeRef { + Arc::new(Self) + } +} + +impl DataType for JsonType { + fn name(&self) -> String { + JSON_TYPE_NAME.to_string() + } + + fn logical_type_id(&self) -> LogicalTypeId { + LogicalTypeId::Json + } + + fn default_value(&self) -> Value { + Bytes::default().into() + } + + fn as_arrow_type(&self) -> ArrowDataType { + ArrowDataType::Binary + } + + fn create_mutable_vector(&self, capacity: usize) -> Box { + Box::new(BinaryVectorBuilder::with_capacity(capacity)) + } + + fn try_cast(&self, from: Value) -> Option { + match from { + Value::Binary(v) => Some(Value::Binary(v)), + _ => None, + } + } +} diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs index 6c49154e40..a8e59da513 100644 --- a/src/datatypes/src/value.rs +++ b/src/datatypes/src/value.rs @@ -342,7 +342,8 @@ impl Value { let value_type_id = self.logical_type_id(); let output_type_id = output_type.logical_type_id(); ensure!( - output_type_id == value_type_id || self.is_null(), + // Json type leverage Value(Binary) for storage. + output_type_id == value_type_id || self.is_null() || (output_type_id == LogicalTypeId::Json && value_type_id == LogicalTypeId::Binary), error::ToScalarValueSnafu { reason: format!( "expect value to return output_type {output_type_id:?}, actual: {value_type_id:?}", @@ -484,7 +485,7 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result ScalarValue::UInt64(None), ConcreteDataType::Float32(_) => ScalarValue::Float32(None), ConcreteDataType::Float64(_) => ScalarValue::Float64(None), - ConcreteDataType::Binary(_) => ScalarValue::Binary(None), + ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ScalarValue::Binary(None), ConcreteDataType::String(_) => ScalarValue::Utf8(None), ConcreteDataType::Date(_) => ScalarValue::Date32(None), ConcreteDataType::DateTime(_) => ScalarValue::Date64(None), @@ -1994,6 +1995,10 @@ mod tests { &ConcreteDataType::duration_nanosecond_datatype(), &Value::Duration(Duration::new_nanosecond(1)), ); + check_type_and_value( + &ConcreteDataType::decimal128_datatype(38, 10), + &Value::Decimal128(Decimal128::new(1, 38, 10)), + ); } #[test] @@ -2178,6 +2183,14 @@ mod tests { ValueRef::List(ListValueRef::Ref { val: &list }), Value::List(list.clone()).as_value_ref() ); + + let jsonb_value = jsonb::parse_value(r#"{"key": "value"}"#.as_bytes()) + .unwrap() + .to_vec(); + assert_eq!( + ValueRef::Binary(jsonb_value.clone().as_slice()), + Value::Binary(jsonb_value.into()).as_value_ref() + ); } #[test] @@ -2391,6 +2404,16 @@ mod tests { .try_to_scalar_value(&ConcreteDataType::binary_datatype()) .unwrap() ); + + let jsonb_value = jsonb::parse_value(r#"{"key": "value"}"#.as_bytes()) + .unwrap() + .to_vec(); + assert_eq!( + ScalarValue::Binary(Some(jsonb_value.clone())), + Value::Binary(jsonb_value.into()) + .try_to_scalar_value(&ConcreteDataType::json_datatype()) + .unwrap() + ); } #[test] @@ -2523,6 +2546,12 @@ mod tests { .try_to_scalar_value(&ConcreteDataType::duration_nanosecond_datatype()) .unwrap() ); + assert_eq!( + ScalarValue::Binary(None), + Value::Null + .try_to_scalar_value(&ConcreteDataType::json_datatype()) + .unwrap() + ); } #[test] diff --git a/src/datatypes/src/vectors/eq.rs b/src/datatypes/src/vectors/eq.rs index fcf97515ee..16b0adf6f6 100644 --- a/src/datatypes/src/vectors/eq.rs +++ b/src/datatypes/src/vectors/eq.rs @@ -80,7 +80,7 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool { match lhs.data_type() { Null(_) => true, Boolean(_) => is_vector_eq!(BooleanVector, lhs, rhs), - Binary(_) => is_vector_eq!(BinaryVector, lhs, rhs), + Binary(_) | Json(_) => is_vector_eq!(BinaryVector, lhs, rhs), String(_) => is_vector_eq!(StringVector, lhs, rhs), Date(_) => is_vector_eq!(DateVector, lhs, rhs), DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs), diff --git a/src/frontend/src/instance.rs b/src/frontend/src/instance.rs index 798264b2a2..2c5544c51a 100644 --- a/src/frontend/src/instance.rs +++ b/src/frontend/src/instance.rs @@ -356,9 +356,10 @@ impl SqlQueryHandler for Instance { async fn is_valid_schema(&self, catalog: &str, schema: &str) -> Result { self.catalog_manager - .schema_exists(catalog, schema) + .schema_exists(catalog, schema, None) .await .context(error::CatalogSnafu) + .map(|b| b && !self.catalog_manager.is_reserved_schema_name(schema)) } } diff --git a/src/frontend/src/instance/prom_store.rs b/src/frontend/src/instance/prom_store.rs index 20f66ae853..8f1098b058 100644 --- a/src/frontend/src/instance/prom_store.rs +++ b/src/frontend/src/instance/prom_store.rs @@ -102,7 +102,7 @@ impl Instance { ) -> Result { let table = self .catalog_manager - .table(catalog_name, schema_name, table_name) + .table(catalog_name, schema_name, table_name, Some(ctx)) .await .context(CatalogSnafu)? .with_context(|| TableNotFoundSnafu { diff --git a/src/frontend/src/script.rs b/src/frontend/src/script.rs index 91f793b697..43d28c012a 100644 --- a/src/frontend/src/script.rs +++ b/src/frontend/src/script.rs @@ -152,7 +152,12 @@ mod python { if let Some(table) = self .catalog_manager - .table(&expr.catalog_name, &expr.schema_name, &expr.table_name) + .table( + &expr.catalog_name, + &expr.schema_name, + &expr.table_name, + None, + ) .await .context(CatalogSnafu)? { @@ -185,6 +190,7 @@ mod python { &table_name.catalog_name, &table_name.schema_name, &table_name.table_name, + None, ) .await .context(CatalogSnafu)? diff --git a/src/index/src/inverted_index/format/reader/blob.rs b/src/index/src/inverted_index/format/reader/blob.rs index 3a6274f5f9..5da70e3748 100644 --- a/src/index/src/inverted_index/format/reader/blob.rs +++ b/src/index/src/inverted_index/format/reader/blob.rs @@ -12,15 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::io::SeekFrom; use std::sync::Arc; use async_trait::async_trait; -use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt}; +use common_base::range_read::RangeReader; use greptime_proto::v1::index::InvertedIndexMetas; use snafu::{ensure, ResultExt}; -use crate::inverted_index::error::{ReadSnafu, Result, SeekSnafu, UnexpectedBlobSizeSnafu}; +use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu}; use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader; use crate::inverted_index::format::reader::InvertedIndexReader; use crate::inverted_index::format::MIN_BLOB_SIZE; @@ -49,28 +48,28 @@ impl InvertedIndexBlobReader { } #[async_trait] -impl InvertedIndexReader for InvertedIndexBlobReader { +impl InvertedIndexReader for InvertedIndexBlobReader { async fn read_all(&mut self, dest: &mut Vec) -> Result { + let metadata = self.source.metadata().await.context(CommonIoSnafu)?; self.source - .seek(SeekFrom::Start(0)) + .read_into(0..metadata.content_length, dest) .await - .context(SeekSnafu)?; - self.source.read_to_end(dest).await.context(ReadSnafu) + .context(CommonIoSnafu)?; + Ok(metadata.content_length as usize) } async fn seek_read(&mut self, offset: u64, size: u32) -> Result> { - self.source - .seek(SeekFrom::Start(offset)) + let buf = self + .source + .read(offset..offset + size as u64) .await - .context(SeekSnafu)?; - let mut buf = vec![0u8; size as usize]; - self.source.read(&mut buf).await.context(ReadSnafu)?; - Ok(buf) + .context(CommonIoSnafu)?; + Ok(buf.into()) } async fn metadata(&mut self) -> Result> { - let end = SeekFrom::End(0); - let blob_size = self.source.seek(end).await.context(SeekSnafu)?; + let metadata = self.source.metadata().await.context(CommonIoSnafu)?; + let blob_size = metadata.content_length; Self::validate_blob_size(blob_size)?; let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size); diff --git a/src/index/src/inverted_index/format/reader/footer.rs b/src/index/src/inverted_index/format/reader/footer.rs index 478352ee68..244973669b 100644 --- a/src/index/src/inverted_index/format/reader/footer.rs +++ b/src/index/src/inverted_index/format/reader/footer.rs @@ -12,32 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::io::SeekFrom; - -use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt}; +use common_base::range_read::RangeReader; use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas}; use prost::Message; use snafu::{ensure, ResultExt}; use crate::inverted_index::error::{ - DecodeProtoSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedFooterPayloadSizeSnafu, + CommonIoSnafu, DecodeProtoSnafu, Result, UnexpectedFooterPayloadSizeSnafu, UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu, }; use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE; /// InvertedIndeFooterReader is for reading the footer section of the blob. -pub struct InvertedIndeFooterReader { - source: R, +pub struct InvertedIndeFooterReader<'a, R> { + source: &'a mut R, blob_size: u64, } -impl InvertedIndeFooterReader { - pub fn new(source: R, blob_size: u64) -> Self { +impl<'a, R> InvertedIndeFooterReader<'a, R> { + pub fn new(source: &'a mut R, blob_size: u64) -> Self { Self { source, blob_size } } } -impl InvertedIndeFooterReader { +impl<'a, R: RangeReader> InvertedIndeFooterReader<'a, R> { pub async fn metadata(&mut self) -> Result { let payload_size = self.read_payload_size().await?; let metas = self.read_payload(payload_size).await?; @@ -45,26 +43,26 @@ impl InvertedIndeFooterReader { } async fn read_payload_size(&mut self) -> Result { - let size_offset = SeekFrom::Start(self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE); - self.source.seek(size_offset).await.context(SeekSnafu)?; - let size_buf = &mut [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize]; - self.source.read_exact(size_buf).await.context(ReadSnafu)?; + let mut size_buf = [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize]; + let end = self.blob_size; + let start = end - FOOTER_PAYLOAD_SIZE_SIZE; + self.source + .read_into(start..end, &mut &mut size_buf[..]) + .await + .context(CommonIoSnafu)?; - let payload_size = u32::from_le_bytes(*size_buf) as u64; + let payload_size = u32::from_le_bytes(size_buf) as u64; self.validate_payload_size(payload_size)?; Ok(payload_size) } async fn read_payload(&mut self, payload_size: u64) -> Result { - let payload_offset = - SeekFrom::Start(self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE - payload_size); - self.source.seek(payload_offset).await.context(SeekSnafu)?; + let end = self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE; + let start = end - payload_size; + let bytes = self.source.read(start..end).await.context(CommonIoSnafu)?; - let payload = &mut vec![0u8; payload_size as usize]; - self.source.read_exact(payload).await.context(ReadSnafu)?; - - let metas = InvertedIndexMetas::decode(&payload[..]).context(DecodeProtoSnafu)?; + let metas = InvertedIndexMetas::decode(&*bytes).context(DecodeProtoSnafu)?; self.validate_metas(&metas, payload_size)?; Ok(metas) @@ -144,7 +142,8 @@ mod tests { let payload_buf = create_test_payload(meta); let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(Cursor::new(payload_buf), blob_size); + let mut cursor = Cursor::new(payload_buf); + let mut reader = InvertedIndeFooterReader::new(&mut cursor, blob_size); let payload_size = reader.read_payload_size().await.unwrap(); let metas = reader.read_payload(payload_size).await.unwrap(); @@ -164,7 +163,8 @@ mod tests { let mut payload_buf = create_test_payload(meta); payload_buf.push(0xff); // Add an extra byte to corrupt the footer let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(Cursor::new(payload_buf), blob_size); + let mut cursor = Cursor::new(payload_buf); + let mut reader = InvertedIndeFooterReader::new(&mut cursor, blob_size); let payload_size_result = reader.read_payload_size().await; assert!(payload_size_result.is_err()); @@ -181,7 +181,8 @@ mod tests { let payload_buf = create_test_payload(meta); let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(Cursor::new(payload_buf), blob_size); + let mut cursor = Cursor::new(payload_buf); + let mut reader = InvertedIndeFooterReader::new(&mut cursor, blob_size); let payload_size = reader.read_payload_size().await.unwrap(); let payload_result = reader.read_payload(payload_size).await; diff --git a/src/log-store/src/raft_engine/log_store.rs b/src/log-store/src/raft_engine/log_store.rs index 8e9ceec710..a4db95cd57 100644 --- a/src/log-store/src/raft_engine/log_store.rs +++ b/src/log-store/src/raft_engine/log_store.rs @@ -16,6 +16,7 @@ use std::collections::{hash_map, HashMap}; use std::fmt::{Debug, Formatter}; use std::sync::atomic::{AtomicI64, Ordering}; use std::sync::Arc; +use std::time::Duration; use async_stream::stream; use common_runtime::{RepeatedTask, TaskFunction}; @@ -40,7 +41,9 @@ use crate::raft_engine::protos::logstore::{EntryImpl, NamespaceImpl}; const NAMESPACE_PREFIX: &str = "$sys/"; pub struct RaftEngineLogStore { - config: RaftEngineConfig, + sync_write: bool, + sync_period: Option, + read_batch_size: usize, engine: Arc, gc_task: RepeatedTask, last_sync_time: AtomicI64, @@ -76,7 +79,7 @@ impl TaskFunction for PurgeExpiredFilesFunction { } impl RaftEngineLogStore { - pub async fn try_new(dir: String, config: RaftEngineConfig) -> Result { + pub async fn try_new(dir: String, config: &RaftEngineConfig) -> Result { let raft_engine_config = Config { dir, purge_threshold: ReadableSize(config.purge_threshold.0), @@ -85,6 +88,7 @@ impl RaftEngineLogStore { target_file_size: ReadableSize(config.file_size.0), enable_log_recycle: config.enable_log_recycle, prefill_for_recycle: config.prefill_log_files, + recovery_threads: config.recovery_parallelism, ..Default::default() }; let engine = Arc::new(Engine::open(raft_engine_config).context(RaftEngineSnafu)?); @@ -96,7 +100,9 @@ impl RaftEngineLogStore { ); let log_store = Self { - config, + sync_write: config.sync_write, + sync_period: config.sync_period, + read_batch_size: config.read_batch_size, engine, gc_task, last_sync_time: AtomicI64::new(0), @@ -196,7 +202,9 @@ impl RaftEngineLogStore { impl Debug for RaftEngineLogStore { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("RaftEngineLogsStore") - .field("config", &self.config) + .field("sync_write", &self.sync_write) + .field("sync_period", &self.sync_period) + .field("read_batch_size", &self.read_batch_size) .field("started", &self.gc_task.started()) .finish() } @@ -228,9 +236,9 @@ impl LogStore for RaftEngineLogStore { let (mut batch, last_entry_ids) = self.entries_to_batch(entries)?; - let mut sync = self.config.sync_write; + let mut sync = self.sync_write; - if let Some(sync_period) = &self.config.sync_period { + if let Some(sync_period) = &self.sync_period { let now = common_time::util::current_time_millis(); if now - self.last_sync_time.load(Ordering::Relaxed) >= sync_period.as_millis() as i64 { self.last_sync_time.store(now, Ordering::Relaxed); @@ -276,7 +284,7 @@ impl LogStore for RaftEngineLogStore { entry_id, self.span(ns) ); - let max_batch_size = self.config.read_batch_size; + let max_batch_size = self.read_batch_size; let (tx, mut rx) = tokio::sync::mpsc::channel(max_batch_size); let _handle = common_runtime::spawn_global(async move { while start_index <= last_index { @@ -489,7 +497,7 @@ mod tests { let dir = create_temp_dir("raft-engine-logstore-test"); let logstore = RaftEngineLogStore::try_new( dir.path().to_str().unwrap().to_string(), - RaftEngineConfig::default(), + &RaftEngineConfig::default(), ) .await .unwrap(); @@ -502,7 +510,7 @@ mod tests { let dir = create_temp_dir("raft-engine-logstore-test"); let logstore = RaftEngineLogStore::try_new( dir.path().to_str().unwrap().to_string(), - RaftEngineConfig::default(), + &RaftEngineConfig::default(), ) .await .unwrap(); @@ -528,7 +536,7 @@ mod tests { let dir = create_temp_dir("raft-engine-logstore-test"); let logstore = RaftEngineLogStore::try_new( dir.path().to_str().unwrap().to_string(), - RaftEngineConfig::default(), + &RaftEngineConfig::default(), ) .await .unwrap(); @@ -570,7 +578,7 @@ mod tests { { let logstore = RaftEngineLogStore::try_new( dir.path().to_str().unwrap().to_string(), - RaftEngineConfig::default(), + &RaftEngineConfig::default(), ) .await .unwrap(); @@ -590,7 +598,7 @@ mod tests { let logstore = RaftEngineLogStore::try_new( dir.path().to_str().unwrap().to_string(), - RaftEngineConfig::default(), + &RaftEngineConfig::default(), ) .await .unwrap(); @@ -634,7 +642,7 @@ mod tests { ..Default::default() }; - RaftEngineLogStore::try_new(path, config).await.unwrap() + RaftEngineLogStore::try_new(path, &config).await.unwrap() } #[tokio::test] diff --git a/src/log-store/src/test_util/log_store_util.rs b/src/log-store/src/test_util/log_store_util.rs index b1fd183fba..98d419acc4 100644 --- a/src/log-store/src/test_util/log_store_util.rs +++ b/src/log-store/src/test_util/log_store_util.rs @@ -29,7 +29,7 @@ pub async fn create_tmp_local_file_log_store>(path: P) -> RaftEng file_size: ReadableSize::kb(128), ..Default::default() }; - RaftEngineLogStore::try_new(path, cfg).await.unwrap() + RaftEngineLogStore::try_new(path, &cfg).await.unwrap() } /// Create a [KafkaLogStore]. diff --git a/src/meta-srv/src/error.rs b/src/meta-srv/src/error.rs index a6f3721f30..d0a58d688e 100644 --- a/src/meta-srv/src/error.rs +++ b/src/meta-srv/src/error.rs @@ -655,13 +655,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Invalid heartbeat request: {}", err_msg))] - InvalidHeartbeatRequest { - err_msg: String, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to publish message"))] PublishMessage { #[snafu(source)] @@ -809,7 +802,6 @@ impl ErrorExt for Error { | Error::UnsupportedSelectorType { .. } | Error::InvalidArguments { .. } | Error::InitExportMetricsTask { .. } - | Error::InvalidHeartbeatRequest { .. } | Error::ProcedureNotFound { .. } | Error::TooManyPartitions { .. } | Error::TomlFormat { .. } => StatusCode::InvalidArguments, diff --git a/src/meta-srv/src/handler/collect_stats_handler.rs b/src/meta-srv/src/handler/collect_stats_handler.rs index ca4df868d1..ec9fa231e1 100644 --- a/src/meta-srv/src/handler/collect_stats_handler.rs +++ b/src/meta-srv/src/handler/collect_stats_handler.rs @@ -15,8 +15,12 @@ use std::cmp::Ordering; use api::v1::meta::{HeartbeatRequest, Role}; +use common_meta::instruction::CacheIdent; +use common_meta::key::node_address::{NodeAddressKey, NodeAddressValue}; +use common_meta::key::{MetadataKey, MetadataValue}; +use common_meta::peer::Peer; use common_meta::rpc::store::PutRequest; -use common_telemetry::warn; +use common_telemetry::{error, warn}; use dashmap::DashMap; use snafu::ResultExt; @@ -120,6 +124,13 @@ impl HeartbeatHandler for CollectStatsHandler { true }; + // Need to refresh the [datanode -> address] mapping + if refresh { + // Safety: `epoch_stats.stats` is not empty + let last = epoch_stats.stats.last().unwrap(); + rewrite_node_address(ctx, last).await; + } + if !refresh && epoch_stats.len() < MAX_CACHED_STATS_PER_KEY { return Ok(HandleControl::Continue); } @@ -131,7 +142,7 @@ impl HeartbeatHandler for CollectStatsHandler { let put = PutRequest { key, value, - ..Default::default() + prev_kv: false, }; let _ = ctx @@ -144,6 +155,44 @@ impl HeartbeatHandler for CollectStatsHandler { } } +async fn rewrite_node_address(ctx: &mut Context, stat: &Stat) { + let peer = Peer { + id: stat.id, + addr: stat.addr.clone(), + }; + let key = NodeAddressKey::with_datanode(peer.id).to_bytes(); + if let Ok(value) = NodeAddressValue::new(peer.clone()).try_as_raw_value() { + let put = PutRequest { + key, + value, + prev_kv: false, + }; + + match ctx.leader_cached_kv_backend.put(put).await { + Ok(_) => { + // broadcast invalidating cache + let cache_idents = stat + .table_ids() + .into_iter() + .map(CacheIdent::TableId) + .collect::>(); + if let Err(e) = ctx + .cache_invalidator + .invalidate(&Default::default(), &cache_idents) + .await + { + error!(e; "Failed to invalidate {} `NodeAddressKey` cache, peer: {:?}", cache_idents.len(), peer); + } + } + Err(e) => { + error!(e; "Failed to update NodeAddressValue: {:?}", peer); + } + } + } else { + warn!("Failed to serialize NodeAddressValue: {:?}", peer); + } +} + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/src/meta-srv/src/handler/extract_stat_handler.rs b/src/meta-srv/src/handler/extract_stat_handler.rs index 8124ec8336..c23e78314f 100644 --- a/src/meta-srv/src/handler/extract_stat_handler.rs +++ b/src/meta-srv/src/handler/extract_stat_handler.rs @@ -13,7 +13,7 @@ // limitations under the License. use api::v1::meta::{HeartbeatRequest, Role}; -use common_telemetry::warn; +use common_telemetry::{info, warn}; use super::node_stat::Stat; use crate::error::Result; @@ -40,12 +40,15 @@ impl HeartbeatHandler for ExtractStatHandler { return Ok(HandleControl::Continue); } - match Stat::try_from(req.clone()) { + match Stat::try_from(req) { Ok(stat) => { let _ = acc.stat.insert(stat); } - Err(err) => { - warn!(err; "Incomplete heartbeat data: {:?}", req); + Err(Some(header)) => { + info!("New handshake request: {:?}", header); + } + Err(_) => { + warn!("Incomplete heartbeat data: {:?}", req); } }; diff --git a/src/meta-srv/src/handler/node_stat.rs b/src/meta-srv/src/handler/node_stat.rs index 5f1ec1cc2b..3a6c6355cd 100644 --- a/src/meta-srv/src/handler/node_stat.rs +++ b/src/meta-srv/src/handler/node_stat.rs @@ -14,14 +14,14 @@ use std::collections::{HashMap, HashSet}; -use api::v1::meta::HeartbeatRequest; +use api::v1::meta::{HeartbeatRequest, RequestHeader}; use common_meta::ClusterId; use common_time::util as time_util; use serde::{Deserialize, Serialize}; use store_api::region_engine::RegionRole; use store_api::storage::RegionId; +use table::metadata::TableId; -use crate::error::{Error, InvalidHeartbeatRequestSnafu}; use crate::key::DatanodeStatKey; #[derive(Debug, Clone, Default, Serialize, Deserialize)] @@ -79,6 +79,11 @@ impl Stat { self.region_stats.iter().map(|s| (s.id, s.role)).collect() } + /// Returns all table ids in the region stats. + pub fn table_ids(&self) -> HashSet { + self.region_stats.iter().map(|s| s.id.table_id()).collect() + } + pub fn retain_active_region_stats(&mut self, inactive_region_ids: &HashSet) { if inactive_region_ids.is_empty() { return; @@ -92,10 +97,10 @@ impl Stat { } } -impl TryFrom for Stat { - type Error = Error; +impl TryFrom<&HeartbeatRequest> for Stat { + type Error = Option; - fn try_from(value: HeartbeatRequest) -> Result { + fn try_from(value: &HeartbeatRequest) -> Result { let HeartbeatRequest { header, peer, @@ -107,9 +112,9 @@ impl TryFrom for Stat { match (header, peer) { (Some(header), Some(peer)) => { let region_stats = region_stats - .into_iter() - .map(RegionStat::try_from) - .collect::, _>>()?; + .iter() + .map(RegionStat::from) + .collect::>(); Ok(Self { timestamp_millis: time_util::current_time_millis(), @@ -117,35 +122,30 @@ impl TryFrom for Stat { // datanode id id: peer.id, // datanode address - addr: peer.addr, + addr: peer.addr.clone(), rcus: region_stats.iter().map(|s| s.rcus).sum(), wcus: region_stats.iter().map(|s| s.wcus).sum(), region_num: region_stats.len() as u64, region_stats, - node_epoch, + node_epoch: *node_epoch, }) } - _ => InvalidHeartbeatRequestSnafu { - err_msg: "missing header or peer", - } - .fail(), + (header, _) => Err(header.clone()), } } } -impl TryFrom for RegionStat { - type Error = Error; - - fn try_from(value: api::v1::meta::RegionStat) -> Result { - Ok(Self { +impl From<&api::v1::meta::RegionStat> for RegionStat { + fn from(value: &api::v1::meta::RegionStat) -> Self { + Self { id: RegionId::from_u64(value.region_id), rcus: value.rcus, wcus: value.wcus, approximate_bytes: value.approximate_bytes, engine: value.engine.to_string(), role: RegionRole::from(value.role()), - extensions: value.extensions, - }) + extensions: value.extensions.clone(), + } } } diff --git a/src/meta-srv/src/procedure/region_migration.rs b/src/meta-srv/src/procedure/region_migration.rs index 123a489cd5..e56afdbf11 100644 --- a/src/meta-srv/src/procedure/region_migration.rs +++ b/src/meta-srv/src/procedure/region_migration.rs @@ -232,7 +232,7 @@ impl Context { .table_metadata_manager .table_route_manager() .table_route_storage() - .get_raw(table_id) + .get_with_raw_bytes(table_id) .await .context(error::TableMetadataManagerSnafu) .map_err(BoxedError::new) diff --git a/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs b/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs index 22b25492e2..17c5762906 100644 --- a/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs +++ b/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs @@ -160,6 +160,11 @@ impl DowngradeLeaderRegion { "Trying to downgrade the region {} on Datanode {}, but region doesn't exist!", region_id, leader ); + } else { + info!( + "Region {} leader is downgraded, last_entry_id: {:?}", + region_id, last_entry_id + ); } if let Some(last_entry_id) = last_entry_id { diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata/downgrade_leader_region.rs b/src/meta-srv/src/procedure/region_migration/update_metadata/downgrade_leader_region.rs index 11dbebb182..3b3f6a6c0c 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata/downgrade_leader_region.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata/downgrade_leader_region.rs @@ -146,7 +146,7 @@ mod tests { let original_table_route = table_metadata_manager .table_route_manager() .table_route_storage() - .get_raw(table_id) + .get_with_raw_bytes(table_id) .await .unwrap() .unwrap(); diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata/rollback_downgraded_region.rs b/src/meta-srv/src/procedure/region_migration/update_metadata/rollback_downgraded_region.rs index 8da1bbb0db..4e6f20ef19 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata/rollback_downgraded_region.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata/rollback_downgraded_region.rs @@ -140,7 +140,7 @@ mod tests { let old_table_route = table_metadata_manager .table_route_manager() .table_route_storage() - .get_raw(table_id) + .get_with_raw_bytes(table_id) .await .unwrap() .unwrap(); diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs index 17cdabc6a0..75f93f760e 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs @@ -337,7 +337,7 @@ mod tests { let original_table_route = table_metadata_manager .table_route_manager() .table_route_storage() - .get_raw(table_id) + .get_with_raw_bytes(table_id) .await .unwrap() .unwrap(); diff --git a/src/metric-engine/src/engine/create.rs b/src/metric-engine/src/engine/create.rs index dad22c72f9..9ca89248dc 100644 --- a/src/metric-engine/src/engine/create.rs +++ b/src/metric-engine/src/engine/create.rs @@ -33,6 +33,7 @@ use store_api::metric_engine_consts::{ METADATA_SCHEMA_VALUE_COLUMN_INDEX, METADATA_SCHEMA_VALUE_COLUMN_NAME, PHYSICAL_TABLE_METADATA_KEY, }; +use store_api::mito_engine_options::{APPEND_MODE_KEY, TTL_KEY}; use store_api::region_engine::RegionEngine; use store_api::region_request::{AffectedRows, RegionCreateRequest, RegionRequest}; use store_api::storage::consts::ReservedColumnId; @@ -426,9 +427,10 @@ impl MetricEngineInner { // concat region dir let metadata_region_dir = join_dir(&request.region_dir, METADATA_REGION_SUBDIR); - // remove TTL option + // remove TTL and APPEND_MODE option let mut options = request.options.clone(); - options.remove("ttl"); + options.remove(TTL_KEY); + options.remove(APPEND_MODE_KEY); RegionCreateRequest { engine: MITO_ENGINE_NAME.to_string(), diff --git a/src/mito2/src/cache.rs b/src/mito2/src/cache.rs index 2bd8c21e75..e607717842 100644 --- a/src/mito2/src/cache.rs +++ b/src/mito2/src/cache.rs @@ -25,6 +25,7 @@ pub(crate) mod write_cache; use std::mem; use std::sync::Arc; +use bytes::Bytes; use datatypes::value::Value; use datatypes::vectors::VectorRef; use moka::notification::RemovalCause; @@ -393,20 +394,59 @@ impl SstMetaKey { } } +/// Path to column pages in the SST file. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ColumnPagePath { + /// Region id of the SST file to cache. + region_id: RegionId, + /// Id of the SST file to cache. + file_id: FileId, + /// Index of the row group. + row_group_idx: usize, + /// Index of the column in the row group. + column_idx: usize, +} + /// Cache key for pages of a SST row group. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct PageKey { - /// Region id of the SST file to cache. - pub region_id: RegionId, - /// Id of the SST file to cache. - pub file_id: FileId, - /// Index of the row group. - pub row_group_idx: usize, - /// Index of the column in the row group. - pub column_idx: usize, +pub enum PageKey { + /// Cache key for a compressed page in a row group. + Compressed(ColumnPagePath), + /// Cache key for all uncompressed pages in a row group. + Uncompressed(ColumnPagePath), } impl PageKey { + /// Creates a key for a compressed page. + pub fn new_compressed( + region_id: RegionId, + file_id: FileId, + row_group_idx: usize, + column_idx: usize, + ) -> PageKey { + PageKey::Compressed(ColumnPagePath { + region_id, + file_id, + row_group_idx, + column_idx, + }) + } + + /// Creates a key for all uncompressed pages in a row group. + pub fn new_uncompressed( + region_id: RegionId, + file_id: FileId, + row_group_idx: usize, + column_idx: usize, + ) -> PageKey { + PageKey::Uncompressed(ColumnPagePath { + region_id, + file_id, + row_group_idx, + column_idx, + }) + } + /// Returns memory used by the key (estimated). fn estimated_size(&self) -> usize { mem::size_of::() @@ -414,21 +454,41 @@ impl PageKey { } /// Cached row group pages for a column. +// We don't use enum here to make it easier to mock and use the struct. +#[derive(Default)] pub struct PageValue { + /// Compressed page of the column in the row group. + pub compressed: Bytes, /// All pages of the column in the row group. - pub pages: Vec, + pub row_group: Vec, } impl PageValue { - /// Creates a new page value. - pub fn new(pages: Vec) -> PageValue { - PageValue { pages } + /// Creates a new value from a compressed page. + pub fn new_compressed(bytes: Bytes) -> PageValue { + PageValue { + compressed: bytes, + row_group: vec![], + } + } + + /// Creates a new value from all pages in a row group. + pub fn new_row_group(pages: Vec) -> PageValue { + PageValue { + compressed: Bytes::new(), + row_group: pages, + } } /// Returns memory used by the value (estimated). fn estimated_size(&self) -> usize { - // We only consider heap size of all pages. - self.pages.iter().map(|page| page.buffer().len()).sum() + mem::size_of::() + + self.compressed.len() + + self + .row_group + .iter() + .map(|page| page.buffer().len()) + .sum::() } } @@ -507,13 +567,8 @@ mod tests { .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value) .is_none()); - let key = PageKey { - region_id, - file_id, - row_group_idx: 0, - column_idx: 0, - }; - let pages = Arc::new(PageValue::new(Vec::new())); + let key = PageKey::new_uncompressed(region_id, file_id, 0, 0); + let pages = Arc::new(PageValue::default()); cache.put_pages(key.clone(), pages); assert!(cache.get_pages(&key).is_none()); @@ -562,14 +617,9 @@ mod tests { let cache = CacheManager::builder().page_cache_size(1000).build(); let region_id = RegionId::new(1, 1); let file_id = FileId::random(); - let key = PageKey { - region_id, - file_id, - row_group_idx: 0, - column_idx: 0, - }; + let key = PageKey::new_compressed(region_id, file_id, 0, 0); assert!(cache.get_pages(&key).is_none()); - let pages = Arc::new(PageValue::new(Vec::new())); + let pages = Arc::new(PageValue::default()); cache.put_pages(key.clone(), pages); assert!(cache.get_pages(&key).is_some()); } diff --git a/src/mito2/src/metrics.rs b/src/mito2/src/metrics.rs index 355c0fba47..c50bbbdc78 100644 --- a/src/mito2/src/metrics.rs +++ b/src/mito2/src/metrics.rs @@ -133,6 +133,7 @@ lazy_static! { vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0] ) .unwrap(); + pub static ref READ_STAGE_FETCH_PAGES: Histogram = READ_STAGE_ELAPSED.with_label_values(&["fetch_pages"]); /// Counter of rows read from different source. pub static ref READ_ROWS_TOTAL: IntCounterVec = register_int_counter_vec!("greptime_mito_read_rows_total", "mito read rows total", &[TYPE_LABEL]).unwrap(); diff --git a/src/mito2/src/read/merge.rs b/src/mito2/src/read/merge.rs index b827388080..8060c53405 100644 --- a/src/mito2/src/read/merge.rs +++ b/src/mito2/src/read/merge.rs @@ -89,6 +89,9 @@ impl Drop for MergeReader { READ_STAGE_ELAPSED .with_label_values(&["merge"]) .observe(self.metrics.scan_cost.as_secs_f64()); + READ_STAGE_ELAPSED + .with_label_values(&["merge_fetch"]) + .observe(self.metrics.fetch_cost.as_secs_f64()); } } diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index ec45c9b934..40f18c393b 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -731,14 +731,28 @@ impl ScanInput { } for file in &self.files { - let range = PartitionRange { - start: file.meta_ref().time_range.0, - end: file.meta_ref().time_range.1, - num_rows: file.meta_ref().num_rows as usize, - identifier: id, - }; - id += 1; - container.push(range); + if self.append_mode { + // For append mode, we can parallelize reading row groups. + for _ in 0..file.meta_ref().num_row_groups { + let range = PartitionRange { + start: file.time_range().0, + end: file.time_range().1, + num_rows: file.num_rows(), + identifier: id, + }; + id += 1; + container.push(range); + } + } else { + let range = PartitionRange { + start: file.meta_ref().time_range.0, + end: file.meta_ref().time_range.1, + num_rows: file.meta_ref().num_rows as usize, + identifier: id, + }; + id += 1; + container.push(range); + } } container @@ -887,10 +901,21 @@ impl ScanPartList { }) } + /// Returns the number of files. + pub(crate) fn num_files(&self) -> usize { + self.0.as_ref().map_or(0, |parts| { + parts.iter().map(|part| part.file_ranges.len()).sum() + }) + } + /// Returns the number of file ranges. pub(crate) fn num_file_ranges(&self) -> usize { self.0.as_ref().map_or(0, |parts| { - parts.iter().map(|part| part.file_ranges.len()).sum() + parts + .iter() + .flat_map(|part| part.file_ranges.iter()) + .map(|ranges| ranges.len()) + .sum() }) } } @@ -933,9 +958,10 @@ impl StreamContext { Ok(inner) => match t { DisplayFormatType::Default => write!( f, - "partition_count={} ({} memtable ranges, {} file ranges)", + "partition_count={} ({} memtable ranges, {} file {} ranges)", inner.0.len(), inner.0.num_mem_ranges(), + inner.0.num_files(), inner.0.num_file_ranges() )?, DisplayFormatType::Verbose => write!(f, "{:?}", inner.0)?, diff --git a/src/mito2/src/read/seq_scan.rs b/src/mito2/src/read/seq_scan.rs index ca232df834..296d55250b 100644 --- a/src/mito2/src/read/seq_scan.rs +++ b/src/mito2/src/read/seq_scan.rs @@ -166,8 +166,8 @@ impl SeqScan { reader_metrics.merge_from(reader.metrics()); } debug!( - "Seq scan region {}, file {}, {} ranges finished, metrics: {:?}", - region_id, file_id, range_num, reader_metrics + "Seq scan region {}, file {}, {} ranges finished, metrics: {:?}, compaction: {}", + region_id, file_id, range_num, reader_metrics, compaction ); // Reports metrics. reader_metrics.observe_rows(read_type); @@ -238,11 +238,12 @@ impl SeqScan { let maybe_reader = Self::build_reader_from_sources(stream_ctx, sources, semaphore).await; let build_reader_cost = build_start.elapsed(); metrics.build_reader_cost += build_reader_cost; - common_telemetry::debug!( - "Build reader region: {}, range_id: {}, from sources, build_reader_cost: {:?}", + debug!( + "Build reader region: {}, range_id: {}, from sources, build_reader_cost: {:?}, compaction: {}", stream_ctx.input.mapper.metadata().region_id, range_id, - build_reader_cost + build_reader_cost, + compaction, ); maybe_reader @@ -354,11 +355,12 @@ impl SeqScan { metrics.observe_metrics_on_finish(); debug!( - "Seq scan finished, region_id: {:?}, partition: {}, metrics: {:?}, first_poll: {:?}", + "Seq scan finished, region_id: {:?}, partition: {}, metrics: {:?}, first_poll: {:?}, compaction: {}", stream_ctx.input.mapper.metadata().region_id, partition, metrics, first_poll, + compaction, ); } }; @@ -450,13 +452,14 @@ impl SeqScan { metrics.total_cost = stream_ctx.query_start.elapsed(); metrics.observe_metrics_on_finish(); - common_telemetry::debug!( - "Seq scan finished, region_id: {}, partition: {}, id: {}, metrics: {:?}, first_poll: {:?}", + debug!( + "Seq scan finished, region_id: {}, partition: {}, id: {}, metrics: {:?}, first_poll: {:?}, compaction: {}", stream_ctx.input.mapper.metadata().region_id, partition, id, metrics, first_poll, + compaction, ); } }; diff --git a/src/mito2/src/read/unordered_scan.rs b/src/mito2/src/read/unordered_scan.rs index 5dfcc519d6..67e87197a6 100644 --- a/src/mito2/src/read/unordered_scan.rs +++ b/src/mito2/src/read/unordered_scan.rs @@ -152,7 +152,6 @@ impl RegionScanner for UnorderedScan { let parallelism = self.properties.num_partitions(); let stream = try_stream! { let first_poll = stream_ctx.query_start.elapsed(); - let part = { let mut parts = stream_ctx.parts.lock().await; maybe_init_parts(&stream_ctx.input, &mut parts, &mut metrics, parallelism) @@ -180,6 +179,7 @@ impl RegionScanner for UnorderedScan { .map_err(BoxedError::new) .context(ExternalSnafu)?; metrics.build_reader_cost = build_reader_start.elapsed(); + let query_start = stream_ctx.query_start; let cache = stream_ctx.input.cache_manager.as_deref(); // Scans memtables first. @@ -217,8 +217,8 @@ impl RegionScanner for UnorderedScan { metrics.total_cost = query_start.elapsed(); metrics.observe_metrics_on_finish(); debug!( - "Unordered scan partition {} finished, region_id: {}, metrics: {:?}, reader_metrics: {:?}, first_poll: {:?}", - partition, mapper.metadata().region_id, metrics, reader_metrics, first_poll, + "Unordered scan partition {} finished, region_id: {}, metrics: {:?}, reader_metrics: {:?}, first_poll: {:?}, ranges: {}", + partition, mapper.metadata().region_id, metrics, reader_metrics, first_poll, part.file_ranges[0].len(), ); }; let stream = Box::pin(RecordBatchStreamWrapper::new( @@ -343,14 +343,14 @@ impl UnorderedDistributor { let mems_per_part = ((self.mem_ranges.len() + parallelism - 1) / parallelism).max(1); let ranges_per_part = ((self.file_ranges.len() + parallelism - 1) / parallelism).max(1); - common_telemetry::debug!( - "Parallel scan is enabled, parallelism: {}, {} mem_ranges, {} file_ranges, mems_per_part: {}, ranges_per_part: {}", - parallelism, - self.mem_ranges.len(), - self.file_ranges.len(), - mems_per_part, - ranges_per_part - ); + debug!( + "Parallel scan is enabled, parallelism: {}, {} mem_ranges, {} file_ranges, mems_per_part: {}, ranges_per_part: {}", + parallelism, + self.mem_ranges.len(), + self.file_ranges.len(), + mems_per_part, + ranges_per_part + ); let mut scan_parts = self .mem_ranges .chunks(mems_per_part) diff --git a/src/mito2/src/row_converter.rs b/src/mito2/src/row_converter.rs index ae0ab1177d..a7f6f1644e 100644 --- a/src/mito2/src/row_converter.rs +++ b/src/mito2/src/row_converter.rs @@ -68,7 +68,7 @@ impl SortField { ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9, ConcreteDataType::Float32(_) => 5, ConcreteDataType::Float64(_) => 9, - ConcreteDataType::Binary(_) => 11, + ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => 11, ConcreteDataType::String(_) => 11, // a non-empty string takes at least 11 bytes. ConcreteDataType::Date(_) => 5, ConcreteDataType::DateTime(_) => 9, @@ -146,7 +146,8 @@ impl SortField { Time, time, Interval, interval, Duration, duration, - Decimal128, decimal128 + Decimal128, decimal128, + Json, binary ); Ok(()) @@ -169,7 +170,7 @@ impl SortField { Ok(Value::from(Option::<$f>::deserialize(deserializer).context(error::DeserializeFieldSnafu)?)) } )* - ConcreteDataType::Binary(_) => Ok(Value::from( + ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => Ok(Value::from( Option::>::deserialize(deserializer) .context(error::DeserializeFieldSnafu)? .map(Bytes::from), @@ -237,7 +238,7 @@ impl SortField { ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9, ConcreteDataType::Float32(_) => 5, ConcreteDataType::Float64(_) => 9, - ConcreteDataType::Binary(_) => { + ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => { // Now the encoder encode binary as a list of bytes so we can't use // skip bytes. let pos_before = deserializer.position(); diff --git a/src/mito2/src/sst/parquet.rs b/src/mito2/src/sst/parquet.rs index 34819c0c71..cce9965539 100644 --- a/src/mito2/src/sst/parquet.rs +++ b/src/mito2/src/sst/parquet.rs @@ -216,22 +216,16 @@ mod tests { .await; } + // Doesn't have compressed page cached. + let page_key = PageKey::new_compressed(metadata.region_id, handle.file_id(), 0, 0); + assert!(cache.as_ref().unwrap().get_pages(&page_key).is_none()); + // Cache 4 row groups. for i in 0..4 { - let page_key = PageKey { - region_id: metadata.region_id, - file_id: handle.file_id(), - row_group_idx: i, - column_idx: 0, - }; + let page_key = PageKey::new_uncompressed(metadata.region_id, handle.file_id(), i, 0); assert!(cache.as_ref().unwrap().get_pages(&page_key).is_some()); } - let page_key = PageKey { - region_id: metadata.region_id, - file_id: handle.file_id(), - row_group_idx: 5, - column_idx: 0, - }; + let page_key = PageKey::new_uncompressed(metadata.region_id, handle.file_id(), 5, 0); assert!(cache.as_ref().unwrap().get_pages(&page_key).is_none()); } diff --git a/src/mito2/src/sst/parquet/page_reader.rs b/src/mito2/src/sst/parquet/page_reader.rs index 1416da448b..b2f69e2dd7 100644 --- a/src/mito2/src/sst/parquet/page_reader.rs +++ b/src/mito2/src/sst/parquet/page_reader.rs @@ -19,14 +19,14 @@ use std::collections::VecDeque; use parquet::column::page::{Page, PageMetadata, PageReader}; use parquet::errors::Result; -/// A reader that reads from cached pages. -pub(crate) struct CachedPageReader { +/// A reader that reads all pages from a cache. +pub(crate) struct RowGroupCachedReader { /// Cached pages. pages: VecDeque, } -impl CachedPageReader { - /// Returns a new reader from existing pages. +impl RowGroupCachedReader { + /// Returns a new reader from pages of a column in a row group. pub(crate) fn new(pages: &[Page]) -> Self { Self { pages: pages.iter().cloned().collect(), @@ -34,7 +34,7 @@ impl CachedPageReader { } } -impl PageReader for CachedPageReader { +impl PageReader for RowGroupCachedReader { fn get_next_page(&mut self) -> Result> { Ok(self.pages.pop_front()) } @@ -55,9 +55,8 @@ impl PageReader for CachedPageReader { } } -impl Iterator for CachedPageReader { +impl Iterator for RowGroupCachedReader { type Item = Result; - fn next(&mut self) -> Option { self.get_next_page().transpose() } diff --git a/src/mito2/src/sst/parquet/row_group.rs b/src/mito2/src/sst/parquet/row_group.rs index 68a91e55fe..991d56943d 100644 --- a/src/mito2/src/sst/parquet/row_group.rs +++ b/src/mito2/src/sst/parquet/row_group.rs @@ -23,33 +23,37 @@ use parquet::arrow::arrow_reader::{RowGroups, RowSelection}; use parquet::arrow::ProjectionMask; use parquet::column::page::{PageIterator, PageReader}; use parquet::errors::{ParquetError, Result}; -use parquet::file::metadata::{ParquetMetaData, RowGroupMetaData}; +use parquet::file::metadata::{ColumnChunkMetaData, ParquetMetaData, RowGroupMetaData}; +use parquet::file::properties::DEFAULT_PAGE_SIZE; use parquet::file::reader::{ChunkReader, Length}; use parquet::file::serialized_reader::SerializedPageReader; use parquet::format::PageLocation; use store_api::storage::RegionId; +use tokio::task::yield_now; use crate::cache::file_cache::{FileType, IndexKey}; use crate::cache::{CacheManagerRef, PageKey, PageValue}; -use crate::metrics::READ_STAGE_ELAPSED; +use crate::metrics::{READ_STAGE_ELAPSED, READ_STAGE_FETCH_PAGES}; use crate::sst::file::FileId; use crate::sst::parquet::helper::fetch_byte_ranges; -use crate::sst::parquet::page_reader::CachedPageReader; +use crate::sst::parquet::page_reader::RowGroupCachedReader; /// An in-memory collection of column chunks pub struct InMemoryRowGroup<'a> { metadata: &'a RowGroupMetaData, page_locations: Option<&'a [Vec]>, + /// Compressed page of each column. column_chunks: Vec>>, row_count: usize, region_id: RegionId, file_id: FileId, row_group_idx: usize, cache_manager: Option, - /// Cached pages for each column. + /// Row group level cached pages for each column. /// - /// `column_cached_pages.len()` equals to `column_chunks.len()`. - column_cached_pages: Vec>>, + /// These pages are uncompressed pages of a row group. + /// `column_uncompressed_pages.len()` equals to `column_chunks.len()`. + column_uncompressed_pages: Vec>>, file_path: &'a str, /// Object store. object_store: ObjectStore, @@ -86,7 +90,7 @@ impl<'a> InMemoryRowGroup<'a> { file_id, row_group_idx, cache_manager, - column_cached_pages: vec![None; metadata.columns().len()], + column_uncompressed_pages: vec![None; metadata.columns().len()], file_path, object_store, } @@ -161,16 +165,20 @@ impl<'a> InMemoryRowGroup<'a> { // Now we only use cache in dense chunk data. self.fetch_pages_from_cache(projection); + // Release the CPU to avoid blocking the runtime. Since `fetch_pages_from_cache` + // is a synchronous, CPU-bound operation. + yield_now().await; + let fetch_ranges = self .column_chunks .iter() - .zip(&self.column_cached_pages) + .zip(&self.column_uncompressed_pages) .enumerate() - // Don't need to fetch column data if we already cache the column's pages. - .filter(|&(idx, (chunk, cached_pages))| { - chunk.is_none() && projection.leaf_included(idx) && cached_pages.is_none() + .filter(|&(idx, (chunk, uncompressed_pages))| { + // Don't need to fetch column data if we already cache the column's pages. + chunk.is_none() && projection.leaf_included(idx) && uncompressed_pages.is_none() }) - .map(|(idx, (_chunk, _cached_pages))| { + .map(|(idx, (_chunk, _pages))| { let column = self.metadata.column(idx); let (start, length) = column.byte_range(); start..(start + length) @@ -184,22 +192,41 @@ impl<'a> InMemoryRowGroup<'a> { let mut chunk_data = self.fetch_bytes(&fetch_ranges).await?.into_iter(); - for (idx, (chunk, cached_pages)) in self + for (idx, (chunk, row_group_pages)) in self .column_chunks .iter_mut() - .zip(&self.column_cached_pages) + .zip(&self.column_uncompressed_pages) .enumerate() { - if chunk.is_some() || !projection.leaf_included(idx) || cached_pages.is_some() { + if chunk.is_some() || !projection.leaf_included(idx) || row_group_pages.is_some() { continue; } - if let Some(data) = chunk_data.next() { - *chunk = Some(Arc::new(ColumnChunkData::Dense { - offset: self.metadata.column(idx).byte_range().0 as usize, - data, - })); + // Get the fetched page. + let Some(data) = chunk_data.next() else { + continue; + }; + + let column = self.metadata.column(idx); + if let Some(cache) = &self.cache_manager { + if !cache_uncompressed_pages(column) { + // For columns that have multiple uncompressed pages, we only cache the compressed page + // to save memory. + let page_key = PageKey::new_compressed( + self.region_id, + self.file_id, + self.row_group_idx, + idx, + ); + cache + .put_pages(page_key, Arc::new(PageValue::new_compressed(data.clone()))); + } } + + *chunk = Some(Arc::new(ColumnChunkData::Dense { + offset: column.byte_range().0 as usize, + data, + })); } } @@ -207,20 +234,42 @@ impl<'a> InMemoryRowGroup<'a> { } /// Fetches pages for columns if cache is enabled. + /// If the page is in the cache, sets the column chunk or `column_uncompressed_pages` for the column. fn fetch_pages_from_cache(&mut self, projection: &ProjectionMask) { + let _timer = READ_STAGE_FETCH_PAGES.start_timer(); self.column_chunks - .iter() + .iter_mut() .enumerate() - .filter(|&(idx, chunk)| chunk.is_none() && projection.leaf_included(idx)) - .for_each(|(idx, _chunk)| { - if let Some(cache) = &self.cache_manager { - let page_key = PageKey { - region_id: self.region_id, - file_id: self.file_id, - row_group_idx: self.row_group_idx, - column_idx: idx, - }; - self.column_cached_pages[idx] = cache.get_pages(&page_key); + .filter(|(idx, chunk)| chunk.is_none() && projection.leaf_included(*idx)) + .for_each(|(idx, chunk)| { + let Some(cache) = &self.cache_manager else { + return; + }; + let column = self.metadata.column(idx); + if cache_uncompressed_pages(column) { + // Fetches uncompressed pages for the row group. + let page_key = PageKey::new_uncompressed( + self.region_id, + self.file_id, + self.row_group_idx, + idx, + ); + self.column_uncompressed_pages[idx] = cache.get_pages(&page_key); + } else { + // Fetches the compressed page from the cache. + let page_key = PageKey::new_compressed( + self.region_id, + self.file_id, + self.row_group_idx, + idx, + ); + + *chunk = cache.get_pages(&page_key).map(|page_value| { + Arc::new(ColumnChunkData::Dense { + offset: column.byte_range().0 as usize, + data: page_value.compressed.clone(), + }) + }); } }); } @@ -259,12 +308,12 @@ impl<'a> InMemoryRowGroup<'a> { /// Creates a page reader to read column at `i`. fn column_page_reader(&self, i: usize) -> Result> { - if let Some(cached_pages) = &self.column_cached_pages[i] { - // Already in cache. - return Ok(Box::new(CachedPageReader::new(&cached_pages.pages))); + if let Some(cached_pages) = &self.column_uncompressed_pages[i] { + debug_assert!(!cached_pages.row_group.is_empty()); + // Hits the row group level page cache. + return Ok(Box::new(RowGroupCachedReader::new(&cached_pages.row_group))); } - // Cache miss. let page_reader = match &self.column_chunks[i] { None => { return Err(ParquetError::General(format!( @@ -283,25 +332,34 @@ impl<'a> InMemoryRowGroup<'a> { }; let Some(cache) = &self.cache_manager else { - // Cache is disabled. return Ok(Box::new(page_reader)); }; - // We collect all pages and put them into the cache. - let pages = page_reader.collect::>>()?; - let page_value = Arc::new(PageValue::new(pages)); - let page_key = PageKey { - region_id: self.region_id, - file_id: self.file_id, - row_group_idx: self.row_group_idx, - column_idx: i, - }; - cache.put_pages(page_key, page_value.clone()); + let column = self.metadata.column(i); + if cache_uncompressed_pages(column) { + // This column use row group level page cache. + // We collect all pages and put them into the cache. + let pages = page_reader.collect::>>()?; + let page_value = Arc::new(PageValue::new_row_group(pages)); + let page_key = + PageKey::new_uncompressed(self.region_id, self.file_id, self.row_group_idx, i); + cache.put_pages(page_key, page_value.clone()); - Ok(Box::new(CachedPageReader::new(&page_value.pages))) + return Ok(Box::new(RowGroupCachedReader::new(&page_value.row_group))); + } + + // This column don't cache uncompressed pages. + Ok(Box::new(page_reader)) } } +/// Returns whether we cache uncompressed pages for the column. +fn cache_uncompressed_pages(column: &ColumnChunkMetaData) -> bool { + // If the row group only has a data page, cache the whole row group as + // it might be faster than caching a compressed page. + column.uncompressed_size() as usize <= DEFAULT_PAGE_SIZE +} + impl<'a> RowGroups for InMemoryRowGroup<'a> { fn num_rows(&self) -> usize { self.row_count @@ -318,7 +376,7 @@ impl<'a> RowGroups for InMemoryRowGroup<'a> { /// An in-memory column chunk #[derive(Clone)] -enum ColumnChunkData { +pub(crate) enum ColumnChunkData { /// Column chunk data representing only a subset of data pages Sparse { /// Length of the full column chunk diff --git a/src/operator/src/delete.rs b/src/operator/src/delete.rs index ac78350a50..756195c83a 100644 --- a/src/operator/src/delete.rs +++ b/src/operator/src/delete.rs @@ -232,7 +232,7 @@ impl Deleter { async fn get_table(&self, catalog: &str, schema: &str, table: &str) -> Result { self.catalog_manager - .table(catalog, schema, table) + .table(catalog, schema, table, None) .await .context(CatalogSnafu)? .with_context(|| TableNotFoundSnafu { diff --git a/src/operator/src/insert.rs b/src/operator/src/insert.rs index 018021f471..c56e51c7a3 100644 --- a/src/operator/src/insert.rs +++ b/src/operator/src/insert.rs @@ -32,7 +32,6 @@ use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE}; use common_query::Output; use common_telemetry::tracing_context::TracingContext; use common_telemetry::{error, info, warn}; -use datatypes::schema::Schema; use futures_util::future; use meter_macros::write_meter; use partition::manager::PartitionRuleManagerRef; @@ -45,7 +44,7 @@ use store_api::metric_engine_consts::{ }; use store_api::mito_engine_options::{APPEND_MODE_KEY, MERGE_MODE_KEY}; use store_api::storage::{RegionId, TableId}; -use table::requests::{InsertRequest as TableInsertRequest, TTL_KEY}; +use table::requests::{InsertRequest as TableInsertRequest, AUTO_CREATE_TABLE_KEY, TTL_KEY}; use table::table_reference::TableReference; use table::TableRef; @@ -462,21 +461,49 @@ impl Inserter { auto_create_table_type: AutoCreateTableType, statement_executor: &StatementExecutor, ) -> Result> { - let mut table_name_to_ids = HashMap::with_capacity(requests.inserts.len()); - let mut create_tables = vec![]; - let mut alter_tables = vec![]; let _timer = crate::metrics::CREATE_ALTER_ON_DEMAND .with_label_values(&[auto_create_table_type.as_str()]) .start_timer(); + + let catalog = ctx.current_catalog(); + let schema = ctx.current_schema(); + let mut table_name_to_ids = HashMap::with_capacity(requests.inserts.len()); + // If `auto_create_table` hint is disabled, skip creating/altering tables. + let auto_create_table_hint = ctx + .extension(AUTO_CREATE_TABLE_KEY) + .map(|v| v.parse::()) + .transpose() + .map_err(|_| { + InvalidInsertRequestSnafu { + reason: "`auto_create_table` hint must be a boolean", + } + .build() + })? + .unwrap_or(true); + if !auto_create_table_hint { + for req in &requests.inserts { + let table = self + .get_table(catalog, &schema, &req.table_name) + .await? + .context(InvalidInsertRequestSnafu { + reason: format!( + "Table `{}` does not exist, and `auto_create_table` hint is disabled", + req.table_name + ), + })?; + let table_info = table.table_info(); + table_name_to_ids.insert(table_info.name.clone(), table_info.table_id()); + } + return Ok(table_name_to_ids); + } + + let mut create_tables = vec![]; + let mut alter_tables = vec![]; for req in &requests.inserts { - let catalog = ctx.current_catalog(); - let schema = ctx.current_schema(); - let table = self.get_table(catalog, &schema, &req.table_name).await?; - match table { + match self.get_table(catalog, &schema, &req.table_name).await? { Some(table) => { let table_info = table.table_info(); table_name_to_ids.insert(table_info.name.clone(), table_info.table_id()); - validate_request_with_table(req, &table)?; if let Some(alter_expr) = self.get_alter_table_expr_on_demand(req, table, ctx)? { @@ -536,6 +563,7 @@ impl Inserter { } } } + Ok(table_name_to_ids) } @@ -608,7 +636,7 @@ impl Inserter { table: &str, ) -> Result> { self.catalog_manager - .table(catalog, schema, table) + .table(catalog, schema, table, None) .await .context(CatalogSnafu) } @@ -796,87 +824,9 @@ fn validate_column_count_match(requests: &RowInsertRequests) -> Result<()> { Ok(()) } -fn validate_request_with_table(req: &RowInsertRequest, table: &TableRef) -> Result<()> { - let request_schema = req.rows.as_ref().unwrap().schema.as_slice(); - let table_schema = table.schema(); - - validate_required_columns(request_schema, &table_schema)?; - - Ok(()) -} - -fn validate_required_columns(request_schema: &[ColumnSchema], table_schema: &Schema) -> Result<()> { - for column_schema in table_schema.column_schemas() { - if column_schema.is_nullable() || column_schema.default_constraint().is_some() { - continue; - } - if !request_schema - .iter() - .any(|c| c.column_name == column_schema.name) - { - return InvalidInsertRequestSnafu { - reason: format!( - "Expecting insert data to be presented on a not null or no default value column '{}'.", - &column_schema.name - ) - }.fail(); - } - } - Ok(()) -} - fn build_create_table_expr( table: &TableReference, request_schema: &[ColumnSchema], ) -> Result { CreateExprFactory.create_table_expr_by_column_schemas(table, request_schema, default_engine()) } - -#[cfg(test)] -mod tests { - use datatypes::prelude::{ConcreteDataType, Value as DtValue}; - use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema as DtColumnSchema}; - - use super::*; - - #[test] - fn test_validate_required_columns() { - let schema = Schema::new(vec![ - DtColumnSchema::new("a", ConcreteDataType::int32_datatype(), true) - .with_default_constraint(None) - .unwrap(), - DtColumnSchema::new("b", ConcreteDataType::int32_datatype(), true) - .with_default_constraint(Some(ColumnDefaultConstraint::Value(DtValue::Int32(100)))) - .unwrap(), - ]); - let request_schema = &[ColumnSchema { - column_name: "c".to_string(), - ..Default::default() - }]; - // If nullable is true, it doesn't matter whether the insert request has the column. - validate_required_columns(request_schema, &schema).unwrap(); - - let schema = Schema::new(vec![ - DtColumnSchema::new("a", ConcreteDataType::int32_datatype(), false) - .with_default_constraint(None) - .unwrap(), - DtColumnSchema::new("b", ConcreteDataType::int32_datatype(), false) - .with_default_constraint(Some(ColumnDefaultConstraint::Value(DtValue::Int32(-100)))) - .unwrap(), - ]); - let request_schema = &[ColumnSchema { - column_name: "a".to_string(), - ..Default::default() - }]; - // If nullable is false, but the column is defined with default value, - // it also doesn't matter whether the insert request has the column. - validate_required_columns(request_schema, &schema).unwrap(); - - let request_schema = &[ColumnSchema { - column_name: "b".to_string(), - ..Default::default() - }]; - // Neither of the above cases. - assert!(validate_required_columns(request_schema, &schema).is_err()); - } -} diff --git a/src/operator/src/req_convert/delete/row_to_region.rs b/src/operator/src/req_convert/delete/row_to_region.rs index 1b1316c904..d04659c6c8 100644 --- a/src/operator/src/req_convert/delete/row_to_region.rs +++ b/src/operator/src/req_convert/delete/row_to_region.rs @@ -64,7 +64,7 @@ impl<'a> RowToRegion<'a> { let catalog_name = self.ctx.current_catalog(); let schema_name = self.ctx.current_schema(); self.catalog_manager - .table(catalog_name, &schema_name, table_name) + .table(catalog_name, &schema_name, table_name, None) .await .context(CatalogSnafu)? .with_context(|| TableNotFoundSnafu { diff --git a/src/operator/src/req_convert/insert/stmt_to_region.rs b/src/operator/src/req_convert/insert/stmt_to_region.rs index 37d55e6c9e..8124edc195 100644 --- a/src/operator/src/req_convert/insert/stmt_to_region.rs +++ b/src/operator/src/req_convert/insert/stmt_to_region.rs @@ -139,7 +139,7 @@ impl<'a> StatementToRegion<'a> { async fn get_table(&self, catalog: &str, schema: &str, table: &str) -> Result { self.catalog_manager - .table(catalog, schema, table) + .table(catalog, schema, table, None) .await .context(CatalogSnafu)? .with_context(|| TableNotFoundSnafu { diff --git a/src/operator/src/request.rs b/src/operator/src/request.rs index 64a6a75c31..0c1db682c3 100644 --- a/src/operator/src/request.rs +++ b/src/operator/src/request.rs @@ -219,7 +219,7 @@ impl Requester { ) -> Result> { let table = self .catalog_manager - .table(catalog, schema, table_name) + .table(catalog, schema, table_name, None) .await .context(CatalogSnafu)?; diff --git a/src/operator/src/statement.rs b/src/operator/src/statement.rs index a2f0251495..35e6752d08 100644 --- a/src/operator/src/statement.rs +++ b/src/operator/src/statement.rs @@ -286,7 +286,7 @@ impl StatementExecutor { let table_ref = self .catalog_manager - .table(&catalog, &schema, &table) + .table(&catalog, &schema, &table, Some(&query_ctx)) .await .context(CatalogSnafu)? .context(TableNotFoundSnafu { table_name: &table })?; @@ -313,7 +313,7 @@ impl StatementExecutor { let catalog = query_ctx.current_catalog(); ensure!( self.catalog_manager - .schema_exists(catalog, db.as_ref()) + .schema_exists(catalog, db.as_ref(), Some(&query_ctx)) .await .context(CatalogSnafu)?, SchemaNotFoundSnafu { schema_info: &db } @@ -382,7 +382,7 @@ impl StatementExecutor { table, } = table_ref; self.catalog_manager - .table(catalog, schema, table) + .table(catalog, schema, table, None) .await .context(CatalogSnafu)? .with_context(|| TableNotFoundSnafu { diff --git a/src/operator/src/statement/copy_database.rs b/src/operator/src/statement/copy_database.rs index 134dd23559..662c2a9fbf 100644 --- a/src/operator/src/statement/copy_database.rs +++ b/src/operator/src/statement/copy_database.rs @@ -57,7 +57,7 @@ impl StatementExecutor { ); let table_names = self .catalog_manager - .table_names(&req.catalog_name, &req.schema_name) + .table_names(&req.catalog_name, &req.schema_name, Some(&ctx)) .await .context(CatalogSnafu)?; diff --git a/src/operator/src/statement/ddl.rs b/src/operator/src/statement/ddl.rs index afef6d590d..aa1a070875 100644 --- a/src/operator/src/statement/ddl.rs +++ b/src/operator/src/statement/ddl.rs @@ -106,7 +106,7 @@ impl StatementExecutor { .context(error::ExternalSnafu)?; let table_ref = self .catalog_manager - .table(&catalog, &schema, &table) + .table(&catalog, &schema, &table, Some(&ctx)) .await .context(CatalogSnafu)? .context(TableNotFoundSnafu { table_name: &table })?; @@ -207,6 +207,7 @@ impl StatementExecutor { &create_table.catalog_name, &create_table.schema_name, &create_table.table_name, + Some(&query_ctx), ) .await .context(CatalogSnafu)? @@ -487,7 +488,12 @@ impl StatementExecutor { // if view or table exists. if let Some(table) = self .catalog_manager - .table(&expr.catalog_name, &expr.schema_name, &expr.view_name) + .table( + &expr.catalog_name, + &expr.schema_name, + &expr.view_name, + Some(&ctx), + ) .await .context(CatalogSnafu)? { @@ -656,7 +662,7 @@ impl StatementExecutor { ) -> Result { let view_info = if let Some(view) = self .catalog_manager - .table(&catalog, &schema, &view) + .table(&catalog, &schema, &view, None) .await .context(CatalogSnafu)? { @@ -766,6 +772,7 @@ impl StatementExecutor { &table_name.catalog_name, &table_name.schema_name, &table_name.table_name, + Some(&query_context), ) .await .context(CatalogSnafu)? @@ -816,7 +823,7 @@ impl StatementExecutor { if self .catalog_manager - .schema_exists(&catalog, &schema) + .schema_exists(&catalog, &schema, None) .await .context(CatalogSnafu)? { @@ -858,6 +865,7 @@ impl StatementExecutor { &table_name.catalog_name, &table_name.schema_name, &table_name.table_name, + Some(&query_context), ) .await .context(CatalogSnafu)? @@ -944,7 +952,12 @@ impl StatementExecutor { let table = self .catalog_manager - .table(&catalog_name, &schema_name, &table_name) + .table( + &catalog_name, + &schema_name, + &table_name, + Some(&query_context), + ) .await .context(CatalogSnafu)? .with_context(|| TableNotFoundSnafu { @@ -1167,9 +1180,10 @@ impl StatementExecutor { if !self .catalog_manager - .schema_exists(catalog, database) + .schema_exists(catalog, database, None) .await .context(CatalogSnafu)? + && !self.catalog_manager.is_reserved_schema_name(database) { self.create_database_procedure( catalog.to_string(), diff --git a/src/operator/src/statement/describe.rs b/src/operator/src/statement/describe.rs index d40990e4dd..02dd58dbd4 100644 --- a/src/operator/src/statement/describe.rs +++ b/src/operator/src/statement/describe.rs @@ -39,7 +39,7 @@ impl StatementExecutor { let table = self .catalog_manager - .table(&catalog, &schema, &table) + .table(&catalog, &schema, &table, Some(&query_ctx)) .await .context(CatalogSnafu)? .with_context(|| TableNotFoundSnafu { diff --git a/src/operator/src/statement/show.rs b/src/operator/src/statement/show.rs index 5b4817226b..eb69983f01 100644 --- a/src/operator/src/statement/show.rs +++ b/src/operator/src/statement/show.rs @@ -143,7 +143,7 @@ impl StatementExecutor { let table_ref = self .catalog_manager - .table(&catalog, &schema, &view) + .table(&catalog, &schema, &view, Some(&query_ctx)) .await .context(CatalogSnafu)? .context(ViewNotFoundSnafu { view_name: &view })?; diff --git a/src/pipeline/src/manager/pipeline_operator.rs b/src/pipeline/src/manager/pipeline_operator.rs index 049cd80b45..2e838144a4 100644 --- a/src/pipeline/src/manager/pipeline_operator.rs +++ b/src/pipeline/src/manager/pipeline_operator.rs @@ -110,7 +110,12 @@ impl PipelineOperator { // exist in catalog, just open if let Some(table) = self .catalog_manager - .table(&expr.catalog_name, &expr.schema_name, &expr.table_name) + .table( + &expr.catalog_name, + &expr.schema_name, + &expr.table_name, + Some(&ctx), + ) .await .context(CatalogSnafu)? { @@ -130,7 +135,7 @@ impl PipelineOperator { // get from catalog let table = self .catalog_manager - .table(catalog, schema, table_name) + .table(catalog, schema, table_name, Some(&ctx)) .await .context(CatalogSnafu)? .context(PipelineTableNotFoundSnafu)?; diff --git a/src/query/src/datafusion.rs b/src/query/src/datafusion.rs index 907b14c20d..03eadfde97 100644 --- a/src/query/src/datafusion.rs +++ b/src/query/src/datafusion.rs @@ -116,7 +116,7 @@ impl DatafusionQueryEngine { let default_catalog = &query_ctx.current_catalog().to_owned(); let default_schema = &query_ctx.current_schema(); let table_name = dml.table_name.resolve(default_catalog, default_schema); - let table = self.find_table(&table_name).await?; + let table = self.find_table(&table_name, &query_ctx).await?; let output = self .exec_query_plan(LogicalPlan::DfPlan((*dml.input).clone()), query_ctx.clone()) @@ -241,14 +241,18 @@ impl DatafusionQueryEngine { .context(TableMutationSnafu) } - async fn find_table(&self, table_name: &ResolvedTableReference) -> Result { + async fn find_table( + &self, + table_name: &ResolvedTableReference, + query_context: &QueryContextRef, + ) -> Result { let catalog_name = table_name.catalog.as_ref(); let schema_name = table_name.schema.as_ref(); let table_name = table_name.table.as_ref(); self.state .catalog_manager() - .table(catalog_name, schema_name, table_name) + .table(catalog_name, schema_name, table_name, Some(query_context)) .await .context(CatalogSnafu)? .with_context(|| TableNotFoundSnafu { table: table_name }) @@ -529,7 +533,7 @@ mod tests { use datatypes::prelude::ConcreteDataType; use datatypes::schema::ColumnSchema; use datatypes::vectors::{Helper, UInt32Vector, UInt64Vector, VectorRef}; - use session::context::QueryContext; + use session::context::{QueryContext, QueryContextBuilder}; use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME}; use super::*; @@ -618,12 +622,16 @@ mod tests { .as_any() .downcast_ref::() .unwrap(); + let query_ctx = Arc::new(QueryContextBuilder::default().build()); let table = engine - .find_table(&ResolvedTableReference { - catalog: "greptime".into(), - schema: "public".into(), - table: "numbers".into(), - }) + .find_table( + &ResolvedTableReference { + catalog: "greptime".into(), + schema: "public".into(), + table: "numbers".into(), + }, + &query_ctx, + ) .await .unwrap(); diff --git a/src/query/src/datafusion/planner.rs b/src/query/src/datafusion/planner.rs index 3c66efeb18..c3e8e1f544 100644 --- a/src/query/src/datafusion/planner.rs +++ b/src/query/src/datafusion/planner.rs @@ -61,7 +61,7 @@ impl DfContextProviderAdapter { let mut table_provider = DfTableSourceProvider::new( engine_state.catalog_manager().clone(), engine_state.disallow_cross_catalog_query(), - query_ctx.as_ref(), + query_ctx.clone(), Arc::new(DefaultPlanDecoder::new(session_state.clone(), &query_ctx)?), session_state .config_options() diff --git a/src/query/src/dist_plan/merge_scan.rs b/src/query/src/dist_plan/merge_scan.rs index c8a4ebcc77..a3fb8004cf 100644 --- a/src/query/src/dist_plan/merge_scan.rs +++ b/src/query/src/dist_plan/merge_scan.rs @@ -156,20 +156,22 @@ impl MergeScanExec { query_ctx: QueryContextRef, target_partition: usize, ) -> Result { - let arrow_schema_without_metadata = Self::arrow_schema_without_metadata(arrow_schema); + // TODO(CookiePieWw): Initially we removed the metadata from the schema in #2000, but we have to + // keep it for #4619 to identify json type in src/datatypes/src/schema/column_schema.rs. + // Reconsider if it's possible to remove it. + let arrow_schema = Arc::new(arrow_schema.clone()); let properties = PlanProperties::new( - EquivalenceProperties::new(arrow_schema_without_metadata.clone()), + EquivalenceProperties::new(arrow_schema.clone()), Partitioning::UnknownPartitioning(target_partition), ExecutionMode::Bounded, ); - let schema_without_metadata = - Self::arrow_schema_to_schema(arrow_schema_without_metadata.clone())?; + let schema = Self::arrow_schema_to_schema(arrow_schema.clone())?; Ok(Self { table, regions, plan, - schema: schema_without_metadata, - arrow_schema: arrow_schema_without_metadata, + schema, + arrow_schema, region_query_handler, metric: ExecutionPlanMetricsSet::new(), sub_stage_metrics: Arc::default(), @@ -288,20 +290,6 @@ impl MergeScanExec { })) } - fn arrow_schema_without_metadata(arrow_schema: &ArrowSchema) -> ArrowSchemaRef { - Arc::new(ArrowSchema::new( - arrow_schema - .fields() - .iter() - .map(|field| { - let field = field.as_ref().clone(); - let field_without_metadata = field.with_metadata(Default::default()); - Arc::new(field_without_metadata) - }) - .collect::>(), - )) - } - fn arrow_schema_to_schema(arrow_schema: ArrowSchemaRef) -> Result { let schema = Schema::try_from(arrow_schema).context(ConvertSchemaSnafu)?; Ok(Arc::new(schema)) diff --git a/src/query/src/dist_plan/planner.rs b/src/query/src/dist_plan/planner.rs index 73168ff1bd..a94a798461 100644 --- a/src/query/src/dist_plan/planner.rs +++ b/src/query/src/dist_plan/planner.rs @@ -128,6 +128,7 @@ impl DistExtensionPlanner { &table_name.catalog_name, &table_name.schema_name, &table_name.table_name, + None, ) .await .context(CatalogSnafu)? diff --git a/src/query/src/planner.rs b/src/query/src/planner.rs index d59ee8a72e..4c09860335 100644 --- a/src/query/src/planner.rs +++ b/src/query/src/planner.rs @@ -68,7 +68,7 @@ impl DfLogicalPlanner { let table_provider = DfTableSourceProvider::new( self.engine_state.catalog_manager().clone(), self.engine_state.disallow_cross_catalog_query(), - query_ctx.as_ref(), + query_ctx.clone(), Arc::new(DefaultPlanDecoder::new( self.session_state.clone(), &query_ctx, @@ -144,14 +144,15 @@ impl DfLogicalPlanner { #[tracing::instrument(skip_all)] async fn plan_pql(&self, stmt: EvalStmt, query_ctx: QueryContextRef) -> Result { + let plan_decoder = Arc::new(DefaultPlanDecoder::new( + self.session_state.clone(), + &query_ctx, + )?); let table_provider = DfTableSourceProvider::new( self.engine_state.catalog_manager().clone(), self.engine_state.disallow_cross_catalog_query(), - query_ctx.as_ref(), - Arc::new(DefaultPlanDecoder::new( - self.session_state.clone(), - &query_ctx, - )?), + query_ctx, + plan_decoder, self.session_state .config_options() .sql_parser diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index e42449d0dc..ad00c900c8 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -2379,7 +2379,7 @@ mod test { DfTableSourceProvider::new( catalog_list, false, - QueryContext::arc().as_ref(), + QueryContext::arc(), DummyDecoder::arc(), false, ) @@ -3219,7 +3219,7 @@ mod test { DfTableSourceProvider::new( catalog_list.clone(), false, - QueryContext::arc().as_ref(), + QueryContext::arc(), DummyDecoder::arc(), true, ), @@ -3249,7 +3249,7 @@ mod test { DfTableSourceProvider::new( catalog_list.clone(), false, - QueryContext::arc().as_ref(), + QueryContext::arc(), DummyDecoder::arc(), true, ), diff --git a/src/query/src/sql.rs b/src/query/src/sql.rs index 8b0c09bb62..ca79ef7416 100644 --- a/src/query/src/sql.rs +++ b/src/query/src/sql.rs @@ -232,6 +232,7 @@ async fn query_from_information_schema_table( query_ctx.current_catalog(), INFORMATION_SCHEMA_NAME, table_name, + Some(&query_ctx), ) .await .context(error::CatalogSnafu)? diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index 5abd1466bb..626fdaa404 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -58,6 +58,7 @@ humantime-serde.workspace = true hyper = { version = "0.14", features = ["full"] } influxdb_line_protocol = { git = "https://github.com/evenyag/influxdb_iox", branch = "feat/line-protocol" } itertools.workspace = true +jsonb.workspace = true lazy_static.workspace = true mime_guess = "2.0" notify.workspace = true @@ -70,7 +71,7 @@ parking_lot = "0.12" pgwire = "0.20" pin-project = "1.0" pipeline.workspace = true -postgres-types = { version = "0.2", features = ["with-chrono-0_4"] } +postgres-types = { version = "0.2", features = ["with-chrono-0_4", "with-serde_json-1"] } pprof = { version = "0.13", features = [ "flamegraph", "prost-codec", diff --git a/src/servers/src/http/prometheus.rs b/src/servers/src/http/prometheus.rs index 2aef58c48c..941cac2539 100644 --- a/src/servers/src/http/prometheus.rs +++ b/src/servers/src/http/prometheus.rs @@ -405,11 +405,11 @@ async fn get_all_column_names( schema: &str, manager: &CatalogManagerRef, ) -> std::result::Result, catalog::error::Error> { - let table_names = manager.table_names(catalog, schema).await?; + let table_names = manager.table_names(catalog, schema, None).await?; let mut labels = HashSet::new(); for table_name in table_names { - let Some(table) = manager.table(catalog, schema, &table_name).await? else { + let Some(table) = manager.table(catalog, schema, &table_name, None).await? else { continue; }; for column in table.primary_key_columns() { @@ -436,6 +436,7 @@ async fn retrieve_series_from_query_result( query_ctx.current_catalog(), &query_ctx.current_schema(), table_name, + Some(query_ctx), ) .await .context(CatalogSnafu)? @@ -691,7 +692,7 @@ pub async fn label_values_query( if label_name == METRIC_NAME_LABEL { let mut table_names = match handler .catalog_manager() - .table_names(&catalog, &schema) + .table_names(&catalog, &schema, Some(&query_ctx)) .await { Ok(table_names) => table_names, @@ -777,7 +778,11 @@ async fn retrieve_field_names( if matches.is_empty() { // query all tables if no matcher is provided - while let Some(table) = manager.tables(catalog, &schema).next().await { + while let Some(table) = manager + .tables(catalog, &schema, Some(query_ctx)) + .next() + .await + { let table = table.context(CatalogSnafu)?; for column in table.field_columns() { field_columns.insert(column.name); @@ -788,7 +793,7 @@ async fn retrieve_field_names( for table_name in matches { let table = manager - .table(catalog, &schema, &table_name) + .table(catalog, &schema, &table_name, Some(query_ctx)) .await .context(CatalogSnafu)? .with_context(|| TableNotFoundSnafu { diff --git a/src/servers/src/mysql/writer.rs b/src/servers/src/mysql/writer.rs index bf4d967aa5..d957edaa55 100644 --- a/src/servers/src/mysql/writer.rs +++ b/src/servers/src/mysql/writer.rs @@ -168,6 +168,7 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> { &mut row_writer, &record_batch, query_context.clone(), + &column_def, ) .await? } @@ -191,9 +192,10 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> { row_writer: &mut RowWriter<'_, W>, recordbatch: &RecordBatch, query_context: QueryContextRef, + column_def: &[Column], ) -> Result<()> { for row in recordbatch.rows() { - for value in row.into_iter() { + for (value, column) in row.into_iter().zip(column_def.iter()) { match value { Value::Null => row_writer.write_col(None::)?, Value::Boolean(v) => row_writer.write_col(v as i8)?, @@ -208,7 +210,14 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> { Value::Float32(v) => row_writer.write_col(v.0)?, Value::Float64(v) => row_writer.write_col(v.0)?, Value::String(v) => row_writer.write_col(v.as_utf8())?, - Value::Binary(v) => row_writer.write_col(v.deref())?, + Value::Binary(v) => match column.coltype { + ColumnType::MYSQL_TYPE_JSON => { + row_writer.write_col(jsonb::to_string(&v))?; + } + _ => { + row_writer.write_col(v.deref())?; + } + }, Value::Date(v) => row_writer.write_col(v.to_chrono_date())?, // convert datetime and timestamp to timezone of current connection Value::DateTime(v) => row_writer.write_col( @@ -281,6 +290,7 @@ pub(crate) fn create_mysql_column( ConcreteDataType::Interval(_) => Ok(ColumnType::MYSQL_TYPE_VARCHAR), ConcreteDataType::Duration(_) => Ok(ColumnType::MYSQL_TYPE_TIME), ConcreteDataType::Decimal128(_) => Ok(ColumnType::MYSQL_TYPE_DECIMAL), + ConcreteDataType::Json(_) => Ok(ColumnType::MYSQL_TYPE_JSON), _ => error::UnsupportedDataTypeSnafu { data_type, reason: "not implemented", diff --git a/src/servers/src/postgres.rs b/src/servers/src/postgres.rs index 42683ff680..36f6730b43 100644 --- a/src/servers/src/postgres.rs +++ b/src/servers/src/postgres.rs @@ -48,7 +48,7 @@ pub(crate) struct GreptimeDBStartupParameters { impl GreptimeDBStartupParameters { fn new() -> GreptimeDBStartupParameters { GreptimeDBStartupParameters { - version: format!("16.3-greptime-{}", env!("CARGO_PKG_VERSION")), + version: format!("16.3-greptimedb-{}", env!("CARGO_PKG_VERSION")), } } } diff --git a/src/servers/src/postgres/handler.rs b/src/servers/src/postgres/handler.rs index 5d0c041cf2..190684ed34 100644 --- a/src/servers/src/postgres/handler.rs +++ b/src/servers/src/postgres/handler.rs @@ -150,8 +150,8 @@ where .map(move |row| { row.and_then(|row| { let mut encoder = DataRowEncoder::new(pg_schema_ref.clone()); - for value in row.iter() { - encode_value(&query_ctx, value, &mut encoder)?; + for (value, column) in row.iter().zip(schema.column_schemas()) { + encode_value(&query_ctx, value, &mut encoder, &column.data_type)?; } encoder.finish() }) diff --git a/src/servers/src/postgres/types.rs b/src/servers/src/postgres/types.rs index 6d47f65183..2bec6c2999 100644 --- a/src/servers/src/postgres/types.rs +++ b/src/servers/src/postgres/types.rs @@ -62,6 +62,7 @@ pub(super) fn encode_value( query_ctx: &QueryContextRef, value: &Value, builder: &mut DataRowEncoder, + datatype: &ConcreteDataType, ) -> PgWireResult<()> { match value { Value::Null => builder.encode_field(&None::<&i8>), @@ -77,13 +78,18 @@ pub(super) fn encode_value( Value::Float32(v) => builder.encode_field(&v.0), Value::Float64(v) => builder.encode_field(&v.0), Value::String(v) => builder.encode_field(&v.as_utf8()), - Value::Binary(v) => { - let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output(); - match *bytea_output { - PGByteaOutputValue::ESCAPE => builder.encode_field(&EscapeOutputBytea(v.deref())), - PGByteaOutputValue::HEX => builder.encode_field(&HexOutputBytea(v.deref())), + Value::Binary(v) => match datatype { + ConcreteDataType::Json(_) => builder.encode_field(&jsonb::to_string(v)), + _ => { + let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output(); + match *bytea_output { + PGByteaOutputValue::ESCAPE => { + builder.encode_field(&EscapeOutputBytea(v.deref())) + } + PGByteaOutputValue::HEX => builder.encode_field(&HexOutputBytea(v.deref())), + } } - } + }, Value::Date(v) => { if let Some(date) = v.to_chrono_date() { let (style, order) = *query_ctx.configuration_parameter().pg_datetime_style(); @@ -154,6 +160,7 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result { &ConcreteDataType::Time(_) => Ok(Type::TIME), &ConcreteDataType::Interval(_) => Ok(Type::INTERVAL), &ConcreteDataType::Decimal128(_) => Ok(Type::NUMERIC), + &ConcreteDataType::Json(_) => Ok(Type::JSON), &ConcreteDataType::Duration(_) | &ConcreteDataType::List(_) | &ConcreteDataType::Dictionary(_) => server_error::UnsupportedDataTypeSnafu { @@ -549,6 +556,23 @@ pub(super) fn parameters_to_scalar_values( } } } + &Type::JSONB => { + let data = portal.parameter::(idx, &client_type)?; + match server_type { + ConcreteDataType::Binary(_) => { + ScalarValue::Binary(data.map(|d| jsonb::Value::from(d).to_vec())) + } + _ => { + return Err(invalid_parameter_error( + "invalid_parameter_type", + Some(&format!( + "Expected: {}, found: {}", + server_type, client_type + )), + )); + } + } + } _ => Err(invalid_parameter_error( "unsupported_parameter_value", Some(&format!("Found type: {}", client_type)), @@ -581,6 +605,8 @@ pub(super) fn param_types_to_pg_types( mod test { use std::sync::Arc; + use common_time::interval::IntervalUnit; + use common_time::timestamp::TimeUnit; use datatypes::schema::{ColumnSchema, Schema}; use datatypes::value::ListValue; use pgwire::api::results::{FieldFormat, FieldInfo}; @@ -778,6 +804,35 @@ mod test { ), ]; + let datatypes = vec![ + ConcreteDataType::null_datatype(), + ConcreteDataType::boolean_datatype(), + ConcreteDataType::uint8_datatype(), + ConcreteDataType::uint16_datatype(), + ConcreteDataType::uint32_datatype(), + ConcreteDataType::uint64_datatype(), + ConcreteDataType::int8_datatype(), + ConcreteDataType::int8_datatype(), + ConcreteDataType::int16_datatype(), + ConcreteDataType::int16_datatype(), + ConcreteDataType::int32_datatype(), + ConcreteDataType::int32_datatype(), + ConcreteDataType::int64_datatype(), + ConcreteDataType::int64_datatype(), + ConcreteDataType::float32_datatype(), + ConcreteDataType::float32_datatype(), + ConcreteDataType::float32_datatype(), + ConcreteDataType::float64_datatype(), + ConcreteDataType::float64_datatype(), + ConcreteDataType::float64_datatype(), + ConcreteDataType::string_datatype(), + ConcreteDataType::binary_datatype(), + ConcreteDataType::date_datatype(), + ConcreteDataType::time_datatype(TimeUnit::Second), + ConcreteDataType::datetime_datatype(), + ConcreteDataType::timestamp_datatype(TimeUnit::Second), + ConcreteDataType::interval_datatype(IntervalUnit::YearMonth), + ]; let values = vec![ Value::Null, Value::Boolean(true), @@ -812,14 +867,15 @@ mod test { .build() .into(); let mut builder = DataRowEncoder::new(Arc::new(schema)); - for i in values.iter() { - encode_value(&query_context, i, &mut builder).unwrap(); + for (value, datatype) in values.iter().zip(datatypes) { + encode_value(&query_context, value, &mut builder, &datatype).unwrap(); } let err = encode_value( &query_context, &Value::List(ListValue::new(vec![], ConcreteDataType::int16_datatype())), &mut builder, + &ConcreteDataType::list_datatype(ConcreteDataType::int16_datatype()), ) .unwrap_err(); match err { diff --git a/src/session/src/context.rs b/src/session/src/context.rs index 28ecca6a3f..70168d9498 100644 --- a/src/session/src/context.rs +++ b/src/session/src/context.rs @@ -261,6 +261,7 @@ impl QueryContext { impl QueryContextBuilder { pub fn build(self) -> QueryContext { + let channel = self.channel.unwrap_or_default(); QueryContext { current_catalog: self .current_catalog @@ -270,8 +271,10 @@ impl QueryContextBuilder { .sql_dialect .unwrap_or_else(|| Arc::new(GreptimeDbDialect {})), extensions: self.extensions.unwrap_or_default(), - configuration_parameter: self.configuration_parameter.unwrap_or_default(), - channel: self.channel.unwrap_or_default(), + configuration_parameter: self + .configuration_parameter + .unwrap_or_else(|| Arc::new(ConfigurationVariables::default())), + channel, } } diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml index a9ed77e8ea..e459c1d015 100644 --- a/src/sql/Cargo.toml +++ b/src/sql/Cargo.toml @@ -26,6 +26,7 @@ datatypes.workspace = true hex = "0.4" iso8601 = "0.6.1" itertools.workspace = true +jsonb.workspace = true lazy_static.workspace = true regex.workspace = true serde_json.workspace = true diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index a042473503..30af7ae517 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -124,6 +124,16 @@ fn parse_string_to_value( } } ConcreteDataType::Binary(_) => Ok(Value::Binary(s.as_bytes().into())), + ConcreteDataType::Json(_) => { + if let Ok(json) = jsonb::parse_value(s.as_bytes()) { + Ok(Value::Binary(json.to_vec().into())) + } else { + ParseSqlValueSnafu { + msg: format!("Failed to parse {s} to Json value"), + } + .fail() + } + } _ => { unreachable!() } @@ -250,7 +260,19 @@ pub fn sql_value_to_value( SqlValue::DoubleQuotedString(s) | SqlValue::SingleQuotedString(s) => { parse_string_to_value(column_name, s.clone(), data_type, timezone)? } - SqlValue::HexStringLiteral(s) => parse_hex_string(s)?, + SqlValue::HexStringLiteral(s) => { + // Should not directly write binary into json column + ensure!( + !matches!(data_type, ConcreteDataType::Json(_)), + ColumnTypeMismatchSnafu { + column_name, + expect: ConcreteDataType::binary_datatype(), + actual: ConcreteDataType::json_datatype(), + } + ); + + parse_hex_string(s)? + } SqlValue::Placeholder(s) => return InvalidSqlValueSnafu { value: s }.fail(), // TODO(dennis): supports binary string @@ -571,6 +593,7 @@ pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result Ok(ConcreteDataType::json_datatype()), _ => error::SqlTypeNotSupportedSnafu { t: data_type.clone(), } @@ -607,6 +630,7 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu ConcreteDataType::Decimal128(d) => Ok(SqlDataType::Decimal( ExactNumberInfo::PrecisionAndScale(d.precision() as u64, d.scale() as u64), )), + ConcreteDataType::Json(_) => Ok(SqlDataType::JSON), ConcreteDataType::Duration(_) | ConcreteDataType::Null(_) | ConcreteDataType::List(_) @@ -872,6 +896,35 @@ mod tests { ); assert!(v.is_err()); assert!(format!("{v:?}").contains("invalid character"), "v is {v:?}",); + + let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::json_datatype(), + &sql_val, + None, + None, + ); + assert!(v.is_err()); + + let sql_val = SqlValue::DoubleQuotedString(r#"{"a":"b"}"#.to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::json_datatype(), + &sql_val, + None, + None, + ) + .unwrap(); + assert_eq!( + Value::Binary(Bytes::from( + jsonb::parse_value(r#"{"a":"b"}"#.as_bytes()) + .unwrap() + .to_vec() + .as_slice() + )), + v + ); } #[test] @@ -1037,6 +1090,36 @@ mod tests { } } + #[test] + fn test_parse_json_to_jsonb() { + match parse_string_to_value( + "json_col", + r#"{"a": "b"}"#.to_string(), + &ConcreteDataType::json_datatype(), + None, + ) { + Ok(Value::Binary(b)) => { + assert_eq!( + b, + jsonb::parse_value(r#"{"a": "b"}"#.as_bytes()) + .unwrap() + .to_vec() + ); + } + _ => { + unreachable!() + } + } + + assert!(parse_string_to_value( + "json_col", + r#"Nicola Kovac is the best rifler in the world"#.to_string(), + &ConcreteDataType::json_datatype(), + None, + ) + .is_err()) + } + #[test] pub fn test_parse_column_default_constraint() { let bool_value = sqlparser::ast::Value::Boolean(true); diff --git a/src/store-api/src/mito_engine_options.rs b/src/store-api/src/mito_engine_options.rs index 9a6181e64c..e641a1d2fc 100644 --- a/src/store-api/src/mito_engine_options.rs +++ b/src/store-api/src/mito_engine_options.rs @@ -21,6 +21,8 @@ use common_wal::options::WAL_OPTIONS_KEY; pub const APPEND_MODE_KEY: &str = "append_mode"; /// Option key for merge mode. pub const MERGE_MODE_KEY: &str = "merge_mode"; +/// Option key for TTL(time-to-live) +pub const TTL_KEY: &str = "ttl"; /// Returns true if the `key` is a valid option key for the mito engine. pub fn is_mito_engine_option_key(key: &str) -> bool { diff --git a/src/table/src/requests.rs b/src/table/src/requests.rs index a00b25eacb..2aebc47055 100644 --- a/src/table/src/requests.rs +++ b/src/table/src/requests.rs @@ -83,6 +83,7 @@ pub const WRITE_BUFFER_SIZE_KEY: &str = "write_buffer_size"; pub const TTL_KEY: &str = "ttl"; pub const STORAGE_KEY: &str = "storage"; pub const COMMENT_KEY: &str = "comment"; +pub const AUTO_CREATE_TABLE_KEY: &str = "auto_create_table"; impl TableOptions { pub fn try_from_iter>( diff --git a/tests-integration/src/grpc.rs b/tests-integration/src/grpc.rs index 5975138431..fa88de07de 100644 --- a/tests-integration/src/grpc.rs +++ b/tests-integration/src/grpc.rs @@ -181,7 +181,8 @@ mod test { .table( "greptime", "database_created_through_grpc", - "table_created_through_grpc" + "table_created_through_grpc", + None, ) .await .unwrap() @@ -510,7 +511,7 @@ CREATE TABLE {table_name} ( let table = instance .frontend() .catalog_manager() - .table("greptime", "public", table_name) + .table("greptime", "public", table_name, None) .await .unwrap() .unwrap(); diff --git a/tests-integration/src/instance.rs b/tests-integration/src/instance.rs index a456f0a75d..b3f966c811 100644 --- a/tests-integration/src/instance.rs +++ b/tests-integration/src/instance.rs @@ -278,7 +278,7 @@ mod tests { assert!(instance .frontend() .catalog_manager() - .table("greptime", "public", "demo") + .table("greptime", "public", "demo", None) .await .unwrap() .is_none()) diff --git a/tests-integration/src/tests/instance_test.rs b/tests-integration/src/tests/instance_test.rs index aefa437532..b0bc7f4c88 100644 --- a/tests-integration/src/tests/instance_test.rs +++ b/tests-integration/src/tests/instance_test.rs @@ -462,7 +462,6 @@ async fn test_execute_show_databases_tables(instance: Arc) { +--------------------+ | greptime_private | | information_schema | -| pg_catalog | | public | +--------------------+\ "; @@ -1900,7 +1899,6 @@ async fn test_show_databases(instance: Arc) { +--------------------+ | greptime_private | | information_schema | -| pg_catalog | | public | +--------------------+"; check_output_stream(output, expected).await; @@ -1914,7 +1912,6 @@ async fn test_show_databases(instance: Arc) { | Database | +--------------------+ | information_schema | -| pg_catalog | +--------------------+"; check_output_stream(output, expected).await; } diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 56307e0427..fe28387cd6 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -906,6 +906,7 @@ fn drop_lines_with_inconsistent_results(input: String) -> String { "metadata_cache_size =", "content_cache_size =", "name =", + "recovery_parallelism =", ]; input diff --git a/tests-integration/tests/region_migration.rs b/tests-integration/tests/region_migration.rs index 98e10c8b2d..3f72ee0cca 100644 --- a/tests-integration/tests/region_migration.rs +++ b/tests-integration/tests/region_migration.rs @@ -1013,7 +1013,7 @@ async fn prepare_testing_metric_table(cluster: &GreptimeDbCluster) -> TableId { let table = cluster .frontend .catalog_manager() - .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy") + .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy", None) .await .unwrap() .unwrap(); @@ -1039,7 +1039,12 @@ async fn prepare_testing_table(cluster: &GreptimeDbCluster) -> TableId { let table = cluster .frontend .catalog_manager() - .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, TEST_TABLE_NAME) + .table( + DEFAULT_CATALOG_NAME, + DEFAULT_SCHEMA_NAME, + TEST_TABLE_NAME, + None, + ) .await .unwrap() .unwrap(); diff --git a/tests-integration/tests/sql.rs b/tests-integration/tests/sql.rs index a41968496b..1e87c54e5f 100644 --- a/tests-integration/tests/sql.rs +++ b/tests-integration/tests/sql.rs @@ -383,26 +383,42 @@ pub async fn test_postgres_crud(store_type: StorageType) { .await .unwrap(); - sqlx::query("create table demo(i bigint, ts timestamp time index, d date, dt datetime)") - .execute(&pool) - .await - .unwrap(); + sqlx::query( + "create table demo(i bigint, ts timestamp time index, d date, dt datetime, b blob, j json)", + ) + .execute(&pool) + .await + .unwrap(); for i in 0..10 { let d = NaiveDate::from_yo_opt(2015, 100).unwrap(); let dt = d.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp_millis(); + let bytes = "hello".as_bytes(); + let json = serde_json::json!({ + "code": 200, + "success": true, + "payload": { + "features": [ + "serde", + "json" + ], + "homepage": null + } + }); - sqlx::query("insert into demo values($1, $2, $3, $4)") + sqlx::query("insert into demo values($1, $2, $3, $4, $5, $6)") .bind(i) .bind(i) .bind(d) .bind(dt) + .bind(bytes) + .bind(json) .execute(&pool) .await .unwrap(); } - let rows = sqlx::query("select i,d,dt from demo") + let rows = sqlx::query("select i,d,dt,b,j from demo") .fetch_all(&pool) .await .unwrap(); @@ -412,6 +428,8 @@ pub async fn test_postgres_crud(store_type: StorageType) { let ret: i64 = row.get("i"); let d: NaiveDate = row.get("d"); let dt: NaiveDateTime = row.get("dt"); + let bytes: Vec = row.get("b"); + let json: serde_json::Value = row.get("j"); assert_eq!(ret, i as i64); @@ -422,6 +440,20 @@ pub async fn test_postgres_crud(store_type: StorageType) { .and_then(|d| d.and_hms_opt(0, 0, 0)) .unwrap(); assert_eq!(expected_dt, dt); + assert_eq!("hello".as_bytes(), bytes); + + let expected_j = serde_json::json!({ + "code": 200, + "success": true, + "payload": { + "features": [ + "serde", + "json" + ], + "homepage": null + } + }); + assert_eq!(json.to_string(), expected_j.to_string()); } let rows = sqlx::query("select i from demo where i=$1") diff --git a/tests/cases/distributed/explain/analyze.result b/tests/cases/distributed/explain/analyze.result index 2f3955c163..76aac4fa13 100644 --- a/tests/cases/distributed/explain/analyze.result +++ b/tests/cases/distributed/explain/analyze.result @@ -36,7 +36,7 @@ explain analyze SELECT count(*) FROM system_metrics; |_|_|_CoalescePartitionsExec REDACTED |_|_|_AggregateExec: mode=Partial, gby=[], aggr=[COUNT(system_REDACTED |_|_|_RepartitionExec: partitioning=REDACTED -|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| |_|_| Total rows: 1_| +-+-+-+ diff --git a/tests/cases/standalone/common/aggregate/multi_regions.result b/tests/cases/standalone/common/aggregate/multi_regions.result index 66dcf01f40..a7c6907eaf 100644 --- a/tests/cases/standalone/common/aggregate/multi_regions.result +++ b/tests/cases/standalone/common/aggregate/multi_regions.result @@ -34,7 +34,7 @@ select sum(val) from t group by host; |_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED |_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[SUM(t.val)] REDACTED |_|_|_RepartitionExec: partitioning=REDACTED -|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| | 1_| 1_|_ProjectionExec: expr=[SUM(t.val)@1 as SUM(t.val)] REDACTED |_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[SUM(t.val)] REDACTED @@ -43,7 +43,7 @@ select sum(val) from t group by host; |_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED |_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[SUM(t.val)] REDACTED |_|_|_RepartitionExec: partitioning=REDACTED -|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| |_|_| Total rows: 0_| +-+-+-+ @@ -66,9 +66,9 @@ select sum(val) from t; |_|_|_ProjectionExec: expr=[val@1 as val] REDACTED |_|_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED +| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| -| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED +| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| |_|_| Total rows: 1_| +-+-+-+ @@ -95,9 +95,9 @@ select sum(val) from t group by idc; |_|_|_ProjectionExec: expr=[val@1 as val, idc@3 as idc] REDACTED |_|_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED +| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| -| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED +| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| |_|_| Total rows: 0_| +-+-+-+ diff --git a/tests/cases/standalone/common/create/create_database.result b/tests/cases/standalone/common/create/create_database.result index f485162edd..3eafc27d6f 100644 --- a/tests/cases/standalone/common/create/create_database.result +++ b/tests/cases/standalone/common/create/create_database.result @@ -18,7 +18,6 @@ show databases; | greptime_private | | illegal-database | | information_schema | -| pg_catalog | | public | +--------------------+ diff --git a/tests/cases/standalone/common/create/create_database_opts.result b/tests/cases/standalone/common/create/create_database_opts.result index 93ac8bfcef..0177a92298 100644 --- a/tests/cases/standalone/common/create/create_database_opts.result +++ b/tests/cases/standalone/common/create/create_database_opts.result @@ -10,7 +10,6 @@ SHOW DATABASES; | greptime_private | | information_schema | | mydb | -| pg_catalog | | public | +--------------------+ @@ -22,7 +21,6 @@ SHOW FULL DATABASES; | greptime_private | | | information_schema | | | mydb | ttl='1h' | -| pg_catalog | | | public | | +--------------------+----------+ @@ -78,7 +76,6 @@ SHOW DATABASES; +--------------------+ | greptime_private | | information_schema | -| pg_catalog | | public | +--------------------+ diff --git a/tests/cases/standalone/common/create/create_metric_table.result b/tests/cases/standalone/common/create/create_metric_table.result index 37a59598ef..e1ea76c2bf 100644 --- a/tests/cases/standalone/common/create/create_metric_table.result +++ b/tests/cases/standalone/common/create/create_metric_table.result @@ -169,3 +169,53 @@ DROP TABLE `auT`; Affected Rows: 0 +-- append-only metric table +CREATE TABLE + phy (ts timestamp time index, val double) engine = metric +with +( + "physical_metric_table" = "", + "append_mode" = "true" +); + +Affected Rows: 0 + +CREATE TABLE t1(ts timestamp time index, val double, host string primary key) engine=metric with ("on_physical_table" = "phy"); + +Affected Rows: 0 + +INSERT INTO t1 (ts, val, host) VALUES + ('2022-01-01 00:00:00', 1.23, 'example.com'), + ('2022-01-02 00:00:00', 4.56, 'example.com'), + ('2022-01-03 00:00:00', 7.89, 'example.com'), + ('2022-01-01 00:00:00', 1.23, 'example.com'), + ('2022-01-02 00:00:00', 4.56, 'example.com'), + ('2022-01-03 00:00:00', 7.89, 'example.com'); + +Affected Rows: 6 + +SELECT * FROM t1; + ++-------------+---------------------+------+ +| host | ts | val | ++-------------+---------------------+------+ +| example.com | 2022-01-01T00:00:00 | 1.23 | +| example.com | 2022-01-01T00:00:00 | 1.23 | +| example.com | 2022-01-02T00:00:00 | 4.56 | +| example.com | 2022-01-02T00:00:00 | 4.56 | +| example.com | 2022-01-03T00:00:00 | 7.89 | +| example.com | 2022-01-03T00:00:00 | 7.89 | ++-------------+---------------------+------+ + +DROP TABLE t1; + +Affected Rows: 0 + +DESC TABLE t1; + +Error: 4001(TableNotFound), Table not found: t1 + +DROP TABLE phy; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/create/create_metric_table.sql b/tests/cases/standalone/common/create/create_metric_table.sql index a444986e9e..749926fb17 100644 --- a/tests/cases/standalone/common/create/create_metric_table.sql +++ b/tests/cases/standalone/common/create/create_metric_table.sql @@ -59,3 +59,30 @@ CREATE TABLE `auT`( DESC TABLE `auT`; DROP TABLE `auT`; + +-- append-only metric table +CREATE TABLE + phy (ts timestamp time index, val double) engine = metric +with +( + "physical_metric_table" = "", + "append_mode" = "true" +); + +CREATE TABLE t1(ts timestamp time index, val double, host string primary key) engine=metric with ("on_physical_table" = "phy"); + +INSERT INTO t1 (ts, val, host) VALUES + ('2022-01-01 00:00:00', 1.23, 'example.com'), + ('2022-01-02 00:00:00', 4.56, 'example.com'), + ('2022-01-03 00:00:00', 7.89, 'example.com'), + ('2022-01-01 00:00:00', 1.23, 'example.com'), + ('2022-01-02 00:00:00', 4.56, 'example.com'), + ('2022-01-03 00:00:00', 7.89, 'example.com'); + +SELECT * FROM t1; + +DROP TABLE t1; + +DESC TABLE t1; + +DROP TABLE phy; diff --git a/tests/cases/standalone/common/function/system.result b/tests/cases/standalone/common/function/system.result index ba03e47f13..8abcb1e915 100644 --- a/tests/cases/standalone/common/function/system.result +++ b/tests/cases/standalone/common/function/system.result @@ -8,12 +8,12 @@ SELECT build(); ++|build()|++|branch:BRANCH|commit:COMMIT|commit_short:COMMIT_SHORT|clean:CLEAN|version:VERSION++ --- SQLNESS REPLACE greptimedb-[\d\.]+ greptimedb-VERSION +-- SQLNESS REPLACE [\d\.]+ VERSION SELECT version(); -+-------------------------+ -| version() | -+-------------------------+ -| 5.7.20-greptimedb-VERSION | -+-------------------------+ ++-----------+ +| version() | ++-----------+ +| VERSION | ++-----------+ diff --git a/tests/cases/standalone/common/function/system.sql b/tests/cases/standalone/common/function/system.sql index d945c8baf8..368dcdc5c5 100644 --- a/tests/cases/standalone/common/function/system.sql +++ b/tests/cases/standalone/common/function/system.sql @@ -6,5 +6,5 @@ -- SQLNESS REPLACE [\s\-]+ SELECT build(); --- SQLNESS REPLACE greptimedb-[\d\.]+ greptimedb-VERSION +-- SQLNESS REPLACE [\d\.]+ VERSION SELECT version(); diff --git a/tests/cases/standalone/common/information_schema/tables.result b/tests/cases/standalone/common/information_schema/tables.result index 28416fc072..93a93a9c98 100644 --- a/tests/cases/standalone/common/information_schema/tables.result +++ b/tests/cases/standalone/common/information_schema/tables.result @@ -24,16 +24,13 @@ Affected Rows: 0 select table_catalog, table_schema, table_name from information_schema.tables where table_schema != 'information_schema'; -+---------------+--------------+--------------+ -| table_catalog | table_schema | table_name | -+---------------+--------------+--------------+ -| greptime | abc | t | -| greptime | abcde | t | -| greptime | pg_catalog | pg_class | -| greptime | pg_catalog | pg_type | -| greptime | pg_catalog | pg_namespace | -| greptime | public | numbers | -+---------------+--------------+--------------+ ++---------------+--------------+------------+ +| table_catalog | table_schema | table_name | ++---------------+--------------+------------+ +| greptime | abc | t | +| greptime | abcde | t | +| greptime | public | numbers | ++---------------+--------------+------------+ use public; diff --git a/tests/cases/standalone/common/range/nest.result b/tests/cases/standalone/common/range/nest.result index 952b9fd8b4..236c0f297a 100644 --- a/tests/cases/standalone/common/range/nest.result +++ b/tests/cases/standalone/common/range/nest.result @@ -77,7 +77,7 @@ EXPLAIN ANALYZE SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s'; |_|_|_CoalescePartitionsExec REDACTED |_|_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED +| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| |_|_| Total rows: 10_| +-+-+-+ diff --git a/tests/cases/standalone/common/select/prune.result b/tests/cases/standalone/common/select/prune.result index f2718926c9..13ddee5510 100644 --- a/tests/cases/standalone/common/select/prune.result +++ b/tests/cases/standalone/common/select/prune.result @@ -89,7 +89,7 @@ explain analyze select * from demo where idc='idc1'; +-+-+-+ | 0_| 0_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED +| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| |_|_| Total rows: 2_| +-+-+-+ diff --git a/tests/cases/standalone/common/show/show_databases_tables.result b/tests/cases/standalone/common/show/show_databases_tables.result index 706b23905b..fa50fb2aab 100644 --- a/tests/cases/standalone/common/show/show_databases_tables.result +++ b/tests/cases/standalone/common/show/show_databases_tables.result @@ -5,7 +5,6 @@ SHOW DATABASES; +--------------------+ | greptime_private | | information_schema | -| pg_catalog | | public | +--------------------+ @@ -16,7 +15,6 @@ SHOW FULL DATABASES; +--------------------+---------+ | greptime_private | | | information_schema | | -| pg_catalog | | | public | | +--------------------+---------+ diff --git a/tests/cases/standalone/common/system/information_schema.result b/tests/cases/standalone/common/system/information_schema.result index f3ee1db016..cbb0c12b6f 100644 --- a/tests/cases/standalone/common/system/information_schema.result +++ b/tests/cases/standalone/common/system/information_schema.result @@ -45,9 +45,6 @@ order by table_schema, table_name; |greptime|information_schema|tables|LOCALTEMPORARY|3|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| |greptime|information_schema|triggers|LOCALTEMPORARY|24|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| |greptime|information_schema|views|LOCALTEMPORARY|32|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|pg_catalog|pg_class|LOCALTEMPORARY|256|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|pg_catalog|pg_namespace|LOCALTEMPORARY|258|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|pg_catalog|pg_type|LOCALTEMPORARY|257|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| |greptime|public|numbers|LOCALTEMPORARY|2|0|0|0|0|0|test_engine|11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| +++++++++++++++++++++++++ @@ -413,16 +410,6 @@ select * from information_schema.columns order by table_schema, table_name, colu | greptime | information_schema | views | table_name | 3 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | views | table_schema | 2 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | views | view_definition | 4 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | -| greptime | pg_catalog | pg_class | oid | 1 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | | -| greptime | pg_catalog | pg_class | relkind | 4 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | -| greptime | pg_catalog | pg_class | relname | 2 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | -| greptime | pg_catalog | pg_class | relnamespace | 3 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | | -| greptime | pg_catalog | pg_class | relowner | 5 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | | -| greptime | pg_catalog | pg_namespace | nspname | 2 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | -| greptime | pg_catalog | pg_namespace | oid | 1 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | | -| greptime | pg_catalog | pg_type | oid | 1 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | | -| greptime | pg_catalog | pg_type | typlen | 3 | | | 5 | 0 | | | | | | select,insert | | Int16 | smallint | FIELD | | No | smallint | | | -| greptime | pg_catalog | pg_type | typname | 2 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | public | numbers | number | 1 | | | 10 | 0 | | | | PRI | | select,insert | | UInt32 | int unsigned | TAG | | No | int unsigned | | | +---------------+--------------------+---------------------------------------+-----------------------------------+------------------+--------------------------+------------------------+-------------------+---------------+--------------------+--------------------+----------------+------------+-------+---------------+-----------------------+----------------------+-----------------+---------------+----------------+-------------+-----------------+----------------+--------+ @@ -596,7 +583,6 @@ select * from schemata where catalog_name = 'greptime' and schema_name != 'publi +--------------+--------------------+----------------------------+------------------------+----------+---------+ | greptime | greptime_private | utf8 | utf8_bin | | | | greptime | information_schema | utf8 | utf8_bin | | | -| greptime | pg_catalog | utf8 | utf8_bin | | | +--------------+--------------------+----------------------------+------------------------+----------+---------+ -- test engines diff --git a/tests/cases/standalone/common/system/pg_catalog.result b/tests/cases/standalone/common/system/pg_catalog.result index 261211902d..d30355352f 100644 --- a/tests/cases/standalone/common/system/pg_catalog.result +++ b/tests/cases/standalone/common/system/pg_catalog.result @@ -5,30 +5,7 @@ Error: 1004(InvalidArguments), Schema pg_catalog already exists select * from pg_catalog.pg_type order by oid; -+-----+-----------+--------+ -| oid | typname | typlen | -+-----+-----------+--------+ -| 1 | String | -1 | -| 2 | Binary | -1 | -| 3 | Int8 | 1 | -| 4 | Int16 | 2 | -| 5 | Int32 | 4 | -| 6 | Int64 | 8 | -| 7 | UInt8 | 1 | -| 8 | UInt16 | 2 | -| 9 | UInt32 | 4 | -| 10 | UInt64 | 8 | -| 11 | Float32 | 4 | -| 12 | Float64 | 8 | -| 13 | Decimal | 16 | -| 14 | Date | 4 | -| 15 | DateTime | 8 | -| 16 | Timestamp | 8 | -| 17 | Time | 8 | -| 18 | Duration | 8 | -| 19 | Interval | 16 | -| 20 | List | -1 | -+-----+-----------+--------+ +Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_type -- \d SELECT n.nspname as "Schema", @@ -44,11 +21,7 @@ WHERE c.relkind IN ('r','p','v','m','S','f','') AND pg_catalog.pg_table_is_visible(c.oid) ORDER BY 1,2; -+--------+---------+-------+-------+ -| Schema | Name | Type | Owner | -+--------+---------+-------+-------+ -| public | numbers | table | | -+--------+---------+-------+-------+ +Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class -- \dt SELECT n.nspname as "Schema", @@ -64,11 +37,7 @@ WHERE c.relkind IN ('r','p','') AND pg_catalog.pg_table_is_visible(c.oid) ORDER BY 1,2; -+--------+---------+-------+-------+ -| Schema | Name | Type | Owner | -+--------+---------+-------+-------+ -| public | numbers | table | | -+--------+---------+-------+-------+ +Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class -- make sure oid of namespace keep stable SELECT * FROM pg_namespace ORDER BY oid; @@ -100,11 +69,7 @@ where relnamespace = ( where nspname = 'my_db' ); -+---------+ -| relname | -+---------+ -| foo | -+---------+ +Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class -- \dt SELECT n.nspname as "Schema", @@ -120,12 +85,7 @@ WHERE c.relkind IN ('r','p','') AND pg_catalog.pg_table_is_visible(c.oid) ORDER BY 1,2; -+--------+---------+-------+-------+ -| Schema | Name | Type | Owner | -+--------+---------+-------+-------+ -| my_db | foo | table | | -| public | numbers | table | | -+--------+---------+-------+-------+ +Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class -- show tables in `my_db`, `public` select relname @@ -137,12 +97,7 @@ where relnamespace in ( ) order by relname; -+---------+ -| relname | -+---------+ -| foo | -| numbers | -+---------+ +Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class select relname from pg_catalog.pg_class @@ -152,11 +107,7 @@ where relnamespace in ( where nspname like 'my%' ); -+---------+ -| relname | -+---------+ -| foo | -+---------+ +Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class select relnamespace, relname, relkind from pg_catalog.pg_class @@ -169,11 +120,7 @@ where relnamespace in ( ) order by relnamespace, relname; -+--------------+---------+---------+ -| relnamespace | relname | relkind | -+--------------+---------+---------+ -| 434869349 | foo | r | -+--------------+---------+---------+ +Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class use public; @@ -190,24 +137,11 @@ Affected Rows: 0 -- pg_class desc table pg_class; -+--------------+--------+-----+------+---------+---------------+ -| Column | Type | Key | Null | Default | Semantic Type | -+--------------+--------+-----+------+---------+---------------+ -| oid | UInt32 | | NO | | FIELD | -| relname | String | | NO | | FIELD | -| relnamespace | UInt32 | | NO | | FIELD | -| relkind | String | | NO | | FIELD | -| relowner | UInt32 | | NO | | FIELD | -+--------------+--------+-----+------+---------+---------------+ +Error: 4001(TableNotFound), Table not found: pg_class desc table pg_namespace; -+---------+--------+-----+------+---------+---------------+ -| Column | Type | Key | Null | Default | Semantic Type | -+---------+--------+-----+------+---------+---------------+ -| oid | UInt32 | | NO | | FIELD | -| nspname | String | | NO | | FIELD | -+---------+--------+-----+------+---------+---------------+ +Error: 4001(TableNotFound), Table not found: pg_namespace drop table my_db.foo; diff --git a/tests/cases/standalone/common/tql-explain-analyze/analyze.result b/tests/cases/standalone/common/tql-explain-analyze/analyze.result index fe8af2e753..5a08da5517 100644 --- a/tests/cases/standalone/common/tql-explain-analyze/analyze.result +++ b/tests/cases/standalone/common/tql-explain-analyze/analyze.result @@ -32,7 +32,7 @@ TQL ANALYZE (0, 10, '5s') test; |_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED |_|_|_FilterExec: j@1 >= -300000 AND j@1 <= 310000 REDACTED |_|_|_RepartitionExec: partitioning=REDACTED -|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| |_|_| Total rows: 4_| +-+-+-+ @@ -63,7 +63,7 @@ TQL ANALYZE (0, 10, '1s', '2s') test; |_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED |_|_|_FilterExec: j@1 >= -2000 AND j@1 <= 12000 REDACTED |_|_|_RepartitionExec: partitioning=REDACTED -|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| |_|_| Total rows: 4_| +-+-+-+ @@ -93,7 +93,7 @@ TQL ANALYZE ('1970-01-01T00:00:00'::timestamp, '1970-01-01T00:00:00'::timestamp |_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED |_|_|_FilterExec: j@1 >= -300000 AND j@1 <= 310000 REDACTED |_|_|_RepartitionExec: partitioning=REDACTED -|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| |_|_| Total rows: 4_| +-+-+-+ @@ -125,7 +125,7 @@ TQL ANALYZE VERBOSE (0, 10, '5s') test; |_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED |_|_|_FilterExec: j@1 >= -300000 AND j@1 <= 310000 REDACTED |_|_|_RepartitionExec: partitioning=REDACTED -|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED |_|_|_| |_|_| Total rows: 4_| +-+-+-+ diff --git a/tests/cases/standalone/common/types/json/json.result b/tests/cases/standalone/common/types/json/json.result new file mode 100644 index 0000000000..710d0b230a --- /dev/null +++ b/tests/cases/standalone/common/types/json/json.result @@ -0,0 +1,156 @@ +CREATE TABLE jsons (j JSON, t timestamp time index); + +Affected Rows: 0 + +--Insert valid json strings-- +INSERT INTO jsons VALUES('[null]', 0), +('[true]', 1), +('[false]', 2), +('[0]', 3), +('["foo"]', 4), +('[]', 5), +('{}', 6), +('[0,1]', 7), +('{"foo":"bar"}', 8), +('{"a":null,"foo":"bar"}', 9), +('[-1]', 10), +('{"entities": { + "description": { + "urls": [ + { + "url": "http://t.co/QMLJeFmfMT", + "expanded_url": "http://www.pixiv.net/member.php?id=4776", + "display_url": "pixiv.net/member.php?id=…", + "indices": [ + 58, + 80 + ] + }, + { + "url": "http://t.co/LU8T7vmU3h", + "expanded_url": "http://ask.fm/KATANA77", + "display_url": "ask.fm/KATANA77", + "indices": [ + 95, + 117 + ] + } + ] + } +}}', 11); + +Affected Rows: 12 + +INSERT INTO jsons VALUES(to_json('[null]'), 12), +(to_json('[true]'), 13), +(to_json('[false]'), 14), +(to_json('[0]'), 15), +(to_json('["foo"]'), 16), +(to_json('[]'), 17), +(to_json('{}'), 18), +(to_json('[0,1]'), 19), +(to_json('{"foo":"bar"}'), 20), +(to_json('{"a":null,"foo":"bar"}'), 21), +(to_json('[-1]'), 22), +(to_json('[-2147483648]'), 23), +(to_json('{"entities": { + "description": { + "urls": [ + { + "url": "http://t.co/QMLJeFmfMT", + "expanded_url": "http://www.pixiv.net/member.php?id=4776", + "display_url": "pixiv.net/member.php?id=…", + "indices": [ + 58, + 80 + ] + }, + { + "url": "http://t.co/LU8T7vmU3h", + "expanded_url": "http://ask.fm/KATANA77", + "display_url": "ask.fm/KATANA77", + "indices": [ + 95, + 117 + ] + } + ] + } + }}'), 24); + +Affected Rows: 13 + +SELECT json_to_string(j), t FROM jsons; + ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------+ +| json_to_string(jsons.j) | t | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------+ +| [null] | 1970-01-01T00:00:00 | +| [true] | 1970-01-01T00:00:00.001 | +| [false] | 1970-01-01T00:00:00.002 | +| [0] | 1970-01-01T00:00:00.003 | +| ["foo"] | 1970-01-01T00:00:00.004 | +| [] | 1970-01-01T00:00:00.005 | +| {} | 1970-01-01T00:00:00.006 | +| [0,1] | 1970-01-01T00:00:00.007 | +| {"foo":"bar"} | 1970-01-01T00:00:00.008 | +| {"a":null,"foo":"bar"} | 1970-01-01T00:00:00.009 | +| [-1] | 1970-01-01T00:00:00.010 | +| {"entities":{"description":{"urls":[{"display_url":"pixiv.net/member.php?id=…","expanded_url":"http://www.pixiv.net/member.php?id=4776","indices":[58,80],"url":"http://t.co/QMLJeFmfMT"},{"display_url":"ask.fm/KATANA77","expanded_url":"http://ask.fm/KATANA77","indices":[95,117],"url":"http://t.co/LU8T7vmU3h"}]}}} | 1970-01-01T00:00:00.011 | +| [null] | 1970-01-01T00:00:00.012 | +| [true] | 1970-01-01T00:00:00.013 | +| [false] | 1970-01-01T00:00:00.014 | +| [0] | 1970-01-01T00:00:00.015 | +| ["foo"] | 1970-01-01T00:00:00.016 | +| [] | 1970-01-01T00:00:00.017 | +| {} | 1970-01-01T00:00:00.018 | +| [0,1] | 1970-01-01T00:00:00.019 | +| {"foo":"bar"} | 1970-01-01T00:00:00.020 | +| {"a":null,"foo":"bar"} | 1970-01-01T00:00:00.021 | +| [-1] | 1970-01-01T00:00:00.022 | +| [-2147483648] | 1970-01-01T00:00:00.023 | +| {"entities":{"description":{"urls":[{"display_url":"pixiv.net/member.php?id=…","expanded_url":"http://www.pixiv.net/member.php?id=4776","indices":[58,80],"url":"http://t.co/QMLJeFmfMT"},{"display_url":"ask.fm/KATANA77","expanded_url":"http://ask.fm/KATANA77","indices":[95,117],"url":"http://t.co/LU8T7vmU3h"}]}}} | 1970-01-01T00:00:00.024 | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------+ + +--Insert invalid json strings-- +DELETE FROM jsons; + +Affected Rows: 25 + +INSERT INTO jsons VALUES(to_json('{"a":1, "b":2, "c":3'), 4); + +Error: 3001(EngineExecuteQuery), DataFusion error: Invalid function args: Cannot convert the string to json, have: {"a":1, "b":2, "c":3 + +INSERT INTO jsons VALUES(to_json('Morning my friends, have a nice day :)'), 5); + +Error: 3001(EngineExecuteQuery), DataFusion error: Invalid function args: Cannot convert the string to json, have: Morning my friends, have a nice day :) + +SELECT json_to_string(j), t FROM jsons; + +++ +++ + +CREATE TABLE json_empty (j JSON, t timestamp time index); + +Affected Rows: 0 + +INSERT INTO json_empty VALUES(NULL, 2); + +Affected Rows: 1 + +SELECT json_to_string(j), t FROM json_empty; + ++------------------------------+-------------------------+ +| json_to_string(json_empty.j) | t | ++------------------------------+-------------------------+ +| | 1970-01-01T00:00:00.002 | ++------------------------------+-------------------------+ + +drop table jsons; + +Affected Rows: 0 + +drop table json_empty; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/types/json/json.sql b/tests/cases/standalone/common/types/json/json.sql new file mode 100644 index 0000000000..57fce9a8ea --- /dev/null +++ b/tests/cases/standalone/common/types/json/json.sql @@ -0,0 +1,96 @@ +CREATE TABLE jsons (j JSON, t timestamp time index); + +--Insert valid json strings-- +INSERT INTO jsons VALUES('[null]', 0), +('[true]', 1), +('[false]', 2), +('[0]', 3), +('["foo"]', 4), +('[]', 5), +('{}', 6), +('[0,1]', 7), +('{"foo":"bar"}', 8), +('{"a":null,"foo":"bar"}', 9), +('[-1]', 10), +('{"entities": { + "description": { + "urls": [ + { + "url": "http://t.co/QMLJeFmfMT", + "expanded_url": "http://www.pixiv.net/member.php?id=4776", + "display_url": "pixiv.net/member.php?id=…", + "indices": [ + 58, + 80 + ] + }, + { + "url": "http://t.co/LU8T7vmU3h", + "expanded_url": "http://ask.fm/KATANA77", + "display_url": "ask.fm/KATANA77", + "indices": [ + 95, + 117 + ] + } + ] + } +}}', 11); + +INSERT INTO jsons VALUES(to_json('[null]'), 12), +(to_json('[true]'), 13), +(to_json('[false]'), 14), +(to_json('[0]'), 15), +(to_json('["foo"]'), 16), +(to_json('[]'), 17), +(to_json('{}'), 18), +(to_json('[0,1]'), 19), +(to_json('{"foo":"bar"}'), 20), +(to_json('{"a":null,"foo":"bar"}'), 21), +(to_json('[-1]'), 22), +(to_json('[-2147483648]'), 23), +(to_json('{"entities": { + "description": { + "urls": [ + { + "url": "http://t.co/QMLJeFmfMT", + "expanded_url": "http://www.pixiv.net/member.php?id=4776", + "display_url": "pixiv.net/member.php?id=…", + "indices": [ + 58, + 80 + ] + }, + { + "url": "http://t.co/LU8T7vmU3h", + "expanded_url": "http://ask.fm/KATANA77", + "display_url": "ask.fm/KATANA77", + "indices": [ + 95, + 117 + ] + } + ] + } + }}'), 24); + +SELECT json_to_string(j), t FROM jsons; + +--Insert invalid json strings-- +DELETE FROM jsons; + +INSERT INTO jsons VALUES(to_json('{"a":1, "b":2, "c":3'), 4); + +INSERT INTO jsons VALUES(to_json('Morning my friends, have a nice day :)'), 5); + +SELECT json_to_string(j), t FROM jsons; + +CREATE TABLE json_empty (j JSON, t timestamp time index); + +INSERT INTO json_empty VALUES(NULL, 2); + +SELECT json_to_string(j), t FROM json_empty; + +drop table jsons; + +drop table json_empty; diff --git a/tests/cases/standalone/common/view/create.result b/tests/cases/standalone/common/view/create.result index bb0ea87834..4cf8084cd3 100644 --- a/tests/cases/standalone/common/view/create.result +++ b/tests/cases/standalone/common/view/create.result @@ -77,11 +77,7 @@ WHERE c.relkind IN ('v','') AND pg_catalog.pg_table_is_visible(c.oid) ORDER BY 1,2; -+--------+-----------+------+-------+ -| Schema | Name | Type | Owner | -+--------+-----------+------+-------+ -| public | test_view | view | | -+--------+-----------+------+-------+ +Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class -- SQLNESS REPLACE (\s\d+\s) ID -- SQLNESS REPLACE (\s[\-0-9T:\.]{15,}) DATETIME @@ -110,9 +106,6 @@ SELECT * FROM INFORMATION_SCHEMA.TABLES ORDER BY TABLE_NAME, TABLE_TYPE; |greptime|information_schema|optimizer_trace|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| |greptime|information_schema|parameters|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| |greptime|information_schema|partitions|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|pg_catalog|pg_class|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|pg_catalog|pg_namespace|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|pg_catalog|pg_type|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| |greptime|information_schema|profiling|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| |greptime|information_schema|referential_constraints|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| |greptime|information_schema|region_peers|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| @@ -205,6 +198,5 @@ WHERE c.relkind IN ('v','') AND pg_catalog.pg_table_is_visible(c.oid) ORDER BY 1,2; -++ -++ +Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class diff --git a/tests/cases/standalone/optimizer/last_value.result b/tests/cases/standalone/optimizer/last_value.result index ab3f12bce1..790a6a4748 100644 --- a/tests/cases/standalone/optimizer/last_value.result +++ b/tests/cases/standalone/optimizer/last_value.result @@ -48,7 +48,7 @@ explain analyze |_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED |_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED |_|_|_RepartitionExec: REDACTED -|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges), selector=LastRow REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), selector=LastRow REDACTED |_|_|_| |_|_| Total rows: 4_| +-+-+-+