mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-31 12:20:38 +00:00
Merge branch 'main' into transform-count-min-max
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
32
Cargo.lock
generated
32
Cargo.lock
generated
@@ -1812,10 +1812,12 @@ name = "common-base"
|
||||
version = "0.9.3"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"async-trait",
|
||||
"bitvec",
|
||||
"bytes",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"futures",
|
||||
"paste",
|
||||
"serde",
|
||||
"snafu 0.8.4",
|
||||
@@ -1952,6 +1954,7 @@ dependencies = [
|
||||
"datatypes",
|
||||
"geohash",
|
||||
"h3o",
|
||||
"jsonb",
|
||||
"num",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
@@ -2293,6 +2296,7 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"futures-util",
|
||||
"humantime-serde",
|
||||
"num_cpus",
|
||||
"rskafka",
|
||||
"rustls 0.23.10",
|
||||
"rustls-native-certs",
|
||||
@@ -3166,6 +3170,7 @@ dependencies = [
|
||||
"datafusion-common",
|
||||
"enum_dispatch",
|
||||
"greptime-proto",
|
||||
"jsonb",
|
||||
"num",
|
||||
"num-traits",
|
||||
"ordered-float 3.9.2",
|
||||
@@ -3698,6 +3703,12 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||
|
||||
[[package]]
|
||||
name = "fast-float"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
|
||||
|
||||
[[package]]
|
||||
name = "fastdivide"
|
||||
version = "0.4.1"
|
||||
@@ -4302,7 +4313,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=157cfdb52709e489cf1f3ce8e3042ed4ee8a524a#157cfdb52709e489cf1f3ce8e3042ed4ee8a524a"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=973f49cde88a582fb65755cc572ebcf6fb93ccf7#973f49cde88a582fb65755cc572ebcf6fb93ccf7"
|
||||
dependencies = [
|
||||
"prost 0.12.6",
|
||||
"serde",
|
||||
@@ -5409,6 +5420,21 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonb"
|
||||
version = "0.4.1"
|
||||
source = "git+https://github.com/CookiePieWw/jsonb.git?rev=d0166c130fce903bf6c58643417a3173a6172d31#d0166c130fce903bf6c58643417a3173a6172d31"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"fast-float",
|
||||
"itoa",
|
||||
"nom",
|
||||
"ordered-float 4.2.0",
|
||||
"rand",
|
||||
"ryu",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonpath-rust"
|
||||
version = "0.5.1"
|
||||
@@ -8062,6 +8088,8 @@ dependencies = [
|
||||
"chrono",
|
||||
"fallible-iterator",
|
||||
"postgres-protocol",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -10400,6 +10428,7 @@ dependencies = [
|
||||
"hyper 0.14.29",
|
||||
"influxdb_line_protocol",
|
||||
"itertools 0.10.5",
|
||||
"jsonb",
|
||||
"lazy_static",
|
||||
"mime_guess",
|
||||
"mysql_async",
|
||||
@@ -10779,6 +10808,7 @@ dependencies = [
|
||||
"hex",
|
||||
"iso8601",
|
||||
"itertools 0.10.5",
|
||||
"jsonb",
|
||||
"lazy_static",
|
||||
"regex",
|
||||
"serde_json",
|
||||
|
||||
@@ -120,10 +120,11 @@ etcd-client = { version = "0.13" }
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "157cfdb52709e489cf1f3ce8e3042ed4ee8a524a" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "973f49cde88a582fb65755cc572ebcf6fb93ccf7" }
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "d0166c130fce903bf6c58643417a3173a6172d31", default-features = false }
|
||||
lazy_static = "1.4"
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
|
||||
mockall = "0.11.4"
|
||||
|
||||
@@ -68,6 +68,7 @@
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
|
||||
| `wal.num_topics` | Integer | `64` | Number of topics.<br/>**It's only used when the provider is `kafka`**. |
|
||||
@@ -381,6 +382,7 @@
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
|
||||
|
||||
@@ -170,6 +170,9 @@ prefill_log_files = false
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
sync_period = "10s"
|
||||
|
||||
## Parallelism during WAL recovery.
|
||||
recovery_parallelism = 2
|
||||
|
||||
## The Kafka broker endpoints.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
@@ -174,6 +174,9 @@ prefill_log_files = false
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
sync_period = "10s"
|
||||
|
||||
## Parallelism during WAL recovery.
|
||||
recovery_parallelism = 2
|
||||
|
||||
## The Kafka broker endpoints.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
@@ -42,7 +42,8 @@ use greptime_proto::v1::greptime_request::Request;
|
||||
use greptime_proto::v1::query_request::Query;
|
||||
use greptime_proto::v1::value::ValueData;
|
||||
use greptime_proto::v1::{
|
||||
ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, QueryRequest, Row, SemanticType,
|
||||
ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonTypeExtension, QueryRequest,
|
||||
Row, SemanticType,
|
||||
};
|
||||
use paste::paste;
|
||||
use snafu::prelude::*;
|
||||
@@ -103,7 +104,17 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
ColumnDataType::Uint64 => ConcreteDataType::uint64_datatype(),
|
||||
ColumnDataType::Float32 => ConcreteDataType::float32_datatype(),
|
||||
ColumnDataType::Float64 => ConcreteDataType::float64_datatype(),
|
||||
ColumnDataType::Binary => ConcreteDataType::binary_datatype(),
|
||||
ColumnDataType::Binary => {
|
||||
if let Some(TypeExt::JsonType(_)) = datatype_wrapper
|
||||
.datatype_ext
|
||||
.as_ref()
|
||||
.and_then(|datatype_ext| datatype_ext.type_ext.as_ref())
|
||||
{
|
||||
ConcreteDataType::json_datatype()
|
||||
} else {
|
||||
ConcreteDataType::binary_datatype()
|
||||
}
|
||||
}
|
||||
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
||||
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
||||
@@ -236,7 +247,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
ConcreteDataType::UInt64(_) => ColumnDataType::Uint64,
|
||||
ConcreteDataType::Float32(_) => ColumnDataType::Float32,
|
||||
ConcreteDataType::Float64(_) => ColumnDataType::Float64,
|
||||
ConcreteDataType::Binary(_) => ColumnDataType::Binary,
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ColumnDataType::Binary,
|
||||
ConcreteDataType::String(_) => ColumnDataType::String,
|
||||
ConcreteDataType::Date(_) => ColumnDataType::Date,
|
||||
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
|
||||
@@ -276,6 +287,16 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
})),
|
||||
})
|
||||
}
|
||||
ColumnDataType::Binary => {
|
||||
if datatype == ConcreteDataType::json_datatype() {
|
||||
// Json is the same as binary in proto. The extension marks the binary in proto is actually a json.
|
||||
Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
Ok(Self {
|
||||
@@ -649,7 +670,8 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
|
||||
ConcreteDataType::Null(_)
|
||||
| ConcreteDataType::List(_)
|
||||
| ConcreteDataType::Dictionary(_)
|
||||
| ConcreteDataType::Duration(_) => {
|
||||
| ConcreteDataType::Duration(_)
|
||||
| ConcreteDataType::Json(_) => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
@@ -813,7 +835,8 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
|
||||
ConcreteDataType::Null(_)
|
||||
| ConcreteDataType::List(_)
|
||||
| ConcreteDataType::Dictionary(_)
|
||||
| ConcreteDataType::Duration(_) => {
|
||||
| ConcreteDataType::Duration(_)
|
||||
| ConcreteDataType::Json(_) => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
@@ -831,7 +854,13 @@ pub fn is_column_type_value_eq(
|
||||
expect_type: &ConcreteDataType,
|
||||
) -> bool {
|
||||
ColumnDataTypeWrapper::try_new(type_value, type_extension)
|
||||
.map(|wrapper| ConcreteDataType::from(wrapper) == *expect_type)
|
||||
.map(|wrapper| {
|
||||
let datatype = ConcreteDataType::from(wrapper);
|
||||
(datatype == *expect_type)
|
||||
// Json type leverage binary type in pb, so this is valid.
|
||||
|| (datatype == ConcreteDataType::binary_datatype()
|
||||
&& *expect_type == ConcreteDataType::json_datatype())
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ use futures_util::{StreamExt, TryStreamExt};
|
||||
use meta_client::client::MetaClient;
|
||||
use moka::sync::Cache;
|
||||
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
|
||||
use session::context::{Channel, QueryContext};
|
||||
use snafu::prelude::*;
|
||||
use table::dist_table::DistTable;
|
||||
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
|
||||
@@ -152,7 +153,11 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
|
||||
async fn schema_names(
|
||||
&self,
|
||||
catalog: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<Vec<String>> {
|
||||
let stream = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
@@ -163,12 +168,17 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
.map_err(BoxedError::new)
|
||||
.context(ListSchemasSnafu { catalog })?;
|
||||
|
||||
keys.extend(self.system_catalog.schema_names());
|
||||
keys.extend(self.system_catalog.schema_names(query_ctx));
|
||||
|
||||
Ok(keys.into_iter().collect())
|
||||
}
|
||||
|
||||
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
|
||||
async fn table_names(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<Vec<String>> {
|
||||
let stream = self
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
@@ -181,7 +191,7 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
.into_iter()
|
||||
.map(|(k, _)| k)
|
||||
.collect::<Vec<_>>();
|
||||
tables.extend_from_slice(&self.system_catalog.table_names(schema));
|
||||
tables.extend_from_slice(&self.system_catalog.table_names(schema, query_ctx));
|
||||
|
||||
Ok(tables.into_iter().collect())
|
||||
}
|
||||
@@ -194,8 +204,13 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
.context(TableMetadataManagerSnafu)
|
||||
}
|
||||
|
||||
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
|
||||
if self.system_catalog.schema_exists(schema) {
|
||||
async fn schema_exists(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<bool> {
|
||||
if self.system_catalog.schema_exists(schema, query_ctx) {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
@@ -206,8 +221,14 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
.context(TableMetadataManagerSnafu)
|
||||
}
|
||||
|
||||
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
|
||||
if self.system_catalog.table_exists(schema, table) {
|
||||
async fn table_exists(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<bool> {
|
||||
if self.system_catalog.table_exists(schema, table, query_ctx) {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
@@ -225,10 +246,12 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<Option<TableRef>> {
|
||||
if let Some(table) = self
|
||||
.system_catalog
|
||||
.table(catalog_name, schema_name, table_name)
|
||||
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
|
||||
if let Some(table) =
|
||||
self.system_catalog
|
||||
.table(catalog_name, schema_name, table_name, query_ctx)
|
||||
{
|
||||
return Ok(Some(table));
|
||||
}
|
||||
@@ -236,23 +259,45 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
let table_cache: TableCacheRef = self.cache_registry.get().context(CacheNotFoundSnafu {
|
||||
name: "table_cache",
|
||||
})?;
|
||||
|
||||
table_cache
|
||||
if let Some(table) = table_cache
|
||||
.get_by_ref(&TableName {
|
||||
catalog_name: catalog_name.to_string(),
|
||||
schema_name: schema_name.to_string(),
|
||||
table_name: table_name.to_string(),
|
||||
})
|
||||
.await
|
||||
.context(GetTableCacheSnafu)
|
||||
.context(GetTableCacheSnafu)?
|
||||
{
|
||||
return Ok(Some(table));
|
||||
}
|
||||
|
||||
if channel == Channel::Postgres {
|
||||
// falldown to pg_catalog
|
||||
if let Some(table) =
|
||||
self.system_catalog
|
||||
.table(catalog_name, PG_CATALOG_NAME, table_name, query_ctx)
|
||||
{
|
||||
return Ok(Some(table));
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result<TableRef>> {
|
||||
fn tables<'a>(
|
||||
&'a self,
|
||||
catalog: &'a str,
|
||||
schema: &'a str,
|
||||
query_ctx: Option<&'a QueryContext>,
|
||||
) -> BoxStream<'a, Result<TableRef>> {
|
||||
let sys_tables = try_stream!({
|
||||
// System tables
|
||||
let sys_table_names = self.system_catalog.table_names(schema);
|
||||
let sys_table_names = self.system_catalog.table_names(schema, query_ctx);
|
||||
for table_name in sys_table_names {
|
||||
if let Some(table) = self.system_catalog.table(catalog, schema, &table_name) {
|
||||
if let Some(table) =
|
||||
self.system_catalog
|
||||
.table(catalog, schema, &table_name, query_ctx)
|
||||
{
|
||||
yield table;
|
||||
}
|
||||
}
|
||||
@@ -320,18 +365,27 @@ struct SystemCatalog {
|
||||
}
|
||||
|
||||
impl SystemCatalog {
|
||||
// TODO(j0hn50n133): remove the duplicated hard-coded table names logic
|
||||
fn schema_names(&self) -> Vec<String> {
|
||||
vec![
|
||||
INFORMATION_SCHEMA_NAME.to_string(),
|
||||
PG_CATALOG_NAME.to_string(),
|
||||
]
|
||||
fn schema_names(&self, query_ctx: Option<&QueryContext>) -> Vec<String> {
|
||||
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
|
||||
match channel {
|
||||
// pg_catalog only visible under postgres protocol
|
||||
Channel::Postgres => vec![
|
||||
INFORMATION_SCHEMA_NAME.to_string(),
|
||||
PG_CATALOG_NAME.to_string(),
|
||||
],
|
||||
_ => {
|
||||
vec![INFORMATION_SCHEMA_NAME.to_string()]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn table_names(&self, schema: &str) -> Vec<String> {
|
||||
fn table_names(&self, schema: &str, query_ctx: Option<&QueryContext>) -> Vec<String> {
|
||||
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
|
||||
match schema {
|
||||
INFORMATION_SCHEMA_NAME => self.information_schema_provider.table_names(),
|
||||
PG_CATALOG_NAME => self.pg_catalog_provider.table_names(),
|
||||
PG_CATALOG_NAME if channel == Channel::Postgres => {
|
||||
self.pg_catalog_provider.table_names()
|
||||
}
|
||||
DEFAULT_SCHEMA_NAME => {
|
||||
vec![NUMBERS_TABLE_NAME.to_string()]
|
||||
}
|
||||
@@ -339,23 +393,35 @@ impl SystemCatalog {
|
||||
}
|
||||
}
|
||||
|
||||
fn schema_exists(&self, schema: &str) -> bool {
|
||||
schema == INFORMATION_SCHEMA_NAME || schema == PG_CATALOG_NAME
|
||||
fn schema_exists(&self, schema: &str, query_ctx: Option<&QueryContext>) -> bool {
|
||||
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
|
||||
match channel {
|
||||
Channel::Postgres => schema == PG_CATALOG_NAME || schema == INFORMATION_SCHEMA_NAME,
|
||||
_ => schema == INFORMATION_SCHEMA_NAME,
|
||||
}
|
||||
}
|
||||
|
||||
fn table_exists(&self, schema: &str, table: &str) -> bool {
|
||||
fn table_exists(&self, schema: &str, table: &str, query_ctx: Option<&QueryContext>) -> bool {
|
||||
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
|
||||
if schema == INFORMATION_SCHEMA_NAME {
|
||||
self.information_schema_provider.table(table).is_some()
|
||||
} else if schema == DEFAULT_SCHEMA_NAME {
|
||||
table == NUMBERS_TABLE_NAME
|
||||
} else if schema == PG_CATALOG_NAME {
|
||||
} else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
|
||||
self.pg_catalog_provider.table(table).is_some()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Option<TableRef> {
|
||||
fn table(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table_name: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Option<TableRef> {
|
||||
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
|
||||
if schema == INFORMATION_SCHEMA_NAME {
|
||||
let information_schema_provider =
|
||||
self.catalog_cache.get_with_by_ref(catalog, move || {
|
||||
@@ -366,7 +432,7 @@ impl SystemCatalog {
|
||||
))
|
||||
});
|
||||
information_schema_provider.table(table_name)
|
||||
} else if schema == PG_CATALOG_NAME {
|
||||
} else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
|
||||
if catalog == DEFAULT_CATALOG_NAME {
|
||||
self.pg_catalog_provider.table(table_name)
|
||||
} else {
|
||||
|
||||
@@ -20,8 +20,10 @@ use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::CreateTableExpr;
|
||||
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME};
|
||||
use futures::future::BoxFuture;
|
||||
use futures_util::stream::BoxStream;
|
||||
use session::context::QueryContext;
|
||||
use table::metadata::TableId;
|
||||
use table::TableRef;
|
||||
|
||||
@@ -44,15 +46,35 @@ pub trait CatalogManager: Send + Sync {
|
||||
|
||||
async fn catalog_names(&self) -> Result<Vec<String>>;
|
||||
|
||||
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>>;
|
||||
async fn schema_names(
|
||||
&self,
|
||||
catalog: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<Vec<String>>;
|
||||
|
||||
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>>;
|
||||
async fn table_names(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<Vec<String>>;
|
||||
|
||||
async fn catalog_exists(&self, catalog: &str) -> Result<bool>;
|
||||
|
||||
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool>;
|
||||
async fn schema_exists(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<bool>;
|
||||
|
||||
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool>;
|
||||
async fn table_exists(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<bool>;
|
||||
|
||||
/// Returns the table by catalog, schema and table name.
|
||||
async fn table(
|
||||
@@ -60,10 +82,25 @@ pub trait CatalogManager: Send + Sync {
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table_name: &str,
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<Option<TableRef>>;
|
||||
|
||||
/// Returns all tables with a stream by catalog and schema.
|
||||
fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result<TableRef>>;
|
||||
fn tables<'a>(
|
||||
&'a self,
|
||||
catalog: &'a str,
|
||||
schema: &'a str,
|
||||
query_ctx: Option<&'a QueryContext>,
|
||||
) -> BoxStream<'a, Result<TableRef>>;
|
||||
|
||||
/// Check if `schema` is a reserved schema name
|
||||
fn is_reserved_schema_name(&self, schema: &str) -> bool {
|
||||
// We have to check whether a schema name is reserved before create schema.
|
||||
// We need this rather than use schema_exists directly because `pg_catalog` is
|
||||
// only visible via postgres protocol. So if we don't check, a mysql client may
|
||||
// create a schema named `pg_catalog` which is somehow malformed.
|
||||
schema == INFORMATION_SCHEMA_NAME || schema == PG_CATALOG_NAME
|
||||
}
|
||||
}
|
||||
|
||||
pub type CatalogManagerRef = Arc<dyn CatalogManager>;
|
||||
|
||||
@@ -26,6 +26,7 @@ use common_catalog::consts::{
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use futures_util::stream::BoxStream;
|
||||
use session::context::QueryContext;
|
||||
use snafu::OptionExt;
|
||||
use table::TableRef;
|
||||
|
||||
@@ -53,7 +54,11 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
Ok(self.catalogs.read().unwrap().keys().cloned().collect())
|
||||
}
|
||||
|
||||
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
|
||||
async fn schema_names(
|
||||
&self,
|
||||
catalog: &str,
|
||||
_query_ctx: Option<&QueryContext>,
|
||||
) -> Result<Vec<String>> {
|
||||
Ok(self
|
||||
.catalogs
|
||||
.read()
|
||||
@@ -67,7 +72,12 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
|
||||
async fn table_names(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
_query_ctx: Option<&QueryContext>,
|
||||
) -> Result<Vec<String>> {
|
||||
Ok(self
|
||||
.catalogs
|
||||
.read()
|
||||
@@ -87,11 +97,22 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
self.catalog_exist_sync(catalog)
|
||||
}
|
||||
|
||||
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
|
||||
async fn schema_exists(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
_query_ctx: Option<&QueryContext>,
|
||||
) -> Result<bool> {
|
||||
self.schema_exist_sync(catalog, schema)
|
||||
}
|
||||
|
||||
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
|
||||
async fn table_exists(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table: &str,
|
||||
_query_ctx: Option<&QueryContext>,
|
||||
) -> Result<bool> {
|
||||
let catalogs = self.catalogs.read().unwrap();
|
||||
Ok(catalogs
|
||||
.get(catalog)
|
||||
@@ -108,6 +129,7 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table_name: &str,
|
||||
_query_ctx: Option<&QueryContext>,
|
||||
) -> Result<Option<TableRef>> {
|
||||
let result = try {
|
||||
self.catalogs
|
||||
@@ -121,7 +143,12 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result<TableRef>> {
|
||||
fn tables<'a>(
|
||||
&'a self,
|
||||
catalog: &'a str,
|
||||
schema: &'a str,
|
||||
_query_ctx: Option<&QueryContext>,
|
||||
) -> BoxStream<'a, Result<TableRef>> {
|
||||
let catalogs = self.catalogs.read().unwrap();
|
||||
|
||||
let Some(schemas) = catalogs.get(catalog) else {
|
||||
@@ -371,11 +398,12 @@ mod tests {
|
||||
DEFAULT_CATALOG_NAME,
|
||||
DEFAULT_SCHEMA_NAME,
|
||||
NUMBERS_TABLE_NAME,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let stream = catalog_list.tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
|
||||
let stream = catalog_list.tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, None);
|
||||
let tables = stream.try_collect::<Vec<_>>().await.unwrap();
|
||||
assert_eq!(tables.len(), 1);
|
||||
assert_eq!(
|
||||
@@ -384,7 +412,12 @@ mod tests {
|
||||
);
|
||||
|
||||
assert!(catalog_list
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists")
|
||||
.table(
|
||||
DEFAULT_CATALOG_NAME,
|
||||
DEFAULT_SCHEMA_NAME,
|
||||
"not_exists",
|
||||
None
|
||||
)
|
||||
.await
|
||||
.unwrap()
|
||||
.is_none());
|
||||
@@ -411,7 +444,7 @@ mod tests {
|
||||
};
|
||||
catalog.register_table_sync(register_table_req).unwrap();
|
||||
assert!(catalog
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name, None)
|
||||
.await
|
||||
.unwrap()
|
||||
.is_some());
|
||||
@@ -423,7 +456,7 @@ mod tests {
|
||||
};
|
||||
catalog.deregister_table_sync(deregister_table_req).unwrap();
|
||||
assert!(catalog
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name, None)
|
||||
.await
|
||||
.unwrap()
|
||||
.is_none());
|
||||
|
||||
@@ -257,8 +257,8 @@ impl InformationSchemaColumnsBuilder {
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let keys = &table.table_info().meta.primary_key_indices;
|
||||
|
||||
@@ -212,8 +212,8 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let mut primary_constraints = vec![];
|
||||
|
||||
@@ -240,9 +240,9 @@ impl InformationSchemaPartitionsBuilder {
|
||||
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let table_info_stream = catalog_manager
|
||||
.tables(&catalog_name, &schema_name)
|
||||
.tables(&catalog_name, &schema_name, None)
|
||||
.try_filter_map(|t| async move {
|
||||
let table_info = t.table_info();
|
||||
if table_info.table_type == TableType::Temporary {
|
||||
|
||||
@@ -176,9 +176,9 @@ impl InformationSchemaRegionPeersBuilder {
|
||||
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let table_id_stream = catalog_manager
|
||||
.tables(&catalog_name, &schema_name)
|
||||
.tables(&catalog_name, &schema_name, None)
|
||||
.try_filter_map(|t| async move {
|
||||
let table_info = t.table_info();
|
||||
if table_info.table_type == TableType::Temporary {
|
||||
|
||||
@@ -171,7 +171,7 @@ impl InformationSchemaSchemataBuilder {
|
||||
let table_metadata_manager = utils::table_meta_manager(&self.catalog_manager)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let opts = if let Some(table_metadata_manager) = &table_metadata_manager {
|
||||
table_metadata_manager
|
||||
.schema_manager()
|
||||
|
||||
@@ -176,8 +176,8 @@ impl InformationSchemaTableConstraintsBuilder {
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let keys = &table.table_info().meta.primary_key_indices;
|
||||
|
||||
@@ -234,8 +234,8 @@ impl InformationSchemaTablesBuilder {
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let table_info = table.table_info();
|
||||
|
||||
@@ -192,8 +192,8 @@ impl InformationSchemaViewsBuilder {
|
||||
.context(CastManagerSnafu)?
|
||||
.view_info_cache()?;
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let table_info = table.table_info();
|
||||
|
||||
@@ -18,15 +18,16 @@ mod pg_namespace;
|
||||
mod table_names;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Weak};
|
||||
use std::sync::{Arc, LazyLock, Weak};
|
||||
|
||||
use common_catalog::consts::{self, PG_CATALOG_NAME};
|
||||
use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, PG_CATALOG_NAME};
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use lazy_static::lazy_static;
|
||||
use paste::paste;
|
||||
use pg_catalog_memory_table::get_schema_columns;
|
||||
use pg_class::PGClass;
|
||||
use pg_namespace::PGNamespace;
|
||||
use session::context::{Channel, QueryContext};
|
||||
use table::TableRef;
|
||||
pub use table_names::*;
|
||||
|
||||
@@ -142,3 +143,12 @@ impl SystemSchemaProviderInner for PGCatalogProvider {
|
||||
&self.catalog_name
|
||||
}
|
||||
}
|
||||
|
||||
/// Provide query context to call the [`CatalogManager`]'s method.
|
||||
static PG_QUERY_CTX: LazyLock<QueryContext> = LazyLock::new(|| {
|
||||
QueryContext::with_channel(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, Channel::Postgres)
|
||||
});
|
||||
|
||||
fn query_ctx() -> Option<&'static QueryContext> {
|
||||
Some(&PG_QUERY_CTX)
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ use store_api::storage::ScanRequest;
|
||||
use table::metadata::TableType;
|
||||
|
||||
use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
|
||||
use super::{OID_COLUMN_NAME, PG_CLASS};
|
||||
use super::{query_ctx, OID_COLUMN_NAME, PG_CLASS};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
@@ -202,8 +202,11 @@ impl PGClassBuilder {
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
|
||||
for schema_name in catalog_manager
|
||||
.schema_names(&catalog_name, query_ctx())
|
||||
.await?
|
||||
{
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, query_ctx());
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let table_info = table.table_info();
|
||||
self.add_class(
|
||||
|
||||
@@ -31,7 +31,7 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::ScanRequest;
|
||||
|
||||
use super::{PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
|
||||
use super::{query_ctx, PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
@@ -180,7 +180,10 @@ impl PGNamespaceBuilder {
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
for schema_name in catalog_manager
|
||||
.schema_names(&catalog_name, query_ctx())
|
||||
.await?
|
||||
{
|
||||
self.add_namespace(&predicates, &schema_name);
|
||||
}
|
||||
self.finish()
|
||||
|
||||
@@ -23,7 +23,7 @@ use datafusion::datasource::view::ViewTable;
|
||||
use datafusion::datasource::{provider_as_source, TableProvider};
|
||||
use datafusion::logical_expr::TableSource;
|
||||
use itertools::Itertools;
|
||||
use session::context::QueryContext;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::TableType;
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
@@ -45,6 +45,7 @@ pub struct DfTableSourceProvider {
|
||||
disallow_cross_catalog_query: bool,
|
||||
default_catalog: String,
|
||||
default_schema: String,
|
||||
query_ctx: QueryContextRef,
|
||||
plan_decoder: SubstraitPlanDecoderRef,
|
||||
enable_ident_normalization: bool,
|
||||
}
|
||||
@@ -53,7 +54,7 @@ impl DfTableSourceProvider {
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
disallow_cross_catalog_query: bool,
|
||||
query_ctx: &QueryContext,
|
||||
query_ctx: QueryContextRef,
|
||||
plan_decoder: SubstraitPlanDecoderRef,
|
||||
enable_ident_normalization: bool,
|
||||
) -> Self {
|
||||
@@ -63,6 +64,7 @@ impl DfTableSourceProvider {
|
||||
resolved_tables: HashMap::new(),
|
||||
default_catalog: query_ctx.current_catalog().to_owned(),
|
||||
default_schema: query_ctx.current_schema(),
|
||||
query_ctx,
|
||||
plan_decoder,
|
||||
enable_ident_normalization,
|
||||
}
|
||||
@@ -71,8 +73,7 @@ impl DfTableSourceProvider {
|
||||
pub fn resolve_table_ref(&self, table_ref: TableReference) -> Result<ResolvedTableReference> {
|
||||
if self.disallow_cross_catalog_query {
|
||||
match &table_ref {
|
||||
TableReference::Bare { .. } => (),
|
||||
TableReference::Partial { .. } => {}
|
||||
TableReference::Bare { .. } | TableReference::Partial { .. } => {}
|
||||
TableReference::Full {
|
||||
catalog, schema, ..
|
||||
} => {
|
||||
@@ -107,7 +108,7 @@ impl DfTableSourceProvider {
|
||||
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(catalog_name, schema_name, table_name)
|
||||
.table(catalog_name, schema_name, table_name, Some(&self.query_ctx))
|
||||
.await?
|
||||
.with_context(|| TableNotExistSnafu {
|
||||
table: format_full_table_name(catalog_name, schema_name, table_name),
|
||||
@@ -210,12 +211,12 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_validate_table_ref() {
|
||||
let query_ctx = &QueryContext::with("greptime", "public");
|
||||
let query_ctx = Arc::new(QueryContext::with("greptime", "public"));
|
||||
|
||||
let table_provider = DfTableSourceProvider::new(
|
||||
MemoryCatalogManager::with_default_setup(),
|
||||
true,
|
||||
query_ctx,
|
||||
query_ctx.clone(),
|
||||
DummyDecoder::arc(),
|
||||
true,
|
||||
);
|
||||
@@ -308,7 +309,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_resolve_view() {
|
||||
let query_ctx = &QueryContext::with("greptime", "public");
|
||||
let query_ctx = Arc::new(QueryContext::with("greptime", "public"));
|
||||
let backend = Arc::new(MemoryKvBackend::default());
|
||||
let layered_cache_builder = LayeredCacheRegistryBuilder::default()
|
||||
.add_cache_registry(CacheRegistryBuilder::default().build());
|
||||
@@ -344,8 +345,13 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut table_provider =
|
||||
DfTableSourceProvider::new(catalog_manager, true, query_ctx, MockDecoder::arc(), true);
|
||||
let mut table_provider = DfTableSourceProvider::new(
|
||||
catalog_manager,
|
||||
true,
|
||||
query_ctx.clone(),
|
||||
MockDecoder::arc(),
|
||||
true,
|
||||
);
|
||||
|
||||
// View not found
|
||||
let table_ref = TableReference::bare("not_exists_view");
|
||||
|
||||
@@ -112,7 +112,7 @@ impl SchemaProvider for DummySchemaProvider {
|
||||
async fn table(&self, name: &str) -> datafusion::error::Result<Option<Arc<dyn TableProvider>>> {
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(&self.catalog_name, &self.schema_name, name)
|
||||
.table(&self.catalog_name, &self.schema_name, name, None)
|
||||
.await?
|
||||
.with_context(|| TableNotExistSnafu {
|
||||
table: format_full_table_name(&self.catalog_name, &self.schema_name, name),
|
||||
|
||||
@@ -65,6 +65,7 @@ fn test_load_datanode_example_config() {
|
||||
wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
|
||||
dir: Some("/tmp/greptimedb/wal".to_string()),
|
||||
sync_period: Some(Duration::from_secs(10)),
|
||||
recovery_parallelism: 2,
|
||||
..Default::default()
|
||||
}),
|
||||
storage: StorageConfig {
|
||||
@@ -207,6 +208,7 @@ fn test_load_standalone_example_config() {
|
||||
wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
|
||||
dir: Some("/tmp/greptimedb/wal".to_string()),
|
||||
sync_period: Some(Duration::from_secs(10)),
|
||||
recovery_parallelism: 2,
|
||||
..Default::default()
|
||||
}),
|
||||
region_engine: vec![
|
||||
|
||||
@@ -9,10 +9,12 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
anymap = "1.0.0-beta.2"
|
||||
async-trait.workspace = true
|
||||
bitvec = "1.0"
|
||||
bytes.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
futures.workspace = true
|
||||
paste = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -1,242 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::io::{Read, Write};
|
||||
|
||||
use bytes::{Buf, BufMut, BytesMut};
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_macro::stack_trace_debug;
|
||||
use paste::paste;
|
||||
use snafu::{ensure, Location, ResultExt, Snafu};
|
||||
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display(
|
||||
"Destination buffer overflow, src_len: {}, dst_len: {}",
|
||||
src_len,
|
||||
dst_len
|
||||
))]
|
||||
Overflow {
|
||||
src_len: usize,
|
||||
dst_len: usize,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Buffer underflow"))]
|
||||
Underflow {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("IO operation reach EOF"))]
|
||||
Eof {
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_read_le {
|
||||
( $($num_ty: ty), *) => {
|
||||
$(
|
||||
paste!{
|
||||
// TODO(hl): default implementation requires allocating a
|
||||
// temp buffer. maybe use more efficient impls in concrete buffers.
|
||||
// see https://github.com/GrepTimeTeam/greptimedb/pull/97#discussion_r930798941
|
||||
fn [<read_ $num_ty _le>](&mut self) -> Result<$num_ty> {
|
||||
let mut buf = [0u8; std::mem::size_of::<$num_ty>()];
|
||||
self.read_to_slice(&mut buf)?;
|
||||
Ok($num_ty::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
fn [<peek_ $num_ty _le>](&mut self) -> Result<$num_ty> {
|
||||
let mut buf = [0u8; std::mem::size_of::<$num_ty>()];
|
||||
self.peek_to_slice(&mut buf)?;
|
||||
Ok($num_ty::from_le_bytes(buf))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_write_le {
|
||||
( $($num_ty: ty), *) => {
|
||||
$(
|
||||
paste!{
|
||||
fn [<write_ $num_ty _le>](&mut self, n: $num_ty) -> Result<()> {
|
||||
self.write_from_slice(&n.to_le_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Buffer {
|
||||
/// Returns remaining data size for read.
|
||||
fn remaining_size(&self) -> usize;
|
||||
|
||||
/// Returns true if buffer has no data for read.
|
||||
fn is_empty(&self) -> bool {
|
||||
self.remaining_size() == 0
|
||||
}
|
||||
|
||||
/// Peeks data into dst. This method should not change internal cursor,
|
||||
/// invoke `advance_by` if needed.
|
||||
/// # Panics
|
||||
/// This method **may** panic if buffer does not have enough data to be copied to dst.
|
||||
fn peek_to_slice(&self, dst: &mut [u8]) -> Result<()>;
|
||||
|
||||
/// Reads data into dst. This method will change internal cursor.
|
||||
/// # Panics
|
||||
/// This method **may** panic if buffer does not have enough data to be copied to dst.
|
||||
fn read_to_slice(&mut self, dst: &mut [u8]) -> Result<()> {
|
||||
self.peek_to_slice(dst)?;
|
||||
self.advance_by(dst.len());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Advances internal cursor for next read.
|
||||
/// # Panics
|
||||
/// This method **may** panic if the offset after advancing exceeds the length of underlying buffer.
|
||||
fn advance_by(&mut self, by: usize);
|
||||
|
||||
impl_read_le![u8, i8, u16, i16, u32, i32, u64, i64, f32, f64];
|
||||
}
|
||||
|
||||
macro_rules! impl_buffer_for_bytes {
|
||||
( $($buf_ty:ty), *) => {
|
||||
$(
|
||||
impl Buffer for $buf_ty {
|
||||
fn remaining_size(&self) -> usize{
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn peek_to_slice(&self, dst: &mut [u8]) -> Result<()> {
|
||||
let dst_len = dst.len();
|
||||
ensure!(self.remaining() >= dst.len(), OverflowSnafu {
|
||||
src_len: self.remaining_size(),
|
||||
dst_len,
|
||||
}
|
||||
);
|
||||
dst.copy_from_slice(&self[0..dst_len]);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn advance_by(&mut self, by: usize) {
|
||||
self.advance(by);
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
impl_buffer_for_bytes![bytes::Bytes, bytes::BytesMut];
|
||||
|
||||
impl Buffer for &[u8] {
|
||||
fn remaining_size(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn peek_to_slice(&self, dst: &mut [u8]) -> Result<()> {
|
||||
let dst_len = dst.len();
|
||||
ensure!(
|
||||
self.len() >= dst.len(),
|
||||
OverflowSnafu {
|
||||
src_len: self.remaining_size(),
|
||||
dst_len,
|
||||
}
|
||||
);
|
||||
dst.copy_from_slice(&self[0..dst_len]);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_to_slice(&mut self, dst: &mut [u8]) -> Result<()> {
|
||||
ensure!(
|
||||
self.len() >= dst.len(),
|
||||
OverflowSnafu {
|
||||
src_len: self.remaining_size(),
|
||||
dst_len: dst.len(),
|
||||
}
|
||||
);
|
||||
self.read_exact(dst).context(EofSnafu)
|
||||
}
|
||||
|
||||
fn advance_by(&mut self, by: usize) {
|
||||
*self = &self[by..];
|
||||
}
|
||||
}
|
||||
|
||||
/// Mutable buffer.
|
||||
pub trait BufferMut {
|
||||
fn as_slice(&self) -> &[u8];
|
||||
|
||||
fn write_from_slice(&mut self, src: &[u8]) -> Result<()>;
|
||||
|
||||
impl_write_le![i8, u8, i16, u16, i32, u32, i64, u64, f32, f64];
|
||||
}
|
||||
|
||||
impl BufferMut for BytesMut {
|
||||
fn as_slice(&self) -> &[u8] {
|
||||
self
|
||||
}
|
||||
|
||||
fn write_from_slice(&mut self, src: &[u8]) -> Result<()> {
|
||||
self.put_slice(src);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl BufferMut for &mut [u8] {
|
||||
fn as_slice(&self) -> &[u8] {
|
||||
self
|
||||
}
|
||||
|
||||
fn write_from_slice(&mut self, src: &[u8]) -> Result<()> {
|
||||
// see std::io::Write::write_all
|
||||
// https://doc.rust-lang.org/src/std/io/impls.rs.html#363
|
||||
self.write_all(src).map_err(|_| {
|
||||
OverflowSnafu {
|
||||
src_len: src.len(),
|
||||
dst_len: self.as_slice().len(),
|
||||
}
|
||||
.build()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl BufferMut for Vec<u8> {
|
||||
fn as_slice(&self) -> &[u8] {
|
||||
self
|
||||
}
|
||||
|
||||
fn write_from_slice(&mut self, src: &[u8]) -> Result<()> {
|
||||
self.extend_from_slice(src);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -44,6 +44,12 @@ impl From<Vec<u8>> for Bytes {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Bytes> for Vec<u8> {
|
||||
fn from(bytes: Bytes) -> Vec<u8> {
|
||||
bytes.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Bytes {
|
||||
type Target = [u8];
|
||||
|
||||
|
||||
@@ -13,9 +13,9 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub mod bit_vec;
|
||||
pub mod buffer;
|
||||
pub mod bytes;
|
||||
pub mod plugins;
|
||||
pub mod range_read;
|
||||
#[allow(clippy::all)]
|
||||
pub mod readable_size;
|
||||
pub mod secrets;
|
||||
|
||||
80
src/common/base/src/range_read.rs
Normal file
80
src/common/base/src/range_read.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::io;
|
||||
use std::ops::Range;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::{BufMut, Bytes};
|
||||
use futures::{AsyncReadExt, AsyncSeekExt};
|
||||
|
||||
/// `Metadata` contains the metadata of a source.
|
||||
pub struct Metadata {
|
||||
/// The length of the source in bytes.
|
||||
pub content_length: u64,
|
||||
}
|
||||
|
||||
/// `RangeReader` reads a range of bytes from a source.
|
||||
#[async_trait]
|
||||
pub trait RangeReader: Send + Unpin {
|
||||
/// Returns the metadata of the source.
|
||||
async fn metadata(&mut self) -> io::Result<Metadata>;
|
||||
|
||||
/// Reads the bytes in the given range.
|
||||
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes>;
|
||||
|
||||
/// Reads the bytes in the given range into the buffer.
|
||||
///
|
||||
/// Handles the buffer based on its capacity:
|
||||
/// - If the buffer is insufficient to hold the bytes, it will either:
|
||||
/// - Allocate additional space (e.g., for `Vec<u8>`)
|
||||
/// - Panic (e.g., for `&mut [u8]`)
|
||||
async fn read_into(
|
||||
&mut self,
|
||||
range: Range<u64>,
|
||||
buf: &mut (impl BufMut + Send),
|
||||
) -> io::Result<()> {
|
||||
let bytes = self.read(range).await?;
|
||||
buf.put_slice(&bytes);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reads the bytes in the given ranges.
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> io::Result<Vec<Bytes>> {
|
||||
let mut result = Vec::with_capacity(ranges.len());
|
||||
for range in ranges {
|
||||
result.push(self.read(range.clone()).await?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement `RangeReader` for a type that implements `AsyncRead + AsyncSeek`.
|
||||
///
|
||||
/// TODO(zhongzc): It's a temporary solution for porting the codebase from `AsyncRead + AsyncSeek` to `RangeReader`.
|
||||
/// Until the codebase is fully ported to `RangeReader`, remove this implementation.
|
||||
#[async_trait]
|
||||
impl<R: futures::AsyncRead + futures::AsyncSeek + Send + Unpin> RangeReader for R {
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
let content_length = self.seek(io::SeekFrom::End(0)).await?;
|
||||
Ok(Metadata { content_length })
|
||||
}
|
||||
|
||||
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
|
||||
let mut buf = vec![0; (range.end - range.start) as usize];
|
||||
self.seek(io::SeekFrom::Start(range.start)).await?;
|
||||
self.read_exact(&mut buf).await?;
|
||||
Ok(Bytes::from(buf))
|
||||
}
|
||||
}
|
||||
@@ -1,182 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(assert_matches)]
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use common_base::buffer::Error::Overflow;
|
||||
use common_base::buffer::{Buffer, BufferMut};
|
||||
use paste::paste;
|
||||
|
||||
#[test]
|
||||
pub fn test_buffer_read_write() {
|
||||
let mut buf = BytesMut::with_capacity(16);
|
||||
buf.write_u64_le(1234u64).unwrap();
|
||||
let result = buf.peek_u64_le().unwrap();
|
||||
assert_eq!(1234u64, result);
|
||||
buf.advance_by(8);
|
||||
|
||||
buf.write_from_slice("hello, world".as_bytes()).unwrap();
|
||||
let mut content = vec![0u8; 5];
|
||||
buf.peek_to_slice(&mut content).unwrap();
|
||||
let read = String::from_utf8_lossy(&content);
|
||||
assert_eq!("hello", read);
|
||||
buf.advance_by(5);
|
||||
// after read, buffer should still have 7 bytes to read.
|
||||
assert_eq!(7, buf.remaining());
|
||||
|
||||
let mut content = vec![0u8; 6];
|
||||
buf.read_to_slice(&mut content).unwrap();
|
||||
let read = String::from_utf8_lossy(&content);
|
||||
assert_eq!(", worl", read);
|
||||
// after read, buffer should still have 1 byte to read.
|
||||
assert_eq!(1, buf.remaining());
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_buffer_read() {
|
||||
let mut bytes = Bytes::from_static("hello".as_bytes());
|
||||
assert_eq!(5, bytes.remaining_size());
|
||||
assert_eq!(b'h', bytes.peek_u8_le().unwrap());
|
||||
bytes.advance_by(1);
|
||||
assert_eq!(4, bytes.remaining_size());
|
||||
}
|
||||
|
||||
macro_rules! test_primitive_read_write {
|
||||
( $($num_ty: ty), *) => {
|
||||
$(
|
||||
paste!{
|
||||
#[test]
|
||||
fn [<test_read_write_ $num_ty>]() {
|
||||
assert_eq!($num_ty::MAX,(&mut $num_ty::MAX.to_le_bytes() as &[u8]).[<read_ $num_ty _le>]().unwrap());
|
||||
assert_eq!($num_ty::MIN,(&mut $num_ty::MIN.to_le_bytes() as &[u8]).[<read_ $num_ty _le>]().unwrap());
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
test_primitive_read_write![u8, u16, u32, u64, i8, i16, i32, i64, f32, f64];
|
||||
|
||||
#[test]
|
||||
pub fn test_read_write_from_slice_buffer() {
|
||||
let mut buf = "hello".as_bytes();
|
||||
assert_eq!(104, buf.peek_u8_le().unwrap());
|
||||
buf.advance_by(1);
|
||||
assert_eq!(101, buf.peek_u8_le().unwrap());
|
||||
buf.advance_by(1);
|
||||
assert_eq!(108, buf.peek_u8_le().unwrap());
|
||||
buf.advance_by(1);
|
||||
assert_eq!(108, buf.peek_u8_le().unwrap());
|
||||
buf.advance_by(1);
|
||||
assert_eq!(111, buf.peek_u8_le().unwrap());
|
||||
buf.advance_by(1);
|
||||
assert_matches!(buf.peek_u8_le(), Err(Overflow { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_read_u8_from_slice_buffer() {
|
||||
let mut buf = "hello".as_bytes();
|
||||
assert_eq!(104, buf.read_u8_le().unwrap());
|
||||
assert_eq!(101, buf.read_u8_le().unwrap());
|
||||
assert_eq!(108, buf.read_u8_le().unwrap());
|
||||
assert_eq!(108, buf.read_u8_le().unwrap());
|
||||
assert_eq!(111, buf.read_u8_le().unwrap());
|
||||
assert_matches!(buf.read_u8_le(), Err(Overflow { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_read_write_numbers() {
|
||||
let mut buf: Vec<u8> = vec![];
|
||||
buf.write_u64_le(1234).unwrap();
|
||||
assert_eq!(1234, (&buf[..]).read_u64_le().unwrap());
|
||||
|
||||
buf.write_u32_le(4242).unwrap();
|
||||
let mut p = &buf[..];
|
||||
assert_eq!(1234, p.read_u64_le().unwrap());
|
||||
assert_eq!(4242, p.read_u32_le().unwrap());
|
||||
}
|
||||
|
||||
macro_rules! test_primitive_vec_read_write {
|
||||
( $($num_ty: ty), *) => {
|
||||
$(
|
||||
paste!{
|
||||
#[test]
|
||||
fn [<test_read_write_ $num_ty _from_vec_buffer>]() {
|
||||
let mut buf = vec![];
|
||||
let _ = buf.[<write_ $num_ty _le>]($num_ty::MAX).unwrap();
|
||||
assert_eq!($num_ty::MAX, buf.as_slice().[<read_ $num_ty _le>]().unwrap());
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
test_primitive_vec_read_write![u8, u16, u32, u64, i8, i16, i32, i64, f32, f64];
|
||||
|
||||
#[test]
|
||||
pub fn test_peek_write_from_vec_buffer() {
|
||||
let mut buf: Vec<u8> = vec![];
|
||||
buf.write_from_slice("hello".as_bytes()).unwrap();
|
||||
let mut slice = buf.as_slice();
|
||||
assert_eq!(104, slice.peek_u8_le().unwrap());
|
||||
slice.advance_by(1);
|
||||
assert_eq!(101, slice.peek_u8_le().unwrap());
|
||||
slice.advance_by(1);
|
||||
assert_eq!(108, slice.peek_u8_le().unwrap());
|
||||
slice.advance_by(1);
|
||||
assert_eq!(108, slice.peek_u8_le().unwrap());
|
||||
slice.advance_by(1);
|
||||
assert_eq!(111, slice.peek_u8_le().unwrap());
|
||||
slice.advance_by(1);
|
||||
assert_matches!(slice.read_u8_le(), Err(Overflow { .. }));
|
||||
}
|
||||
|
||||
macro_rules! test_primitive_bytes_read_write {
|
||||
( $($num_ty: ty), *) => {
|
||||
$(
|
||||
paste!{
|
||||
#[test]
|
||||
fn [<test_read_write_ $num_ty _from_bytes>]() {
|
||||
let mut bytes = bytes::Bytes::from($num_ty::MAX.to_le_bytes().to_vec());
|
||||
assert_eq!($num_ty::MAX, bytes.[<read_ $num_ty _le>]().unwrap());
|
||||
|
||||
let mut bytes = bytes::Bytes::from($num_ty::MIN.to_le_bytes().to_vec());
|
||||
assert_eq!($num_ty::MIN, bytes.[<read_ $num_ty _le>]().unwrap());
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
test_primitive_bytes_read_write![u8, u16, u32, u64, i8, i16, i32, i64, f32, f64];
|
||||
|
||||
#[test]
|
||||
pub fn test_write_overflow() {
|
||||
let mut buf = [0u8; 4];
|
||||
assert_matches!(
|
||||
(&mut buf[..]).write_from_slice("hell".as_bytes()),
|
||||
Ok { .. }
|
||||
);
|
||||
|
||||
assert_matches!(
|
||||
(&mut buf[..]).write_from_slice("hello".as_bytes()),
|
||||
Err(common_base::buffer::Error::Overflow { .. })
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -29,6 +29,7 @@ datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
geohash = { version = "0.13", optional = true }
|
||||
h3o = { version = "0.6", optional = true }
|
||||
jsonb.workspace = true
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
once_cell.workspace = true
|
||||
|
||||
@@ -22,6 +22,7 @@ use crate::function::{AsyncFunctionRef, FunctionRef};
|
||||
use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
|
||||
use crate::scalars::date::DateFunction;
|
||||
use crate::scalars::expression::ExpressionFunction;
|
||||
use crate::scalars::json::JsonFunction;
|
||||
use crate::scalars::matches::MatchesFunction;
|
||||
use crate::scalars::math::MathFunction;
|
||||
use crate::scalars::numpy::NumpyFunction;
|
||||
@@ -116,6 +117,9 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
|
||||
SystemFunction::register(&function_registry);
|
||||
TableFunction::register(&function_registry);
|
||||
|
||||
// Json related functions
|
||||
JsonFunction::register(&function_registry);
|
||||
|
||||
// Geo functions
|
||||
#[cfg(feature = "geo")]
|
||||
crate::scalars::geo::GeoFunctions::register(&function_registry);
|
||||
|
||||
@@ -17,9 +17,11 @@ pub(crate) mod date;
|
||||
pub mod expression;
|
||||
#[cfg(feature = "geo")]
|
||||
pub mod geo;
|
||||
pub mod json;
|
||||
pub mod matches;
|
||||
pub mod math;
|
||||
pub mod numpy;
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test;
|
||||
pub(crate) mod timestamp;
|
||||
|
||||
31
src/common/function/src/scalars/json.rs
Normal file
31
src/common/function/src/scalars/json.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
mod json_to_string;
|
||||
mod to_json;
|
||||
|
||||
use json_to_string::JsonToStringFunction;
|
||||
use to_json::ToJsonFunction;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
pub(crate) struct JsonFunction;
|
||||
|
||||
impl JsonFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(JsonToStringFunction));
|
||||
registry.register(Arc::new(ToJsonFunction));
|
||||
}
|
||||
}
|
||||
174
src/common/function/src/scalars/json/json_to_string.rs
Normal file
174
src/common/function/src/scalars/json/json_to_string.rs
Normal file
@@ -0,0 +1,174 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Converts the `JSONB` into `String`. It's useful for displaying JSONB content.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct JsonToStringFunction;
|
||||
|
||||
const NAME: &str = "json_to_string";
|
||||
|
||||
impl Function for JsonToStringFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::json_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let jsons = &columns[0];
|
||||
|
||||
let size = jsons.len();
|
||||
let datatype = jsons.data_type();
|
||||
let mut results = StringVectorBuilder::with_capacity(size);
|
||||
|
||||
match datatype {
|
||||
// JSON data type uses binary vector
|
||||
ConcreteDataType::Binary(_) => {
|
||||
for i in 0..size {
|
||||
let json = jsons.get_ref(i);
|
||||
|
||||
let json = json.as_binary();
|
||||
let result = match json {
|
||||
Ok(Some(json)) => match jsonb::from_slice(json) {
|
||||
Ok(json) => {
|
||||
let json = json.to_string();
|
||||
Some(json)
|
||||
}
|
||||
Err(_) => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!("Illegal json binary: {:?}", json),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JsonToStringFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "JSON_TO_STRING")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::BinaryVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_by_path_function() {
|
||||
let json_to_string = JsonToStringFunction;
|
||||
|
||||
assert_eq!("json_to_string", json_to_string.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
json_to_string
|
||||
.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(json_to_string.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::json_datatype()]
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let value = jsonb::parse_value(s.as_bytes()).unwrap();
|
||||
value.to_vec()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];
|
||||
let vector = json_to_string
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(3, vector.len());
|
||||
for (i, gt) in json_strings.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_string().unwrap().unwrap();
|
||||
// remove whitespaces
|
||||
assert_eq!(gt.replace(" ", ""), result);
|
||||
}
|
||||
|
||||
let invalid_jsonb = vec![b"invalid json"];
|
||||
let invalid_json_vector = BinaryVector::from_vec(invalid_jsonb);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(invalid_json_vector)];
|
||||
let vector = json_to_string.eval(FunctionContext::default(), &args);
|
||||
assert!(vector.is_err());
|
||||
}
|
||||
}
|
||||
165
src/common/function/src/scalars/json/to_json.rs
Normal file
165
src/common/function/src/scalars/json/to_json.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Parses the `String` into `JSONB`.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct ToJsonFunction;
|
||||
|
||||
const NAME: &str = "to_json";
|
||||
|
||||
impl Function for ToJsonFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::json_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::string_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let json_strings = &columns[0];
|
||||
|
||||
let size = json_strings.len();
|
||||
let datatype = json_strings.data_type();
|
||||
let mut results = BinaryVectorBuilder::with_capacity(size);
|
||||
|
||||
match datatype {
|
||||
ConcreteDataType::String(_) => {
|
||||
for i in 0..size {
|
||||
let json_string = json_strings.get_ref(i);
|
||||
|
||||
let json_string = json_string.as_string();
|
||||
let result = match json_string {
|
||||
Ok(Some(json_string)) => match jsonb::parse_value(json_string.as_bytes()) {
|
||||
Ok(json) => Some(json.to_vec()),
|
||||
Err(_) => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"Cannot convert the string to json, have: {}",
|
||||
json_string
|
||||
),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ToJsonFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "TO_JSON")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::StringVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_by_path_function() {
|
||||
let to_json = ToJsonFunction;
|
||||
|
||||
assert_eq!("to_json", to_json.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::json_datatype(),
|
||||
to_json
|
||||
.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(to_json.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::string_datatype()]
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let value = jsonb::parse_value(s.as_bytes()).unwrap();
|
||||
value.to_vec()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let json_string_vector = StringVector::from_vec(json_strings.to_vec());
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
|
||||
let vector = to_json.eval(FunctionContext::default(), &args).unwrap();
|
||||
|
||||
assert_eq!(3, vector.len());
|
||||
for (i, gt) in jsonbs.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_binary().unwrap().unwrap();
|
||||
// remove whitespaces
|
||||
assert_eq!(gt, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@ use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::vectors::{StringVector, VectorRef};
|
||||
use session::context::Channel;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
@@ -44,11 +45,22 @@ impl Function for VersionFunction {
|
||||
Signature::exact(vec![], Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
let result = StringVector::from(vec![format!(
|
||||
"5.7.20-greptimedb-{}",
|
||||
env!("CARGO_PKG_VERSION")
|
||||
)]);
|
||||
fn eval(&self, func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
let version = match func_ctx.query_ctx.channel() {
|
||||
Channel::Mysql => {
|
||||
format!(
|
||||
"{}-greptimedb-{}",
|
||||
std::env::var("GREPTIMEDB_MYSQL_SERVER_VERSION")
|
||||
.unwrap_or_else(|_| "8.4.2".to_string()),
|
||||
env!("CARGO_PKG_VERSION")
|
||||
)
|
||||
}
|
||||
Channel::Postgres => {
|
||||
format!("16.3-greptimedb-{}", env!("CARGO_PKG_VERSION"))
|
||||
}
|
||||
_ => env!("CARGO_PKG_VERSION").to_string(),
|
||||
};
|
||||
let result = StringVector::from(vec![version]);
|
||||
Ok(Arc::new(result))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,11 +14,10 @@
|
||||
|
||||
use api::helper;
|
||||
use api::v1::column::Values;
|
||||
use api::v1::{AddColumns, Column, CreateTableExpr};
|
||||
use api::v1::{Column, CreateTableExpr};
|
||||
use common_base::BitVec;
|
||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use table::metadata::TableId;
|
||||
use table::table_reference::TableReference;
|
||||
@@ -27,11 +26,6 @@ use crate::error::{CreateVectorSnafu, Result, UnexpectedValuesLengthSnafu};
|
||||
use crate::util;
|
||||
use crate::util::ColumnExpr;
|
||||
|
||||
pub fn find_new_columns(schema: &SchemaRef, columns: &[Column]) -> Result<Option<AddColumns>> {
|
||||
let column_exprs = ColumnExpr::from_columns(columns);
|
||||
util::extract_new_columns(schema, column_exprs)
|
||||
}
|
||||
|
||||
/// Try to build create table request from insert data.
|
||||
pub fn build_create_expr_from_insertion(
|
||||
catalog_name: &str,
|
||||
@@ -114,7 +108,6 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::error;
|
||||
use crate::error::ColumnDataTypeSnafu;
|
||||
use crate::insert::find_new_columns;
|
||||
|
||||
#[inline]
|
||||
fn build_column_schema(
|
||||
@@ -281,11 +274,18 @@ mod tests {
|
||||
|
||||
let schema = Arc::new(SchemaBuilder::try_from(columns).unwrap().build().unwrap());
|
||||
|
||||
assert!(find_new_columns(&schema, &[]).unwrap().is_none());
|
||||
assert!(
|
||||
util::extract_new_columns(&schema, ColumnExpr::from_columns(&[]))
|
||||
.unwrap()
|
||||
.is_none()
|
||||
);
|
||||
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let add_columns = find_new_columns(&schema, &insert_batch.0).unwrap().unwrap();
|
||||
let add_columns =
|
||||
util::extract_new_columns(&schema, ColumnExpr::from_columns(&insert_batch.0))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(5, add_columns.add_columns.len());
|
||||
let host_column = &add_columns.add_columns[0];
|
||||
|
||||
@@ -19,4 +19,4 @@ pub mod insert;
|
||||
pub mod util;
|
||||
|
||||
pub use alter::{alter_expr_to_request, create_table_schema};
|
||||
pub use insert::{build_create_expr_from_insertion, find_new_columns};
|
||||
pub use insert::build_create_expr_from_insertion;
|
||||
|
||||
@@ -70,7 +70,7 @@ macro_rules! convert_arrow_array_to_grpc_vals {
|
||||
return Ok(vals);
|
||||
},
|
||||
)+
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) | ConcreteDataType::Duration(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) | ConcreteDataType::Duration(_) | ConcreteDataType::Json(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -39,7 +39,7 @@ use crate::key::DeserializedValueWithBytes;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
|
||||
use crate::rpc::ddl::AlterTableTask;
|
||||
use crate::rpc::router::find_leaders;
|
||||
use crate::{cache_invalidator, metrics, ClusterId};
|
||||
use crate::{metrics, ClusterId};
|
||||
|
||||
pub struct AlterLogicalTablesProcedure {
|
||||
pub context: DdlContext,
|
||||
@@ -170,12 +170,11 @@ impl AlterLogicalTablesProcedure {
|
||||
}
|
||||
|
||||
pub(crate) async fn on_invalidate_table_cache(&mut self) -> Result<Status> {
|
||||
let ctx = cache_invalidator::Context::default();
|
||||
let to_invalidate = self.build_table_cache_keys_to_invalidate();
|
||||
|
||||
self.context
|
||||
.cache_invalidator
|
||||
.invalidate(&ctx, &to_invalidate)
|
||||
.invalidate(&Default::default(), &to_invalidate)
|
||||
.await?;
|
||||
Ok(Status::done())
|
||||
}
|
||||
|
||||
@@ -441,11 +441,9 @@ async fn handle_alter_table_task(
|
||||
.table_metadata_manager()
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.get_raw(table_id)
|
||||
.get(table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu { table_id })?
|
||||
.into_inner();
|
||||
|
||||
.context(TableRouteNotFoundSnafu { table_id })?;
|
||||
ensure!(
|
||||
table_route_value.is_physical(),
|
||||
UnexpectedLogicalRouteTableSnafu {
|
||||
|
||||
@@ -90,6 +90,7 @@
|
||||
pub mod catalog_name;
|
||||
pub mod datanode_table;
|
||||
pub mod flow;
|
||||
pub mod node_address;
|
||||
pub mod schema_name;
|
||||
pub mod table_info;
|
||||
pub mod table_name;
|
||||
@@ -102,7 +103,7 @@ pub mod view_info;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Deref;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::Bytes;
|
||||
@@ -134,6 +135,7 @@ use self::table_route::{TableRouteManager, TableRouteValue};
|
||||
use self::tombstone::TombstoneManager;
|
||||
use crate::ddl::utils::region_storage_path;
|
||||
use crate::error::{self, Result, SerdeJsonSnafu};
|
||||
use crate::key::node_address::NodeAddressValue;
|
||||
use crate::key::table_route::TableRouteKey;
|
||||
use crate::key::txn_helper::TxnOpGetResponseSet;
|
||||
use crate::kv_backend::txn::{Txn, TxnOp};
|
||||
@@ -152,12 +154,15 @@ pub const TABLE_NAME_KEY_PREFIX: &str = "__table_name";
|
||||
pub const CATALOG_NAME_KEY_PREFIX: &str = "__catalog_name";
|
||||
pub const SCHEMA_NAME_KEY_PREFIX: &str = "__schema_name";
|
||||
pub const TABLE_ROUTE_PREFIX: &str = "__table_route";
|
||||
pub const NODE_ADDRESS_PREFIX: &str = "__node_address";
|
||||
|
||||
pub const CACHE_KEY_PREFIXES: [&str; 4] = [
|
||||
/// The keys with these prefixes will be loaded into the cache when the leader starts.
|
||||
pub const CACHE_KEY_PREFIXES: [&str; 5] = [
|
||||
TABLE_NAME_KEY_PREFIX,
|
||||
CATALOG_NAME_KEY_PREFIX,
|
||||
SCHEMA_NAME_KEY_PREFIX,
|
||||
TABLE_ROUTE_PREFIX,
|
||||
NODE_ADDRESS_PREFIX,
|
||||
];
|
||||
|
||||
pub type RegionDistribution = BTreeMap<DatanodeId, Vec<RegionNumber>>;
|
||||
@@ -210,6 +215,11 @@ lazy_static! {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref NODE_ADDRESS_PATTERN: Regex =
|
||||
Regex::new(&format!("^{NODE_ADDRESS_PREFIX}/([0-9]+)/([0-9]+)$")).unwrap();
|
||||
}
|
||||
|
||||
/// The key of metadata.
|
||||
pub trait MetadataKey<'a, T> {
|
||||
fn to_bytes(&self) -> Vec<u8>;
|
||||
@@ -306,6 +316,12 @@ impl<T: DeserializeOwned + Serialize> Deref for DeserializedValueWithBytes<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DeserializeOwned + Serialize> DerefMut for DeserializedValueWithBytes<T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DeserializeOwned + Serialize + Debug> Debug for DeserializedValueWithBytes<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
@@ -1230,7 +1246,8 @@ impl_metadata_value! {
|
||||
FlowInfoValue,
|
||||
FlowNameValue,
|
||||
FlowRouteValue,
|
||||
TableFlowValue
|
||||
TableFlowValue,
|
||||
NodeAddressValue
|
||||
}
|
||||
|
||||
impl_optional_metadata_value! {
|
||||
@@ -1952,7 +1969,7 @@ mod tests {
|
||||
let table_route_value = table_metadata_manager
|
||||
.table_route_manager
|
||||
.table_route_storage()
|
||||
.get_raw(table_id)
|
||||
.get_with_raw_bytes(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
@@ -2005,7 +2022,7 @@ mod tests {
|
||||
let table_route_value = table_metadata_manager
|
||||
.table_route_manager
|
||||
.table_route_storage()
|
||||
.get_raw(table_id)
|
||||
.get_with_raw_bytes(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
114
src/common/meta/src/key/node_address.rs
Normal file
114
src/common/meta/src/key/node_address.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use api::v1::meta::Role;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{InvalidMetadataSnafu, Result};
|
||||
use crate::key::{MetadataKey, NODE_ADDRESS_PATTERN, NODE_ADDRESS_PREFIX};
|
||||
use crate::peer::Peer;
|
||||
|
||||
/// The key stores node address.
|
||||
///
|
||||
/// The layout: `__node_address/{role}/{node_id}`
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct NodeAddressKey {
|
||||
pub role: Role,
|
||||
pub node_id: u64,
|
||||
}
|
||||
|
||||
impl NodeAddressKey {
|
||||
pub fn new(role: Role, node_id: u64) -> Self {
|
||||
Self { role, node_id }
|
||||
}
|
||||
|
||||
pub fn with_datanode(node_id: u64) -> Self {
|
||||
Self::new(Role::Datanode, node_id)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
|
||||
pub struct NodeAddressValue {
|
||||
pub peer: Peer,
|
||||
}
|
||||
|
||||
impl NodeAddressValue {
|
||||
pub fn new(peer: Peer) -> Self {
|
||||
Self { peer }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> MetadataKey<'a, NodeAddressKey> for NodeAddressKey {
|
||||
fn to_bytes(&self) -> Vec<u8> {
|
||||
self.to_string().into_bytes()
|
||||
}
|
||||
|
||||
fn from_bytes(bytes: &[u8]) -> Result<NodeAddressKey> {
|
||||
let key = std::str::from_utf8(bytes).map_err(|e| {
|
||||
InvalidMetadataSnafu {
|
||||
err_msg: format!(
|
||||
"NodeAddressKey '{}' is not a valid UTF8 string: {e}",
|
||||
String::from_utf8_lossy(bytes)
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let captures = NODE_ADDRESS_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidMetadataSnafu {
|
||||
err_msg: format!("Invalid NodeAddressKey '{key}'"),
|
||||
})?;
|
||||
// Safety: pass the regex check above
|
||||
let role = captures[1].parse::<i32>().unwrap();
|
||||
let role = Role::try_from(role).map_err(|_| {
|
||||
InvalidMetadataSnafu {
|
||||
err_msg: format!("Invalid Role value: {role}"),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let node_id = captures[2].parse::<u64>().unwrap();
|
||||
Ok(NodeAddressKey::new(role, node_id))
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for NodeAddressKey {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}/{}/{}",
|
||||
NODE_ADDRESS_PREFIX, self.role as i32, self.node_id
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_node_address_key() {
|
||||
let key = NodeAddressKey::new(Role::Datanode, 1);
|
||||
let bytes = key.to_bytes();
|
||||
let key2 = NodeAddressKey::from_bytes(&bytes).unwrap();
|
||||
assert_eq!(key, key2);
|
||||
|
||||
let key = NodeAddressKey::new(Role::Flownode, 3);
|
||||
let bytes = key.to_bytes();
|
||||
let key2 = NodeAddressKey::from_bytes(&bytes).unwrap();
|
||||
assert_eq!(key, key2);
|
||||
}
|
||||
}
|
||||
@@ -22,9 +22,10 @@ use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::error::{
|
||||
self, InvalidMetadataSnafu, MetadataCorruptionSnafu, Result, SerdeJsonSnafu,
|
||||
TableRouteNotFoundSnafu, UnexpectedLogicalRouteTableSnafu,
|
||||
InvalidMetadataSnafu, MetadataCorruptionSnafu, Result, SerdeJsonSnafu, TableRouteNotFoundSnafu,
|
||||
UnexpectedLogicalRouteTableSnafu,
|
||||
};
|
||||
use crate::key::node_address::{NodeAddressKey, NodeAddressValue};
|
||||
use crate::key::txn_helper::TxnOpGetResponseSet;
|
||||
use crate::key::{
|
||||
DeserializedValueWithBytes, MetadataKey, MetadataValue, RegionDistribution,
|
||||
@@ -85,7 +86,7 @@ impl TableRouteValue {
|
||||
debug_assert_eq!(region.region.id.table_id(), physical_table_id);
|
||||
RegionId::new(table_id, region.region.id.region_number())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
.collect();
|
||||
TableRouteValue::logical(physical_table_id, region_routes)
|
||||
}
|
||||
}
|
||||
@@ -189,12 +190,12 @@ impl TableRouteValue {
|
||||
.region_routes
|
||||
.iter()
|
||||
.map(|region_route| region_route.region.id.region_number())
|
||||
.collect::<Vec<_>>(),
|
||||
.collect(),
|
||||
TableRouteValue::Logical(x) => x
|
||||
.region_ids()
|
||||
.iter()
|
||||
.map(|region_id| region_id.region_number())
|
||||
.collect::<Vec<_>>(),
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -301,7 +302,7 @@ impl TableRouteManager {
|
||||
Some(route) => {
|
||||
ensure!(
|
||||
route.is_physical(),
|
||||
error::UnexpectedLogicalRouteTableSnafu {
|
||||
UnexpectedLogicalRouteTableSnafu {
|
||||
err_msg: format!("{route:?} is a non-physical TableRouteValue.")
|
||||
}
|
||||
);
|
||||
@@ -321,7 +322,7 @@ impl TableRouteManager {
|
||||
) -> Result<TableId> {
|
||||
let table_route = self
|
||||
.storage
|
||||
.get(logical_or_physical_table_id)
|
||||
.get_inner(logical_or_physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: logical_or_physical_table_id,
|
||||
@@ -335,7 +336,7 @@ impl TableRouteManager {
|
||||
|
||||
/// Returns the [TableRouteValue::Physical] recursively.
|
||||
///
|
||||
/// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if:
|
||||
/// Returns a [TableRouteNotFound](error::Error::TableRouteNotFound) Error if:
|
||||
/// - the physical table(`logical_or_physical_table_id`) does not exist
|
||||
/// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist.
|
||||
pub async fn get_physical_table_route(
|
||||
@@ -528,6 +529,15 @@ impl TableRouteStorage {
|
||||
|
||||
/// Returns the [`TableRouteValue`].
|
||||
pub async fn get(&self, table_id: TableId) -> Result<Option<TableRouteValue>> {
|
||||
let mut table_route = self.get_inner(table_id).await?;
|
||||
if let Some(table_route) = &mut table_route {
|
||||
self.remap_route_address(table_route).await?;
|
||||
};
|
||||
|
||||
Ok(table_route)
|
||||
}
|
||||
|
||||
async fn get_inner(&self, table_id: TableId) -> Result<Option<TableRouteValue>> {
|
||||
let key = TableRouteKey::new(table_id);
|
||||
self.kv_backend
|
||||
.get(&key.to_bytes())
|
||||
@@ -537,7 +547,19 @@ impl TableRouteStorage {
|
||||
}
|
||||
|
||||
/// Returns the [`TableRouteValue`] wrapped with [`DeserializedValueWithBytes`].
|
||||
pub async fn get_raw(
|
||||
pub async fn get_with_raw_bytes(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>> {
|
||||
let mut table_route = self.get_with_raw_bytes_inner(table_id).await?;
|
||||
if let Some(table_route) = &mut table_route {
|
||||
self.remap_route_address(table_route).await?;
|
||||
};
|
||||
|
||||
Ok(table_route)
|
||||
}
|
||||
|
||||
async fn get_with_raw_bytes_inner(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>> {
|
||||
@@ -554,27 +576,27 @@ impl TableRouteStorage {
|
||||
/// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if:
|
||||
/// - the physical table(`logical_or_physical_table_id`) does not exist
|
||||
/// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist.
|
||||
pub async fn get_raw_physical_table_route(
|
||||
pub async fn get_physical_table_route_with_raw_bytes(
|
||||
&self,
|
||||
logical_or_physical_table_id: TableId,
|
||||
) -> Result<(TableId, DeserializedValueWithBytes<TableRouteValue>)> {
|
||||
let table_route =
|
||||
self.get_raw(logical_or_physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: logical_or_physical_table_id,
|
||||
})?;
|
||||
let table_route = self
|
||||
.get_with_raw_bytes(logical_or_physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: logical_or_physical_table_id,
|
||||
})?;
|
||||
|
||||
match table_route.get_inner_ref() {
|
||||
TableRouteValue::Physical(_) => Ok((logical_or_physical_table_id, table_route)),
|
||||
TableRouteValue::Logical(x) => {
|
||||
let physical_table_id = x.physical_table_id();
|
||||
let physical_table_route =
|
||||
self.get_raw(physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: physical_table_id,
|
||||
})?;
|
||||
let physical_table_route = self
|
||||
.get_with_raw_bytes(physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: physical_table_id,
|
||||
})?;
|
||||
Ok((physical_table_id, physical_table_route))
|
||||
}
|
||||
}
|
||||
@@ -582,6 +604,13 @@ impl TableRouteStorage {
|
||||
|
||||
/// Returns batch of [`TableRouteValue`] that respects the order of `table_ids`.
|
||||
pub async fn batch_get(&self, table_ids: &[TableId]) -> Result<Vec<Option<TableRouteValue>>> {
|
||||
let mut table_routes = self.batch_get_inner(table_ids).await?;
|
||||
self.remap_routes_addresses(&mut table_routes).await?;
|
||||
|
||||
Ok(table_routes)
|
||||
}
|
||||
|
||||
async fn batch_get_inner(&self, table_ids: &[TableId]) -> Result<Vec<Option<TableRouteValue>>> {
|
||||
let keys = table_ids
|
||||
.iter()
|
||||
.map(|id| TableRouteKey::new(*id).to_bytes())
|
||||
@@ -604,8 +633,107 @@ impl TableRouteStorage {
|
||||
Ok(None)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn remap_routes_addresses(
|
||||
&self,
|
||||
table_routes: &mut [Option<TableRouteValue>],
|
||||
) -> Result<()> {
|
||||
let keys = table_routes
|
||||
.iter()
|
||||
.flat_map(|table_route| {
|
||||
table_route
|
||||
.as_ref()
|
||||
.map(extract_address_keys)
|
||||
.unwrap_or_default()
|
||||
})
|
||||
.collect::<HashSet<_>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
let node_addrs = self.get_node_addresses(keys).await?;
|
||||
for table_route in table_routes.iter_mut().flatten() {
|
||||
set_addresses(&node_addrs, table_route)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn remap_route_address(&self, table_route: &mut TableRouteValue) -> Result<()> {
|
||||
let keys = extract_address_keys(table_route).into_iter().collect();
|
||||
let node_addrs = self.get_node_addresses(keys).await?;
|
||||
set_addresses(&node_addrs, table_route)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_node_addresses(
|
||||
&self,
|
||||
keys: Vec<Vec<u8>>,
|
||||
) -> Result<HashMap<u64, NodeAddressValue>> {
|
||||
if keys.is_empty() {
|
||||
return Ok(HashMap::default());
|
||||
}
|
||||
|
||||
self.kv_backend
|
||||
.batch_get(BatchGetRequest { keys })
|
||||
.await?
|
||||
.kvs
|
||||
.into_iter()
|
||||
.map(|kv| {
|
||||
let node_id = NodeAddressKey::from_bytes(&kv.key)?.node_id;
|
||||
let node_addr = NodeAddressValue::try_from_raw_value(&kv.value)?;
|
||||
Ok((node_id, node_addr))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
fn set_addresses(
|
||||
node_addrs: &HashMap<u64, NodeAddressValue>,
|
||||
table_route: &mut TableRouteValue,
|
||||
) -> Result<()> {
|
||||
let TableRouteValue::Physical(physical_table_route) = table_route else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
for region_route in &mut physical_table_route.region_routes {
|
||||
if let Some(leader) = &mut region_route.leader_peer {
|
||||
if let Some(node_addr) = node_addrs.get(&leader.id) {
|
||||
leader.addr = node_addr.peer.addr.clone();
|
||||
}
|
||||
}
|
||||
for follower in &mut region_route.follower_peers {
|
||||
if let Some(node_addr) = node_addrs.get(&follower.id) {
|
||||
follower.addr = node_addr.peer.addr.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extract_address_keys(table_route: &TableRouteValue) -> HashSet<Vec<u8>> {
|
||||
let TableRouteValue::Physical(physical_table_route) = table_route else {
|
||||
return HashSet::default();
|
||||
};
|
||||
|
||||
physical_table_route
|
||||
.region_routes
|
||||
.iter()
|
||||
.flat_map(|region_route| {
|
||||
region_route
|
||||
.follower_peers
|
||||
.iter()
|
||||
.map(|peer| NodeAddressKey::with_datanode(peer.id).to_bytes())
|
||||
.chain(
|
||||
region_route
|
||||
.leader_peer
|
||||
.as_ref()
|
||||
.map(|leader| NodeAddressKey::with_datanode(leader.id).to_bytes()),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -614,7 +742,9 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::kv_backend::TxnService;
|
||||
use crate::kv_backend::{KvBackend, TxnService};
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::store::PutRequest;
|
||||
|
||||
#[test]
|
||||
fn test_table_route_compatibility() {
|
||||
@@ -643,18 +773,18 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_table_route_storage_get_raw_empty() {
|
||||
async fn test_table_route_storage_get_with_raw_bytes_empty() {
|
||||
let kv = Arc::new(MemoryKvBackend::default());
|
||||
let table_route_storage = TableRouteStorage::new(kv);
|
||||
let table_route = table_route_storage.get_raw(1024).await.unwrap();
|
||||
let table_route = table_route_storage.get_with_raw_bytes(1024).await.unwrap();
|
||||
assert!(table_route.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_table_route_storage_get_raw() {
|
||||
async fn test_table_route_storage_get_with_raw_bytes() {
|
||||
let kv = Arc::new(MemoryKvBackend::default());
|
||||
let table_route_storage = TableRouteStorage::new(kv.clone());
|
||||
let table_route = table_route_storage.get_raw(1024).await.unwrap();
|
||||
let table_route = table_route_storage.get_with_raw_bytes(1024).await.unwrap();
|
||||
assert!(table_route.is_none());
|
||||
let table_route_manager = TableRouteManager::new(kv.clone());
|
||||
let table_route_value = TableRouteValue::Logical(LogicalTableRouteValue {
|
||||
@@ -667,7 +797,7 @@ mod tests {
|
||||
.unwrap();
|
||||
let r = kv.txn(txn).await.unwrap();
|
||||
assert!(r.succeeded);
|
||||
let table_route = table_route_storage.get_raw(1024).await.unwrap();
|
||||
let table_route = table_route_storage.get_with_raw_bytes(1024).await.unwrap();
|
||||
assert!(table_route.is_some());
|
||||
let got = table_route.unwrap().inner;
|
||||
assert_eq!(got, table_route_value);
|
||||
@@ -718,4 +848,61 @@ mod tests {
|
||||
assert!(results[2].is_none());
|
||||
assert_eq!(results[3].as_ref().unwrap(), &routes[0].1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn remap_route_address_updates_addresses() {
|
||||
let kv = Arc::new(MemoryKvBackend::default());
|
||||
let table_route_storage = TableRouteStorage::new(kv.clone());
|
||||
let mut table_route = TableRouteValue::Physical(PhysicalTableRouteValue {
|
||||
region_routes: vec![RegionRoute {
|
||||
leader_peer: Some(Peer {
|
||||
id: 1,
|
||||
..Default::default()
|
||||
}),
|
||||
follower_peers: vec![Peer {
|
||||
id: 2,
|
||||
..Default::default()
|
||||
}],
|
||||
..Default::default()
|
||||
}],
|
||||
version: 0,
|
||||
});
|
||||
|
||||
kv.put(PutRequest {
|
||||
key: NodeAddressKey::with_datanode(1).to_bytes(),
|
||||
value: NodeAddressValue {
|
||||
peer: Peer {
|
||||
addr: "addr1".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
.try_as_raw_value()
|
||||
.unwrap(),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
table_route_storage
|
||||
.remap_route_address(&mut table_route)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
if let TableRouteValue::Physical(physical_table_route) = table_route {
|
||||
assert_eq!(
|
||||
physical_table_route.region_routes[0]
|
||||
.leader_peer
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.addr,
|
||||
"addr1"
|
||||
);
|
||||
assert_eq!(
|
||||
physical_table_route.region_routes[0].follower_peers[0].addr,
|
||||
""
|
||||
);
|
||||
} else {
|
||||
panic!("Expected PhysicalTableRouteValue");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ common-macro.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
futures-util.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
num_cpus.workspace = true
|
||||
rskafka.workspace = true
|
||||
rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }
|
||||
rustls-native-certs = "0.7"
|
||||
|
||||
@@ -41,6 +41,8 @@ pub struct RaftEngineConfig {
|
||||
/// Duration for fsyncing log files.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub sync_period: Option<Duration>,
|
||||
/// Parallelism during log recovery.
|
||||
pub recovery_parallelism: usize,
|
||||
}
|
||||
|
||||
impl Default for RaftEngineConfig {
|
||||
@@ -55,6 +57,7 @@ impl Default for RaftEngineConfig {
|
||||
enable_log_recycle: true,
|
||||
prefill_log_files: false,
|
||||
sync_period: None,
|
||||
recovery_parallelism: num_cpus::get(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -454,7 +454,7 @@ impl DatanodeBuilder {
|
||||
"Creating raft-engine logstore with config: {:?} and storage path: {}",
|
||||
config, &wal_dir
|
||||
);
|
||||
let logstore = RaftEngineLogStore::try_new(wal_dir, config.clone())
|
||||
let logstore = RaftEngineLogStore::try_new(wal_dir, config)
|
||||
.await
|
||||
.map_err(Box::new)
|
||||
.context(OpenLogStoreSnafu)?;
|
||||
|
||||
@@ -192,7 +192,7 @@ impl HeartbeatTask {
|
||||
let (outgoing_tx, mut outgoing_rx) = mpsc::channel(16);
|
||||
let mailbox = Arc::new(HeartbeatMailbox::new(outgoing_tx));
|
||||
|
||||
let quit_signal = Arc::new(tokio::sync::Notify::new());
|
||||
let quit_signal = Arc::new(Notify::new());
|
||||
|
||||
let mut tx = Self::create_streams(
|
||||
&meta_client,
|
||||
|
||||
@@ -25,6 +25,7 @@ common-time.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
enum_dispatch = "0.3"
|
||||
greptime-proto.workspace = true
|
||||
jsonb.workspace = true
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
ordered-float = { version = "3.0", features = ["serde"] }
|
||||
|
||||
@@ -33,8 +33,8 @@ use crate::types::{
|
||||
BinaryType, BooleanType, DateTimeType, DateType, Decimal128Type, DictionaryType,
|
||||
DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType,
|
||||
DurationType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, ListType,
|
||||
NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType,
|
||||
ListType, NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
@@ -81,6 +81,9 @@ pub enum ConcreteDataType {
|
||||
// Compound types:
|
||||
List(ListType),
|
||||
Dictionary(DictionaryType),
|
||||
|
||||
// JSON type:
|
||||
Json(JsonType),
|
||||
}
|
||||
|
||||
impl fmt::Display for ConcreteDataType {
|
||||
@@ -128,6 +131,7 @@ impl fmt::Display for ConcreteDataType {
|
||||
ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::List(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -162,6 +166,7 @@ impl ConcreteDataType {
|
||||
| ConcreteDataType::Duration(_)
|
||||
| ConcreteDataType::Decimal128(_)
|
||||
| ConcreteDataType::Binary(_)
|
||||
| ConcreteDataType::Json(_)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -216,6 +221,10 @@ impl ConcreteDataType {
|
||||
matches!(self, ConcreteDataType::Decimal128(_))
|
||||
}
|
||||
|
||||
pub fn is_json(&self) -> bool {
|
||||
matches!(self, ConcreteDataType::Json(_))
|
||||
}
|
||||
|
||||
pub fn numerics() -> Vec<ConcreteDataType> {
|
||||
vec![
|
||||
ConcreteDataType::int8_datatype(),
|
||||
@@ -404,7 +413,7 @@ macro_rules! impl_new_concrete_type_functions {
|
||||
|
||||
impl_new_concrete_type_functions!(
|
||||
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
|
||||
Binary, Date, DateTime, String
|
||||
Binary, Date, DateTime, String, Json
|
||||
);
|
||||
|
||||
impl ConcreteDataType {
|
||||
|
||||
@@ -25,6 +25,7 @@ use datafusion_common::DFSchemaRef;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
|
||||
use crate::prelude::DataType;
|
||||
pub use crate::schema::column_schema::{
|
||||
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, COMMENT_KEY, FULLTEXT_KEY,
|
||||
TIME_INDEX_KEY,
|
||||
@@ -34,6 +35,8 @@ pub use crate::schema::raw::RawSchema;
|
||||
|
||||
/// Key used to store version number of the schema in metadata.
|
||||
pub const VERSION_KEY: &str = "greptime:version";
|
||||
/// Key used to store actual column type in field metadata.
|
||||
pub const TYPE_KEY: &str = "greptime:type";
|
||||
|
||||
/// A common schema, should be immutable.
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
@@ -256,7 +259,13 @@ fn collect_fields(column_schemas: &[ColumnSchema]) -> Result<FieldsAndIndices> {
|
||||
if column_schema.is_time_index() && timestamp_index.is_none() {
|
||||
timestamp_index = Some(index);
|
||||
}
|
||||
let field = Field::try_from(column_schema)?;
|
||||
let mut field = Field::try_from(column_schema)?;
|
||||
|
||||
// Json column performs the same as binary column in Arrow, so we need to mark it
|
||||
if column_schema.data_type.is_json() {
|
||||
let metadata = HashMap::from([(TYPE_KEY.to_string(), column_schema.data_type.name())]);
|
||||
field = field.with_metadata(metadata);
|
||||
}
|
||||
fields.push(field);
|
||||
ensure!(
|
||||
name_to_index
|
||||
|
||||
@@ -22,6 +22,8 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
use crate::schema::TYPE_KEY;
|
||||
use crate::types::JSON_TYPE_NAME;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::VectorRef;
|
||||
|
||||
@@ -268,7 +270,14 @@ impl TryFrom<&Field> for ColumnSchema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(field: &Field) -> Result<ColumnSchema> {
|
||||
let data_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
let mut data_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
// Override the data type if it is specified in the metadata.
|
||||
if field.metadata().contains_key(TYPE_KEY) {
|
||||
data_type = match field.metadata().get(TYPE_KEY).unwrap().as_str() {
|
||||
JSON_TYPE_NAME => ConcreteDataType::json_datatype(),
|
||||
_ => data_type,
|
||||
};
|
||||
}
|
||||
let mut metadata = field.metadata().clone();
|
||||
let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
|
||||
Some(json) => {
|
||||
@@ -528,4 +537,32 @@ mod tests {
|
||||
assert_eq!(formatted_int8, "test_column_1 Int8 null");
|
||||
assert_eq!(formatted_int32, "test_column_2 Int32 not null");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_field_to_column_schema() {
|
||||
let field = Field::new("test", ArrowDataType::Int32, true);
|
||||
let column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!("test", column_schema.name);
|
||||
assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
|
||||
assert!(column_schema.is_nullable);
|
||||
assert!(!column_schema.is_time_index);
|
||||
assert!(column_schema.default_constraint.is_none());
|
||||
assert!(column_schema.metadata.is_empty());
|
||||
|
||||
let field = Field::new("test", ArrowDataType::Binary, true);
|
||||
let field = field.with_metadata(Metadata::from([(
|
||||
TYPE_KEY.to_string(),
|
||||
ConcreteDataType::json_datatype().name(),
|
||||
)]));
|
||||
let column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!("test", column_schema.name);
|
||||
assert_eq!(ConcreteDataType::json_datatype(), column_schema.data_type);
|
||||
assert!(column_schema.is_nullable);
|
||||
assert!(!column_schema.is_time_index);
|
||||
assert!(column_schema.default_constraint.is_none());
|
||||
assert_eq!(
|
||||
column_schema.metadata.get(TYPE_KEY).unwrap(),
|
||||
&ConcreteDataType::json_datatype().name()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,6 +68,8 @@ pub enum LogicalTypeId {
|
||||
|
||||
List,
|
||||
Dictionary,
|
||||
|
||||
Json,
|
||||
}
|
||||
|
||||
impl LogicalTypeId {
|
||||
@@ -126,6 +128,7 @@ impl LogicalTypeId {
|
||||
LogicalTypeId::DurationMicrosecond => ConcreteDataType::duration_microsecond_datatype(),
|
||||
LogicalTypeId::DurationNanosecond => ConcreteDataType::duration_nanosecond_datatype(),
|
||||
LogicalTypeId::Decimal128 => ConcreteDataType::decimal128_default_datatype(),
|
||||
LogicalTypeId::Json => ConcreteDataType::json_datatype(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ mod decimal_type;
|
||||
mod dictionary_type;
|
||||
mod duration_type;
|
||||
mod interval_type;
|
||||
mod json_type;
|
||||
mod list_type;
|
||||
mod null_type;
|
||||
mod primitive_type;
|
||||
@@ -42,6 +43,7 @@ pub use duration_type::{
|
||||
pub use interval_type::{
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType,
|
||||
};
|
||||
pub use json_type::{JsonType, JSON_TYPE_NAME};
|
||||
pub use list_type::ListType;
|
||||
pub use null_type::NullType;
|
||||
pub use primitive_type::{
|
||||
|
||||
67
src/datatypes/src/types/json_type.rs
Normal file
67
src/datatypes/src/types/json_type.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::Bytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
|
||||
pub const JSON_TYPE_NAME: &str = "Json";
|
||||
|
||||
/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
|
||||
/// It utilizes current binary value and vector implementation.
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct JsonType;
|
||||
|
||||
impl JsonType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for JsonType {
|
||||
fn name(&self) -> String {
|
||||
JSON_TYPE_NAME.to_string()
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Json
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Bytes::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Binary
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn try_cast(&self, from: Value) -> Option<Value> {
|
||||
match from {
|
||||
Value::Binary(v) => Some(Value::Binary(v)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -342,7 +342,8 @@ impl Value {
|
||||
let value_type_id = self.logical_type_id();
|
||||
let output_type_id = output_type.logical_type_id();
|
||||
ensure!(
|
||||
output_type_id == value_type_id || self.is_null(),
|
||||
// Json type leverage Value(Binary) for storage.
|
||||
output_type_id == value_type_id || self.is_null() || (output_type_id == LogicalTypeId::Json && value_type_id == LogicalTypeId::Binary),
|
||||
error::ToScalarValueSnafu {
|
||||
reason: format!(
|
||||
"expect value to return output_type {output_type_id:?}, actual: {value_type_id:?}",
|
||||
@@ -484,7 +485,7 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValu
|
||||
ConcreteDataType::UInt64(_) => ScalarValue::UInt64(None),
|
||||
ConcreteDataType::Float32(_) => ScalarValue::Float32(None),
|
||||
ConcreteDataType::Float64(_) => ScalarValue::Float64(None),
|
||||
ConcreteDataType::Binary(_) => ScalarValue::Binary(None),
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ScalarValue::Binary(None),
|
||||
ConcreteDataType::String(_) => ScalarValue::Utf8(None),
|
||||
ConcreteDataType::Date(_) => ScalarValue::Date32(None),
|
||||
ConcreteDataType::DateTime(_) => ScalarValue::Date64(None),
|
||||
@@ -1994,6 +1995,10 @@ mod tests {
|
||||
&ConcreteDataType::duration_nanosecond_datatype(),
|
||||
&Value::Duration(Duration::new_nanosecond(1)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::decimal128_datatype(38, 10),
|
||||
&Value::Decimal128(Decimal128::new(1, 38, 10)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2178,6 +2183,14 @@ mod tests {
|
||||
ValueRef::List(ListValueRef::Ref { val: &list }),
|
||||
Value::List(list.clone()).as_value_ref()
|
||||
);
|
||||
|
||||
let jsonb_value = jsonb::parse_value(r#"{"key": "value"}"#.as_bytes())
|
||||
.unwrap()
|
||||
.to_vec();
|
||||
assert_eq!(
|
||||
ValueRef::Binary(jsonb_value.clone().as_slice()),
|
||||
Value::Binary(jsonb_value.into()).as_value_ref()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2391,6 +2404,16 @@ mod tests {
|
||||
.try_to_scalar_value(&ConcreteDataType::binary_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
let jsonb_value = jsonb::parse_value(r#"{"key": "value"}"#.as_bytes())
|
||||
.unwrap()
|
||||
.to_vec();
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(Some(jsonb_value.clone())),
|
||||
Value::Binary(jsonb_value.into())
|
||||
.try_to_scalar_value(&ConcreteDataType::json_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2523,6 +2546,12 @@ mod tests {
|
||||
.try_to_scalar_value(&ConcreteDataType::duration_nanosecond_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(None),
|
||||
Value::Null
|
||||
.try_to_scalar_value(&ConcreteDataType::json_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -80,7 +80,7 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
|
||||
match lhs.data_type() {
|
||||
Null(_) => true,
|
||||
Boolean(_) => is_vector_eq!(BooleanVector, lhs, rhs),
|
||||
Binary(_) => is_vector_eq!(BinaryVector, lhs, rhs),
|
||||
Binary(_) | Json(_) => is_vector_eq!(BinaryVector, lhs, rhs),
|
||||
String(_) => is_vector_eq!(StringVector, lhs, rhs),
|
||||
Date(_) => is_vector_eq!(DateVector, lhs, rhs),
|
||||
DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs),
|
||||
|
||||
@@ -356,9 +356,10 @@ impl SqlQueryHandler for Instance {
|
||||
|
||||
async fn is_valid_schema(&self, catalog: &str, schema: &str) -> Result<bool> {
|
||||
self.catalog_manager
|
||||
.schema_exists(catalog, schema)
|
||||
.schema_exists(catalog, schema, None)
|
||||
.await
|
||||
.context(error::CatalogSnafu)
|
||||
.map(|b| b && !self.catalog_manager.is_reserved_schema_name(schema))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ impl Instance {
|
||||
) -> Result<Output> {
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(catalog_name, schema_name, table_name)
|
||||
.table(catalog_name, schema_name, table_name, Some(ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
|
||||
@@ -152,7 +152,12 @@ mod python {
|
||||
|
||||
if let Some(table) = self
|
||||
.catalog_manager
|
||||
.table(&expr.catalog_name, &expr.schema_name, &expr.table_name)
|
||||
.table(
|
||||
&expr.catalog_name,
|
||||
&expr.schema_name,
|
||||
&expr.table_name,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
{
|
||||
@@ -185,6 +190,7 @@ mod python {
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
|
||||
@@ -12,15 +12,14 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::io::SeekFrom;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
|
||||
use common_base::range_read::RangeReader;
|
||||
use greptime_proto::v1::index::InvertedIndexMetas;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::inverted_index::error::{ReadSnafu, Result, SeekSnafu, UnexpectedBlobSizeSnafu};
|
||||
use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu};
|
||||
use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader;
|
||||
use crate::inverted_index::format::reader::InvertedIndexReader;
|
||||
use crate::inverted_index::format::MIN_BLOB_SIZE;
|
||||
@@ -49,28 +48,28 @@ impl<R> InvertedIndexBlobReader<R> {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<R: AsyncRead + AsyncSeek + Unpin + Send> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize> {
|
||||
let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
|
||||
self.source
|
||||
.seek(SeekFrom::Start(0))
|
||||
.read_into(0..metadata.content_length, dest)
|
||||
.await
|
||||
.context(SeekSnafu)?;
|
||||
self.source.read_to_end(dest).await.context(ReadSnafu)
|
||||
.context(CommonIoSnafu)?;
|
||||
Ok(metadata.content_length as usize)
|
||||
}
|
||||
|
||||
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
|
||||
self.source
|
||||
.seek(SeekFrom::Start(offset))
|
||||
let buf = self
|
||||
.source
|
||||
.read(offset..offset + size as u64)
|
||||
.await
|
||||
.context(SeekSnafu)?;
|
||||
let mut buf = vec![0u8; size as usize];
|
||||
self.source.read(&mut buf).await.context(ReadSnafu)?;
|
||||
Ok(buf)
|
||||
.context(CommonIoSnafu)?;
|
||||
Ok(buf.into())
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>> {
|
||||
let end = SeekFrom::End(0);
|
||||
let blob_size = self.source.seek(end).await.context(SeekSnafu)?;
|
||||
let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
|
||||
let blob_size = metadata.content_length;
|
||||
Self::validate_blob_size(blob_size)?;
|
||||
|
||||
let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size);
|
||||
|
||||
@@ -12,32 +12,30 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::io::SeekFrom;
|
||||
|
||||
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
|
||||
use common_base::range_read::RangeReader;
|
||||
use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas};
|
||||
use prost::Message;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::inverted_index::error::{
|
||||
DecodeProtoSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedFooterPayloadSizeSnafu,
|
||||
CommonIoSnafu, DecodeProtoSnafu, Result, UnexpectedFooterPayloadSizeSnafu,
|
||||
UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu,
|
||||
};
|
||||
use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
|
||||
/// InvertedIndeFooterReader is for reading the footer section of the blob.
|
||||
pub struct InvertedIndeFooterReader<R> {
|
||||
source: R,
|
||||
pub struct InvertedIndeFooterReader<'a, R> {
|
||||
source: &'a mut R,
|
||||
blob_size: u64,
|
||||
}
|
||||
|
||||
impl<R> InvertedIndeFooterReader<R> {
|
||||
pub fn new(source: R, blob_size: u64) -> Self {
|
||||
impl<'a, R> InvertedIndeFooterReader<'a, R> {
|
||||
pub fn new(source: &'a mut R, blob_size: u64) -> Self {
|
||||
Self { source, blob_size }
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: AsyncRead + AsyncSeek + Unpin> InvertedIndeFooterReader<R> {
|
||||
impl<'a, R: RangeReader> InvertedIndeFooterReader<'a, R> {
|
||||
pub async fn metadata(&mut self) -> Result<InvertedIndexMetas> {
|
||||
let payload_size = self.read_payload_size().await?;
|
||||
let metas = self.read_payload(payload_size).await?;
|
||||
@@ -45,26 +43,26 @@ impl<R: AsyncRead + AsyncSeek + Unpin> InvertedIndeFooterReader<R> {
|
||||
}
|
||||
|
||||
async fn read_payload_size(&mut self) -> Result<u64> {
|
||||
let size_offset = SeekFrom::Start(self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE);
|
||||
self.source.seek(size_offset).await.context(SeekSnafu)?;
|
||||
let size_buf = &mut [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize];
|
||||
self.source.read_exact(size_buf).await.context(ReadSnafu)?;
|
||||
let mut size_buf = [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize];
|
||||
let end = self.blob_size;
|
||||
let start = end - FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
self.source
|
||||
.read_into(start..end, &mut &mut size_buf[..])
|
||||
.await
|
||||
.context(CommonIoSnafu)?;
|
||||
|
||||
let payload_size = u32::from_le_bytes(*size_buf) as u64;
|
||||
let payload_size = u32::from_le_bytes(size_buf) as u64;
|
||||
self.validate_payload_size(payload_size)?;
|
||||
|
||||
Ok(payload_size)
|
||||
}
|
||||
|
||||
async fn read_payload(&mut self, payload_size: u64) -> Result<InvertedIndexMetas> {
|
||||
let payload_offset =
|
||||
SeekFrom::Start(self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE - payload_size);
|
||||
self.source.seek(payload_offset).await.context(SeekSnafu)?;
|
||||
let end = self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
let start = end - payload_size;
|
||||
let bytes = self.source.read(start..end).await.context(CommonIoSnafu)?;
|
||||
|
||||
let payload = &mut vec![0u8; payload_size as usize];
|
||||
self.source.read_exact(payload).await.context(ReadSnafu)?;
|
||||
|
||||
let metas = InvertedIndexMetas::decode(&payload[..]).context(DecodeProtoSnafu)?;
|
||||
let metas = InvertedIndexMetas::decode(&*bytes).context(DecodeProtoSnafu)?;
|
||||
self.validate_metas(&metas, payload_size)?;
|
||||
|
||||
Ok(metas)
|
||||
@@ -144,7 +142,8 @@ mod tests {
|
||||
|
||||
let payload_buf = create_test_payload(meta);
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndeFooterReader::new(Cursor::new(payload_buf), blob_size);
|
||||
let mut cursor = Cursor::new(payload_buf);
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut cursor, blob_size);
|
||||
|
||||
let payload_size = reader.read_payload_size().await.unwrap();
|
||||
let metas = reader.read_payload(payload_size).await.unwrap();
|
||||
@@ -164,7 +163,8 @@ mod tests {
|
||||
let mut payload_buf = create_test_payload(meta);
|
||||
payload_buf.push(0xff); // Add an extra byte to corrupt the footer
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndeFooterReader::new(Cursor::new(payload_buf), blob_size);
|
||||
let mut cursor = Cursor::new(payload_buf);
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut cursor, blob_size);
|
||||
|
||||
let payload_size_result = reader.read_payload_size().await;
|
||||
assert!(payload_size_result.is_err());
|
||||
@@ -181,7 +181,8 @@ mod tests {
|
||||
|
||||
let payload_buf = create_test_payload(meta);
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndeFooterReader::new(Cursor::new(payload_buf), blob_size);
|
||||
let mut cursor = Cursor::new(payload_buf);
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut cursor, blob_size);
|
||||
|
||||
let payload_size = reader.read_payload_size().await.unwrap();
|
||||
let payload_result = reader.read_payload(payload_size).await;
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::collections::{hash_map, HashMap};
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::atomic::{AtomicI64, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_stream::stream;
|
||||
use common_runtime::{RepeatedTask, TaskFunction};
|
||||
@@ -40,7 +41,9 @@ use crate::raft_engine::protos::logstore::{EntryImpl, NamespaceImpl};
|
||||
const NAMESPACE_PREFIX: &str = "$sys/";
|
||||
|
||||
pub struct RaftEngineLogStore {
|
||||
config: RaftEngineConfig,
|
||||
sync_write: bool,
|
||||
sync_period: Option<Duration>,
|
||||
read_batch_size: usize,
|
||||
engine: Arc<Engine>,
|
||||
gc_task: RepeatedTask<Error>,
|
||||
last_sync_time: AtomicI64,
|
||||
@@ -76,7 +79,7 @@ impl TaskFunction<Error> for PurgeExpiredFilesFunction {
|
||||
}
|
||||
|
||||
impl RaftEngineLogStore {
|
||||
pub async fn try_new(dir: String, config: RaftEngineConfig) -> Result<Self> {
|
||||
pub async fn try_new(dir: String, config: &RaftEngineConfig) -> Result<Self> {
|
||||
let raft_engine_config = Config {
|
||||
dir,
|
||||
purge_threshold: ReadableSize(config.purge_threshold.0),
|
||||
@@ -85,6 +88,7 @@ impl RaftEngineLogStore {
|
||||
target_file_size: ReadableSize(config.file_size.0),
|
||||
enable_log_recycle: config.enable_log_recycle,
|
||||
prefill_for_recycle: config.prefill_log_files,
|
||||
recovery_threads: config.recovery_parallelism,
|
||||
..Default::default()
|
||||
};
|
||||
let engine = Arc::new(Engine::open(raft_engine_config).context(RaftEngineSnafu)?);
|
||||
@@ -96,7 +100,9 @@ impl RaftEngineLogStore {
|
||||
);
|
||||
|
||||
let log_store = Self {
|
||||
config,
|
||||
sync_write: config.sync_write,
|
||||
sync_period: config.sync_period,
|
||||
read_batch_size: config.read_batch_size,
|
||||
engine,
|
||||
gc_task,
|
||||
last_sync_time: AtomicI64::new(0),
|
||||
@@ -196,7 +202,9 @@ impl RaftEngineLogStore {
|
||||
impl Debug for RaftEngineLogStore {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("RaftEngineLogsStore")
|
||||
.field("config", &self.config)
|
||||
.field("sync_write", &self.sync_write)
|
||||
.field("sync_period", &self.sync_period)
|
||||
.field("read_batch_size", &self.read_batch_size)
|
||||
.field("started", &self.gc_task.started())
|
||||
.finish()
|
||||
}
|
||||
@@ -228,9 +236,9 @@ impl LogStore for RaftEngineLogStore {
|
||||
|
||||
let (mut batch, last_entry_ids) = self.entries_to_batch(entries)?;
|
||||
|
||||
let mut sync = self.config.sync_write;
|
||||
let mut sync = self.sync_write;
|
||||
|
||||
if let Some(sync_period) = &self.config.sync_period {
|
||||
if let Some(sync_period) = &self.sync_period {
|
||||
let now = common_time::util::current_time_millis();
|
||||
if now - self.last_sync_time.load(Ordering::Relaxed) >= sync_period.as_millis() as i64 {
|
||||
self.last_sync_time.store(now, Ordering::Relaxed);
|
||||
@@ -276,7 +284,7 @@ impl LogStore for RaftEngineLogStore {
|
||||
entry_id,
|
||||
self.span(ns)
|
||||
);
|
||||
let max_batch_size = self.config.read_batch_size;
|
||||
let max_batch_size = self.read_batch_size;
|
||||
let (tx, mut rx) = tokio::sync::mpsc::channel(max_batch_size);
|
||||
let _handle = common_runtime::spawn_global(async move {
|
||||
while start_index <= last_index {
|
||||
@@ -489,7 +497,7 @@ mod tests {
|
||||
let dir = create_temp_dir("raft-engine-logstore-test");
|
||||
let logstore = RaftEngineLogStore::try_new(
|
||||
dir.path().to_str().unwrap().to_string(),
|
||||
RaftEngineConfig::default(),
|
||||
&RaftEngineConfig::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -502,7 +510,7 @@ mod tests {
|
||||
let dir = create_temp_dir("raft-engine-logstore-test");
|
||||
let logstore = RaftEngineLogStore::try_new(
|
||||
dir.path().to_str().unwrap().to_string(),
|
||||
RaftEngineConfig::default(),
|
||||
&RaftEngineConfig::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -528,7 +536,7 @@ mod tests {
|
||||
let dir = create_temp_dir("raft-engine-logstore-test");
|
||||
let logstore = RaftEngineLogStore::try_new(
|
||||
dir.path().to_str().unwrap().to_string(),
|
||||
RaftEngineConfig::default(),
|
||||
&RaftEngineConfig::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -570,7 +578,7 @@ mod tests {
|
||||
{
|
||||
let logstore = RaftEngineLogStore::try_new(
|
||||
dir.path().to_str().unwrap().to_string(),
|
||||
RaftEngineConfig::default(),
|
||||
&RaftEngineConfig::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -590,7 +598,7 @@ mod tests {
|
||||
|
||||
let logstore = RaftEngineLogStore::try_new(
|
||||
dir.path().to_str().unwrap().to_string(),
|
||||
RaftEngineConfig::default(),
|
||||
&RaftEngineConfig::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -634,7 +642,7 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
RaftEngineLogStore::try_new(path, config).await.unwrap()
|
||||
RaftEngineLogStore::try_new(path, &config).await.unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -29,7 +29,7 @@ pub async fn create_tmp_local_file_log_store<P: AsRef<Path>>(path: P) -> RaftEng
|
||||
file_size: ReadableSize::kb(128),
|
||||
..Default::default()
|
||||
};
|
||||
RaftEngineLogStore::try_new(path, cfg).await.unwrap()
|
||||
RaftEngineLogStore::try_new(path, &cfg).await.unwrap()
|
||||
}
|
||||
|
||||
/// Create a [KafkaLogStore].
|
||||
|
||||
@@ -655,13 +655,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid heartbeat request: {}", err_msg))]
|
||||
InvalidHeartbeatRequest {
|
||||
err_msg: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to publish message"))]
|
||||
PublishMessage {
|
||||
#[snafu(source)]
|
||||
@@ -809,7 +802,6 @@ impl ErrorExt for Error {
|
||||
| Error::UnsupportedSelectorType { .. }
|
||||
| Error::InvalidArguments { .. }
|
||||
| Error::InitExportMetricsTask { .. }
|
||||
| Error::InvalidHeartbeatRequest { .. }
|
||||
| Error::ProcedureNotFound { .. }
|
||||
| Error::TooManyPartitions { .. }
|
||||
| Error::TomlFormat { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
@@ -15,8 +15,12 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use api::v1::meta::{HeartbeatRequest, Role};
|
||||
use common_meta::instruction::CacheIdent;
|
||||
use common_meta::key::node_address::{NodeAddressKey, NodeAddressValue};
|
||||
use common_meta::key::{MetadataKey, MetadataValue};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::store::PutRequest;
|
||||
use common_telemetry::warn;
|
||||
use common_telemetry::{error, warn};
|
||||
use dashmap::DashMap;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -120,6 +124,13 @@ impl HeartbeatHandler for CollectStatsHandler {
|
||||
true
|
||||
};
|
||||
|
||||
// Need to refresh the [datanode -> address] mapping
|
||||
if refresh {
|
||||
// Safety: `epoch_stats.stats` is not empty
|
||||
let last = epoch_stats.stats.last().unwrap();
|
||||
rewrite_node_address(ctx, last).await;
|
||||
}
|
||||
|
||||
if !refresh && epoch_stats.len() < MAX_CACHED_STATS_PER_KEY {
|
||||
return Ok(HandleControl::Continue);
|
||||
}
|
||||
@@ -131,7 +142,7 @@ impl HeartbeatHandler for CollectStatsHandler {
|
||||
let put = PutRequest {
|
||||
key,
|
||||
value,
|
||||
..Default::default()
|
||||
prev_kv: false,
|
||||
};
|
||||
|
||||
let _ = ctx
|
||||
@@ -144,6 +155,44 @@ impl HeartbeatHandler for CollectStatsHandler {
|
||||
}
|
||||
}
|
||||
|
||||
async fn rewrite_node_address(ctx: &mut Context, stat: &Stat) {
|
||||
let peer = Peer {
|
||||
id: stat.id,
|
||||
addr: stat.addr.clone(),
|
||||
};
|
||||
let key = NodeAddressKey::with_datanode(peer.id).to_bytes();
|
||||
if let Ok(value) = NodeAddressValue::new(peer.clone()).try_as_raw_value() {
|
||||
let put = PutRequest {
|
||||
key,
|
||||
value,
|
||||
prev_kv: false,
|
||||
};
|
||||
|
||||
match ctx.leader_cached_kv_backend.put(put).await {
|
||||
Ok(_) => {
|
||||
// broadcast invalidating cache
|
||||
let cache_idents = stat
|
||||
.table_ids()
|
||||
.into_iter()
|
||||
.map(CacheIdent::TableId)
|
||||
.collect::<Vec<_>>();
|
||||
if let Err(e) = ctx
|
||||
.cache_invalidator
|
||||
.invalidate(&Default::default(), &cache_idents)
|
||||
.await
|
||||
{
|
||||
error!(e; "Failed to invalidate {} `NodeAddressKey` cache, peer: {:?}", cache_idents.len(), peer);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!(e; "Failed to update NodeAddressValue: {:?}", peer);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!("Failed to serialize NodeAddressValue: {:?}", peer);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::meta::{HeartbeatRequest, Role};
|
||||
use common_telemetry::warn;
|
||||
use common_telemetry::{info, warn};
|
||||
|
||||
use super::node_stat::Stat;
|
||||
use crate::error::Result;
|
||||
@@ -40,12 +40,15 @@ impl HeartbeatHandler for ExtractStatHandler {
|
||||
return Ok(HandleControl::Continue);
|
||||
}
|
||||
|
||||
match Stat::try_from(req.clone()) {
|
||||
match Stat::try_from(req) {
|
||||
Ok(stat) => {
|
||||
let _ = acc.stat.insert(stat);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(err; "Incomplete heartbeat data: {:?}", req);
|
||||
Err(Some(header)) => {
|
||||
info!("New handshake request: {:?}", header);
|
||||
}
|
||||
Err(_) => {
|
||||
warn!("Incomplete heartbeat data: {:?}", req);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -14,14 +14,14 @@
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use api::v1::meta::HeartbeatRequest;
|
||||
use api::v1::meta::{HeartbeatRequest, RequestHeader};
|
||||
use common_meta::ClusterId;
|
||||
use common_time::util as time_util;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::error::{Error, InvalidHeartbeatRequestSnafu};
|
||||
use crate::key::DatanodeStatKey;
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
@@ -79,6 +79,11 @@ impl Stat {
|
||||
self.region_stats.iter().map(|s| (s.id, s.role)).collect()
|
||||
}
|
||||
|
||||
/// Returns all table ids in the region stats.
|
||||
pub fn table_ids(&self) -> HashSet<TableId> {
|
||||
self.region_stats.iter().map(|s| s.id.table_id()).collect()
|
||||
}
|
||||
|
||||
pub fn retain_active_region_stats(&mut self, inactive_region_ids: &HashSet<RegionId>) {
|
||||
if inactive_region_ids.is_empty() {
|
||||
return;
|
||||
@@ -92,10 +97,10 @@ impl Stat {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<HeartbeatRequest> for Stat {
|
||||
type Error = Error;
|
||||
impl TryFrom<&HeartbeatRequest> for Stat {
|
||||
type Error = Option<RequestHeader>;
|
||||
|
||||
fn try_from(value: HeartbeatRequest) -> Result<Self, Self::Error> {
|
||||
fn try_from(value: &HeartbeatRequest) -> Result<Self, Self::Error> {
|
||||
let HeartbeatRequest {
|
||||
header,
|
||||
peer,
|
||||
@@ -107,9 +112,9 @@ impl TryFrom<HeartbeatRequest> for Stat {
|
||||
match (header, peer) {
|
||||
(Some(header), Some(peer)) => {
|
||||
let region_stats = region_stats
|
||||
.into_iter()
|
||||
.map(RegionStat::try_from)
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
.iter()
|
||||
.map(RegionStat::from)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(Self {
|
||||
timestamp_millis: time_util::current_time_millis(),
|
||||
@@ -117,35 +122,30 @@ impl TryFrom<HeartbeatRequest> for Stat {
|
||||
// datanode id
|
||||
id: peer.id,
|
||||
// datanode address
|
||||
addr: peer.addr,
|
||||
addr: peer.addr.clone(),
|
||||
rcus: region_stats.iter().map(|s| s.rcus).sum(),
|
||||
wcus: region_stats.iter().map(|s| s.wcus).sum(),
|
||||
region_num: region_stats.len() as u64,
|
||||
region_stats,
|
||||
node_epoch,
|
||||
node_epoch: *node_epoch,
|
||||
})
|
||||
}
|
||||
_ => InvalidHeartbeatRequestSnafu {
|
||||
err_msg: "missing header or peer",
|
||||
}
|
||||
.fail(),
|
||||
(header, _) => Err(header.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<api::v1::meta::RegionStat> for RegionStat {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: api::v1::meta::RegionStat) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
impl From<&api::v1::meta::RegionStat> for RegionStat {
|
||||
fn from(value: &api::v1::meta::RegionStat) -> Self {
|
||||
Self {
|
||||
id: RegionId::from_u64(value.region_id),
|
||||
rcus: value.rcus,
|
||||
wcus: value.wcus,
|
||||
approximate_bytes: value.approximate_bytes,
|
||||
engine: value.engine.to_string(),
|
||||
role: RegionRole::from(value.role()),
|
||||
extensions: value.extensions,
|
||||
})
|
||||
extensions: value.extensions.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -232,7 +232,7 @@ impl Context {
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.get_raw(table_id)
|
||||
.get_with_raw_bytes(table_id)
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
|
||||
@@ -160,6 +160,11 @@ impl DowngradeLeaderRegion {
|
||||
"Trying to downgrade the region {} on Datanode {}, but region doesn't exist!",
|
||||
region_id, leader
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"Region {} leader is downgraded, last_entry_id: {:?}",
|
||||
region_id, last_entry_id
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(last_entry_id) = last_entry_id {
|
||||
|
||||
@@ -146,7 +146,7 @@ mod tests {
|
||||
let original_table_route = table_metadata_manager
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.get_raw(table_id)
|
||||
.get_with_raw_bytes(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
@@ -140,7 +140,7 @@ mod tests {
|
||||
let old_table_route = table_metadata_manager
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.get_raw(table_id)
|
||||
.get_with_raw_bytes(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
@@ -337,7 +337,7 @@ mod tests {
|
||||
let original_table_route = table_metadata_manager
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.get_raw(table_id)
|
||||
.get_with_raw_bytes(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
@@ -33,6 +33,7 @@ use store_api::metric_engine_consts::{
|
||||
METADATA_SCHEMA_VALUE_COLUMN_INDEX, METADATA_SCHEMA_VALUE_COLUMN_NAME,
|
||||
PHYSICAL_TABLE_METADATA_KEY,
|
||||
};
|
||||
use store_api::mito_engine_options::{APPEND_MODE_KEY, TTL_KEY};
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{AffectedRows, RegionCreateRequest, RegionRequest};
|
||||
use store_api::storage::consts::ReservedColumnId;
|
||||
@@ -426,9 +427,10 @@ impl MetricEngineInner {
|
||||
// concat region dir
|
||||
let metadata_region_dir = join_dir(&request.region_dir, METADATA_REGION_SUBDIR);
|
||||
|
||||
// remove TTL option
|
||||
// remove TTL and APPEND_MODE option
|
||||
let mut options = request.options.clone();
|
||||
options.remove("ttl");
|
||||
options.remove(TTL_KEY);
|
||||
options.remove(APPEND_MODE_KEY);
|
||||
|
||||
RegionCreateRequest {
|
||||
engine: MITO_ENGINE_NAME.to_string(),
|
||||
|
||||
@@ -25,6 +25,7 @@ pub(crate) mod write_cache;
|
||||
use std::mem;
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::Bytes;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use moka::notification::RemovalCause;
|
||||
@@ -393,20 +394,59 @@ impl SstMetaKey {
|
||||
}
|
||||
}
|
||||
|
||||
/// Path to column pages in the SST file.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ColumnPagePath {
|
||||
/// Region id of the SST file to cache.
|
||||
region_id: RegionId,
|
||||
/// Id of the SST file to cache.
|
||||
file_id: FileId,
|
||||
/// Index of the row group.
|
||||
row_group_idx: usize,
|
||||
/// Index of the column in the row group.
|
||||
column_idx: usize,
|
||||
}
|
||||
|
||||
/// Cache key for pages of a SST row group.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct PageKey {
|
||||
/// Region id of the SST file to cache.
|
||||
pub region_id: RegionId,
|
||||
/// Id of the SST file to cache.
|
||||
pub file_id: FileId,
|
||||
/// Index of the row group.
|
||||
pub row_group_idx: usize,
|
||||
/// Index of the column in the row group.
|
||||
pub column_idx: usize,
|
||||
pub enum PageKey {
|
||||
/// Cache key for a compressed page in a row group.
|
||||
Compressed(ColumnPagePath),
|
||||
/// Cache key for all uncompressed pages in a row group.
|
||||
Uncompressed(ColumnPagePath),
|
||||
}
|
||||
|
||||
impl PageKey {
|
||||
/// Creates a key for a compressed page.
|
||||
pub fn new_compressed(
|
||||
region_id: RegionId,
|
||||
file_id: FileId,
|
||||
row_group_idx: usize,
|
||||
column_idx: usize,
|
||||
) -> PageKey {
|
||||
PageKey::Compressed(ColumnPagePath {
|
||||
region_id,
|
||||
file_id,
|
||||
row_group_idx,
|
||||
column_idx,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a key for all uncompressed pages in a row group.
|
||||
pub fn new_uncompressed(
|
||||
region_id: RegionId,
|
||||
file_id: FileId,
|
||||
row_group_idx: usize,
|
||||
column_idx: usize,
|
||||
) -> PageKey {
|
||||
PageKey::Uncompressed(ColumnPagePath {
|
||||
region_id,
|
||||
file_id,
|
||||
row_group_idx,
|
||||
column_idx,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns memory used by the key (estimated).
|
||||
fn estimated_size(&self) -> usize {
|
||||
mem::size_of::<Self>()
|
||||
@@ -414,21 +454,41 @@ impl PageKey {
|
||||
}
|
||||
|
||||
/// Cached row group pages for a column.
|
||||
// We don't use enum here to make it easier to mock and use the struct.
|
||||
#[derive(Default)]
|
||||
pub struct PageValue {
|
||||
/// Compressed page of the column in the row group.
|
||||
pub compressed: Bytes,
|
||||
/// All pages of the column in the row group.
|
||||
pub pages: Vec<Page>,
|
||||
pub row_group: Vec<Page>,
|
||||
}
|
||||
|
||||
impl PageValue {
|
||||
/// Creates a new page value.
|
||||
pub fn new(pages: Vec<Page>) -> PageValue {
|
||||
PageValue { pages }
|
||||
/// Creates a new value from a compressed page.
|
||||
pub fn new_compressed(bytes: Bytes) -> PageValue {
|
||||
PageValue {
|
||||
compressed: bytes,
|
||||
row_group: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new value from all pages in a row group.
|
||||
pub fn new_row_group(pages: Vec<Page>) -> PageValue {
|
||||
PageValue {
|
||||
compressed: Bytes::new(),
|
||||
row_group: pages,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns memory used by the value (estimated).
|
||||
fn estimated_size(&self) -> usize {
|
||||
// We only consider heap size of all pages.
|
||||
self.pages.iter().map(|page| page.buffer().len()).sum()
|
||||
mem::size_of::<Self>()
|
||||
+ self.compressed.len()
|
||||
+ self
|
||||
.row_group
|
||||
.iter()
|
||||
.map(|page| page.buffer().len())
|
||||
.sum::<usize>()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -507,13 +567,8 @@ mod tests {
|
||||
.get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
|
||||
.is_none());
|
||||
|
||||
let key = PageKey {
|
||||
region_id,
|
||||
file_id,
|
||||
row_group_idx: 0,
|
||||
column_idx: 0,
|
||||
};
|
||||
let pages = Arc::new(PageValue::new(Vec::new()));
|
||||
let key = PageKey::new_uncompressed(region_id, file_id, 0, 0);
|
||||
let pages = Arc::new(PageValue::default());
|
||||
cache.put_pages(key.clone(), pages);
|
||||
assert!(cache.get_pages(&key).is_none());
|
||||
|
||||
@@ -562,14 +617,9 @@ mod tests {
|
||||
let cache = CacheManager::builder().page_cache_size(1000).build();
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let file_id = FileId::random();
|
||||
let key = PageKey {
|
||||
region_id,
|
||||
file_id,
|
||||
row_group_idx: 0,
|
||||
column_idx: 0,
|
||||
};
|
||||
let key = PageKey::new_compressed(region_id, file_id, 0, 0);
|
||||
assert!(cache.get_pages(&key).is_none());
|
||||
let pages = Arc::new(PageValue::new(Vec::new()));
|
||||
let pages = Arc::new(PageValue::default());
|
||||
cache.put_pages(key.clone(), pages);
|
||||
assert!(cache.get_pages(&key).is_some());
|
||||
}
|
||||
|
||||
@@ -133,6 +133,7 @@ lazy_static! {
|
||||
vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref READ_STAGE_FETCH_PAGES: Histogram = READ_STAGE_ELAPSED.with_label_values(&["fetch_pages"]);
|
||||
/// Counter of rows read from different source.
|
||||
pub static ref READ_ROWS_TOTAL: IntCounterVec =
|
||||
register_int_counter_vec!("greptime_mito_read_rows_total", "mito read rows total", &[TYPE_LABEL]).unwrap();
|
||||
|
||||
@@ -89,6 +89,9 @@ impl Drop for MergeReader {
|
||||
READ_STAGE_ELAPSED
|
||||
.with_label_values(&["merge"])
|
||||
.observe(self.metrics.scan_cost.as_secs_f64());
|
||||
READ_STAGE_ELAPSED
|
||||
.with_label_values(&["merge_fetch"])
|
||||
.observe(self.metrics.fetch_cost.as_secs_f64());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -731,14 +731,28 @@ impl ScanInput {
|
||||
}
|
||||
|
||||
for file in &self.files {
|
||||
let range = PartitionRange {
|
||||
start: file.meta_ref().time_range.0,
|
||||
end: file.meta_ref().time_range.1,
|
||||
num_rows: file.meta_ref().num_rows as usize,
|
||||
identifier: id,
|
||||
};
|
||||
id += 1;
|
||||
container.push(range);
|
||||
if self.append_mode {
|
||||
// For append mode, we can parallelize reading row groups.
|
||||
for _ in 0..file.meta_ref().num_row_groups {
|
||||
let range = PartitionRange {
|
||||
start: file.time_range().0,
|
||||
end: file.time_range().1,
|
||||
num_rows: file.num_rows(),
|
||||
identifier: id,
|
||||
};
|
||||
id += 1;
|
||||
container.push(range);
|
||||
}
|
||||
} else {
|
||||
let range = PartitionRange {
|
||||
start: file.meta_ref().time_range.0,
|
||||
end: file.meta_ref().time_range.1,
|
||||
num_rows: file.meta_ref().num_rows as usize,
|
||||
identifier: id,
|
||||
};
|
||||
id += 1;
|
||||
container.push(range);
|
||||
}
|
||||
}
|
||||
|
||||
container
|
||||
@@ -887,10 +901,21 @@ impl ScanPartList {
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the number of files.
|
||||
pub(crate) fn num_files(&self) -> usize {
|
||||
self.0.as_ref().map_or(0, |parts| {
|
||||
parts.iter().map(|part| part.file_ranges.len()).sum()
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the number of file ranges.
|
||||
pub(crate) fn num_file_ranges(&self) -> usize {
|
||||
self.0.as_ref().map_or(0, |parts| {
|
||||
parts.iter().map(|part| part.file_ranges.len()).sum()
|
||||
parts
|
||||
.iter()
|
||||
.flat_map(|part| part.file_ranges.iter())
|
||||
.map(|ranges| ranges.len())
|
||||
.sum()
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -933,9 +958,10 @@ impl StreamContext {
|
||||
Ok(inner) => match t {
|
||||
DisplayFormatType::Default => write!(
|
||||
f,
|
||||
"partition_count={} ({} memtable ranges, {} file ranges)",
|
||||
"partition_count={} ({} memtable ranges, {} file {} ranges)",
|
||||
inner.0.len(),
|
||||
inner.0.num_mem_ranges(),
|
||||
inner.0.num_files(),
|
||||
inner.0.num_file_ranges()
|
||||
)?,
|
||||
DisplayFormatType::Verbose => write!(f, "{:?}", inner.0)?,
|
||||
|
||||
@@ -166,8 +166,8 @@ impl SeqScan {
|
||||
reader_metrics.merge_from(reader.metrics());
|
||||
}
|
||||
debug!(
|
||||
"Seq scan region {}, file {}, {} ranges finished, metrics: {:?}",
|
||||
region_id, file_id, range_num, reader_metrics
|
||||
"Seq scan region {}, file {}, {} ranges finished, metrics: {:?}, compaction: {}",
|
||||
region_id, file_id, range_num, reader_metrics, compaction
|
||||
);
|
||||
// Reports metrics.
|
||||
reader_metrics.observe_rows(read_type);
|
||||
@@ -238,11 +238,12 @@ impl SeqScan {
|
||||
let maybe_reader = Self::build_reader_from_sources(stream_ctx, sources, semaphore).await;
|
||||
let build_reader_cost = build_start.elapsed();
|
||||
metrics.build_reader_cost += build_reader_cost;
|
||||
common_telemetry::debug!(
|
||||
"Build reader region: {}, range_id: {}, from sources, build_reader_cost: {:?}",
|
||||
debug!(
|
||||
"Build reader region: {}, range_id: {}, from sources, build_reader_cost: {:?}, compaction: {}",
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
range_id,
|
||||
build_reader_cost
|
||||
build_reader_cost,
|
||||
compaction,
|
||||
);
|
||||
|
||||
maybe_reader
|
||||
@@ -354,11 +355,12 @@ impl SeqScan {
|
||||
metrics.observe_metrics_on_finish();
|
||||
|
||||
debug!(
|
||||
"Seq scan finished, region_id: {:?}, partition: {}, metrics: {:?}, first_poll: {:?}",
|
||||
"Seq scan finished, region_id: {:?}, partition: {}, metrics: {:?}, first_poll: {:?}, compaction: {}",
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
metrics,
|
||||
first_poll,
|
||||
compaction,
|
||||
);
|
||||
}
|
||||
};
|
||||
@@ -450,13 +452,14 @@ impl SeqScan {
|
||||
metrics.total_cost = stream_ctx.query_start.elapsed();
|
||||
metrics.observe_metrics_on_finish();
|
||||
|
||||
common_telemetry::debug!(
|
||||
"Seq scan finished, region_id: {}, partition: {}, id: {}, metrics: {:?}, first_poll: {:?}",
|
||||
debug!(
|
||||
"Seq scan finished, region_id: {}, partition: {}, id: {}, metrics: {:?}, first_poll: {:?}, compaction: {}",
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
id,
|
||||
metrics,
|
||||
first_poll,
|
||||
compaction,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -152,7 +152,6 @@ impl RegionScanner for UnorderedScan {
|
||||
let parallelism = self.properties.num_partitions();
|
||||
let stream = try_stream! {
|
||||
let first_poll = stream_ctx.query_start.elapsed();
|
||||
|
||||
let part = {
|
||||
let mut parts = stream_ctx.parts.lock().await;
|
||||
maybe_init_parts(&stream_ctx.input, &mut parts, &mut metrics, parallelism)
|
||||
@@ -180,6 +179,7 @@ impl RegionScanner for UnorderedScan {
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
metrics.build_reader_cost = build_reader_start.elapsed();
|
||||
|
||||
let query_start = stream_ctx.query_start;
|
||||
let cache = stream_ctx.input.cache_manager.as_deref();
|
||||
// Scans memtables first.
|
||||
@@ -217,8 +217,8 @@ impl RegionScanner for UnorderedScan {
|
||||
metrics.total_cost = query_start.elapsed();
|
||||
metrics.observe_metrics_on_finish();
|
||||
debug!(
|
||||
"Unordered scan partition {} finished, region_id: {}, metrics: {:?}, reader_metrics: {:?}, first_poll: {:?}",
|
||||
partition, mapper.metadata().region_id, metrics, reader_metrics, first_poll,
|
||||
"Unordered scan partition {} finished, region_id: {}, metrics: {:?}, reader_metrics: {:?}, first_poll: {:?}, ranges: {}",
|
||||
partition, mapper.metadata().region_id, metrics, reader_metrics, first_poll, part.file_ranges[0].len(),
|
||||
);
|
||||
};
|
||||
let stream = Box::pin(RecordBatchStreamWrapper::new(
|
||||
@@ -343,14 +343,14 @@ impl UnorderedDistributor {
|
||||
|
||||
let mems_per_part = ((self.mem_ranges.len() + parallelism - 1) / parallelism).max(1);
|
||||
let ranges_per_part = ((self.file_ranges.len() + parallelism - 1) / parallelism).max(1);
|
||||
common_telemetry::debug!(
|
||||
"Parallel scan is enabled, parallelism: {}, {} mem_ranges, {} file_ranges, mems_per_part: {}, ranges_per_part: {}",
|
||||
parallelism,
|
||||
self.mem_ranges.len(),
|
||||
self.file_ranges.len(),
|
||||
mems_per_part,
|
||||
ranges_per_part
|
||||
);
|
||||
debug!(
|
||||
"Parallel scan is enabled, parallelism: {}, {} mem_ranges, {} file_ranges, mems_per_part: {}, ranges_per_part: {}",
|
||||
parallelism,
|
||||
self.mem_ranges.len(),
|
||||
self.file_ranges.len(),
|
||||
mems_per_part,
|
||||
ranges_per_part
|
||||
);
|
||||
let mut scan_parts = self
|
||||
.mem_ranges
|
||||
.chunks(mems_per_part)
|
||||
|
||||
@@ -68,7 +68,7 @@ impl SortField {
|
||||
ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9,
|
||||
ConcreteDataType::Float32(_) => 5,
|
||||
ConcreteDataType::Float64(_) => 9,
|
||||
ConcreteDataType::Binary(_) => 11,
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => 11,
|
||||
ConcreteDataType::String(_) => 11, // a non-empty string takes at least 11 bytes.
|
||||
ConcreteDataType::Date(_) => 5,
|
||||
ConcreteDataType::DateTime(_) => 9,
|
||||
@@ -146,7 +146,8 @@ impl SortField {
|
||||
Time, time,
|
||||
Interval, interval,
|
||||
Duration, duration,
|
||||
Decimal128, decimal128
|
||||
Decimal128, decimal128,
|
||||
Json, binary
|
||||
);
|
||||
|
||||
Ok(())
|
||||
@@ -169,7 +170,7 @@ impl SortField {
|
||||
Ok(Value::from(Option::<$f>::deserialize(deserializer).context(error::DeserializeFieldSnafu)?))
|
||||
}
|
||||
)*
|
||||
ConcreteDataType::Binary(_) => Ok(Value::from(
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => Ok(Value::from(
|
||||
Option::<Vec<u8>>::deserialize(deserializer)
|
||||
.context(error::DeserializeFieldSnafu)?
|
||||
.map(Bytes::from),
|
||||
@@ -237,7 +238,7 @@ impl SortField {
|
||||
ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9,
|
||||
ConcreteDataType::Float32(_) => 5,
|
||||
ConcreteDataType::Float64(_) => 9,
|
||||
ConcreteDataType::Binary(_) => {
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => {
|
||||
// Now the encoder encode binary as a list of bytes so we can't use
|
||||
// skip bytes.
|
||||
let pos_before = deserializer.position();
|
||||
|
||||
@@ -216,22 +216,16 @@ mod tests {
|
||||
.await;
|
||||
}
|
||||
|
||||
// Doesn't have compressed page cached.
|
||||
let page_key = PageKey::new_compressed(metadata.region_id, handle.file_id(), 0, 0);
|
||||
assert!(cache.as_ref().unwrap().get_pages(&page_key).is_none());
|
||||
|
||||
// Cache 4 row groups.
|
||||
for i in 0..4 {
|
||||
let page_key = PageKey {
|
||||
region_id: metadata.region_id,
|
||||
file_id: handle.file_id(),
|
||||
row_group_idx: i,
|
||||
column_idx: 0,
|
||||
};
|
||||
let page_key = PageKey::new_uncompressed(metadata.region_id, handle.file_id(), i, 0);
|
||||
assert!(cache.as_ref().unwrap().get_pages(&page_key).is_some());
|
||||
}
|
||||
let page_key = PageKey {
|
||||
region_id: metadata.region_id,
|
||||
file_id: handle.file_id(),
|
||||
row_group_idx: 5,
|
||||
column_idx: 0,
|
||||
};
|
||||
let page_key = PageKey::new_uncompressed(metadata.region_id, handle.file_id(), 5, 0);
|
||||
assert!(cache.as_ref().unwrap().get_pages(&page_key).is_none());
|
||||
}
|
||||
|
||||
|
||||
@@ -19,14 +19,14 @@ use std::collections::VecDeque;
|
||||
use parquet::column::page::{Page, PageMetadata, PageReader};
|
||||
use parquet::errors::Result;
|
||||
|
||||
/// A reader that reads from cached pages.
|
||||
pub(crate) struct CachedPageReader {
|
||||
/// A reader that reads all pages from a cache.
|
||||
pub(crate) struct RowGroupCachedReader {
|
||||
/// Cached pages.
|
||||
pages: VecDeque<Page>,
|
||||
}
|
||||
|
||||
impl CachedPageReader {
|
||||
/// Returns a new reader from existing pages.
|
||||
impl RowGroupCachedReader {
|
||||
/// Returns a new reader from pages of a column in a row group.
|
||||
pub(crate) fn new(pages: &[Page]) -> Self {
|
||||
Self {
|
||||
pages: pages.iter().cloned().collect(),
|
||||
@@ -34,7 +34,7 @@ impl CachedPageReader {
|
||||
}
|
||||
}
|
||||
|
||||
impl PageReader for CachedPageReader {
|
||||
impl PageReader for RowGroupCachedReader {
|
||||
fn get_next_page(&mut self) -> Result<Option<Page>> {
|
||||
Ok(self.pages.pop_front())
|
||||
}
|
||||
@@ -55,9 +55,8 @@ impl PageReader for CachedPageReader {
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for CachedPageReader {
|
||||
impl Iterator for RowGroupCachedReader {
|
||||
type Item = Result<Page>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.get_next_page().transpose()
|
||||
}
|
||||
|
||||
@@ -23,33 +23,37 @@ use parquet::arrow::arrow_reader::{RowGroups, RowSelection};
|
||||
use parquet::arrow::ProjectionMask;
|
||||
use parquet::column::page::{PageIterator, PageReader};
|
||||
use parquet::errors::{ParquetError, Result};
|
||||
use parquet::file::metadata::{ParquetMetaData, RowGroupMetaData};
|
||||
use parquet::file::metadata::{ColumnChunkMetaData, ParquetMetaData, RowGroupMetaData};
|
||||
use parquet::file::properties::DEFAULT_PAGE_SIZE;
|
||||
use parquet::file::reader::{ChunkReader, Length};
|
||||
use parquet::file::serialized_reader::SerializedPageReader;
|
||||
use parquet::format::PageLocation;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::task::yield_now;
|
||||
|
||||
use crate::cache::file_cache::{FileType, IndexKey};
|
||||
use crate::cache::{CacheManagerRef, PageKey, PageValue};
|
||||
use crate::metrics::READ_STAGE_ELAPSED;
|
||||
use crate::metrics::{READ_STAGE_ELAPSED, READ_STAGE_FETCH_PAGES};
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::parquet::helper::fetch_byte_ranges;
|
||||
use crate::sst::parquet::page_reader::CachedPageReader;
|
||||
use crate::sst::parquet::page_reader::RowGroupCachedReader;
|
||||
|
||||
/// An in-memory collection of column chunks
|
||||
pub struct InMemoryRowGroup<'a> {
|
||||
metadata: &'a RowGroupMetaData,
|
||||
page_locations: Option<&'a [Vec<PageLocation>]>,
|
||||
/// Compressed page of each column.
|
||||
column_chunks: Vec<Option<Arc<ColumnChunkData>>>,
|
||||
row_count: usize,
|
||||
region_id: RegionId,
|
||||
file_id: FileId,
|
||||
row_group_idx: usize,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
/// Cached pages for each column.
|
||||
/// Row group level cached pages for each column.
|
||||
///
|
||||
/// `column_cached_pages.len()` equals to `column_chunks.len()`.
|
||||
column_cached_pages: Vec<Option<Arc<PageValue>>>,
|
||||
/// These pages are uncompressed pages of a row group.
|
||||
/// `column_uncompressed_pages.len()` equals to `column_chunks.len()`.
|
||||
column_uncompressed_pages: Vec<Option<Arc<PageValue>>>,
|
||||
file_path: &'a str,
|
||||
/// Object store.
|
||||
object_store: ObjectStore,
|
||||
@@ -86,7 +90,7 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
file_id,
|
||||
row_group_idx,
|
||||
cache_manager,
|
||||
column_cached_pages: vec![None; metadata.columns().len()],
|
||||
column_uncompressed_pages: vec![None; metadata.columns().len()],
|
||||
file_path,
|
||||
object_store,
|
||||
}
|
||||
@@ -161,16 +165,20 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
// Now we only use cache in dense chunk data.
|
||||
self.fetch_pages_from_cache(projection);
|
||||
|
||||
// Release the CPU to avoid blocking the runtime. Since `fetch_pages_from_cache`
|
||||
// is a synchronous, CPU-bound operation.
|
||||
yield_now().await;
|
||||
|
||||
let fetch_ranges = self
|
||||
.column_chunks
|
||||
.iter()
|
||||
.zip(&self.column_cached_pages)
|
||||
.zip(&self.column_uncompressed_pages)
|
||||
.enumerate()
|
||||
// Don't need to fetch column data if we already cache the column's pages.
|
||||
.filter(|&(idx, (chunk, cached_pages))| {
|
||||
chunk.is_none() && projection.leaf_included(idx) && cached_pages.is_none()
|
||||
.filter(|&(idx, (chunk, uncompressed_pages))| {
|
||||
// Don't need to fetch column data if we already cache the column's pages.
|
||||
chunk.is_none() && projection.leaf_included(idx) && uncompressed_pages.is_none()
|
||||
})
|
||||
.map(|(idx, (_chunk, _cached_pages))| {
|
||||
.map(|(idx, (_chunk, _pages))| {
|
||||
let column = self.metadata.column(idx);
|
||||
let (start, length) = column.byte_range();
|
||||
start..(start + length)
|
||||
@@ -184,22 +192,41 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
|
||||
let mut chunk_data = self.fetch_bytes(&fetch_ranges).await?.into_iter();
|
||||
|
||||
for (idx, (chunk, cached_pages)) in self
|
||||
for (idx, (chunk, row_group_pages)) in self
|
||||
.column_chunks
|
||||
.iter_mut()
|
||||
.zip(&self.column_cached_pages)
|
||||
.zip(&self.column_uncompressed_pages)
|
||||
.enumerate()
|
||||
{
|
||||
if chunk.is_some() || !projection.leaf_included(idx) || cached_pages.is_some() {
|
||||
if chunk.is_some() || !projection.leaf_included(idx) || row_group_pages.is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(data) = chunk_data.next() {
|
||||
*chunk = Some(Arc::new(ColumnChunkData::Dense {
|
||||
offset: self.metadata.column(idx).byte_range().0 as usize,
|
||||
data,
|
||||
}));
|
||||
// Get the fetched page.
|
||||
let Some(data) = chunk_data.next() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let column = self.metadata.column(idx);
|
||||
if let Some(cache) = &self.cache_manager {
|
||||
if !cache_uncompressed_pages(column) {
|
||||
// For columns that have multiple uncompressed pages, we only cache the compressed page
|
||||
// to save memory.
|
||||
let page_key = PageKey::new_compressed(
|
||||
self.region_id,
|
||||
self.file_id,
|
||||
self.row_group_idx,
|
||||
idx,
|
||||
);
|
||||
cache
|
||||
.put_pages(page_key, Arc::new(PageValue::new_compressed(data.clone())));
|
||||
}
|
||||
}
|
||||
|
||||
*chunk = Some(Arc::new(ColumnChunkData::Dense {
|
||||
offset: column.byte_range().0 as usize,
|
||||
data,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -207,20 +234,42 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
}
|
||||
|
||||
/// Fetches pages for columns if cache is enabled.
|
||||
/// If the page is in the cache, sets the column chunk or `column_uncompressed_pages` for the column.
|
||||
fn fetch_pages_from_cache(&mut self, projection: &ProjectionMask) {
|
||||
let _timer = READ_STAGE_FETCH_PAGES.start_timer();
|
||||
self.column_chunks
|
||||
.iter()
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.filter(|&(idx, chunk)| chunk.is_none() && projection.leaf_included(idx))
|
||||
.for_each(|(idx, _chunk)| {
|
||||
if let Some(cache) = &self.cache_manager {
|
||||
let page_key = PageKey {
|
||||
region_id: self.region_id,
|
||||
file_id: self.file_id,
|
||||
row_group_idx: self.row_group_idx,
|
||||
column_idx: idx,
|
||||
};
|
||||
self.column_cached_pages[idx] = cache.get_pages(&page_key);
|
||||
.filter(|(idx, chunk)| chunk.is_none() && projection.leaf_included(*idx))
|
||||
.for_each(|(idx, chunk)| {
|
||||
let Some(cache) = &self.cache_manager else {
|
||||
return;
|
||||
};
|
||||
let column = self.metadata.column(idx);
|
||||
if cache_uncompressed_pages(column) {
|
||||
// Fetches uncompressed pages for the row group.
|
||||
let page_key = PageKey::new_uncompressed(
|
||||
self.region_id,
|
||||
self.file_id,
|
||||
self.row_group_idx,
|
||||
idx,
|
||||
);
|
||||
self.column_uncompressed_pages[idx] = cache.get_pages(&page_key);
|
||||
} else {
|
||||
// Fetches the compressed page from the cache.
|
||||
let page_key = PageKey::new_compressed(
|
||||
self.region_id,
|
||||
self.file_id,
|
||||
self.row_group_idx,
|
||||
idx,
|
||||
);
|
||||
|
||||
*chunk = cache.get_pages(&page_key).map(|page_value| {
|
||||
Arc::new(ColumnChunkData::Dense {
|
||||
offset: column.byte_range().0 as usize,
|
||||
data: page_value.compressed.clone(),
|
||||
})
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -259,12 +308,12 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
|
||||
/// Creates a page reader to read column at `i`.
|
||||
fn column_page_reader(&self, i: usize) -> Result<Box<dyn PageReader>> {
|
||||
if let Some(cached_pages) = &self.column_cached_pages[i] {
|
||||
// Already in cache.
|
||||
return Ok(Box::new(CachedPageReader::new(&cached_pages.pages)));
|
||||
if let Some(cached_pages) = &self.column_uncompressed_pages[i] {
|
||||
debug_assert!(!cached_pages.row_group.is_empty());
|
||||
// Hits the row group level page cache.
|
||||
return Ok(Box::new(RowGroupCachedReader::new(&cached_pages.row_group)));
|
||||
}
|
||||
|
||||
// Cache miss.
|
||||
let page_reader = match &self.column_chunks[i] {
|
||||
None => {
|
||||
return Err(ParquetError::General(format!(
|
||||
@@ -283,25 +332,34 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
};
|
||||
|
||||
let Some(cache) = &self.cache_manager else {
|
||||
// Cache is disabled.
|
||||
return Ok(Box::new(page_reader));
|
||||
};
|
||||
|
||||
// We collect all pages and put them into the cache.
|
||||
let pages = page_reader.collect::<Result<Vec<_>>>()?;
|
||||
let page_value = Arc::new(PageValue::new(pages));
|
||||
let page_key = PageKey {
|
||||
region_id: self.region_id,
|
||||
file_id: self.file_id,
|
||||
row_group_idx: self.row_group_idx,
|
||||
column_idx: i,
|
||||
};
|
||||
cache.put_pages(page_key, page_value.clone());
|
||||
let column = self.metadata.column(i);
|
||||
if cache_uncompressed_pages(column) {
|
||||
// This column use row group level page cache.
|
||||
// We collect all pages and put them into the cache.
|
||||
let pages = page_reader.collect::<Result<Vec<_>>>()?;
|
||||
let page_value = Arc::new(PageValue::new_row_group(pages));
|
||||
let page_key =
|
||||
PageKey::new_uncompressed(self.region_id, self.file_id, self.row_group_idx, i);
|
||||
cache.put_pages(page_key, page_value.clone());
|
||||
|
||||
Ok(Box::new(CachedPageReader::new(&page_value.pages)))
|
||||
return Ok(Box::new(RowGroupCachedReader::new(&page_value.row_group)));
|
||||
}
|
||||
|
||||
// This column don't cache uncompressed pages.
|
||||
Ok(Box::new(page_reader))
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether we cache uncompressed pages for the column.
|
||||
fn cache_uncompressed_pages(column: &ColumnChunkMetaData) -> bool {
|
||||
// If the row group only has a data page, cache the whole row group as
|
||||
// it might be faster than caching a compressed page.
|
||||
column.uncompressed_size() as usize <= DEFAULT_PAGE_SIZE
|
||||
}
|
||||
|
||||
impl<'a> RowGroups for InMemoryRowGroup<'a> {
|
||||
fn num_rows(&self) -> usize {
|
||||
self.row_count
|
||||
@@ -318,7 +376,7 @@ impl<'a> RowGroups for InMemoryRowGroup<'a> {
|
||||
|
||||
/// An in-memory column chunk
|
||||
#[derive(Clone)]
|
||||
enum ColumnChunkData {
|
||||
pub(crate) enum ColumnChunkData {
|
||||
/// Column chunk data representing only a subset of data pages
|
||||
Sparse {
|
||||
/// Length of the full column chunk
|
||||
|
||||
@@ -232,7 +232,7 @@ impl Deleter {
|
||||
|
||||
async fn get_table(&self, catalog: &str, schema: &str, table: &str) -> Result<TableRef> {
|
||||
self.catalog_manager
|
||||
.table(catalog, schema, table)
|
||||
.table(catalog, schema, table, None)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
|
||||
@@ -32,7 +32,6 @@ use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{error, info, warn};
|
||||
use datatypes::schema::Schema;
|
||||
use futures_util::future;
|
||||
use meter_macros::write_meter;
|
||||
use partition::manager::PartitionRuleManagerRef;
|
||||
@@ -45,7 +44,7 @@ use store_api::metric_engine_consts::{
|
||||
};
|
||||
use store_api::mito_engine_options::{APPEND_MODE_KEY, MERGE_MODE_KEY};
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use table::requests::{InsertRequest as TableInsertRequest, TTL_KEY};
|
||||
use table::requests::{InsertRequest as TableInsertRequest, AUTO_CREATE_TABLE_KEY, TTL_KEY};
|
||||
use table::table_reference::TableReference;
|
||||
use table::TableRef;
|
||||
|
||||
@@ -462,21 +461,49 @@ impl Inserter {
|
||||
auto_create_table_type: AutoCreateTableType,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<HashMap<String, TableId>> {
|
||||
let mut table_name_to_ids = HashMap::with_capacity(requests.inserts.len());
|
||||
let mut create_tables = vec![];
|
||||
let mut alter_tables = vec![];
|
||||
let _timer = crate::metrics::CREATE_ALTER_ON_DEMAND
|
||||
.with_label_values(&[auto_create_table_type.as_str()])
|
||||
.start_timer();
|
||||
|
||||
let catalog = ctx.current_catalog();
|
||||
let schema = ctx.current_schema();
|
||||
let mut table_name_to_ids = HashMap::with_capacity(requests.inserts.len());
|
||||
// If `auto_create_table` hint is disabled, skip creating/altering tables.
|
||||
let auto_create_table_hint = ctx
|
||||
.extension(AUTO_CREATE_TABLE_KEY)
|
||||
.map(|v| v.parse::<bool>())
|
||||
.transpose()
|
||||
.map_err(|_| {
|
||||
InvalidInsertRequestSnafu {
|
||||
reason: "`auto_create_table` hint must be a boolean",
|
||||
}
|
||||
.build()
|
||||
})?
|
||||
.unwrap_or(true);
|
||||
if !auto_create_table_hint {
|
||||
for req in &requests.inserts {
|
||||
let table = self
|
||||
.get_table(catalog, &schema, &req.table_name)
|
||||
.await?
|
||||
.context(InvalidInsertRequestSnafu {
|
||||
reason: format!(
|
||||
"Table `{}` does not exist, and `auto_create_table` hint is disabled",
|
||||
req.table_name
|
||||
),
|
||||
})?;
|
||||
let table_info = table.table_info();
|
||||
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
|
||||
}
|
||||
return Ok(table_name_to_ids);
|
||||
}
|
||||
|
||||
let mut create_tables = vec![];
|
||||
let mut alter_tables = vec![];
|
||||
for req in &requests.inserts {
|
||||
let catalog = ctx.current_catalog();
|
||||
let schema = ctx.current_schema();
|
||||
let table = self.get_table(catalog, &schema, &req.table_name).await?;
|
||||
match table {
|
||||
match self.get_table(catalog, &schema, &req.table_name).await? {
|
||||
Some(table) => {
|
||||
let table_info = table.table_info();
|
||||
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
|
||||
validate_request_with_table(req, &table)?;
|
||||
if let Some(alter_expr) =
|
||||
self.get_alter_table_expr_on_demand(req, table, ctx)?
|
||||
{
|
||||
@@ -536,6 +563,7 @@ impl Inserter {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(table_name_to_ids)
|
||||
}
|
||||
|
||||
@@ -608,7 +636,7 @@ impl Inserter {
|
||||
table: &str,
|
||||
) -> Result<Option<TableRef>> {
|
||||
self.catalog_manager
|
||||
.table(catalog, schema, table)
|
||||
.table(catalog, schema, table, None)
|
||||
.await
|
||||
.context(CatalogSnafu)
|
||||
}
|
||||
@@ -796,87 +824,9 @@ fn validate_column_count_match(requests: &RowInsertRequests) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_request_with_table(req: &RowInsertRequest, table: &TableRef) -> Result<()> {
|
||||
let request_schema = req.rows.as_ref().unwrap().schema.as_slice();
|
||||
let table_schema = table.schema();
|
||||
|
||||
validate_required_columns(request_schema, &table_schema)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_required_columns(request_schema: &[ColumnSchema], table_schema: &Schema) -> Result<()> {
|
||||
for column_schema in table_schema.column_schemas() {
|
||||
if column_schema.is_nullable() || column_schema.default_constraint().is_some() {
|
||||
continue;
|
||||
}
|
||||
if !request_schema
|
||||
.iter()
|
||||
.any(|c| c.column_name == column_schema.name)
|
||||
{
|
||||
return InvalidInsertRequestSnafu {
|
||||
reason: format!(
|
||||
"Expecting insert data to be presented on a not null or no default value column '{}'.",
|
||||
&column_schema.name
|
||||
)
|
||||
}.fail();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn build_create_table_expr(
|
||||
table: &TableReference,
|
||||
request_schema: &[ColumnSchema],
|
||||
) -> Result<CreateTableExpr> {
|
||||
CreateExprFactory.create_table_expr_by_column_schemas(table, request_schema, default_engine())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datatypes::prelude::{ConcreteDataType, Value as DtValue};
|
||||
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema as DtColumnSchema};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_validate_required_columns() {
|
||||
let schema = Schema::new(vec![
|
||||
DtColumnSchema::new("a", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(None)
|
||||
.unwrap(),
|
||||
DtColumnSchema::new("b", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::Value(DtValue::Int32(100))))
|
||||
.unwrap(),
|
||||
]);
|
||||
let request_schema = &[ColumnSchema {
|
||||
column_name: "c".to_string(),
|
||||
..Default::default()
|
||||
}];
|
||||
// If nullable is true, it doesn't matter whether the insert request has the column.
|
||||
validate_required_columns(request_schema, &schema).unwrap();
|
||||
|
||||
let schema = Schema::new(vec![
|
||||
DtColumnSchema::new("a", ConcreteDataType::int32_datatype(), false)
|
||||
.with_default_constraint(None)
|
||||
.unwrap(),
|
||||
DtColumnSchema::new("b", ConcreteDataType::int32_datatype(), false)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::Value(DtValue::Int32(-100))))
|
||||
.unwrap(),
|
||||
]);
|
||||
let request_schema = &[ColumnSchema {
|
||||
column_name: "a".to_string(),
|
||||
..Default::default()
|
||||
}];
|
||||
// If nullable is false, but the column is defined with default value,
|
||||
// it also doesn't matter whether the insert request has the column.
|
||||
validate_required_columns(request_schema, &schema).unwrap();
|
||||
|
||||
let request_schema = &[ColumnSchema {
|
||||
column_name: "b".to_string(),
|
||||
..Default::default()
|
||||
}];
|
||||
// Neither of the above cases.
|
||||
assert!(validate_required_columns(request_schema, &schema).is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ impl<'a> RowToRegion<'a> {
|
||||
let catalog_name = self.ctx.current_catalog();
|
||||
let schema_name = self.ctx.current_schema();
|
||||
self.catalog_manager
|
||||
.table(catalog_name, &schema_name, table_name)
|
||||
.table(catalog_name, &schema_name, table_name, None)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
|
||||
@@ -139,7 +139,7 @@ impl<'a> StatementToRegion<'a> {
|
||||
|
||||
async fn get_table(&self, catalog: &str, schema: &str, table: &str) -> Result<TableRef> {
|
||||
self.catalog_manager
|
||||
.table(catalog, schema, table)
|
||||
.table(catalog, schema, table, None)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
|
||||
@@ -219,7 +219,7 @@ impl Requester {
|
||||
) -> Result<Vec<PartitionInfo>> {
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(catalog, schema, table_name)
|
||||
.table(catalog, schema, table_name, None)
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
|
||||
|
||||
@@ -286,7 +286,7 @@ impl StatementExecutor {
|
||||
|
||||
let table_ref = self
|
||||
.catalog_manager
|
||||
.table(&catalog, &schema, &table)
|
||||
.table(&catalog, &schema, &table, Some(&query_ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.context(TableNotFoundSnafu { table_name: &table })?;
|
||||
@@ -313,7 +313,7 @@ impl StatementExecutor {
|
||||
let catalog = query_ctx.current_catalog();
|
||||
ensure!(
|
||||
self.catalog_manager
|
||||
.schema_exists(catalog, db.as_ref())
|
||||
.schema_exists(catalog, db.as_ref(), Some(&query_ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?,
|
||||
SchemaNotFoundSnafu { schema_info: &db }
|
||||
@@ -382,7 +382,7 @@ impl StatementExecutor {
|
||||
table,
|
||||
} = table_ref;
|
||||
self.catalog_manager
|
||||
.table(catalog, schema, table)
|
||||
.table(catalog, schema, table, None)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
|
||||
@@ -57,7 +57,7 @@ impl StatementExecutor {
|
||||
);
|
||||
let table_names = self
|
||||
.catalog_manager
|
||||
.table_names(&req.catalog_name, &req.schema_name)
|
||||
.table_names(&req.catalog_name, &req.schema_name, Some(&ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
|
||||
|
||||
@@ -106,7 +106,7 @@ impl StatementExecutor {
|
||||
.context(error::ExternalSnafu)?;
|
||||
let table_ref = self
|
||||
.catalog_manager
|
||||
.table(&catalog, &schema, &table)
|
||||
.table(&catalog, &schema, &table, Some(&ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.context(TableNotFoundSnafu { table_name: &table })?;
|
||||
@@ -207,6 +207,7 @@ impl StatementExecutor {
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
&create_table.table_name,
|
||||
Some(&query_ctx),
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
@@ -487,7 +488,12 @@ impl StatementExecutor {
|
||||
// if view or table exists.
|
||||
if let Some(table) = self
|
||||
.catalog_manager
|
||||
.table(&expr.catalog_name, &expr.schema_name, &expr.view_name)
|
||||
.table(
|
||||
&expr.catalog_name,
|
||||
&expr.schema_name,
|
||||
&expr.view_name,
|
||||
Some(&ctx),
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
{
|
||||
@@ -656,7 +662,7 @@ impl StatementExecutor {
|
||||
) -> Result<Output> {
|
||||
let view_info = if let Some(view) = self
|
||||
.catalog_manager
|
||||
.table(&catalog, &schema, &view)
|
||||
.table(&catalog, &schema, &view, None)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
{
|
||||
@@ -766,6 +772,7 @@ impl StatementExecutor {
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
Some(&query_context),
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
@@ -816,7 +823,7 @@ impl StatementExecutor {
|
||||
|
||||
if self
|
||||
.catalog_manager
|
||||
.schema_exists(&catalog, &schema)
|
||||
.schema_exists(&catalog, &schema, None)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
{
|
||||
@@ -858,6 +865,7 @@ impl StatementExecutor {
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
Some(&query_context),
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
@@ -944,7 +952,12 @@ impl StatementExecutor {
|
||||
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(&catalog_name, &schema_name, &table_name)
|
||||
.table(
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
Some(&query_context),
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
@@ -1167,9 +1180,10 @@ impl StatementExecutor {
|
||||
|
||||
if !self
|
||||
.catalog_manager
|
||||
.schema_exists(catalog, database)
|
||||
.schema_exists(catalog, database, None)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
&& !self.catalog_manager.is_reserved_schema_name(database)
|
||||
{
|
||||
self.create_database_procedure(
|
||||
catalog.to_string(),
|
||||
|
||||
@@ -39,7 +39,7 @@ impl StatementExecutor {
|
||||
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(&catalog, &schema, &table)
|
||||
.table(&catalog, &schema, &table, Some(&query_ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
|
||||
@@ -143,7 +143,7 @@ impl StatementExecutor {
|
||||
|
||||
let table_ref = self
|
||||
.catalog_manager
|
||||
.table(&catalog, &schema, &view)
|
||||
.table(&catalog, &schema, &view, Some(&query_ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.context(ViewNotFoundSnafu { view_name: &view })?;
|
||||
|
||||
@@ -110,7 +110,12 @@ impl PipelineOperator {
|
||||
// exist in catalog, just open
|
||||
if let Some(table) = self
|
||||
.catalog_manager
|
||||
.table(&expr.catalog_name, &expr.schema_name, &expr.table_name)
|
||||
.table(
|
||||
&expr.catalog_name,
|
||||
&expr.schema_name,
|
||||
&expr.table_name,
|
||||
Some(&ctx),
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
{
|
||||
@@ -130,7 +135,7 @@ impl PipelineOperator {
|
||||
// get from catalog
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(catalog, schema, table_name)
|
||||
.table(catalog, schema, table_name, Some(&ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.context(PipelineTableNotFoundSnafu)?;
|
||||
|
||||
@@ -116,7 +116,7 @@ impl DatafusionQueryEngine {
|
||||
let default_catalog = &query_ctx.current_catalog().to_owned();
|
||||
let default_schema = &query_ctx.current_schema();
|
||||
let table_name = dml.table_name.resolve(default_catalog, default_schema);
|
||||
let table = self.find_table(&table_name).await?;
|
||||
let table = self.find_table(&table_name, &query_ctx).await?;
|
||||
|
||||
let output = self
|
||||
.exec_query_plan(LogicalPlan::DfPlan((*dml.input).clone()), query_ctx.clone())
|
||||
@@ -241,14 +241,18 @@ impl DatafusionQueryEngine {
|
||||
.context(TableMutationSnafu)
|
||||
}
|
||||
|
||||
async fn find_table(&self, table_name: &ResolvedTableReference) -> Result<TableRef> {
|
||||
async fn find_table(
|
||||
&self,
|
||||
table_name: &ResolvedTableReference,
|
||||
query_context: &QueryContextRef,
|
||||
) -> Result<TableRef> {
|
||||
let catalog_name = table_name.catalog.as_ref();
|
||||
let schema_name = table_name.schema.as_ref();
|
||||
let table_name = table_name.table.as_ref();
|
||||
|
||||
self.state
|
||||
.catalog_manager()
|
||||
.table(catalog_name, schema_name, table_name)
|
||||
.table(catalog_name, schema_name, table_name, Some(query_context))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu { table: table_name })
|
||||
@@ -529,7 +533,7 @@ mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use datatypes::vectors::{Helper, UInt32Vector, UInt64Vector, VectorRef};
|
||||
use session::context::QueryContext;
|
||||
use session::context::{QueryContext, QueryContextBuilder};
|
||||
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
|
||||
|
||||
use super::*;
|
||||
@@ -618,12 +622,16 @@ mod tests {
|
||||
.as_any()
|
||||
.downcast_ref::<DatafusionQueryEngine>()
|
||||
.unwrap();
|
||||
let query_ctx = Arc::new(QueryContextBuilder::default().build());
|
||||
let table = engine
|
||||
.find_table(&ResolvedTableReference {
|
||||
catalog: "greptime".into(),
|
||||
schema: "public".into(),
|
||||
table: "numbers".into(),
|
||||
})
|
||||
.find_table(
|
||||
&ResolvedTableReference {
|
||||
catalog: "greptime".into(),
|
||||
schema: "public".into(),
|
||||
table: "numbers".into(),
|
||||
},
|
||||
&query_ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ impl DfContextProviderAdapter {
|
||||
let mut table_provider = DfTableSourceProvider::new(
|
||||
engine_state.catalog_manager().clone(),
|
||||
engine_state.disallow_cross_catalog_query(),
|
||||
query_ctx.as_ref(),
|
||||
query_ctx.clone(),
|
||||
Arc::new(DefaultPlanDecoder::new(session_state.clone(), &query_ctx)?),
|
||||
session_state
|
||||
.config_options()
|
||||
|
||||
@@ -156,20 +156,22 @@ impl MergeScanExec {
|
||||
query_ctx: QueryContextRef,
|
||||
target_partition: usize,
|
||||
) -> Result<Self> {
|
||||
let arrow_schema_without_metadata = Self::arrow_schema_without_metadata(arrow_schema);
|
||||
// TODO(CookiePieWw): Initially we removed the metadata from the schema in #2000, but we have to
|
||||
// keep it for #4619 to identify json type in src/datatypes/src/schema/column_schema.rs.
|
||||
// Reconsider if it's possible to remove it.
|
||||
let arrow_schema = Arc::new(arrow_schema.clone());
|
||||
let properties = PlanProperties::new(
|
||||
EquivalenceProperties::new(arrow_schema_without_metadata.clone()),
|
||||
EquivalenceProperties::new(arrow_schema.clone()),
|
||||
Partitioning::UnknownPartitioning(target_partition),
|
||||
ExecutionMode::Bounded,
|
||||
);
|
||||
let schema_without_metadata =
|
||||
Self::arrow_schema_to_schema(arrow_schema_without_metadata.clone())?;
|
||||
let schema = Self::arrow_schema_to_schema(arrow_schema.clone())?;
|
||||
Ok(Self {
|
||||
table,
|
||||
regions,
|
||||
plan,
|
||||
schema: schema_without_metadata,
|
||||
arrow_schema: arrow_schema_without_metadata,
|
||||
schema,
|
||||
arrow_schema,
|
||||
region_query_handler,
|
||||
metric: ExecutionPlanMetricsSet::new(),
|
||||
sub_stage_metrics: Arc::default(),
|
||||
@@ -288,20 +290,6 @@ impl MergeScanExec {
|
||||
}))
|
||||
}
|
||||
|
||||
fn arrow_schema_without_metadata(arrow_schema: &ArrowSchema) -> ArrowSchemaRef {
|
||||
Arc::new(ArrowSchema::new(
|
||||
arrow_schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|field| {
|
||||
let field = field.as_ref().clone();
|
||||
let field_without_metadata = field.with_metadata(Default::default());
|
||||
Arc::new(field_without_metadata)
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
))
|
||||
}
|
||||
|
||||
fn arrow_schema_to_schema(arrow_schema: ArrowSchemaRef) -> Result<SchemaRef> {
|
||||
let schema = Schema::try_from(arrow_schema).context(ConvertSchemaSnafu)?;
|
||||
Ok(Arc::new(schema))
|
||||
|
||||
@@ -128,6 +128,7 @@ impl DistExtensionPlanner {
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
|
||||
@@ -68,7 +68,7 @@ impl DfLogicalPlanner {
|
||||
let table_provider = DfTableSourceProvider::new(
|
||||
self.engine_state.catalog_manager().clone(),
|
||||
self.engine_state.disallow_cross_catalog_query(),
|
||||
query_ctx.as_ref(),
|
||||
query_ctx.clone(),
|
||||
Arc::new(DefaultPlanDecoder::new(
|
||||
self.session_state.clone(),
|
||||
&query_ctx,
|
||||
@@ -144,14 +144,15 @@ impl DfLogicalPlanner {
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn plan_pql(&self, stmt: EvalStmt, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
|
||||
let plan_decoder = Arc::new(DefaultPlanDecoder::new(
|
||||
self.session_state.clone(),
|
||||
&query_ctx,
|
||||
)?);
|
||||
let table_provider = DfTableSourceProvider::new(
|
||||
self.engine_state.catalog_manager().clone(),
|
||||
self.engine_state.disallow_cross_catalog_query(),
|
||||
query_ctx.as_ref(),
|
||||
Arc::new(DefaultPlanDecoder::new(
|
||||
self.session_state.clone(),
|
||||
&query_ctx,
|
||||
)?),
|
||||
query_ctx,
|
||||
plan_decoder,
|
||||
self.session_state
|
||||
.config_options()
|
||||
.sql_parser
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user