mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-30 03:40:37 +00:00
feat: support to create external table (#1372)
* feat: support to create external table * chore: apply suggestions from CR * test: add create external table without ts type * chore: apply suggestions from CR * fix: fix import typo * refactor: move consts to table crate * chore: apply suggestions from CR * refactor: rename create_table_schema
This commit is contained in:
@@ -12,6 +12,7 @@ catalog = { path = "../catalog" }
|
||||
chrono.workspace = true
|
||||
common-base = { path = "../common/base" }
|
||||
common-catalog = { path = "../common/catalog" }
|
||||
common-datasource = { path = "../common/datasource" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-function = { path = "../common/function" }
|
||||
common-query = { path = "../common/query" }
|
||||
@@ -29,9 +30,11 @@ futures = "0.3"
|
||||
futures-util.workspace = true
|
||||
humantime = "2.1"
|
||||
metrics.workspace = true
|
||||
object-store = { path = "../object-store" }
|
||||
once_cell = "1.10"
|
||||
promql = { path = "../promql" }
|
||||
promql-parser = "0.1.0"
|
||||
regex = "1.6"
|
||||
serde.workspace = true
|
||||
serde_json = "1.0"
|
||||
session = { path = "../session" }
|
||||
|
||||
@@ -126,6 +126,54 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: sql::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse SQL, source: {}", source))]
|
||||
ParseSql {
|
||||
#[snafu(backtrace)]
|
||||
source: sql::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field: {}", name))]
|
||||
MissingRequiredField { name: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to regex, source: {}", source))]
|
||||
BuildRegex {
|
||||
location: Location,
|
||||
source: regex::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build data source backend, source: {}", source))]
|
||||
BuildBackend {
|
||||
#[snafu(backtrace)]
|
||||
source: common_datasource::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to list objects, source: {}", source))]
|
||||
ListObjects {
|
||||
#[snafu(backtrace)]
|
||||
source: common_datasource::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported file format: {}", format))]
|
||||
UnsupportedFileFormat { format: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to parse file format: {}", source))]
|
||||
ParseFileFormat {
|
||||
#[snafu(backtrace)]
|
||||
source: common_datasource::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to infer schema: {}", source))]
|
||||
InferSchema {
|
||||
#[snafu(backtrace)]
|
||||
source: common_datasource::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert datafusion schema, source: {}", source))]
|
||||
ConvertSchema {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -139,12 +187,22 @@ impl ErrorExt for Error {
|
||||
| SchemaNotFound { .. }
|
||||
| TableNotFound { .. }
|
||||
| ParseTimestamp { .. }
|
||||
| ParseFloat { .. } => StatusCode::InvalidArguments,
|
||||
| ParseFloat { .. }
|
||||
| MissingRequiredField { .. }
|
||||
| BuildRegex { .. }
|
||||
| UnsupportedFileFormat { .. }
|
||||
| ConvertSchema { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
BuildBackend { .. } | ListObjects { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
ParseFileFormat { source, .. } | InferSchema { source, .. } => source.status_code(),
|
||||
|
||||
QueryAccessDenied { .. } => StatusCode::AccessDenied,
|
||||
Catalog { source } => source.status_code(),
|
||||
VectorComputation { source } | ConvertDatafusionSchema { source } => {
|
||||
source.status_code()
|
||||
}
|
||||
ParseSql { source } => source.status_code(),
|
||||
CreateRecordBatch { source } => source.status_code(),
|
||||
QueryExecution { source } | QueryPlan { source } => source.status_code(),
|
||||
DataFusion { .. } | MissingTimestampColumn { .. } => StatusCode::Internal,
|
||||
|
||||
@@ -14,20 +14,35 @@
|
||||
|
||||
mod show;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_catalog::consts::DEFAULT_CATALOG_NAME;
|
||||
use common_datasource::file_format::csv::CsvFormat;
|
||||
use common_datasource::file_format::json::JsonFormat;
|
||||
use common_datasource::file_format::parquet::ParquetFormat;
|
||||
use common_datasource::file_format::{infer_schemas, FileFormat};
|
||||
use common_datasource::lister::{Lister, Source};
|
||||
use common_datasource::object_store::build_backend;
|
||||
use common_datasource::util::find_dir_and_filename;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
|
||||
use datatypes::vectors::{Helper, StringVector};
|
||||
use object_store::ObjectStore;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::ast::ColumnDef;
|
||||
use sql::statements::column_def_to_schema;
|
||||
use sql::statements::create::Partitions;
|
||||
use sql::statements::show::{ShowDatabases, ShowKind, ShowTables};
|
||||
use table::requests::{
|
||||
IMMUTABLE_TABLE_FORMAT_KEY, IMMUTABLE_TABLE_LOCATION_KEY, IMMUTABLE_TABLE_PATTERN_KEY,
|
||||
};
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
@@ -250,6 +265,105 @@ fn describe_column_semantic_types(
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn prepare_immutable_file_table_files_and_schema(
|
||||
options: &HashMap<String, String>,
|
||||
columns: &Vec<ColumnDef>,
|
||||
) -> Result<(Vec<String>, RawSchema)> {
|
||||
let (object_store, files) = prepare_immutable_file_table(options).await?;
|
||||
let schema = if !columns.is_empty() {
|
||||
let columns_schemas: Vec<_> = columns
|
||||
.iter()
|
||||
.map(|column| column_def_to_schema(column, false).context(error::ParseSqlSnafu))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
RawSchema::new(columns_schemas)
|
||||
} else {
|
||||
let format = parse_immutable_file_table_format(options)?;
|
||||
infer_immutable_file_table_schema(&object_store, &*format, &files).await?
|
||||
};
|
||||
|
||||
Ok((files, schema))
|
||||
}
|
||||
|
||||
// lists files in the frontend to reduce unnecessary scan requests repeated in each datanode.
|
||||
async fn prepare_immutable_file_table(
|
||||
options: &HashMap<String, String>,
|
||||
) -> Result<(ObjectStore, Vec<String>)> {
|
||||
let url =
|
||||
options
|
||||
.get(IMMUTABLE_TABLE_LOCATION_KEY)
|
||||
.context(error::MissingRequiredFieldSnafu {
|
||||
name: IMMUTABLE_TABLE_LOCATION_KEY,
|
||||
})?;
|
||||
|
||||
let (dir, filename) = find_dir_and_filename(url);
|
||||
let source = if let Some(filename) = filename {
|
||||
Source::Filename(filename)
|
||||
} else {
|
||||
Source::Dir
|
||||
};
|
||||
let regex = options
|
||||
.get(IMMUTABLE_TABLE_PATTERN_KEY)
|
||||
.map(|x| Regex::new(x))
|
||||
.transpose()
|
||||
.context(error::BuildRegexSnafu)?;
|
||||
let object_store = build_backend(url, options).context(error::BuildBackendSnafu)?;
|
||||
let lister = Lister::new(object_store.clone(), source, dir, regex);
|
||||
// If we scan files in a directory every time the database restarts,
|
||||
// then it might lead to a potential undefined behavior:
|
||||
// If a user adds a file with an incompatible schema to that directory,
|
||||
// it will make the external table unavailable.
|
||||
let files = lister
|
||||
.list()
|
||||
.await
|
||||
.context(error::ListObjectsSnafu)?
|
||||
.into_iter()
|
||||
.filter_map(|entry| {
|
||||
if entry.path().ends_with('/') {
|
||||
None
|
||||
} else {
|
||||
Some(entry.path().to_string())
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok((object_store, files))
|
||||
}
|
||||
|
||||
fn parse_immutable_file_table_format(
|
||||
options: &HashMap<String, String>,
|
||||
) -> Result<Box<dyn FileFormat>> {
|
||||
let format = options
|
||||
.get(IMMUTABLE_TABLE_FORMAT_KEY)
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
.to_uppercase();
|
||||
|
||||
match format.as_str() {
|
||||
"CSV" => {
|
||||
let file_format = CsvFormat::try_from(options).context(error::ParseFileFormatSnafu)?;
|
||||
Ok(Box::new(file_format))
|
||||
}
|
||||
"JSON" => {
|
||||
let file_format = JsonFormat::try_from(options).context(error::ParseFileFormatSnafu)?;
|
||||
Ok(Box::new(file_format))
|
||||
}
|
||||
"PARQUET" => Ok(Box::new(ParquetFormat {})),
|
||||
format => error::UnsupportedFileFormatSnafu { format }.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn infer_immutable_file_table_schema(
|
||||
object_store: &ObjectStore,
|
||||
file_format: &dyn FileFormat,
|
||||
files: &[String],
|
||||
) -> Result<RawSchema> {
|
||||
let merged = infer_schemas(object_store, files, file_format)
|
||||
.await
|
||||
.context(error::InferSchemaSnafu)?;
|
||||
Ok(RawSchema::from(
|
||||
&Schema::try_from(merged).context(error::ConvertSchemaSnafu)?,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
Reference in New Issue
Block a user