feat: support to create external table (#1372)

* feat: support to create external table

* chore: apply suggestions from CR

* test: add create external table without ts type

* chore: apply suggestions from CR

* fix: fix import typo

* refactor: move consts to table crate

* chore: apply suggestions from CR

* refactor: rename create_table_schema
This commit is contained in:
Weny Xu
2023-04-24 15:43:12 +09:00
committed by GitHub
parent 17daf4cdff
commit f2167663b2
26 changed files with 527 additions and 72 deletions

View File

@@ -26,13 +26,14 @@ use std::sync::Arc;
use std::task::Poll;
use arrow::record_batch::RecordBatch;
use arrow_schema::{ArrowError, Schema};
use arrow_schema::{ArrowError, Schema as ArrowSchema};
use async_trait::async_trait;
use bytes::{Buf, Bytes};
use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::physical_plan::file_format::FileOpenFuture;
use futures::StreamExt;
use object_store::ObjectStore;
use snafu::ResultExt;
use self::csv::CsvFormat;
use self::json::JsonFormat;
@@ -73,7 +74,7 @@ impl TryFrom<&HashMap<String, String>> for Format {
#[async_trait]
pub trait FileFormat: Send + Sync + std::fmt::Debug {
async fn infer_schema(&self, store: &ObjectStore, path: String) -> Result<Schema>;
async fn infer_schema(&self, store: &ObjectStore, path: String) -> Result<ArrowSchema>;
}
pub trait ArrowDecoder: Send + 'static {
@@ -154,3 +155,15 @@ pub fn open_with_decoder<T: ArrowDecoder, F: Fn() -> DataFusionResult<T>>(
Ok(stream.boxed())
}))
}
pub async fn infer_schemas(
store: &ObjectStore,
files: &[String],
file_format: &dyn FileFormat,
) -> Result<ArrowSchema> {
let mut schemas = Vec::with_capacity(files.len());
for file in files {
schemas.push(file_format.infer_schema(store, file.to_string()).await?)
}
ArrowSchema::try_merge(schemas).context(error::MergeSchemaSnafu)
}

View File

@@ -92,7 +92,7 @@ pub fn alter_expr_to_request(expr: AlterExpr) -> Result<AlterTableRequest> {
}
}
pub fn create_table_schema(expr: &CreateTableExpr) -> Result<RawSchema> {
pub fn create_table_schema(expr: &CreateTableExpr, require_time_index: bool) -> Result<RawSchema> {
let column_schemas = expr
.column_defs
.iter()
@@ -101,14 +101,17 @@ pub fn create_table_schema(expr: &CreateTableExpr) -> Result<RawSchema> {
})
.collect::<Result<Vec<ColumnSchema>>>()?;
ensure!(
column_schemas
.iter()
.any(|column| column.name == expr.time_index),
MissingTimestampColumnSnafu {
msg: format!("CreateExpr: {expr:?}")
}
);
// allow external table schema without the time index
if require_time_index {
ensure!(
column_schemas
.iter()
.any(|column| column.name == expr.time_index),
MissingTimestampColumnSnafu {
msg: format!("CreateExpr: {expr:?}")
}
);
}
let column_schemas = column_schemas
.into_iter()
@@ -127,8 +130,9 @@ pub fn create_table_schema(expr: &CreateTableExpr) -> Result<RawSchema> {
pub fn create_expr_to_request(
table_id: TableId,
expr: CreateTableExpr,
require_time_index: bool,
) -> Result<CreateTableRequest> {
let schema = create_table_schema(&expr)?;
let schema = create_table_schema(&expr, require_time_index)?;
let primary_key_indices = expr
.primary_keys
.iter()