mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-30 03:40:37 +00:00
feat(mito): Implement SST format for mito2 (#2178)
* chore: update comment * feat: stream writer takes arrow's types * feat: Define Batch struct * feat: arrow_schema_to_store * refactor: rename * feat: write parquet in new format with tsids * feat: reader support projection * feat: Impl read compat * refactor: rename SchemaCompat to CompatRecordBatch * feat: changing sst format * feat: make it compile * feat: remove tsid and some structs * feat: from_sst_record_batch wip * chore: push array * chore: wip * feat: decode batches from RecordBatch * feat: reader converts record batches * feat: remove compat mod * chore: remove some codes * feat: sort fields by column id * test: test to_sst_arrow_schema * feat: do not sort fields * test: more test helpers * feat: simplify projection * fix: projection indices is incorrect * refactor: define write/read format * test: test write format * test: test projection * test: test convert record batch * feat: remove unused errors * refactor: wrap get_field_batch_columns * chore: clippy * chore: fix clippy * feat: build arrow schema from region meta in ReadFormat * feat: initialize the parquet reader at `build()` * chore: fix typo
This commit is contained in:
@@ -23,6 +23,7 @@ use std::sync::Arc;
|
||||
use api::v1::SemanticType;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use datatypes::arrow::datatypes::FieldRef;
|
||||
use datatypes::prelude::DataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use serde::de::Error;
|
||||
@@ -124,6 +125,11 @@ impl<'de> Deserialize<'de> for RegionMetadata {
|
||||
}
|
||||
|
||||
impl RegionMetadata {
|
||||
/// Decode the metadata from a JSON str.
|
||||
pub fn from_json(s: &str) -> Result<Self> {
|
||||
serde_json::from_str(s).context(SerdeJsonSnafu)
|
||||
}
|
||||
|
||||
/// Encode the metadata to a JSON string.
|
||||
pub fn to_json(&self) -> Result<String> {
|
||||
serde_json::to_string(&self).context(SerdeJsonSnafu)
|
||||
@@ -136,6 +142,11 @@ impl RegionMetadata {
|
||||
.map(|index| &self.column_metadatas[*index])
|
||||
}
|
||||
|
||||
/// Find column index by id.
|
||||
pub fn column_index_by_id(&self, column_id: ColumnId) -> Option<usize> {
|
||||
self.id_to_index.get(&column_id).copied()
|
||||
}
|
||||
|
||||
/// Returns the time index column
|
||||
///
|
||||
/// # Panics
|
||||
@@ -145,6 +156,26 @@ impl RegionMetadata {
|
||||
&self.column_metadatas[index]
|
||||
}
|
||||
|
||||
/// Returns the arrow field of the time index column.
|
||||
pub fn time_index_field(&self) -> FieldRef {
|
||||
let index = self.id_to_index[&self.time_index];
|
||||
self.schema.arrow_schema().fields[index].clone()
|
||||
}
|
||||
|
||||
/// Finds a column by name.
|
||||
pub fn column_by_name(&self, name: &str) -> Option<&ColumnMetadata> {
|
||||
self.schema
|
||||
.column_index_by_name(name)
|
||||
.map(|index| &self.column_metadatas[index])
|
||||
}
|
||||
|
||||
/// Returns all field columns.
|
||||
pub fn field_columns(&self) -> impl Iterator<Item = &ColumnMetadata> {
|
||||
self.column_metadatas
|
||||
.iter()
|
||||
.filter(|column| column.semantic_type == SemanticType::Field)
|
||||
}
|
||||
|
||||
/// Checks whether the metadata is valid.
|
||||
fn validate(&self) -> Result<()> {
|
||||
// Id to name.
|
||||
@@ -264,6 +295,7 @@ impl RegionMetadata {
|
||||
|
||||
/// Checks whether it is a valid column.
|
||||
fn validate_column_metadata(column_metadata: &ColumnMetadata) -> Result<()> {
|
||||
// TODO(yingwen): Ensure column name is not internal columns.
|
||||
if column_metadata.semantic_type == SemanticType::Timestamp {
|
||||
ensure!(
|
||||
column_metadata
|
||||
|
||||
@@ -81,6 +81,9 @@ pub const SEQUENCE_COLUMN_NAME: &str = "__sequence";
|
||||
/// Name for reserved column: op_type
|
||||
pub const OP_TYPE_COLUMN_NAME: &str = "__op_type";
|
||||
|
||||
/// Name for reserved column: primary_key
|
||||
pub const PRIMARY_KEY_COLUMN_NAME: &str = "__primary_key";
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// ---------- Default options --------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user