fix(repartition): improve physical region allocation and compaction read path correctness (#7621)

* fix: fix metadata region

Signed-off-by: WenyXu <wenymedia@gmail.com>

* fix: adjust repartition flow and compaction read compatibility

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: remove logs

Signed-off-by: WenyXu <wenymedia@gmail.com>

* refactor: rename compaction mapper and pk projection

Signed-off-by: WenyXu <wenymedia@gmail.com>

* refactor: rename `CompactionProjectionMapper`

Signed-off-by: WenyXu <wenymedia@gmail.com>

* refactor: clarify compaction projection naming

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: add comments

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: fmt

Signed-off-by: WenyXu <wenymedia@gmail.com>

* feat: allow create physical table with internal columns

Signed-off-by: WenyXu <wenymedia@gmail.com>

* test: add tests

Signed-off-by: WenyXu <wenymedia@gmail.com>

* fix: fix template logic

Signed-off-by: WenyXu <wenymedia@gmail.com>

* fix: fix unit test

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: apply suggestions

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: apply suggestions

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: update sqlness result

Signed-off-by: WenyXu <wenymedia@gmail.com>

---------

Signed-off-by: WenyXu <wenymedia@gmail.com>
This commit is contained in:
Weny Xu
2026-01-28 12:04:05 +08:00
committed by GitHub
parent 238bc4fa2c
commit 5bfc728d32
19 changed files with 733 additions and 208 deletions

View File

@@ -71,6 +71,30 @@ impl RecordBatch {
})
}
pub fn to_df_record_batch<I: IntoIterator<Item = VectorRef>>(
arrow_schema: ArrowSchemaRef,
columns: I,
) -> Result<DfRecordBatch> {
let columns: Vec<_> = columns.into_iter().collect();
let arrow_arrays = columns.iter().map(|v| v.to_arrow_array()).collect();
// Casting the arrays here to match the schema, is a temporary solution to support Arrow's
// view array types (`StringViewArray` and `BinaryViewArray`).
// As to "support": the arrays here are created from vectors, which do not have types
// corresponding to view arrays. What we can do is to only cast them.
// As to "temporary": we are planing to use Arrow's RecordBatch directly in the read path.
// the casting here will be removed in the end.
// TODO(LFC): Remove the casting here once `Batch` is no longer used.
let arrow_arrays = Self::cast_view_arrays(&arrow_schema, arrow_arrays)?;
let arrow_arrays = maybe_align_json_array_with_schema(&arrow_schema, arrow_arrays)?;
let df_record_batch = DfRecordBatch::try_new(arrow_schema, arrow_arrays)
.context(error::NewDfRecordBatchSnafu)?;
Ok(df_record_batch)
}
fn cast_view_arrays(
schema: &ArrowSchemaRef,
mut arrays: Vec<ArrayRef>,