chore: upgrade DataFusion family, again (#7578)

* chore: upgrade DataFusion family

Signed-off-by: luofucong <luofc@foxmail.com>

* chore: switch to released version of datafusion-pg-catalog

---------

Signed-off-by: luofucong <luofc@foxmail.com>
Co-authored-by: Ning Sun <sunning@greptime.com>
Co-authored-by: Ning Sun <sunng@protonmail.com>
This commit is contained in:
LFC
2026-03-03 15:36:39 +08:00
committed by GitHub
parent aab839b6e4
commit b2074e3863
135 changed files with 1589 additions and 2555 deletions

View File

@@ -21,7 +21,7 @@ use datatypes::arrow::error::ArrowError;
use parquet::arrow::arrow_reader::{ParquetRecordBatchReader, RowGroups, RowSelection};
use parquet::arrow::{FieldLevels, ProjectionMask, parquet_to_arrow_field_levels};
use parquet::column::page::{PageIterator, PageReader};
use parquet::file::metadata::ParquetMetaData;
use parquet::file::metadata::{ParquetMetaData, RowGroupMetaData};
use snafu::ResultExt;
use crate::error;
@@ -103,6 +103,14 @@ impl RowGroups for MemtableRowGroupPageFetcher<'_> {
reader: Some(self.column_page_reader(i)),
}))
}
fn row_groups(&self) -> Box<dyn Iterator<Item = &RowGroupMetaData> + '_> {
Box::new(std::iter::once(self.base.row_group_metadata()))
}
fn metadata(&self) -> &ParquetMetaData {
self.base.parquet_metadata()
}
}
impl RowGroupReaderContext for BulkIterContextRef {

View File

@@ -205,7 +205,8 @@ impl ParquetFetchMetrics {
}
pub(crate) struct RowGroupBase<'a> {
metadata: &'a RowGroupMetaData,
parquet_metadata: &'a ParquetMetaData,
row_group_idx: usize,
pub(crate) offset_index: Option<&'a [OffsetIndexMetaData]>,
/// Compressed page of each column.
column_chunks: Vec<Option<Arc<ColumnChunkData>>>,
@@ -225,7 +226,8 @@ impl<'a> RowGroupBase<'a> {
.map(|x| x[row_group_idx].as_slice());
Self {
metadata,
parquet_metadata: parquet_meta,
row_group_idx,
offset_index,
column_chunks: vec![None; metadata.columns().len()],
row_count: metadata.num_rows() as usize,
@@ -244,7 +246,7 @@ impl<'a> RowGroupBase<'a> {
let ranges = self
.column_chunks
.iter()
.zip(self.metadata.columns())
.zip(self.row_group_metadata().columns())
.enumerate()
.filter(|&(idx, (chunk, _chunk_meta))| chunk.is_none() && projection.leaf_included(idx))
.flat_map(|(idx, (_chunk, chunk_meta))| {
@@ -293,8 +295,12 @@ impl<'a> RowGroupBase<'a> {
chunks.push(chunk_data.next().unwrap());
}
let column = self
.parquet_metadata
.row_group(self.row_group_idx)
.column(idx);
*chunk = Some(Arc::new(ColumnChunkData::Sparse {
length: self.metadata.column(idx).byte_range().1 as usize,
length: column.byte_range().1 as usize,
data: offsets.into_iter().zip(chunks).collect(),
}))
}
@@ -307,7 +313,7 @@ impl<'a> RowGroupBase<'a> {
.enumerate()
.filter(|&(idx, chunk)| chunk.is_none() && projection.leaf_included(idx))
.map(|(idx, _chunk)| {
let column = self.metadata.column(idx);
let column = self.row_group_metadata().column(idx);
let (start, length) = column.byte_range();
start..(start + length)
})
@@ -333,7 +339,10 @@ impl<'a> RowGroupBase<'a> {
continue;
};
let column = self.metadata.column(idx);
let column = self
.parquet_metadata
.row_group(self.row_group_idx)
.column(idx);
*chunk = Some(Arc::new(ColumnChunkData::Dense {
offset: column.byte_range().0 as usize,
data,
@@ -360,7 +369,7 @@ impl<'a> RowGroupBase<'a> {
.map(|index| index[col_idx].page_locations.clone());
SerializedPageReader::new(
data.clone(),
self.metadata.column(col_idx),
self.row_group_metadata().column(col_idx),
self.row_count,
page_locations,
)?
@@ -369,6 +378,14 @@ impl<'a> RowGroupBase<'a> {
Ok(page_reader)
}
pub(crate) fn parquet_metadata(&self) -> &ParquetMetaData {
self.parquet_metadata
}
pub(crate) fn row_group_metadata(&self) -> &RowGroupMetaData {
self.parquet_metadata().row_group(self.row_group_idx)
}
}
/// An in-memory collection of column chunks
@@ -599,6 +616,14 @@ impl RowGroups for InMemoryRowGroup<'_> {
reader: Some(Ok(Box::new(page_reader))),
}))
}
fn row_groups(&self) -> Box<dyn Iterator<Item = &RowGroupMetaData> + '_> {
Box::new(std::iter::once(self.base.row_group_metadata()))
}
fn metadata(&self) -> &ParquetMetaData {
self.base.parquet_metadata()
}
}
/// An in-memory column chunk