mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-25 09:20:40 +00:00
refactor: unify flush and compaction to always use FlatSource (#7799)
* feat: support write flat as primary key format Signed-off-by: evenyag <realevenyag@gmail.com> * feat: migrate flush to always use FlatSource Add FormatType propagation in SstWriteRequest and use it to choose Flat vs PrimaryKey write paths (write_all_flat vs write_all_flat_as_primary_key) in AccessLayer and WriteCache. Make compactor and flush derive the sst_write_format from region options or engine config. Simplify flush logic and remove the old memtable_source helper. Update tests to set default sst_write_format. Signed-off-by: evenyag <realevenyag@gmail.com> * refactor: compaction use flat source Signed-off-by: evenyag <realevenyag@gmail.com> * refactor: read parquet sequentially as flat batches Signed-off-by: evenyag <realevenyag@gmail.com> * refactor: remove new_batch_with_binary in favor of new_record_batch_with_binary Replace PrimaryKeyWriteFormat with FlatWriteFormat in test_read_large_binary test and use new_record_batch_with_binary directly, removing the now-unused new_batch_with_binary function and its BinaryArray import. Signed-off-by: evenyag <realevenyag@gmail.com> * test: add tests for PrimaryKeyWriteFormat::convert_flat_batch Signed-off-by: evenyag <realevenyag@gmail.com> * refactor: remove Either from SstWriteRequest Signed-off-by: evenyag <realevenyag@gmail.com> * fix: handle index build mode Signed-off-by: evenyag <realevenyag@gmail.com> * fix: consider sparse encoding and last non null in flush Signed-off-by: evenyag <realevenyag@gmail.com> * test: add unit tests for field_column_start edge cases Signed-off-by: evenyag <realevenyag@gmail.com> --------- Signed-off-by: evenyag <realevenyag@gmail.com>
This commit is contained in:
@@ -20,13 +20,14 @@ use clap::Parser;
|
||||
use colored::Colorize;
|
||||
use datanode::config::RegionEngineConfig;
|
||||
use datanode::store;
|
||||
use either::Either;
|
||||
use futures::stream;
|
||||
use mito2::access_layer::{
|
||||
AccessLayer, AccessLayerRef, Metrics, OperationType, SstWriteRequest, WriteType,
|
||||
};
|
||||
use mito2::cache::{CacheManager, CacheManagerRef};
|
||||
use mito2::config::{FulltextIndexConfig, MitoConfig, Mode};
|
||||
use mito2::read::Source;
|
||||
use mito2::read::FlatSource;
|
||||
use mito2::sst::FormatType;
|
||||
use mito2::sst::file::{FileHandle, FileMeta};
|
||||
use mito2::sst::file_purger::{FilePurger, FilePurgerRef};
|
||||
use mito2::sst::index::intermediate::IntermediateManager;
|
||||
@@ -210,6 +211,7 @@ impl ObjbenchCommand {
|
||||
object_store.clone(),
|
||||
)
|
||||
.expected_metadata(Some(region_meta.clone()))
|
||||
.flat_format(true)
|
||||
.build()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
@@ -231,6 +233,10 @@ impl ObjbenchCommand {
|
||||
let reader_build_elapsed = reader_build_start.elapsed();
|
||||
let total_rows = reader.parquet_metadata().file_metadata().num_rows();
|
||||
println!("{} Reader built in {:?}", "✓".green(), reader_build_elapsed);
|
||||
let reader_stream = Box::pin(stream::try_unfold(reader, |mut reader| async move {
|
||||
let batch = reader.next_record_batch().await?;
|
||||
Ok(batch.map(|batch| (batch, reader)))
|
||||
}));
|
||||
|
||||
// Build write request
|
||||
let fulltext_index_config = FulltextIndexConfig {
|
||||
@@ -241,10 +247,11 @@ impl ObjbenchCommand {
|
||||
let write_req = SstWriteRequest {
|
||||
op_type: OperationType::Flush,
|
||||
metadata: region_meta,
|
||||
source: Either::Left(Source::Reader(Box::new(reader))),
|
||||
source: FlatSource::Stream(reader_stream),
|
||||
cache_manager,
|
||||
storage: None,
|
||||
max_sequence: None,
|
||||
sst_write_format: FormatType::PrimaryKey,
|
||||
index_options: Default::default(),
|
||||
index_config: mito_engine_config.index.clone(),
|
||||
inverted_index_config: MitoConfig::default().inverted_index,
|
||||
|
||||
Reference in New Issue
Block a user