mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-07 22:02:56 +00:00
feat: override __sequence on creating SST to save space and CPU (#5252)
* override memtable sequence Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * override sst sequence Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * chore changes per to CR comments Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * use correct sequence number Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * wrap a method to get max sequence Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix typo Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
@@ -19,6 +19,7 @@ use object_store::util::{join_dir, with_instrument_layers};
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::SequenceNumber;
|
||||
|
||||
use crate::cache::write_cache::SstUploadRequest;
|
||||
use crate::cache::CacheManagerRef;
|
||||
@@ -164,7 +165,9 @@ impl AccessLayer {
|
||||
request.metadata,
|
||||
indexer,
|
||||
);
|
||||
writer.write_all(request.source, write_opts).await?
|
||||
writer
|
||||
.write_all(request.source, request.max_sequence, write_opts)
|
||||
.await?
|
||||
};
|
||||
|
||||
// Put parquet metadata to cache manager.
|
||||
@@ -194,6 +197,7 @@ pub(crate) struct SstWriteRequest {
|
||||
pub(crate) cache_manager: CacheManagerRef,
|
||||
#[allow(dead_code)]
|
||||
pub(crate) storage: Option<String>,
|
||||
pub(crate) max_sequence: Option<SequenceNumber>,
|
||||
|
||||
/// Configs for index
|
||||
pub(crate) index_options: IndexOptions,
|
||||
|
||||
6
src/mito2/src/cache/write_cache.rs
vendored
6
src/mito2/src/cache/write_cache.rs
vendored
@@ -138,7 +138,9 @@ impl WriteCache {
|
||||
indexer,
|
||||
);
|
||||
|
||||
let sst_info = writer.write_all(write_request.source, write_opts).await?;
|
||||
let sst_info = writer
|
||||
.write_all(write_request.source, write_request.max_sequence, write_opts)
|
||||
.await?;
|
||||
|
||||
timer.stop_and_record();
|
||||
|
||||
@@ -375,6 +377,7 @@ mod tests {
|
||||
metadata,
|
||||
source,
|
||||
storage: None,
|
||||
max_sequence: None,
|
||||
cache_manager: Default::default(),
|
||||
index_options: IndexOptions::default(),
|
||||
inverted_index_config: Default::default(),
|
||||
@@ -468,6 +471,7 @@ mod tests {
|
||||
metadata,
|
||||
source,
|
||||
storage: None,
|
||||
max_sequence: None,
|
||||
cache_manager: cache_manager.clone(),
|
||||
index_options: IndexOptions::default(),
|
||||
inverted_index_config: Default::default(),
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::num::NonZero;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -303,6 +304,12 @@ impl Compactor for DefaultCompactor {
|
||||
let fulltext_index_config = compaction_region.engine_config.fulltext_index.clone();
|
||||
let bloom_filter_index_config =
|
||||
compaction_region.engine_config.bloom_filter_index.clone();
|
||||
let max_sequence = output
|
||||
.inputs
|
||||
.iter()
|
||||
.map(|f| f.meta_ref().sequence)
|
||||
.max()
|
||||
.flatten();
|
||||
futs.push(async move {
|
||||
let reader = CompactionSstReaderBuilder {
|
||||
metadata: region_metadata.clone(),
|
||||
@@ -325,6 +332,7 @@ impl Compactor for DefaultCompactor {
|
||||
source: Source::Reader(reader),
|
||||
cache_manager,
|
||||
storage,
|
||||
max_sequence: max_sequence.map(NonZero::get),
|
||||
index_options,
|
||||
inverted_index_config,
|
||||
fulltext_index_config,
|
||||
@@ -343,6 +351,7 @@ impl Compactor for DefaultCompactor {
|
||||
index_file_size: sst_info.index_metadata.file_size,
|
||||
num_rows: sst_info.num_rows as u64,
|
||||
num_row_groups: sst_info.num_row_groups,
|
||||
sequence: max_sequence,
|
||||
});
|
||||
Ok(file_meta_opt)
|
||||
});
|
||||
|
||||
@@ -39,6 +39,7 @@ pub fn new_file_handle(
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
num_row_groups: 0,
|
||||
sequence: None,
|
||||
},
|
||||
file_purger,
|
||||
)
|
||||
@@ -63,6 +64,7 @@ pub(crate) fn new_file_handles(file_specs: &[(i64, i64, u64)]) -> Vec<FileHandle
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
num_row_groups: 0,
|
||||
sequence: None,
|
||||
},
|
||||
file_purger.clone(),
|
||||
)
|
||||
|
||||
@@ -760,6 +760,7 @@ mod tests {
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
num_row_groups: 0,
|
||||
sequence: None,
|
||||
},
|
||||
Arc::new(NoopFilePurger),
|
||||
)
|
||||
|
||||
@@ -580,7 +580,7 @@ async fn test_region_usage() {
|
||||
flush_region(&engine, region_id, None).await;
|
||||
|
||||
let region_stat = region.region_statistic();
|
||||
assert_eq!(region_stat.sst_size, 2790);
|
||||
assert!(region_stat.sst_size > 0); // Chief says this assert can ensure the size is counted.
|
||||
assert_eq!(region_stat.num_rows, 10);
|
||||
|
||||
// region total usage
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
//! Flush related utilities and structs.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::num::NonZeroU64;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -345,6 +346,7 @@ impl RegionFlushTask {
|
||||
continue;
|
||||
}
|
||||
|
||||
let max_sequence = mem.stats().max_sequence();
|
||||
let file_id = FileId::random();
|
||||
let iter = mem.iter(None, None)?;
|
||||
let source = Source::Iter(iter);
|
||||
@@ -357,6 +359,7 @@ impl RegionFlushTask {
|
||||
source,
|
||||
cache_manager: self.cache_manager.clone(),
|
||||
storage: version.options.storage.clone(),
|
||||
max_sequence: Some(max_sequence),
|
||||
index_options: self.index_options.clone(),
|
||||
inverted_index_config: self.engine_config.inverted_index.clone(),
|
||||
fulltext_index_config: self.engine_config.fulltext_index.clone(),
|
||||
@@ -382,6 +385,7 @@ impl RegionFlushTask {
|
||||
index_file_size: sst_info.index_metadata.file_size,
|
||||
num_rows: sst_info.num_rows as u64,
|
||||
num_row_groups: sst_info.num_row_groups,
|
||||
sequence: NonZeroU64::new(max_sequence),
|
||||
};
|
||||
file_metas.push(file_meta);
|
||||
}
|
||||
|
||||
@@ -225,6 +225,7 @@ async fn checkpoint_with_different_compression_types() {
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
num_row_groups: 0,
|
||||
sequence: None,
|
||||
};
|
||||
let action = RegionMetaActionList::new(vec![RegionMetaAction::Edit(RegionEdit {
|
||||
files_to_add: vec![file_meta],
|
||||
|
||||
@@ -23,7 +23,7 @@ pub use bulk::part::BulkPart;
|
||||
use common_time::Timestamp;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::ColumnId;
|
||||
use store_api::storage::{ColumnId, SequenceNumber};
|
||||
use table::predicate::Predicate;
|
||||
|
||||
use crate::config::MitoConfig;
|
||||
@@ -77,6 +77,8 @@ pub struct MemtableStats {
|
||||
num_rows: usize,
|
||||
/// Total number of ranges in the memtable.
|
||||
num_ranges: usize,
|
||||
/// The maximum sequence number in the memtable.
|
||||
max_sequence: SequenceNumber,
|
||||
}
|
||||
|
||||
impl MemtableStats {
|
||||
@@ -106,6 +108,11 @@ impl MemtableStats {
|
||||
pub fn num_ranges(&self) -> usize {
|
||||
self.num_ranges
|
||||
}
|
||||
|
||||
/// Returns the maximum sequence number in the memtable.
|
||||
pub fn max_sequence(&self) -> SequenceNumber {
|
||||
self.max_sequence
|
||||
}
|
||||
}
|
||||
|
||||
pub type BoxedBatchIterator = Box<dyn Iterator<Item = Result<Batch>> + Send>;
|
||||
|
||||
@@ -63,6 +63,25 @@ impl KeyValues {
|
||||
// Safety: rows is not None.
|
||||
self.mutation.rows.as_ref().unwrap().rows.len()
|
||||
}
|
||||
|
||||
/// Returns if this container is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.mutation.rows.is_none()
|
||||
}
|
||||
|
||||
/// Return the max sequence in this container.
|
||||
///
|
||||
/// When the mutation has no rows, the sequence is the same as the mutation sequence.
|
||||
pub fn max_sequence(&self) -> SequenceNumber {
|
||||
let mut sequence = self.mutation.sequence;
|
||||
let num_rows = self.mutation.rows.as_ref().unwrap().rows.len() as u64;
|
||||
sequence += num_rows;
|
||||
if num_rows > 0 {
|
||||
sequence -= 1;
|
||||
}
|
||||
|
||||
sequence
|
||||
}
|
||||
}
|
||||
|
||||
/// Key value view of a mutation.
|
||||
|
||||
@@ -24,7 +24,7 @@ mod shard_builder;
|
||||
mod tree;
|
||||
|
||||
use std::fmt;
|
||||
use std::sync::atomic::{AtomicI64, AtomicUsize, Ordering};
|
||||
use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
@@ -113,6 +113,7 @@ pub struct PartitionTreeMemtable {
|
||||
alloc_tracker: AllocTracker,
|
||||
max_timestamp: AtomicI64,
|
||||
min_timestamp: AtomicI64,
|
||||
max_sequence: AtomicU64,
|
||||
/// Total written rows in memtable. This also includes deleted and duplicated rows.
|
||||
num_rows: AtomicUsize,
|
||||
}
|
||||
@@ -131,6 +132,10 @@ impl Memtable for PartitionTreeMemtable {
|
||||
}
|
||||
|
||||
fn write(&self, kvs: &KeyValues) -> Result<()> {
|
||||
if kvs.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// TODO(yingwen): Validate schema while inserting rows.
|
||||
|
||||
let mut metrics = WriteMetrics::default();
|
||||
@@ -140,6 +145,12 @@ impl Memtable for PartitionTreeMemtable {
|
||||
|
||||
self.update_stats(&metrics);
|
||||
|
||||
// update max_sequence
|
||||
if res.is_ok() {
|
||||
let sequence = kvs.max_sequence();
|
||||
self.max_sequence.fetch_max(sequence, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
self.num_rows.fetch_add(kvs.num_rows(), Ordering::Relaxed);
|
||||
res
|
||||
}
|
||||
@@ -152,6 +163,12 @@ impl Memtable for PartitionTreeMemtable {
|
||||
|
||||
self.update_stats(&metrics);
|
||||
|
||||
// update max_sequence
|
||||
if res.is_ok() {
|
||||
self.max_sequence
|
||||
.fetch_max(key_value.sequence(), Ordering::Relaxed);
|
||||
}
|
||||
|
||||
self.num_rows.fetch_add(1, Ordering::Relaxed);
|
||||
res
|
||||
}
|
||||
@@ -210,6 +227,7 @@ impl Memtable for PartitionTreeMemtable {
|
||||
time_range: None,
|
||||
num_rows: 0,
|
||||
num_ranges: 0,
|
||||
max_sequence: 0,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -229,6 +247,7 @@ impl Memtable for PartitionTreeMemtable {
|
||||
time_range: Some((min_timestamp, max_timestamp)),
|
||||
num_rows: self.num_rows.load(Ordering::Relaxed),
|
||||
num_ranges: 1,
|
||||
max_sequence: self.max_sequence.load(Ordering::Relaxed),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -267,6 +286,7 @@ impl PartitionTreeMemtable {
|
||||
max_timestamp: AtomicI64::new(i64::MIN),
|
||||
min_timestamp: AtomicI64::new(i64::MAX),
|
||||
num_rows: AtomicUsize::new(0),
|
||||
max_sequence: AtomicU64::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, Bound, HashSet};
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::atomic::{AtomicI64, AtomicUsize, Ordering};
|
||||
use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
@@ -100,6 +100,7 @@ pub struct TimeSeriesMemtable {
|
||||
alloc_tracker: AllocTracker,
|
||||
max_timestamp: AtomicI64,
|
||||
min_timestamp: AtomicI64,
|
||||
max_sequence: AtomicU64,
|
||||
dedup: bool,
|
||||
merge_mode: MergeMode,
|
||||
/// Total written rows in memtable. This also includes deleted and duplicated rows.
|
||||
@@ -134,6 +135,7 @@ impl TimeSeriesMemtable {
|
||||
alloc_tracker: AllocTracker::new(write_buffer_manager),
|
||||
max_timestamp: AtomicI64::new(i64::MIN),
|
||||
min_timestamp: AtomicI64::new(i64::MAX),
|
||||
max_sequence: AtomicU64::new(0),
|
||||
dedup,
|
||||
merge_mode,
|
||||
num_rows: Default::default(),
|
||||
@@ -186,6 +188,10 @@ impl Memtable for TimeSeriesMemtable {
|
||||
}
|
||||
|
||||
fn write(&self, kvs: &KeyValues) -> Result<()> {
|
||||
if kvs.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut local_stats = WriteMetrics::default();
|
||||
|
||||
for kv in kvs.iter() {
|
||||
@@ -199,6 +205,10 @@ impl Memtable for TimeSeriesMemtable {
|
||||
// so that we can ensure writing to memtable will succeed.
|
||||
self.update_stats(local_stats);
|
||||
|
||||
// update max_sequence
|
||||
let sequence = kvs.max_sequence();
|
||||
self.max_sequence.fetch_max(sequence, Ordering::Relaxed);
|
||||
|
||||
self.num_rows.fetch_add(kvs.num_rows(), Ordering::Relaxed);
|
||||
Ok(())
|
||||
}
|
||||
@@ -209,6 +219,13 @@ impl Memtable for TimeSeriesMemtable {
|
||||
metrics.value_bytes += std::mem::size_of::<Timestamp>() + std::mem::size_of::<OpType>();
|
||||
|
||||
self.update_stats(metrics);
|
||||
|
||||
// update max_sequence
|
||||
if res.is_ok() {
|
||||
self.max_sequence
|
||||
.fetch_max(key_value.sequence(), Ordering::Relaxed);
|
||||
}
|
||||
|
||||
self.num_rows.fetch_add(1, Ordering::Relaxed);
|
||||
res
|
||||
}
|
||||
@@ -294,6 +311,7 @@ impl Memtable for TimeSeriesMemtable {
|
||||
time_range: None,
|
||||
num_rows: 0,
|
||||
num_ranges: 0,
|
||||
max_sequence: 0,
|
||||
};
|
||||
}
|
||||
let ts_type = self
|
||||
@@ -311,6 +329,7 @@ impl Memtable for TimeSeriesMemtable {
|
||||
time_range: Some((min_timestamp, max_timestamp)),
|
||||
num_rows: self.num_rows.load(Ordering::Relaxed),
|
||||
num_ranges: 1,
|
||||
max_sequence: self.max_sequence.load(Ordering::Relaxed),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
//! Structures to describe metadata of files.
|
||||
|
||||
use std::fmt;
|
||||
use std::num::NonZeroU64;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
@@ -134,6 +135,11 @@ pub struct FileMeta {
|
||||
/// the default value `0` doesn't means the file doesn't contains any rows,
|
||||
/// but instead means the number of rows is unknown.
|
||||
pub num_row_groups: u64,
|
||||
/// Sequence in this file.
|
||||
///
|
||||
/// This sequence is the only sequence in this file. And it's retrieved from the max
|
||||
/// sequence of the rows on generating this file.
|
||||
pub sequence: Option<NonZeroU64>,
|
||||
}
|
||||
|
||||
/// Type of index.
|
||||
@@ -343,6 +349,7 @@ mod tests {
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
num_row_groups: 0,
|
||||
sequence: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -119,6 +119,8 @@ impl FilePurger for LocalFilePurger {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use object_store::services::Fs;
|
||||
use object_store::ObjectStore;
|
||||
@@ -176,6 +178,7 @@ mod tests {
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
num_row_groups: 0,
|
||||
sequence: None,
|
||||
},
|
||||
file_purger,
|
||||
);
|
||||
@@ -238,6 +241,7 @@ mod tests {
|
||||
index_file_size: 4096,
|
||||
num_rows: 1024,
|
||||
num_row_groups: 1,
|
||||
sequence: NonZeroU64::new(4096),
|
||||
},
|
||||
file_purger,
|
||||
);
|
||||
|
||||
@@ -134,7 +134,7 @@ mod tests {
|
||||
);
|
||||
|
||||
let info = writer
|
||||
.write_all(source, &write_opts)
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
@@ -189,7 +189,7 @@ mod tests {
|
||||
);
|
||||
|
||||
writer
|
||||
.write_all(source, &write_opts)
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
@@ -258,7 +258,7 @@ mod tests {
|
||||
);
|
||||
|
||||
let sst_info = writer
|
||||
.write_all(source, &write_opts)
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.expect("write_all should return sst info");
|
||||
@@ -297,7 +297,7 @@ mod tests {
|
||||
Indexer::default(),
|
||||
);
|
||||
writer
|
||||
.write_all(source, &write_opts)
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
@@ -350,7 +350,7 @@ mod tests {
|
||||
Indexer::default(),
|
||||
);
|
||||
writer
|
||||
.write_all(source, &write_opts)
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
@@ -386,7 +386,7 @@ mod tests {
|
||||
);
|
||||
|
||||
writer
|
||||
.write_all(source, &write_opts)
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
@@ -42,7 +42,7 @@ use parquet::file::metadata::{ParquetMetaData, RowGroupMetaData};
|
||||
use parquet::file::statistics::Statistics;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadataRef};
|
||||
use store_api::storage::ColumnId;
|
||||
use store_api::storage::{ColumnId, SequenceNumber};
|
||||
|
||||
use crate::error::{
|
||||
ConvertVectorSnafu, InvalidBatchSnafu, InvalidRecordBatchSnafu, NewRecordBatchSnafu, Result,
|
||||
@@ -65,6 +65,7 @@ pub(crate) struct WriteFormat {
|
||||
metadata: RegionMetadataRef,
|
||||
/// SST file schema.
|
||||
arrow_schema: SchemaRef,
|
||||
override_sequence: Option<SequenceNumber>,
|
||||
}
|
||||
|
||||
impl WriteFormat {
|
||||
@@ -74,9 +75,19 @@ impl WriteFormat {
|
||||
WriteFormat {
|
||||
metadata,
|
||||
arrow_schema,
|
||||
override_sequence: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set override sequence.
|
||||
pub(crate) fn with_override_sequence(
|
||||
mut self,
|
||||
override_sequence: Option<SequenceNumber>,
|
||||
) -> Self {
|
||||
self.override_sequence = override_sequence;
|
||||
self
|
||||
}
|
||||
|
||||
/// Gets the arrow schema to store in parquet.
|
||||
pub(crate) fn arrow_schema(&self) -> &SchemaRef {
|
||||
&self.arrow_schema
|
||||
@@ -107,7 +118,14 @@ impl WriteFormat {
|
||||
columns.push(batch.timestamps().to_arrow_array());
|
||||
// Add internal columns: primary key, sequences, op types.
|
||||
columns.push(new_primary_key_array(batch.primary_key(), batch.num_rows()));
|
||||
columns.push(batch.sequences().to_arrow_array());
|
||||
|
||||
if let Some(override_sequence) = self.override_sequence {
|
||||
let sequence_array =
|
||||
Arc::new(UInt64Array::from(vec![override_sequence; batch.num_rows()]));
|
||||
columns.push(sequence_array);
|
||||
} else {
|
||||
columns.push(batch.sequences().to_arrow_array());
|
||||
}
|
||||
columns.push(batch.op_types().to_arrow_array());
|
||||
|
||||
RecordBatch::try_new(self.arrow_schema.clone(), columns).context(NewRecordBatchSnafu)
|
||||
@@ -756,6 +774,27 @@ mod tests {
|
||||
assert_eq!(expect_record, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_batch_with_override_sequence() {
|
||||
let metadata = build_test_region_metadata();
|
||||
let write_format = WriteFormat::new(metadata).with_override_sequence(Some(415411));
|
||||
|
||||
let num_rows = 4;
|
||||
let batch = new_batch(b"test", 1, 2, num_rows);
|
||||
let columns: Vec<ArrayRef> = vec![
|
||||
Arc::new(Int64Array::from(vec![2; num_rows])), // field1
|
||||
Arc::new(Int64Array::from(vec![3; num_rows])), // field0
|
||||
Arc::new(TimestampMillisecondArray::from(vec![1, 2, 3, 4])), // ts
|
||||
build_test_pk_array(&[(b"test".to_vec(), num_rows)]), // primary key
|
||||
Arc::new(UInt64Array::from(vec![415411; num_rows])), // sequence
|
||||
Arc::new(UInt8Array::from(vec![TEST_OP_TYPE; num_rows])), // op type
|
||||
];
|
||||
let expect_record = RecordBatch::try_new(build_test_arrow_schema(), columns).unwrap();
|
||||
|
||||
let actual = write_format.convert_batch(&batch).unwrap();
|
||||
assert_eq!(expect_record, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_projection_indices() {
|
||||
let metadata = build_test_region_metadata();
|
||||
|
||||
@@ -31,6 +31,7 @@ use parquet::schema::types::ColumnPath;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::consts::SEQUENCE_COLUMN_NAME;
|
||||
use store_api::storage::SequenceNumber;
|
||||
use tokio::io::AsyncWrite;
|
||||
use tokio_util::compat::{Compat, FuturesAsyncWriteCompatExt};
|
||||
|
||||
@@ -112,9 +113,11 @@ where
|
||||
pub async fn write_all(
|
||||
&mut self,
|
||||
mut source: Source,
|
||||
override_sequence: Option<SequenceNumber>, // override the `sequence` field from `Source`
|
||||
opts: &WriteOptions,
|
||||
) -> Result<Option<SstInfo>> {
|
||||
let write_format = WriteFormat::new(self.metadata.clone());
|
||||
let write_format =
|
||||
WriteFormat::new(self.metadata.clone()).with_override_sequence(override_sequence);
|
||||
let mut stats = SourceStats::default();
|
||||
|
||||
while let Some(res) = self
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
//! Utilities for testing SSTs.
|
||||
|
||||
use std::num::NonZeroU64;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::{OpType, SemanticType};
|
||||
@@ -116,6 +117,7 @@ pub fn sst_file_handle(start_ms: i64, end_ms: i64) -> FileHandle {
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
num_row_groups: 0,
|
||||
sequence: None,
|
||||
},
|
||||
file_purger,
|
||||
)
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
//! Utilities to mock version.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::num::NonZeroU64;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
@@ -103,6 +104,7 @@ impl VersionControlBuilder {
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
num_row_groups: 0,
|
||||
sequence: None,
|
||||
},
|
||||
);
|
||||
self
|
||||
@@ -194,6 +196,7 @@ pub(crate) fn apply_edit(
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
num_row_groups: 0,
|
||||
sequence: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Reference in New Issue
Block a user