diff --git a/Cargo.lock b/Cargo.lock index 3063e76900..48eff4ceb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12358,6 +12358,7 @@ dependencies = [ "sqlparser 0.55.0-greptime", "strum 0.27.1", "tokio", + "uuid", ] [[package]] diff --git a/src/mito2/src/access_layer.rs b/src/mito2/src/access_layer.rs index a2f2eeeb84..6af56265cb 100644 --- a/src/mito2/src/access_layer.rs +++ b/src/mito2/src/access_layer.rs @@ -27,7 +27,7 @@ use snafu::ResultExt; use store_api::metadata::RegionMetadataRef; use store_api::region_request::PathType; use store_api::sst_entry::StorageSstEntry; -use store_api::storage::{RegionId, SequenceNumber}; +use store_api::storage::{FileId, RegionId, SequenceNumber}; use crate::cache::CacheManagerRef; use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey}; @@ -37,7 +37,7 @@ use crate::error::{CleanDirSnafu, DeleteIndexSnafu, DeleteSstSnafu, OpenDalSnafu use crate::metrics::{COMPACTION_STAGE_ELAPSED, FLUSH_ELAPSED}; use crate::read::{FlatSource, Source}; use crate::region::options::IndexOptions; -use crate::sst::file::{FileHandle, FileId, RegionFileId}; +use crate::sst::file::{FileHandle, RegionFileId}; use crate::sst::index::IndexerBuilderImpl; use crate::sst::index::intermediate::IntermediateManager; use crate::sst::index::puffin_manager::PuffinManagerFactory; diff --git a/src/mito2/src/cache.rs b/src/mito2/src/cache.rs index 0fe98cbb89..9b313f1fa8 100644 --- a/src/mito2/src/cache.rs +++ b/src/mito2/src/cache.rs @@ -35,7 +35,7 @@ use moka::notification::RemovalCause; use moka::sync::Cache; use parquet::file::metadata::ParquetMetaData; use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef}; -use store_api::storage::{ConcreteDataType, RegionId, TimeSeriesRowSelector}; +use store_api::storage::{ConcreteDataType, FileId, RegionId, TimeSeriesRowSelector}; use crate::cache::cache_size::parquet_meta_size; use crate::cache::file_cache::{FileType, IndexKey}; @@ -43,7 +43,7 @@ use crate::cache::index::inverted_index::{InvertedIndexCache, InvertedIndexCache use crate::cache::write_cache::WriteCacheRef; use crate::metrics::{CACHE_BYTES, CACHE_EVICTION, CACHE_HIT, CACHE_MISS}; use crate::read::Batch; -use crate::sst::file::{FileId, RegionFileId}; +use crate::sst::file::RegionFileId; /// Metrics type key for sst meta. const SST_META_TYPE: &str = "sst_meta"; diff --git a/src/mito2/src/cache/file_cache.rs b/src/mito2/src/cache/file_cache.rs index cb62b427cf..3fed4b9916 100644 --- a/src/mito2/src/cache/file_cache.rs +++ b/src/mito2/src/cache/file_cache.rs @@ -30,12 +30,11 @@ use object_store::util::join_path; use object_store::{ErrorKind, ObjectStore, Reader}; use parquet::file::metadata::ParquetMetaData; use snafu::ResultExt; -use store_api::storage::RegionId; +use store_api::storage::{FileId, RegionId}; use crate::cache::FILE_TYPE; use crate::error::{OpenDalSnafu, Result}; use crate::metrics::{CACHE_BYTES, CACHE_HIT, CACHE_MISS}; -use crate::sst::file::FileId; use crate::sst::parquet::helper::fetch_byte_ranges; use crate::sst::parquet::metadata::MetadataLoader; diff --git a/src/mito2/src/cache/index/bloom_filter_index.rs b/src/mito2/src/cache/index/bloom_filter_index.rs index 95e17c390c..f4b9477b83 100644 --- a/src/mito2/src/cache/index/bloom_filter_index.rs +++ b/src/mito2/src/cache/index/bloom_filter_index.rs @@ -20,11 +20,10 @@ use async_trait::async_trait; use bytes::Bytes; use index::bloom_filter::error::Result; use index::bloom_filter::reader::BloomFilterReader; -use store_api::storage::ColumnId; +use store_api::storage::{ColumnId, FileId}; use crate::cache::index::{INDEX_METADATA_TYPE, IndexCache, PageKey}; use crate::metrics::{CACHE_HIT, CACHE_MISS}; -use crate::sst::file::FileId; const INDEX_TYPE_BLOOM_FILTER_INDEX: &str = "bloom_filter_index"; diff --git a/src/mito2/src/cache/index/inverted_index.rs b/src/mito2/src/cache/index/inverted_index.rs index 6add80edfa..5caf998a12 100644 --- a/src/mito2/src/cache/index/inverted_index.rs +++ b/src/mito2/src/cache/index/inverted_index.rs @@ -21,10 +21,10 @@ use bytes::Bytes; use index::inverted_index::error::Result; use index::inverted_index::format::reader::InvertedIndexReader; use prost::Message; +use store_api::storage::FileId; use crate::cache::index::{INDEX_METADATA_TYPE, IndexCache, PageKey}; use crate::metrics::{CACHE_HIT, CACHE_MISS}; -use crate::sst::file::FileId; const INDEX_TYPE_INVERTED_INDEX: &str = "inverted_index"; diff --git a/src/mito2/src/cache/index/result_cache.rs b/src/mito2/src/cache/index/result_cache.rs index ee6bd44b41..5ae8425cc8 100644 --- a/src/mito2/src/cache/index/result_cache.rs +++ b/src/mito2/src/cache/index/result_cache.rs @@ -19,10 +19,9 @@ use index::bloom_filter::applier::InListPredicate; use index::inverted_index::search::predicate::{Predicate, RangePredicate}; use moka::notification::RemovalCause; use moka::sync::Cache; -use store_api::storage::ColumnId; +use store_api::storage::{ColumnId, FileId}; use crate::metrics::{CACHE_BYTES, CACHE_EVICTION, CACHE_HIT, CACHE_MISS}; -use crate::sst::file::FileId; use crate::sst::index::fulltext_index::applier::builder::{ FulltextQuery, FulltextRequest, FulltextTerm, }; diff --git a/src/mito2/src/compaction/buckets.rs b/src/mito2/src/compaction/buckets.rs index 8e47ea3370..f6030bb0a6 100644 --- a/src/mito2/src/compaction/buckets.rs +++ b/src/mito2/src/compaction/buckets.rs @@ -80,9 +80,10 @@ pub(crate) const TIME_BUCKETS: TimeBuckets = TimeBuckets([ #[cfg(test)] mod tests { + use store_api::storage::FileId; + use super::*; use crate::compaction::test_util::new_file_handle; - use crate::sst::file::FileId; #[test] fn test_time_bucket() { diff --git a/src/mito2/src/compaction/picker.rs b/src/mito2/src/compaction/picker.rs index 5256cc93f8..16540e1e02 100644 --- a/src/mito2/src/compaction/picker.rs +++ b/src/mito2/src/compaction/picker.rs @@ -147,9 +147,10 @@ pub fn new_picker( #[cfg(test)] mod tests { + use store_api::storage::FileId; + use super::*; use crate::compaction::test_util::new_file_handle; - use crate::sst::file::FileId; use crate::test_util::new_noop_file_purger; #[test] diff --git a/src/mito2/src/compaction/test_util.rs b/src/mito2/src/compaction/test_util.rs index eb29a7c427..b785d36bcb 100644 --- a/src/mito2/src/compaction/test_util.rs +++ b/src/mito2/src/compaction/test_util.rs @@ -15,8 +15,9 @@ use std::num::NonZeroU64; use common_time::Timestamp; +use store_api::storage::FileId; -use crate::sst::file::{FileHandle, FileId, FileMeta, Level}; +use crate::sst::file::{FileHandle, FileMeta, Level}; use crate::test_util::new_noop_file_purger; /// Test util to create file handles. diff --git a/src/mito2/src/compaction/twcs.rs b/src/mito2/src/compaction/twcs.rs index d65ec3c5fd..5196eff6b1 100644 --- a/src/mito2/src/compaction/twcs.rs +++ b/src/mito2/src/compaction/twcs.rs @@ -350,11 +350,13 @@ fn find_latest_window_in_seconds<'a>( mod tests { use std::collections::HashSet; + use store_api::storage::FileId; + use super::*; use crate::compaction::test_util::{ new_file_handle, new_file_handle_with_sequence, new_file_handle_with_size_and_sequence, }; - use crate::sst::file::{FileId, Level}; + use crate::sst::file::Level; #[test] fn test_get_latest_window_in_seconds() { diff --git a/src/mito2/src/compaction/window.rs b/src/mito2/src/compaction/window.rs index 9b23f27e19..8a5a878257 100644 --- a/src/mito2/src/compaction/window.rs +++ b/src/mito2/src/compaction/window.rs @@ -206,12 +206,12 @@ mod tests { use common_time::Timestamp; use common_time::range::TimestampRange; - use store_api::storage::RegionId; + use store_api::storage::{FileId, RegionId}; use crate::compaction::compactor::CompactionVersion; use crate::compaction::window::{WindowedCompactionPicker, file_time_bucket_span}; use crate::region::options::RegionOptions; - use crate::sst::file::{FileId, FileMeta, Level}; + use crate::sst::file::{FileMeta, Level}; use crate::sst::file_purger::NoopFilePurger; use crate::sst::version::SstVersion; use crate::test_util::memtable_util::metadata_for_test; diff --git a/src/mito2/src/engine/edit_region_test.rs b/src/mito2/src/engine/edit_region_test.rs index 1a9407fb3b..63601b6cd6 100644 --- a/src/mito2/src/engine/edit_region_test.rs +++ b/src/mito2/src/engine/edit_region_test.rs @@ -19,7 +19,7 @@ use common_time::util::current_time_millis; use object_store::ObjectStore; use store_api::region_engine::RegionEngine; use store_api::region_request::RegionRequest; -use store_api::storage::RegionId; +use store_api::storage::{FileId, RegionId}; use tokio::sync::{Barrier, oneshot}; use crate::config::MitoConfig; @@ -28,7 +28,7 @@ use crate::engine::flush_test::MockTimeProvider; use crate::engine::listener::EventListener; use crate::manifest::action::RegionEdit; use crate::region::MitoRegionRef; -use crate::sst::file::{FileId, FileMeta}; +use crate::sst::file::FileMeta; use crate::test_util::{CreateRequestBuilder, TestEnv}; #[tokio::test] diff --git a/src/mito2/src/engine/listener.rs b/src/mito2/src/engine/listener.rs index 8a2a444882..e2ede93b37 100644 --- a/src/mito2/src/engine/listener.rs +++ b/src/mito2/src/engine/listener.rs @@ -20,11 +20,9 @@ use std::time::Duration; use async_trait::async_trait; use common_telemetry::info; -use store_api::storage::RegionId; +use store_api::storage::{FileId, RegionId}; use tokio::sync::Notify; -use crate::sst::file::FileId; - /// Mito engine background event listener. #[async_trait] pub trait EventListener: Send + Sync { diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs index f1b4191aa1..2c3d739262 100644 --- a/src/mito2/src/error.rs +++ b/src/mito2/src/error.rs @@ -29,13 +29,12 @@ use prost::DecodeError; use snafu::{Location, Snafu}; use store_api::ManifestVersion; use store_api::logstore::provider::Provider; -use store_api::storage::RegionId; +use store_api::storage::{FileId, RegionId}; use tokio::time::error::Elapsed; use crate::cache::file_cache::FileType; use crate::region::RegionRoleState; use crate::schedule::remote_job_scheduler::JobId; -use crate::sst::file::FileId; use crate::worker::WorkerId; #[derive(Snafu)] diff --git a/src/mito2/src/manifest/action.rs b/src/mito2/src/manifest/action.rs index 351e935734..e0d81bc305 100644 --- a/src/mito2/src/manifest/action.rs +++ b/src/mito2/src/manifest/action.rs @@ -22,14 +22,14 @@ use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; use store_api::ManifestVersion; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{RegionId, SequenceNumber}; +use store_api::storage::{FileId, RegionId, SequenceNumber}; use strum::Display; use crate::error::{ DurationOutOfRangeSnafu, RegionMetadataNotFoundSnafu, Result, SerdeJsonSnafu, Utf8Snafu, }; use crate::manifest::manager::RemoveFileOptions; -use crate::sst::file::{FileId, FileMeta}; +use crate::sst::file::FileMeta; use crate::wal::EntryId; /// Actions that can be applied to region manifest. diff --git a/src/mito2/src/manifest/tests/checkpoint.rs b/src/mito2/src/manifest/tests/checkpoint.rs index 3408c978a8..e10d3aad46 100644 --- a/src/mito2/src/manifest/tests/checkpoint.rs +++ b/src/mito2/src/manifest/tests/checkpoint.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use std::time::Duration; use common_datasource::compression::CompressionType; -use store_api::storage::RegionId; +use store_api::storage::{FileId, RegionId}; use strum::IntoEnumIterator; use crate::error::Error::ChecksumMismatch; @@ -28,7 +28,7 @@ use crate::manifest::manager::RegionManifestManager; use crate::manifest::storage::CheckpointMetadata; use crate::manifest::tests::utils::basic_region_metadata; use crate::region::{RegionLeaderState, RegionRoleState}; -use crate::sst::file::{FileId, FileMeta}; +use crate::sst::file::FileMeta; use crate::test_util::TestEnv; async fn build_manager( diff --git a/src/mito2/src/memtable/bulk.rs b/src/mito2/src/memtable/bulk.rs index 102aae52ca..e7a7e0d1c3 100644 --- a/src/mito2/src/memtable/bulk.rs +++ b/src/mito2/src/memtable/bulk.rs @@ -30,7 +30,7 @@ use datatypes::arrow::datatypes::SchemaRef; use mito_codec::key_values::KeyValue; use rayon::prelude::*; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{ColumnId, RegionId, SequenceNumber}; +use store_api::storage::{ColumnId, FileId, RegionId, SequenceNumber}; use tokio::sync::Semaphore; use crate::error::{Result, UnsupportedOperationSnafu}; @@ -47,7 +47,6 @@ use crate::memtable::{ use crate::read::flat_dedup::{FlatDedupIterator, FlatLastNonNull, FlatLastRow}; use crate::read::flat_merge::FlatMergeIterator; use crate::region::options::MergeMode; -use crate::sst::file::FileId; use crate::sst::parquet::format::FIXED_POS_COLUMN_NUM; use crate::sst::parquet::{DEFAULT_READ_BATCH_SIZE, DEFAULT_ROW_GROUP_SIZE}; use crate::sst::{FlatSchemaOptions, to_flat_sst_arrow_schema}; diff --git a/src/mito2/src/memtable/bulk/part.rs b/src/mito2/src/memtable/bulk/part.rs index 8e823aaa37..c3ac1e3245 100644 --- a/src/mito2/src/memtable/bulk/part.rs +++ b/src/mito2/src/memtable/bulk/part.rs @@ -51,8 +51,8 @@ use parquet::file::properties::WriterProperties; use snafu::{OptionExt, ResultExt, Snafu}; use store_api::codec::PrimaryKeyEncoding; use store_api::metadata::{RegionMetadata, RegionMetadataRef}; -use store_api::storage::SequenceNumber; use store_api::storage::consts::PRIMARY_KEY_COLUMN_NAME; +use store_api::storage::{FileId, SequenceNumber}; use table::predicate::Predicate; use crate::error::{ @@ -63,7 +63,6 @@ use crate::memtable::BoxedRecordBatchIterator; use crate::memtable::bulk::context::BulkIterContextRef; use crate::memtable::bulk::part_reader::EncodedBulkPartIter; use crate::memtable::time_series::{ValueBuilder, Values}; -use crate::sst::file::FileId; use crate::sst::index::IndexOutput; use crate::sst::parquet::flat_format::primary_key_column_index; use crate::sst::parquet::format::{PrimaryKeyArray, PrimaryKeyArrayBuilder, ReadFormat}; diff --git a/src/mito2/src/read/last_row.rs b/src/mito2/src/read/last_row.rs index c604397d38..a00c039cec 100644 --- a/src/mito2/src/read/last_row.rs +++ b/src/mito2/src/read/last_row.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use async_trait::async_trait; use datatypes::vectors::UInt32Vector; -use store_api::storage::TimeSeriesRowSelector; +use store_api::storage::{FileId, TimeSeriesRowSelector}; use crate::cache::{ CacheStrategy, SelectorResultKey, SelectorResultValue, selector_result_cache_hit, @@ -26,7 +26,6 @@ use crate::cache::{ }; use crate::error::Result; use crate::read::{Batch, BatchReader, BoxedBatchReader}; -use crate::sst::file::FileId; use crate::sst::parquet::reader::{ReaderMetrics, RowGroupReader}; /// Reader to keep the last row for each time series. diff --git a/src/mito2/src/sst/file.rs b/src/mito2/src/sst/file.rs index 4c20001c7f..3dca030734 100644 --- a/src/mito2/src/sst/file.rs +++ b/src/mito2/src/sst/file.rs @@ -17,7 +17,6 @@ use std::fmt; use std::fmt::{Debug, Formatter}; use std::num::NonZeroU64; -use std::str::FromStr; use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; @@ -26,10 +25,8 @@ use common_time::Timestamp; use partition::expr::PartitionExpr; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; -use snafu::{ResultExt, Snafu}; use store_api::region_request::PathType; -use store_api::storage::RegionId; -use uuid::Uuid; +use store_api::storage::{FileId, RegionId}; use crate::sst::file_purger::FilePurgerRef; use crate::sst::location; @@ -79,52 +76,6 @@ pub type Level = u8; /// Maximum level of SSTs. pub const MAX_LEVEL: Level = 2; -#[derive(Debug, Snafu, PartialEq)] -pub struct ParseIdError { - source: uuid::Error, -} - -/// Unique id for [SST File]. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)] -pub struct FileId(Uuid); - -impl FileId { - /// Returns a new unique [FileId] randomly. - pub fn random() -> FileId { - FileId(Uuid::new_v4()) - } - - /// Parses id from string. - pub fn parse_str(input: &str) -> std::result::Result { - Uuid::parse_str(input).map(FileId).context(ParseIdSnafu) - } - - /// Converts [FileId] as byte slice. - pub fn as_bytes(&self) -> &[u8] { - self.0.as_bytes() - } -} - -impl From for Uuid { - fn from(value: FileId) -> Self { - value.0 - } -} - -impl fmt::Display for FileId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl FromStr for FileId { - type Err = ParseIdError; - - fn from_str(s: &str) -> std::result::Result { - FileId::parse_str(s) - } -} - /// Cross-region file id. /// /// It contains a region id and a file id. The string representation is `{region_id}/{file_id}`. @@ -410,33 +361,13 @@ impl FileHandleInner { #[cfg(test)] mod tests { + use std::str::FromStr; + use datatypes::value::Value; use partition::expr::{PartitionExpr, col}; use super::*; - #[test] - fn test_file_id() { - let id = FileId::random(); - let uuid_str = id.to_string(); - assert_eq!(id.0.to_string(), uuid_str); - - let parsed = FileId::parse_str(&uuid_str).unwrap(); - assert_eq!(id, parsed); - let parsed = uuid_str.parse().unwrap(); - assert_eq!(id, parsed); - } - - #[test] - fn test_file_id_serialization() { - let id = FileId::random(); - let json = serde_json::to_string(&id).unwrap(); - assert_eq!(format!("\"{id}\""), json); - - let parsed = serde_json::from_str(&json).unwrap(); - assert_eq!(id, parsed); - } - fn create_file_meta(file_id: FileId, level: Level) -> FileMeta { FileMeta { region_id: 0.into(), diff --git a/src/mito2/src/sst/file_purger.rs b/src/mito2/src/sst/file_purger.rs index efed3f6721..4dc17315be 100644 --- a/src/mito2/src/sst/file_purger.rs +++ b/src/mito2/src/sst/file_purger.rs @@ -223,12 +223,12 @@ mod tests { use object_store::services::Fs; use smallvec::SmallVec; use store_api::region_request::PathType; - use store_api::storage::RegionId; + use store_api::storage::{FileId, RegionId}; use super::*; use crate::access_layer::AccessLayer; use crate::schedule::scheduler::{LocalScheduler, Scheduler}; - use crate::sst::file::{FileHandle, FileId, FileMeta, FileTimeRange, IndexType, RegionFileId}; + use crate::sst::file::{FileHandle, FileMeta, FileTimeRange, IndexType, RegionFileId}; use crate::sst::index::intermediate::IntermediateManager; use crate::sst::index::puffin_manager::PuffinManagerFactory; use crate::sst::location; diff --git a/src/mito2/src/sst/file_ref.rs b/src/mito2/src/sst/file_ref.rs index 4a507a1fdd..c8b86ed0fd 100644 --- a/src/mito2/src/sst/file_ref.rs +++ b/src/mito2/src/sst/file_ref.rs @@ -19,12 +19,12 @@ use common_telemetry::debug; use dashmap::{DashMap, Entry}; use serde::{Deserialize, Serialize}; use store_api::ManifestVersion; -use store_api::storage::{RegionId, TableId}; +use store_api::storage::{FileId, RegionId, TableId}; use crate::error::Result; use crate::metrics::GC_REF_FILE_CNT; use crate::region::RegionMapRef; -use crate::sst::file::{FileId, FileMeta}; +use crate::sst::file::FileMeta; #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct FileRef { @@ -237,7 +237,7 @@ mod tests { use store_api::storage::RegionId; use super::*; - use crate::sst::file::{FileId, FileMeta, FileTimeRange, IndexType, RegionFileId}; + use crate::sst::file::{FileMeta, FileTimeRange, IndexType, RegionFileId}; #[tokio::test] async fn test_file_ref_mgr() { diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs index e96e8ee2e7..fa53807ef3 100644 --- a/src/mito2/src/sst/index.rs +++ b/src/mito2/src/sst/index.rs @@ -30,14 +30,14 @@ use puffin_manager::SstPuffinManager; use smallvec::SmallVec; use statistics::{ByteCount, RowCount}; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{ColumnId, RegionId}; +use store_api::storage::{ColumnId, FileId, RegionId}; use crate::access_layer::OperationType; use crate::config::{BloomFilterConfig, FulltextIndexConfig, InvertedIndexConfig}; use crate::metrics::INDEX_CREATE_MEMORY_USAGE; use crate::read::Batch; use crate::region::options::IndexOptions; -use crate::sst::file::{FileId, IndexType}; +use crate::sst::file::IndexType; use crate::sst::index::fulltext_index::creator::FulltextIndexer; use crate::sst::index::intermediate::IntermediateManager; use crate::sst::index::inverted_index::creator::InvertedIndexer; diff --git a/src/mito2/src/sst/index/bloom_filter/applier.rs b/src/mito2/src/sst/index/bloom_filter/applier.rs index fcd54931f1..4562f01cdf 100644 --- a/src/mito2/src/sst/index/bloom_filter/applier.rs +++ b/src/mito2/src/sst/index/bloom_filter/applier.rs @@ -353,9 +353,9 @@ mod tests { use futures::future::BoxFuture; use puffin::puffin_manager::PuffinWriter; use store_api::metadata::RegionMetadata; + use store_api::storage::FileId; use super::*; - use crate::sst::file::FileId; use crate::sst::index::bloom_filter::creator::BloomFilterIndexer; use crate::sst::index::bloom_filter::creator::tests::{ mock_object_store, mock_region_metadata, new_batch, new_intm_mgr, diff --git a/src/mito2/src/sst/index/bloom_filter/creator.rs b/src/mito2/src/sst/index/bloom_filter/creator.rs index d373b0701c..387302aeb0 100644 --- a/src/mito2/src/sst/index/bloom_filter/creator.rs +++ b/src/mito2/src/sst/index/bloom_filter/creator.rs @@ -26,7 +26,7 @@ use mito_codec::row_converter::SortField; use puffin::puffin_manager::{PuffinWriter, PutOptions}; use snafu::{ResultExt, ensure}; use store_api::metadata::RegionMetadataRef; -use store_api::storage::ColumnId; +use store_api::storage::{ColumnId, FileId}; use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt}; use crate::error::{ @@ -34,7 +34,6 @@ use crate::error::{ OperateAbortedIndexSnafu, PuffinAddBlobSnafu, PushBloomFilterValueSnafu, Result, }; use crate::read::Batch; -use crate::sst::file::FileId; use crate::sst::index::TYPE_BLOOM_FILTER_INDEX; use crate::sst::index::bloom_filter::INDEX_BLOB_TYPE; use crate::sst::index::intermediate::{ diff --git a/src/mito2/src/sst/index/fulltext_index/creator.rs b/src/mito2/src/sst/index/fulltext_index/creator.rs index b5122e93d2..57e677b915 100644 --- a/src/mito2/src/sst/index/fulltext_index/creator.rs +++ b/src/mito2/src/sst/index/fulltext_index/creator.rs @@ -29,7 +29,7 @@ use puffin::blob_metadata::CompressionCodec; use puffin::puffin_manager::PutOptions; use snafu::{ResultExt, ensure}; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{ColumnId, ConcreteDataType, RegionId}; +use store_api::storage::{ColumnId, ConcreteDataType, FileId, RegionId}; use crate::error::{ CastVectorSnafu, ComputeArrowSnafu, CreateFulltextCreatorSnafu, DataTypeMismatchSnafu, @@ -37,7 +37,6 @@ use crate::error::{ Result, }; use crate::read::Batch; -use crate::sst::file::FileId; use crate::sst::index::TYPE_FULLTEXT_INDEX; use crate::sst::index::fulltext_index::{INDEX_BLOB_TYPE_BLOOM, INDEX_BLOB_TYPE_TANTIVY}; use crate::sst::index::intermediate::{ @@ -436,12 +435,12 @@ mod tests { use puffin::puffin_manager::{PuffinManager, PuffinWriter}; use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder, RegionMetadataRef}; use store_api::region_request::PathType; - use store_api::storage::{ConcreteDataType, RegionId}; + use store_api::storage::{ConcreteDataType, FileId, RegionId}; use super::*; use crate::access_layer::RegionFilePathFactory; use crate::read::{Batch, BatchColumn}; - use crate::sst::file::{FileId, RegionFileId}; + use crate::sst::file::RegionFileId; use crate::sst::index::fulltext_index::applier::FulltextIndexApplier; use crate::sst::index::fulltext_index::applier::builder::{ FulltextQuery, FulltextRequest, FulltextTerm, diff --git a/src/mito2/src/sst/index/intermediate.rs b/src/mito2/src/sst/index/intermediate.rs index 79c863a795..24103179c4 100644 --- a/src/mito2/src/sst/index/intermediate.rs +++ b/src/mito2/src/sst/index/intermediate.rs @@ -24,7 +24,7 @@ use index::error::Result as IndexResult; use index::external_provider::ExternalTempFileProvider; use object_store::util::{self, normalize_dir}; use snafu::ResultExt; -use store_api::storage::{ColumnId, RegionId}; +use store_api::storage::{ColumnId, FileId, RegionId}; use uuid::Uuid; use crate::access_layer::new_fs_cache_store; @@ -34,7 +34,6 @@ use crate::metrics::{ INDEX_INTERMEDIATE_READ_OP_TOTAL, INDEX_INTERMEDIATE_SEEK_OP_TOTAL, INDEX_INTERMEDIATE_WRITE_BYTES_TOTAL, INDEX_INTERMEDIATE_WRITE_OP_TOTAL, }; -use crate::sst::file::FileId; use crate::sst::index::store::InstrumentedStore; const INTERMEDIATE_DIR: &str = "__intm"; @@ -275,10 +274,9 @@ mod tests { use common_test_util::temp_dir; use futures::{AsyncReadExt, AsyncWriteExt}; use regex::Regex; - use store_api::storage::RegionId; + use store_api::storage::{FileId, RegionId}; use super::*; - use crate::sst::file::FileId; #[tokio::test] async fn test_manager() { diff --git a/src/mito2/src/sst/index/inverted_index/applier.rs b/src/mito2/src/sst/index/inverted_index/applier.rs index 2a973db781..350880cc9f 100644 --- a/src/mito2/src/sst/index/inverted_index/applier.rs +++ b/src/mito2/src/sst/index/inverted_index/applier.rs @@ -251,9 +251,9 @@ mod tests { use index::inverted_index::search::index_apply::MockIndexApplier; use object_store::services::Memory; use puffin::puffin_manager::PuffinWriter; + use store_api::storage::FileId; use super::*; - use crate::sst::file::FileId; #[tokio::test] async fn test_index_applier_apply_basic() { diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index 2121415f19..608b11b075 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -29,7 +29,7 @@ use mito_codec::row_converter::SortField; use puffin::puffin_manager::{PuffinWriter, PutOptions}; use snafu::{ResultExt, ensure}; use store_api::metadata::RegionMetadataRef; -use store_api::storage::ColumnId; +use store_api::storage::{ColumnId, FileId}; use tokio::io::duplex; use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt}; @@ -38,7 +38,6 @@ use crate::error::{ PushIndexValueSnafu, Result, }; use crate::read::Batch; -use crate::sst::file::FileId; use crate::sst::index::TYPE_INVERTED_INDEX; use crate::sst::index::intermediate::{ IntermediateLocation, IntermediateManager, TempFileProvider, diff --git a/src/mito2/src/sst/index/puffin_manager.rs b/src/mito2/src/sst/index/puffin_manager.rs index 2c936d2afa..3f8d3f8819 100644 --- a/src/mito2/src/sst/index/puffin_manager.rs +++ b/src/mito2/src/sst/index/puffin_manager.rs @@ -181,9 +181,10 @@ mod tests { use object_store::services::Memory; use puffin::blob_metadata::CompressionCodec; use puffin::puffin_manager::{PuffinManager, PuffinReader, PuffinWriter, PutOptions}; + use store_api::storage::FileId; use super::*; - use crate::sst::file::{FileId, RegionFileId}; + use crate::sst::file::RegionFileId; struct TestFilePathProvider; diff --git a/src/mito2/src/sst/location.rs b/src/mito2/src/sst/location.rs index f1b4e052a6..0f630b3a09 100644 --- a/src/mito2/src/sst/location.rs +++ b/src/mito2/src/sst/location.rs @@ -56,10 +56,9 @@ pub fn index_file_path( #[cfg(test)] mod tests { - use store_api::storage::RegionId; + use store_api::storage::{FileId, RegionId}; use super::*; - use crate::sst::file::FileId; #[test] fn test_sst_file_path() { diff --git a/src/mito2/src/sst/parquet.rs b/src/mito2/src/sst/parquet.rs index f03e0947f8..ff2a52769d 100644 --- a/src/mito2/src/sst/parquet.rs +++ b/src/mito2/src/sst/parquet.rs @@ -18,9 +18,10 @@ use std::sync::Arc; use common_base::readable_size::ReadableSize; use parquet::file::metadata::ParquetMetaData; +use store_api::storage::FileId; use crate::sst::DEFAULT_WRITE_BUFFER_SIZE; -use crate::sst::file::{FileId, FileTimeRange}; +use crate::sst::file::FileTimeRange; use crate::sst::index::IndexOutput; pub(crate) mod file_range; diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index 56848d7576..d1581f4b85 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -36,7 +36,7 @@ use parquet::format::KeyValue; use snafu::{OptionExt, ResultExt}; use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataRef}; use store_api::region_request::PathType; -use store_api::storage::ColumnId; +use store_api::storage::{ColumnId, FileId}; use table::predicate::Predicate; use crate::cache::CacheStrategy; @@ -51,7 +51,7 @@ use crate::metrics::{ }; use crate::read::prune::{PruneReader, Source}; use crate::read::{Batch, BatchReader}; -use crate::sst::file::{FileHandle, FileId}; +use crate::sst::file::FileHandle; use crate::sst::index::bloom_filter::applier::BloomFilterIndexApplierRef; use crate::sst::index::fulltext_index::applier::FulltextIndexApplierRef; use crate::sst::index::inverted_index::applier::InvertedIndexApplierRef; diff --git a/src/mito2/src/sst/parquet/row_group.rs b/src/mito2/src/sst/parquet/row_group.rs index f31db23dfc..d10526057d 100644 --- a/src/mito2/src/sst/parquet/row_group.rs +++ b/src/mito2/src/sst/parquet/row_group.rs @@ -27,13 +27,12 @@ use parquet::file::metadata::{ParquetMetaData, RowGroupMetaData}; use parquet::file::page_index::offset_index::OffsetIndexMetaData; use parquet::file::reader::{ChunkReader, Length}; use parquet::file::serialized_reader::SerializedPageReader; -use store_api::storage::RegionId; +use store_api::storage::{FileId, RegionId}; use tokio::task::yield_now; use crate::cache::file_cache::{FileType, IndexKey}; use crate::cache::{CacheStrategy, PageKey, PageValue}; use crate::metrics::{READ_STAGE_ELAPSED, READ_STAGE_FETCH_PAGES}; -use crate::sst::file::FileId; use crate::sst::parquet::helper::{MERGE_GAP, fetch_byte_ranges}; pub(crate) struct RowGroupBase<'a> { diff --git a/src/mito2/src/sst/parquet/writer.rs b/src/mito2/src/sst/parquet/writer.rs index 3618a03235..4e55c5e367 100644 --- a/src/mito2/src/sst/parquet/writer.rs +++ b/src/mito2/src/sst/parquet/writer.rs @@ -40,8 +40,8 @@ use parquet::schema::types::ColumnPath; use smallvec::smallvec; use snafu::ResultExt; use store_api::metadata::RegionMetadataRef; -use store_api::storage::SequenceNumber; use store_api::storage::consts::SEQUENCE_COLUMN_NAME; +use store_api::storage::{FileId, SequenceNumber}; use tokio::io::AsyncWrite; use tokio_util::compat::{Compat, FuturesAsyncWriteCompatExt}; @@ -50,7 +50,7 @@ use crate::error::{ InvalidMetadataSnafu, OpenDalSnafu, Result, UnexpectedSnafu, WriteParquetSnafu, }; use crate::read::{Batch, FlatSource, Source}; -use crate::sst::file::{FileId, RegionFileId}; +use crate::sst::file::RegionFileId; use crate::sst::index::{Indexer, IndexerBuilder}; use crate::sst::parquet::flat_format::{FlatWriteFormat, time_index_column_index}; use crate::sst::parquet::format::PrimaryKeyWriteFormat; diff --git a/src/mito2/src/sst/version.rs b/src/mito2/src/sst/version.rs index 441b421fbc..e2cfc859e0 100644 --- a/src/mito2/src/sst/version.rs +++ b/src/mito2/src/sst/version.rs @@ -18,8 +18,9 @@ use std::fmt; use std::sync::Arc; use common_time::{TimeToLive, Timestamp}; +use store_api::storage::FileId; -use crate::sst::file::{FileHandle, FileId, FileMeta, Level, MAX_LEVEL}; +use crate::sst::file::{FileHandle, FileMeta, Level, MAX_LEVEL}; use crate::sst::file_purger::FilePurgerRef; /// A version of all SSTs in a region. diff --git a/src/mito2/src/test_util/sst_util.rs b/src/mito2/src/test_util/sst_util.rs index 928ed63c6b..5eacf06bd5 100644 --- a/src/mito2/src/test_util/sst_util.rs +++ b/src/mito2/src/test_util/sst_util.rs @@ -27,10 +27,10 @@ use parquet::file::metadata::ParquetMetaData; use store_api::metadata::{ ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef, }; -use store_api::storage::RegionId; +use store_api::storage::{FileId, RegionId}; use crate::read::{Batch, BatchBuilder, Source}; -use crate::sst::file::{FileHandle, FileId, FileMeta}; +use crate::sst::file::{FileHandle, FileMeta}; use crate::test_util::{VecBatchReader, new_batch_builder, new_noop_file_purger}; /// Test region id. diff --git a/src/mito2/src/test_util/version_util.rs b/src/mito2/src/test_util/version_util.rs index d9d1b5a99c..86cc11eaf5 100644 --- a/src/mito2/src/test_util/version_util.rs +++ b/src/mito2/src/test_util/version_util.rs @@ -25,13 +25,13 @@ use common_time::Timestamp; use datatypes::prelude::ConcreteDataType; use datatypes::schema::ColumnSchema; use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder}; -use store_api::storage::RegionId; +use store_api::storage::{FileId, RegionId}; use crate::manifest::action::RegionEdit; use crate::memtable::time_partition::TimePartitions; use crate::memtable::{KeyValues, MemtableBuilderRef}; use crate::region::version::{Version, VersionBuilder, VersionControl}; -use crate::sst::file::{FileId, FileMeta}; +use crate::sst::file::FileMeta; use crate::sst::file_purger::FilePurgerRef; use crate::test_util::memtable_util::EmptyMemtableBuilder; use crate::test_util::{new_noop_file_purger, ts_ms_value}; diff --git a/src/mito2/src/worker.rs b/src/mito2/src/worker.rs index 6e60de5c28..ae0e357c21 100644 --- a/src/mito2/src/worker.rs +++ b/src/mito2/src/worker.rs @@ -47,7 +47,7 @@ use store_api::logstore::LogStore; use store_api::region_engine::{ SetRegionRoleStateResponse, SetRegionRoleStateSuccess, SettableRegionRoleState, }; -use store_api::storage::RegionId; +use store_api::storage::{FileId, RegionId}; use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::{Mutex, mpsc, oneshot, watch}; @@ -66,7 +66,6 @@ use crate::request::{ WorkerRequest, WorkerRequestWithTime, }; use crate::schedule::scheduler::{LocalScheduler, SchedulerRef}; -use crate::sst::file::FileId; use crate::sst::file_ref::FileReferenceManagerRef; use crate::sst::index::intermediate::IntermediateManager; use crate::sst::index::puffin_manager::PuffinManagerFactory; diff --git a/src/store-api/Cargo.toml b/src/store-api/Cargo.toml index 24d71a579a..3eee78b2d1 100644 --- a/src/store-api/Cargo.toml +++ b/src/store-api/Cargo.toml @@ -37,6 +37,7 @@ snafu.workspace = true sqlparser.workspace = true strum.workspace = true tokio.workspace = true +uuid.workspace = true [dev-dependencies] async-stream.workspace = true diff --git a/src/store-api/src/storage.rs b/src/store-api/src/storage.rs index 97e25542b1..a8f872cd70 100644 --- a/src/store-api/src/storage.rs +++ b/src/store-api/src/storage.rs @@ -16,6 +16,7 @@ pub mod consts; mod descriptors; +mod file; mod requests; mod types; @@ -25,5 +26,6 @@ pub use datatypes::schema::{ }; pub use self::descriptors::*; +pub use self::file::{FileId, ParseIdError}; pub use self::requests::{ScanRequest, TimeSeriesDistribution, TimeSeriesRowSelector}; pub use self::types::SequenceNumber; diff --git a/src/store-api/src/storage/file.rs b/src/store-api/src/storage/file.rs new file mode 100644 index 0000000000..6e2fa334e4 --- /dev/null +++ b/src/store-api/src/storage/file.rs @@ -0,0 +1,95 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt; +use std::fmt::Debug; +use std::str::FromStr; + +use serde::{Deserialize, Serialize}; +use snafu::{ResultExt, Snafu}; +use uuid::Uuid; + +#[derive(Debug, Snafu, PartialEq)] +pub struct ParseIdError { + source: uuid::Error, +} + +/// Unique id for [SST File]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)] +pub struct FileId(Uuid); + +impl FileId { + /// Returns a new unique [FileId] randomly. + pub fn random() -> FileId { + FileId(Uuid::new_v4()) + } + + /// Parses id from string. + pub fn parse_str(input: &str) -> std::result::Result { + Uuid::parse_str(input).map(FileId).context(ParseIdSnafu) + } + + /// Converts [FileId] as byte slice. + pub fn as_bytes(&self) -> &[u8] { + self.0.as_bytes() + } +} + +impl From for Uuid { + fn from(value: FileId) -> Self { + value.0 + } +} + +impl fmt::Display for FileId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for FileId { + type Err = ParseIdError; + + fn from_str(s: &str) -> std::result::Result { + FileId::parse_str(s) + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_file_id() { + let id = FileId::random(); + let uuid_str = id.to_string(); + assert_eq!(id.0.to_string(), uuid_str); + + let parsed = FileId::parse_str(&uuid_str).unwrap(); + assert_eq!(id, parsed); + let parsed = uuid_str.parse().unwrap(); + assert_eq!(id, parsed); + } + + #[test] + fn test_file_id_serialization() { + let id = FileId::random(); + let json = serde_json::to_string(&id).unwrap(); + assert_eq!(format!("\"{id}\""), json); + + let parsed = serde_json::from_str(&json).unwrap(); + assert_eq!(id, parsed); + } +}