mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-14 12:00:40 +00:00
* feat: import resume part2 Signed-off-by: jeremyhi <fengjiachun@gmail.com> * fix: by AI comments Signed-off-by: jeremyhi <fengjiachun@gmail.com> * fix: by AI comments Signed-off-by: jeremyhi <fengjiachun@gmail.com> * fix: by comments Signed-off-by: jeremyhi <fengjiachun@gmail.com> * fix: condig docs Signed-off-by: jeremyhi <fengjiachun@gmail.com> --------- Signed-off-by: jeremyhi <fengjiachun@gmail.com>
975 lines
31 KiB
Rust
975 lines
31 KiB
Rust
// Copyright 2023 Greptime Team
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
//! Import V2 CLI command.
|
|
|
|
use std::collections::HashSet;
|
|
use std::time::Duration;
|
|
|
|
use async_trait::async_trait;
|
|
use clap::Parser;
|
|
use common_error::ext::BoxedError;
|
|
use common_telemetry::info;
|
|
use snafu::{OptionExt, ResultExt};
|
|
|
|
use crate::Tool;
|
|
use crate::common::ObjectStoreConfig;
|
|
use crate::data::export_v2::data::{build_copy_source, execute_copy_database_from};
|
|
use crate::data::export_v2::manifest::{ChunkMeta, ChunkStatus, DataFormat, MANIFEST_VERSION};
|
|
use crate::data::import_v2::coordinator::{
|
|
ImportResumeConfig, ImportTaskExecutor, build_import_tasks, chunk_has_schema_files,
|
|
import_with_resume_session, prepare_import_resume,
|
|
};
|
|
use crate::data::import_v2::error::{
|
|
ChunkImportFailedSnafu, EmptyChunkManifestSnafu, ImportStatePathUnavailableSnafu,
|
|
IncompleteSnapshotSnafu, ManifestVersionMismatchSnafu, MissingChunkDataSnafu, Result,
|
|
SchemaNotInSnapshotSnafu, SnapshotStorageSnafu,
|
|
};
|
|
use crate::data::import_v2::executor::{DdlExecutor, DdlStatement};
|
|
use crate::data::import_v2::state::{ImportTaskKey, default_state_path};
|
|
use crate::data::path::{data_dir_for_schema_chunk, ddl_path_for_schema};
|
|
use crate::data::snapshot_storage::{OpenDalStorage, SnapshotStorage, validate_uri};
|
|
use crate::database::{DatabaseClient, parse_proxy_opts};
|
|
|
|
/// Import from a snapshot.
|
|
#[derive(Debug, Parser)]
|
|
pub struct ImportV2Command {
|
|
/// Server address to connect (e.g., 127.0.0.1:4000).
|
|
#[clap(long)]
|
|
addr: String,
|
|
|
|
/// Source snapshot location (e.g., s3://bucket/path, file:///tmp/backup).
|
|
#[clap(long)]
|
|
from: String,
|
|
|
|
/// Target catalog name.
|
|
#[clap(long, default_value = "greptime")]
|
|
catalog: String,
|
|
|
|
/// Schema list to import (default: all in snapshot).
|
|
/// Can be specified multiple times or comma-separated.
|
|
#[clap(long, value_delimiter = ',')]
|
|
schemas: Vec<String>,
|
|
|
|
/// Verify without importing (dry-run).
|
|
#[clap(long)]
|
|
dry_run: bool,
|
|
|
|
/// Basic authentication (user:password).
|
|
#[clap(long)]
|
|
auth_basic: Option<String>,
|
|
|
|
/// Request timeout.
|
|
#[clap(long, value_parser = humantime::parse_duration)]
|
|
timeout: Option<Duration>,
|
|
|
|
/// Proxy server address.
|
|
///
|
|
/// If set, it overrides the system proxy unless `--no-proxy` is specified.
|
|
/// If neither `--proxy` nor `--no-proxy` is set, system proxy (env) may be used.
|
|
#[clap(long)]
|
|
proxy: Option<String>,
|
|
|
|
/// Disable all proxy usage (ignores `--proxy` and system proxy).
|
|
///
|
|
/// When set and `--proxy` is not provided, this explicitly disables system proxy.
|
|
#[clap(long)]
|
|
no_proxy: bool,
|
|
|
|
/// Object store configuration for remote storage backends.
|
|
#[clap(flatten)]
|
|
storage: ObjectStoreConfig,
|
|
}
|
|
|
|
impl ImportV2Command {
|
|
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
|
|
// Validate URI format
|
|
validate_uri(&self.from)
|
|
.context(SnapshotStorageSnafu)
|
|
.map_err(BoxedError::new)?;
|
|
|
|
// Parse schemas (empty vec means all schemas)
|
|
let schemas = if self.schemas.is_empty() {
|
|
None
|
|
} else {
|
|
Some(self.schemas.clone())
|
|
};
|
|
|
|
// Build storage
|
|
let storage = OpenDalStorage::from_uri(&self.from, &self.storage)
|
|
.context(SnapshotStorageSnafu)
|
|
.map_err(BoxedError::new)?;
|
|
|
|
// Build database client
|
|
let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
|
|
let database_client = DatabaseClient::new(
|
|
self.addr.clone(),
|
|
self.catalog.clone(),
|
|
self.auth_basic.clone(),
|
|
self.timeout.unwrap_or(Duration::from_secs(60)),
|
|
proxy,
|
|
self.no_proxy,
|
|
);
|
|
|
|
Ok(Box::new(Import {
|
|
catalog: self.catalog.clone(),
|
|
schemas,
|
|
dry_run: self.dry_run,
|
|
snapshot_uri: self.from.clone(),
|
|
storage_config: self.storage.clone(),
|
|
storage: Box::new(storage),
|
|
database_client,
|
|
}))
|
|
}
|
|
}
|
|
|
|
/// Import tool implementation.
|
|
pub struct Import {
|
|
catalog: String,
|
|
schemas: Option<Vec<String>>,
|
|
dry_run: bool,
|
|
snapshot_uri: String,
|
|
storage_config: ObjectStoreConfig,
|
|
storage: Box<dyn SnapshotStorage>,
|
|
database_client: DatabaseClient,
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Tool for Import {
|
|
async fn do_work(&self) -> std::result::Result<(), BoxedError> {
|
|
self.run().await.map_err(BoxedError::new)
|
|
}
|
|
}
|
|
|
|
impl Import {
|
|
async fn run(&self) -> Result<()> {
|
|
// 1. Read manifest
|
|
let manifest = self
|
|
.storage
|
|
.read_manifest()
|
|
.await
|
|
.context(SnapshotStorageSnafu)?;
|
|
|
|
info!(
|
|
"Loading snapshot: {} (version: {}, schema_only: {})",
|
|
manifest.snapshot_id, manifest.version, manifest.schema_only
|
|
);
|
|
|
|
// Check version compatibility
|
|
if manifest.version != MANIFEST_VERSION {
|
|
return ManifestVersionMismatchSnafu {
|
|
expected: MANIFEST_VERSION,
|
|
found: manifest.version,
|
|
}
|
|
.fail();
|
|
}
|
|
|
|
info!("Snapshot contains {} schema(s)", manifest.schemas.len());
|
|
|
|
// 2. Determine schemas to import
|
|
let schemas_to_import = match &self.schemas {
|
|
Some(filter) => canonicalize_schema_filter(filter, &manifest.schemas)?,
|
|
None => manifest.schemas.clone(),
|
|
};
|
|
|
|
info!("Importing schemas: {:?}", schemas_to_import);
|
|
|
|
// 3. Read DDL statements
|
|
let ddl_statements = self.read_ddl_statements(&schemas_to_import).await?;
|
|
|
|
info!("Generated {} DDL statements", ddl_statements.len());
|
|
|
|
let data_tasks = if !manifest.schema_only && !manifest.chunks.is_empty() {
|
|
validate_data_snapshot(self.storage.as_ref(), &manifest.chunks, &schemas_to_import)
|
|
.await?;
|
|
build_import_tasks(&manifest.chunks, &schemas_to_import)
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
|
|
// 4. Dry-run mode: print DDL and exit
|
|
if self.dry_run {
|
|
info!("Dry-run mode - DDL statements to execute:");
|
|
println!();
|
|
for (i, stmt) in ddl_statements.iter().enumerate() {
|
|
println!("-- Statement {}", i + 1);
|
|
println!("{};", stmt.sql);
|
|
println!();
|
|
}
|
|
if !manifest.schema_only && !manifest.chunks.is_empty() {
|
|
for line in format_data_import_plan(&manifest.chunks, &schemas_to_import) {
|
|
println!("{line}");
|
|
}
|
|
println!();
|
|
}
|
|
return Ok(());
|
|
}
|
|
|
|
let mut resume_session = if !data_tasks.is_empty() {
|
|
let state_path = default_state_path(
|
|
&manifest.snapshot_id.to_string(),
|
|
self.database_client.addr(),
|
|
&self.catalog,
|
|
&schemas_to_import,
|
|
)
|
|
.context(ImportStatePathUnavailableSnafu {
|
|
snapshot_id: manifest.snapshot_id.to_string(),
|
|
})?;
|
|
Some(
|
|
prepare_import_resume(ImportResumeConfig {
|
|
snapshot_id: manifest.snapshot_id.to_string(),
|
|
target_addr: self.database_client.addr().to_string(),
|
|
catalog: self.catalog.clone(),
|
|
schemas: schemas_to_import.clone(),
|
|
state_path,
|
|
tasks: data_tasks,
|
|
})
|
|
.await?,
|
|
)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let skip_ddl = resume_session
|
|
.as_ref()
|
|
.map(|session| session.should_skip_ddl())
|
|
.unwrap_or(false);
|
|
|
|
// 5. Execute DDL unless a previous run already completed it.
|
|
let ddl_executed = if skip_ddl {
|
|
info!(
|
|
"Existing import state has DDL marked completed; skipping DDL execution and resuming data import"
|
|
);
|
|
false
|
|
} else {
|
|
let executor = DdlExecutor::new(&self.database_client);
|
|
executor.execute_strict(&ddl_statements).await?;
|
|
if let Some(session) = resume_session.as_mut() {
|
|
session.mark_ddl_completed().await?;
|
|
}
|
|
true
|
|
};
|
|
|
|
if let Some(resume_session) = resume_session {
|
|
let executor = CopyDatabaseImportTaskExecutor {
|
|
import: self,
|
|
format: manifest.format,
|
|
};
|
|
import_with_resume_session(resume_session, &executor).await?;
|
|
}
|
|
|
|
if ddl_executed {
|
|
info!(
|
|
"Import completed: {} DDL statements executed",
|
|
ddl_statements.len()
|
|
);
|
|
} else {
|
|
info!("Import completed: DDL execution skipped");
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn read_ddl_statements(&self, schemas: &[String]) -> Result<Vec<DdlStatement>> {
|
|
let mut statements = Vec::new();
|
|
for schema in schemas {
|
|
let path = ddl_path_for_schema(schema);
|
|
let content = self
|
|
.storage
|
|
.read_text(&path)
|
|
.await
|
|
.context(SnapshotStorageSnafu)?;
|
|
statements.extend(
|
|
parse_ddl_statements(&content)
|
|
.into_iter()
|
|
.map(|sql| ddl_statement_for_schema(schema, sql)),
|
|
);
|
|
}
|
|
|
|
Ok(statements)
|
|
}
|
|
}
|
|
|
|
struct CopyDatabaseImportTaskExecutor<'a> {
|
|
import: &'a Import,
|
|
format: DataFormat,
|
|
}
|
|
|
|
#[async_trait]
|
|
impl ImportTaskExecutor for CopyDatabaseImportTaskExecutor<'_> {
|
|
async fn import_task(&self, task: &ImportTaskKey) -> Result<()> {
|
|
let source = build_copy_source(
|
|
&self.import.snapshot_uri,
|
|
&self.import.storage_config,
|
|
&task.schema,
|
|
task.chunk_id,
|
|
)
|
|
.context(ChunkImportFailedSnafu {
|
|
chunk_id: task.chunk_id,
|
|
schema: task.schema.clone(),
|
|
})?;
|
|
|
|
execute_copy_database_from(
|
|
&self.import.database_client,
|
|
&self.import.catalog,
|
|
&task.schema,
|
|
&source,
|
|
self.format,
|
|
)
|
|
.await
|
|
.context(ChunkImportFailedSnafu {
|
|
chunk_id: task.chunk_id,
|
|
schema: task.schema.clone(),
|
|
})
|
|
}
|
|
}
|
|
|
|
fn parse_ddl_statements(content: &str) -> Vec<String> {
|
|
let mut statements = Vec::new();
|
|
let mut current = String::new();
|
|
let mut chars = content.chars().peekable();
|
|
let mut in_single_quote = false;
|
|
let mut in_double_quote = false;
|
|
let mut in_line_comment = false;
|
|
let mut in_block_comment = false;
|
|
|
|
while let Some(ch) = chars.next() {
|
|
if in_line_comment {
|
|
if ch == '\n' {
|
|
in_line_comment = false;
|
|
current.push('\n');
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if in_block_comment {
|
|
if ch == '*' && chars.peek() == Some(&'/') {
|
|
chars.next();
|
|
in_block_comment = false;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if in_single_quote {
|
|
current.push(ch);
|
|
if ch == '\'' {
|
|
if chars.peek() == Some(&'\'') {
|
|
current.push(chars.next().expect("peeked quote must exist"));
|
|
} else {
|
|
in_single_quote = false;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if in_double_quote {
|
|
current.push(ch);
|
|
if ch == '"' {
|
|
if chars.peek() == Some(&'"') {
|
|
current.push(chars.next().expect("peeked quote must exist"));
|
|
} else {
|
|
in_double_quote = false;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
|
|
match ch {
|
|
'-' if chars.peek() == Some(&'-') => {
|
|
chars.next();
|
|
in_line_comment = true;
|
|
}
|
|
'/' if chars.peek() == Some(&'*') => {
|
|
chars.next();
|
|
in_block_comment = true;
|
|
}
|
|
'\'' => {
|
|
in_single_quote = true;
|
|
current.push(ch);
|
|
}
|
|
'"' => {
|
|
in_double_quote = true;
|
|
current.push(ch);
|
|
}
|
|
';' => {
|
|
let statement = current.trim();
|
|
if !statement.is_empty() {
|
|
statements.push(statement.to_string());
|
|
}
|
|
current.clear();
|
|
}
|
|
_ => current.push(ch),
|
|
}
|
|
}
|
|
|
|
let statement = current.trim();
|
|
if !statement.is_empty() {
|
|
statements.push(statement.to_string());
|
|
}
|
|
|
|
statements
|
|
}
|
|
|
|
fn ddl_statement_for_schema(schema: &str, sql: String) -> DdlStatement {
|
|
if is_schema_scoped_statement(&sql) {
|
|
DdlStatement::with_execution_schema(sql, schema.to_string())
|
|
} else {
|
|
DdlStatement::new(sql)
|
|
}
|
|
}
|
|
|
|
fn is_schema_scoped_statement(sql: &str) -> bool {
|
|
let trimmed = sql.trim_start();
|
|
if !starts_with_keyword(trimmed, "CREATE") {
|
|
return false;
|
|
}
|
|
|
|
let Some(rest) = trimmed.get("CREATE".len()..) else {
|
|
return false;
|
|
};
|
|
let mut rest = rest.trim_start();
|
|
if starts_with_keyword(rest, "OR") {
|
|
let Some(next) = rest.get("OR".len()..) else {
|
|
return false;
|
|
};
|
|
rest = next.trim_start();
|
|
if !starts_with_keyword(rest, "REPLACE") {
|
|
return false;
|
|
}
|
|
let Some(next) = rest.get("REPLACE".len()..) else {
|
|
return false;
|
|
};
|
|
rest = next.trim_start();
|
|
}
|
|
|
|
if starts_with_keyword(rest, "EXTERNAL") {
|
|
let Some(next) = rest.get("EXTERNAL".len()..) else {
|
|
return false;
|
|
};
|
|
rest = next.trim_start();
|
|
}
|
|
|
|
starts_with_keyword(rest, "TABLE") || starts_with_keyword(rest, "VIEW")
|
|
}
|
|
|
|
fn starts_with_keyword(input: &str, keyword: &str) -> bool {
|
|
input
|
|
.get(0..keyword.len())
|
|
.map(|s| s.eq_ignore_ascii_case(keyword))
|
|
.unwrap_or(false)
|
|
&& input
|
|
.as_bytes()
|
|
.get(keyword.len())
|
|
.map(|b| !b.is_ascii_alphanumeric() && *b != b'_')
|
|
.unwrap_or(true)
|
|
}
|
|
|
|
fn canonicalize_schema_filter(
|
|
filter: &[String],
|
|
manifest_schemas: &[String],
|
|
) -> Result<Vec<String>> {
|
|
let mut canonicalized = Vec::new();
|
|
let mut seen = HashSet::new();
|
|
|
|
for schema in filter {
|
|
let canonical = manifest_schemas
|
|
.iter()
|
|
.find(|candidate| candidate.eq_ignore_ascii_case(schema))
|
|
.cloned()
|
|
.ok_or_else(|| {
|
|
SchemaNotInSnapshotSnafu {
|
|
schema: schema.clone(),
|
|
}
|
|
.build()
|
|
})?;
|
|
|
|
if seen.insert(canonical.to_ascii_lowercase()) {
|
|
canonicalized.push(canonical);
|
|
}
|
|
}
|
|
|
|
Ok(canonicalized)
|
|
}
|
|
|
|
fn validate_chunk_statuses(chunks: &[ChunkMeta]) -> Result<()> {
|
|
let invalid_chunk = chunks
|
|
.iter()
|
|
.find(|chunk| !matches!(chunk.status, ChunkStatus::Completed | ChunkStatus::Skipped));
|
|
|
|
if let Some(chunk) = invalid_chunk {
|
|
return IncompleteSnapshotSnafu {
|
|
chunk_id: chunk.id,
|
|
status: chunk.status,
|
|
}
|
|
.fail();
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn format_data_import_plan(chunks: &[ChunkMeta], schemas: &[String]) -> Vec<String> {
|
|
let mut lines = vec!["-- Data import plan:".to_string()];
|
|
for chunk in chunks {
|
|
lines.push(format!("-- Chunk {}: {:?}", chunk.id, chunk.status));
|
|
for schema in schemas {
|
|
if chunk_has_schema_files(chunk, schema) {
|
|
lines.push(format!("-- {} -> COPY DATABASE FROM", schema));
|
|
}
|
|
}
|
|
}
|
|
lines
|
|
}
|
|
|
|
async fn validate_data_snapshot(
|
|
storage: &dyn SnapshotStorage,
|
|
chunks: &[ChunkMeta],
|
|
schemas: &[String],
|
|
) -> Result<()> {
|
|
validate_chunk_statuses(chunks)?;
|
|
let actual_prefixes = collect_chunk_data_prefixes(storage).await?;
|
|
|
|
for chunk in chunks {
|
|
if chunk.status == ChunkStatus::Skipped {
|
|
continue;
|
|
}
|
|
if chunk.files.is_empty() {
|
|
return EmptyChunkManifestSnafu { chunk_id: chunk.id }.fail();
|
|
}
|
|
for schema in schemas {
|
|
validate_chunk_schema_files(chunk, schema, &actual_prefixes)?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn collect_chunk_data_prefixes(storage: &dyn SnapshotStorage) -> Result<HashSet<String>> {
|
|
let files = storage
|
|
.list_files_recursive("data/")
|
|
.await
|
|
.context(SnapshotStorageSnafu)?;
|
|
let mut prefixes = HashSet::new();
|
|
|
|
for path in files {
|
|
let normalized = path.trim_start_matches('/');
|
|
let mut parts = normalized.splitn(4, '/');
|
|
let Some(root) = parts.next() else {
|
|
continue;
|
|
};
|
|
let Some(schema) = parts.next() else {
|
|
continue;
|
|
};
|
|
let Some(chunk_id) = parts.next() else {
|
|
continue;
|
|
};
|
|
if root != "data" {
|
|
continue;
|
|
}
|
|
prefixes.insert(format!("data/{schema}/{chunk_id}/"));
|
|
}
|
|
|
|
Ok(prefixes)
|
|
}
|
|
|
|
fn validate_chunk_schema_files(
|
|
chunk: &ChunkMeta,
|
|
schema: &str,
|
|
actual_prefixes: &HashSet<String>,
|
|
) -> Result<bool> {
|
|
if !chunk_has_schema_files(chunk, schema) {
|
|
return Ok(false);
|
|
}
|
|
|
|
let prefix = data_dir_for_schema_chunk(schema, chunk.id);
|
|
if !actual_prefixes.contains(&prefix) {
|
|
return MissingChunkDataSnafu {
|
|
chunk_id: chunk.id,
|
|
schema: schema.to_string(),
|
|
path: prefix,
|
|
}
|
|
.fail();
|
|
}
|
|
|
|
Ok(true)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::collections::{HashMap, HashSet};
|
|
|
|
use async_trait::async_trait;
|
|
|
|
use super::*;
|
|
use crate::data::export_v2::manifest::{ChunkMeta, ChunkStatus, Manifest, TimeRange};
|
|
use crate::data::export_v2::schema::SchemaSnapshot;
|
|
use crate::data::snapshot_storage::SnapshotStorage;
|
|
|
|
struct StubStorage {
|
|
manifest: Manifest,
|
|
files_by_prefix: HashMap<String, Vec<String>>,
|
|
}
|
|
|
|
#[async_trait]
|
|
impl SnapshotStorage for StubStorage {
|
|
async fn exists(&self) -> crate::data::export_v2::error::Result<bool> {
|
|
Ok(true)
|
|
}
|
|
|
|
async fn read_manifest(&self) -> crate::data::export_v2::error::Result<Manifest> {
|
|
Ok(self.manifest.clone())
|
|
}
|
|
|
|
async fn write_manifest(
|
|
&self,
|
|
_manifest: &Manifest,
|
|
) -> crate::data::export_v2::error::Result<()> {
|
|
unimplemented!("not needed in import_v2::command tests")
|
|
}
|
|
|
|
async fn read_text(&self, _path: &str) -> crate::data::export_v2::error::Result<String> {
|
|
unimplemented!("not needed in import_v2::command tests")
|
|
}
|
|
|
|
async fn write_text(
|
|
&self,
|
|
_path: &str,
|
|
_content: &str,
|
|
) -> crate::data::export_v2::error::Result<()> {
|
|
unimplemented!("not needed in import_v2::command tests")
|
|
}
|
|
|
|
async fn write_schema(
|
|
&self,
|
|
_snapshot: &SchemaSnapshot,
|
|
) -> crate::data::export_v2::error::Result<()> {
|
|
unimplemented!("not needed in import_v2::command tests")
|
|
}
|
|
|
|
async fn create_dir_all(&self, _path: &str) -> crate::data::export_v2::error::Result<()> {
|
|
unimplemented!("not needed in import_v2::command tests")
|
|
}
|
|
|
|
async fn list_files_recursive(
|
|
&self,
|
|
prefix: &str,
|
|
) -> crate::data::export_v2::error::Result<Vec<String>> {
|
|
Ok(self
|
|
.files_by_prefix
|
|
.iter()
|
|
.filter(|(candidate, _)| candidate.starts_with(prefix))
|
|
.flat_map(|(_, files)| files.clone())
|
|
.collect())
|
|
}
|
|
|
|
async fn delete_snapshot(&self) -> crate::data::export_v2::error::Result<()> {
|
|
unimplemented!("not needed in import_v2::command tests")
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_ddl_statements() {
|
|
let content = r#"
|
|
-- Schema: public
|
|
CREATE DATABASE public;
|
|
CREATE TABLE t (ts TIMESTAMP TIME INDEX, host STRING, PRIMARY KEY (host)) ENGINE=mito;
|
|
|
|
-- comment
|
|
CREATE VIEW v AS SELECT * FROM t;
|
|
"#;
|
|
let statements = parse_ddl_statements(content);
|
|
assert_eq!(statements.len(), 3);
|
|
assert!(statements[0].starts_with("CREATE DATABASE public"));
|
|
assert!(statements[1].starts_with("CREATE TABLE t"));
|
|
assert!(statements[2].starts_with("CREATE VIEW v"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_ddl_statements_preserves_semicolons_in_string_literals() {
|
|
let content = r#"
|
|
CREATE TABLE t (
|
|
host STRING DEFAULT 'a;b'
|
|
);
|
|
CREATE VIEW v AS SELECT ';' AS marker;
|
|
"#;
|
|
|
|
let statements = parse_ddl_statements(content);
|
|
|
|
assert_eq!(statements.len(), 2);
|
|
assert!(statements[0].contains("'a;b'"));
|
|
assert!(statements[1].contains("';' AS marker"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_ddl_statements_handles_comments_without_splitting() {
|
|
let content = r#"
|
|
-- leading comment
|
|
CREATE TABLE t (ts TIMESTAMP TIME INDEX); /* block; comment */
|
|
CREATE VIEW v AS SELECT 1;
|
|
"#;
|
|
|
|
let statements = parse_ddl_statements(content);
|
|
|
|
assert_eq!(statements.len(), 2);
|
|
assert!(statements[0].starts_with("CREATE TABLE t"));
|
|
assert!(statements[1].starts_with("CREATE VIEW v"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_canonicalize_schema_filter_uses_manifest_casing() {
|
|
let filter = vec!["TEST_DB".to_string(), "PUBLIC".to_string()];
|
|
let manifest_schemas = vec!["test_db".to_string(), "public".to_string()];
|
|
|
|
let canonicalized = canonicalize_schema_filter(&filter, &manifest_schemas).unwrap();
|
|
|
|
assert_eq!(canonicalized, vec!["test_db", "public"]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_canonicalize_schema_filter_dedupes_case_insensitive_matches() {
|
|
let filter = vec![
|
|
"TEST_DB".to_string(),
|
|
"test_db".to_string(),
|
|
"PUBLIC".to_string(),
|
|
"public".to_string(),
|
|
];
|
|
let manifest_schemas = vec!["test_db".to_string(), "public".to_string()];
|
|
|
|
let canonicalized = canonicalize_schema_filter(&filter, &manifest_schemas).unwrap();
|
|
|
|
assert_eq!(canonicalized, vec!["test_db", "public"]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_canonicalize_schema_filter_rejects_missing_schema() {
|
|
let filter = vec!["missing".to_string()];
|
|
let manifest_schemas = vec!["test_db".to_string()];
|
|
|
|
let error = canonicalize_schema_filter(&filter, &manifest_schemas)
|
|
.expect_err("missing schema should fail")
|
|
.to_string();
|
|
|
|
assert!(error.contains("missing"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_ddl_statement_for_schema_create_table_uses_execution_schema() {
|
|
let stmt = ddl_statement_for_schema(
|
|
"test_db",
|
|
"CREATE TABLE metrics (ts TIMESTAMP TIME INDEX) ENGINE=mito".to_string(),
|
|
);
|
|
assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_ddl_statement_for_schema_create_view_uses_execution_schema() {
|
|
let stmt = ddl_statement_for_schema(
|
|
"test_db",
|
|
"CREATE VIEW metrics_view AS SELECT * FROM metrics".to_string(),
|
|
);
|
|
assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_ddl_statement_for_schema_create_or_replace_view_uses_execution_schema() {
|
|
let stmt = ddl_statement_for_schema(
|
|
"test_db",
|
|
"CREATE OR REPLACE VIEW metrics_view AS SELECT * FROM metrics".to_string(),
|
|
);
|
|
assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_ddl_statement_for_schema_create_external_table_uses_execution_schema() {
|
|
let stmt = ddl_statement_for_schema(
|
|
"test_db",
|
|
"CREATE EXTERNAL TABLE IF NOT EXISTS ext_metrics (ts TIMESTAMP TIME INDEX) ENGINE=file"
|
|
.to_string(),
|
|
);
|
|
assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_ddl_statement_for_schema_create_database_uses_public_context() {
|
|
let stmt = ddl_statement_for_schema("test_db", "CREATE DATABASE test_db".to_string());
|
|
assert_eq!(stmt.execution_schema, None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_starts_with_keyword_requires_word_boundary() {
|
|
assert!(starts_with_keyword("CREATE TABLE t", "CREATE"));
|
|
assert!(!starts_with_keyword("CREATED TABLE t", "CREATE"));
|
|
assert!(!starts_with_keyword("TABLESPACE foo", "TABLE"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_validate_chunk_statuses_rejects_failed_chunk() {
|
|
let mut failed = ChunkMeta::new(3, TimeRange::unbounded());
|
|
failed.status = ChunkStatus::Failed;
|
|
|
|
let error = validate_chunk_statuses(&[failed]).expect_err("failed chunk should error");
|
|
assert!(error.to_string().contains("Incomplete snapshot"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_validate_chunk_statuses_accepts_completed_and_skipped_chunks() {
|
|
let mut completed = ChunkMeta::new(1, TimeRange::unbounded());
|
|
completed.status = ChunkStatus::Completed;
|
|
let skipped = ChunkMeta::skipped(2, TimeRange::unbounded());
|
|
|
|
assert!(validate_chunk_statuses(&[completed, skipped]).is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_chunk_has_schema_files_matches_encoded_schema_prefix() {
|
|
let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
|
|
chunk.files = vec![
|
|
"data/public/7/a.parquet".to_string(),
|
|
"data/%E6%B5%8B%E8%AF%95/7/b.parquet".to_string(),
|
|
];
|
|
|
|
assert!(chunk_has_schema_files(&chunk, "public"));
|
|
assert!(chunk_has_schema_files(&chunk, "测试"));
|
|
assert!(!chunk_has_schema_files(&chunk, "metrics"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_format_data_import_plan_includes_matching_schemas_only() {
|
|
let mut completed = ChunkMeta::new(1, TimeRange::unbounded());
|
|
completed.status = ChunkStatus::Completed;
|
|
completed.files = vec![
|
|
"data/public/1/a.parquet".to_string(),
|
|
"data/%E6%B5%8B%E8%AF%95/1/b.parquet".to_string(),
|
|
];
|
|
let skipped = ChunkMeta::skipped(2, TimeRange::unbounded());
|
|
|
|
let lines = format_data_import_plan(
|
|
&[completed, skipped],
|
|
&[
|
|
"public".to_string(),
|
|
"测试".to_string(),
|
|
"metrics".to_string(),
|
|
],
|
|
);
|
|
|
|
assert_eq!(lines[0], "-- Data import plan:");
|
|
assert!(lines.contains(&"-- Chunk 1: Completed".to_string()));
|
|
assert!(lines.contains(&"-- public -> COPY DATABASE FROM".to_string()));
|
|
assert!(lines.contains(&"-- 测试 -> COPY DATABASE FROM".to_string()));
|
|
assert!(!lines.contains(&"-- metrics -> COPY DATABASE FROM".to_string()));
|
|
assert!(lines.contains(&"-- Chunk 2: Skipped".to_string()));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_collect_chunk_data_prefixes_indexes_present_prefixes() {
|
|
let storage = StubStorage {
|
|
manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
|
|
files_by_prefix: HashMap::from([
|
|
(
|
|
"data/public/7/".to_string(),
|
|
vec!["data/public/7/a.parquet".to_string()],
|
|
),
|
|
(
|
|
"data/%E6%B5%8B%E8%AF%95/9/".to_string(),
|
|
vec!["data/%E6%B5%8B%E8%AF%95/9/b.parquet".to_string()],
|
|
),
|
|
]),
|
|
};
|
|
|
|
let prefixes = collect_chunk_data_prefixes(&storage).await.unwrap();
|
|
|
|
assert!(prefixes.contains("data/public/7/"));
|
|
assert!(prefixes.contains("data/%E6%B5%8B%E8%AF%95/9/"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_validate_chunk_schema_files_accepts_present_prefix() {
|
|
let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
|
|
chunk.files = vec!["data/public/7/a.parquet".to_string()];
|
|
let actual_prefixes = HashSet::from(["data/public/7/".to_string()]);
|
|
|
|
assert!(validate_chunk_schema_files(&chunk, "public", &actual_prefixes).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
fn test_validate_chunk_schema_files_rejects_missing_prefix() {
|
|
let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
|
|
chunk.files = vec!["data/public/7/a.parquet".to_string()];
|
|
|
|
let error = validate_chunk_schema_files(&chunk, "public", &HashSet::new())
|
|
.expect_err("missing chunk prefix should fail")
|
|
.to_string();
|
|
assert!(error.contains("marked completed but no files were found"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_validate_chunk_schema_files_skips_absent_schema() {
|
|
let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
|
|
chunk.files = vec!["data/public/7/a.parquet".to_string()];
|
|
|
|
assert!(!validate_chunk_schema_files(&chunk, "metrics", &HashSet::new()).unwrap());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_validate_data_snapshot_rejects_failed_chunk_before_dry_run() {
|
|
let mut failed = ChunkMeta::new(3, TimeRange::unbounded());
|
|
failed.status = ChunkStatus::Failed;
|
|
|
|
let storage = StubStorage {
|
|
manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
|
|
files_by_prefix: HashMap::new(),
|
|
};
|
|
|
|
let error = validate_data_snapshot(&storage, &[failed], &["public".to_string()])
|
|
.await
|
|
.expect_err("failed chunk should reject dry-run validation")
|
|
.to_string();
|
|
assert!(error.contains("Incomplete snapshot"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_validate_data_snapshot_rejects_missing_chunk_prefix_before_dry_run() {
|
|
let mut completed = ChunkMeta::new(7, TimeRange::unbounded());
|
|
completed.status = ChunkStatus::Completed;
|
|
completed.files = vec!["data/public/7/a.parquet".to_string()];
|
|
|
|
let storage = StubStorage {
|
|
manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
|
|
files_by_prefix: HashMap::new(),
|
|
};
|
|
|
|
let error = validate_data_snapshot(&storage, &[completed], &["public".to_string()])
|
|
.await
|
|
.expect_err("missing chunk prefix should reject dry-run validation")
|
|
.to_string();
|
|
assert!(error.contains("marked completed but no files were found"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_validate_data_snapshot_rejects_completed_chunk_with_empty_manifest() {
|
|
let mut completed = ChunkMeta::new(7, TimeRange::unbounded());
|
|
completed.status = ChunkStatus::Completed;
|
|
|
|
let storage = StubStorage {
|
|
manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
|
|
files_by_prefix: HashMap::new(),
|
|
};
|
|
|
|
let error = validate_data_snapshot(&storage, &[completed], &["public".to_string()])
|
|
.await
|
|
.expect_err("empty completed chunk should reject validation")
|
|
.to_string();
|
|
assert!(error.contains("file manifest is empty"));
|
|
}
|
|
}
|