feat: don't validate external table's region schema (#7268)

* feat: don't validate external table's region schema

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix format

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2025-11-21 11:28:14 +08:00
committed by GitHub
parent ec8263b464
commit 25fab2ba7d
2 changed files with 154 additions and 2 deletions

View File

@@ -94,7 +94,9 @@ impl FileRegionManifest {
builder.push_column_metadata(column.clone());
}
builder.primary_key(self.primary_key.clone());
let metadata = builder.build().context(InvalidMetadataSnafu)?;
let metadata = builder
.build_without_validation()
.context(InvalidMetadataSnafu)?;
Ok(Arc::new(metadata))
}
@@ -127,3 +129,49 @@ impl FileRegionManifest {
.context(MissingRequiredFieldSnafu { name })
}
}
#[cfg(test)]
mod tests {
use api::v1::SemanticType;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnSchema;
use super::*;
#[test]
fn metadata_allows_internal_column_name() {
let manifest = FileRegionManifest {
region_id: RegionId::new(1, 0),
column_metadatas: vec![
ColumnMetadata {
column_schema: ColumnSchema::new(
"__primary_key",
ConcreteDataType::string_datatype(),
false,
),
semantic_type: SemanticType::Tag,
column_id: 1,
},
ColumnMetadata {
column_schema: ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id: 2,
},
],
primary_key: vec![1],
options: HashMap::default(),
};
let metadata = manifest.metadata().unwrap();
assert!(
metadata
.column_metadatas
.iter()
.any(|c| c.column_schema.name == "__primary_key")
);
}
}

View File

@@ -640,6 +640,18 @@ impl RegionMetadataBuilder {
/// Consumes the builder and build a [RegionMetadata].
pub fn build(self) -> Result<RegionMetadata> {
self.build_with_options(true)
}
/// Builds metadata without running validation.
///
/// Intended for file/external engines that should accept arbitrary schemas
/// coming from files.
pub fn build_without_validation(self) -> Result<RegionMetadata> {
self.build_with_options(false)
}
fn build_with_options(self, validate: bool) -> Result<RegionMetadata> {
let skipped = SkippedFields::new(&self.column_metadatas)?;
let meta = RegionMetadata {
@@ -654,7 +666,9 @@ impl RegionMetadataBuilder {
partition_expr: self.partition_expr,
};
meta.validate()?;
if validate {
meta.validate()?;
}
Ok(meta)
}
@@ -1929,6 +1943,96 @@ mod test {
);
}
#[test]
fn test_allow_internal_column_name() {
let mut builder = create_builder();
builder
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"__primary_key",
ConcreteDataType::string_datatype(),
false,
),
semantic_type: SemanticType::Tag,
column_id: 1,
})
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id: 2,
})
.primary_key(vec![1]);
let metadata = builder.build_without_validation().unwrap();
assert_eq!(
"__primary_key",
metadata.column_metadatas[0].column_schema.name
);
}
#[test]
fn test_build_without_validation() {
// Primary key points to a Field column, which would normally fail validation.
let mut builder = create_builder();
builder
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id: 1,
})
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"field",
ConcreteDataType::string_datatype(),
true,
),
semantic_type: SemanticType::Field,
column_id: 2,
})
.primary_key(vec![2]);
// Unvalidated build should succeed.
let metadata = builder.build_without_validation().unwrap();
assert_eq!(vec![2], metadata.primary_key);
// Validated build still rejects it.
let mut builder = create_builder();
builder
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id: 1,
})
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"field",
ConcreteDataType::string_datatype(),
true,
),
semantic_type: SemanticType::Field,
column_id: 2,
})
.primary_key(vec![2]);
let err = builder.build().unwrap_err();
assert!(
err.to_string()
.contains("semantic type of column field should be Tag"),
"unexpected err: {err}"
);
}
#[test]
fn test_debug_for_column_metadata() {
let region_metadata = build_test_region_metadata();