Applied PR comments.

This commit is contained in:
Mohammad Dashti
2026-01-15 15:34:51 -10:00
committed by PSeitz
parent 141c91d028
commit fc017c4c74
2 changed files with 40 additions and 67 deletions

View File

@@ -55,6 +55,31 @@ pub(crate) fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
]
}
/// Validates that a field exists in the schema and is configured as a fast field.
///
/// Returns an error if:
/// - The field doesn't exist in the schema
/// - The field exists but is not a fast field
fn validate_field_for_agg(reader: &SegmentReader, field_name: &str) -> crate::Result<()> {
let schema = reader.schema();
// Here we check two things:
// - the field is either directly in the schema or could be part of a json field present in the
// schema.
// - the field (or the json field holding it) is a fast field.
if let Some((field, _path)) = schema.find_field(field_name) {
let field_type = schema.get_field_entry(field).field_type();
if !field_type.is_fast() {
return Err(crate::TantivyError::SchemaError(format!(
"Field '{}' is not a fast field. Aggregations require fast fields.",
field_name
)));
}
Ok(())
} else {
Err(crate::TantivyError::FieldNotFound(field_name.to_string()))
}
}
/// Get fast field reader or return an error if the field doesn't exist (when strict validation is
/// enabled).
pub(crate) fn get_ff_reader(
@@ -66,37 +91,20 @@ pub(crate) fn get_ff_reader(
let ff_fields = reader.fast_fields();
let ff_field_with_type = ff_fields.u64_lenient_for_type(allowed_column_types, field_name)?;
match ff_field_with_type {
Some(field) => Ok(field),
None => {
if strict_validation {
// Check if the field exists in the schema but is not a fast field
let schema = reader.schema();
if let Some((field, _path)) = schema.find_field(field_name) {
let field_type = schema.get_field_entry(field).field_type();
if !field_type.is_fast() {
return Err(crate::TantivyError::SchemaError(format!(
"Field '{}' is not a fast field. Aggregations require fast fields.",
field_name
)));
}
}
// Field doesn't exist at all or has no values in this segment
// Check if it exists in schema to provide a better error message
if schema.find_field(field_name).is_none() {
return Err(crate::TantivyError::FieldNotFound(field_name.to_string()));
}
}
// Field exists in schema and is a fast field, but has no values in this segment
// OR strict validation is disabled - return an empty column
Ok((
Column::build_empty_column(reader.num_docs()),
ColumnType::U64,
))
}
if let Some(field_with_type) = ff_field_with_type {
return Ok(field_with_type);
}
if strict_validation {
validate_field_for_agg(reader, field_name)?;
}
// Field exists in schema and is a fast field, but has no values in this segment
// OR strict validation is disabled - return an empty column
Ok((
Column::build_empty_column(reader.num_docs()),
ColumnType::U64,
))
}
pub(crate) fn get_dynamic_columns(
@@ -130,20 +138,7 @@ pub(crate) fn get_all_ff_reader_or_empty(
if ff_field_with_type.is_empty() {
if strict_validation {
// Check if the field exists in the schema but is not a fast field
let schema = reader.schema();
if let Some((field, _path)) = schema.find_field(field_name) {
let field_type = schema.get_field_entry(field).field_type();
if !field_type.is_fast() {
return Err(crate::TantivyError::SchemaError(format!(
"Field '{}' is not a fast field. Aggregations require fast fields.",
field_name
)));
}
} else {
// Field doesn't exist in the schema at all
return Err(crate::TantivyError::FieldNotFound(field_name.to_string()));
}
validate_field_for_agg(reader, field_name)?;
}
// Field exists in schema and is a fast field, but has no values in this segment

View File

@@ -181,7 +181,7 @@ pub type BucketId = u32;
/// - `limits`: Memory and bucket limits for the aggregation
/// - `tokenizers`: TokenizerManager for parsing query strings in filter aggregations
/// - `strict_field_validation`: Whether to enforce strict field validation (default: false)
#[derive(Clone)]
#[derive(Clone, Default)]
pub struct AggContextParams {
/// Aggregation limits (memory and bucket count)
pub limits: AggregationLimitsGuard,
@@ -195,16 +195,6 @@ pub struct AggContextParams {
pub strict_field_validation: bool,
}
impl Default for AggContextParams {
fn default() -> Self {
Self {
limits: Default::default(),
tokenizers: Default::default(),
strict_field_validation: false,
}
}
}
impl AggContextParams {
/// Create new aggregation context parameters
pub fn new(limits: AggregationLimitsGuard, tokenizers: TokenizerManager) -> Self {
@@ -215,18 +205,6 @@ impl AggContextParams {
}
}
/// Create new aggregation context parameters with strict field validation enabled
pub fn with_strict_field_validation(
limits: AggregationLimitsGuard,
tokenizers: TokenizerManager,
) -> Self {
Self {
limits,
tokenizers,
strict_field_validation: true,
}
}
/// Enable or disable strict field validation
pub fn set_strict_field_validation(mut self, strict: bool) -> Self {
self.strict_field_validation = strict;