From fc017c4c749f4e295cd3f9136ed9de6df0f8832c Mon Sep 17 00:00:00 2001 From: Mohammad Dashti Date: Thu, 15 Jan 2026 15:34:51 -1000 Subject: [PATCH] Applied PR comments. --- src/aggregation/accessor_helpers.rs | 83 ++++++++++++++--------------- src/aggregation/mod.rs | 24 +-------- 2 files changed, 40 insertions(+), 67 deletions(-) diff --git a/src/aggregation/accessor_helpers.rs b/src/aggregation/accessor_helpers.rs index 42ddb6750..d7f0a8bdd 100644 --- a/src/aggregation/accessor_helpers.rs +++ b/src/aggregation/accessor_helpers.rs @@ -55,6 +55,31 @@ pub(crate) fn get_numeric_or_date_column_types() -> &'static [ColumnType] { ] } +/// Validates that a field exists in the schema and is configured as a fast field. +/// +/// Returns an error if: +/// - The field doesn't exist in the schema +/// - The field exists but is not a fast field +fn validate_field_for_agg(reader: &SegmentReader, field_name: &str) -> crate::Result<()> { + let schema = reader.schema(); + // Here we check two things: + // - the field is either directly in the schema or could be part of a json field present in the + // schema. + // - the field (or the json field holding it) is a fast field. + if let Some((field, _path)) = schema.find_field(field_name) { + let field_type = schema.get_field_entry(field).field_type(); + if !field_type.is_fast() { + return Err(crate::TantivyError::SchemaError(format!( + "Field '{}' is not a fast field. Aggregations require fast fields.", + field_name + ))); + } + Ok(()) + } else { + Err(crate::TantivyError::FieldNotFound(field_name.to_string())) + } +} + /// Get fast field reader or return an error if the field doesn't exist (when strict validation is /// enabled). pub(crate) fn get_ff_reader( @@ -66,37 +91,20 @@ pub(crate) fn get_ff_reader( let ff_fields = reader.fast_fields(); let ff_field_with_type = ff_fields.u64_lenient_for_type(allowed_column_types, field_name)?; - match ff_field_with_type { - Some(field) => Ok(field), - None => { - if strict_validation { - // Check if the field exists in the schema but is not a fast field - let schema = reader.schema(); - if let Some((field, _path)) = schema.find_field(field_name) { - let field_type = schema.get_field_entry(field).field_type(); - if !field_type.is_fast() { - return Err(crate::TantivyError::SchemaError(format!( - "Field '{}' is not a fast field. Aggregations require fast fields.", - field_name - ))); - } - } - - // Field doesn't exist at all or has no values in this segment - // Check if it exists in schema to provide a better error message - if schema.find_field(field_name).is_none() { - return Err(crate::TantivyError::FieldNotFound(field_name.to_string())); - } - } - - // Field exists in schema and is a fast field, but has no values in this segment - // OR strict validation is disabled - return an empty column - Ok(( - Column::build_empty_column(reader.num_docs()), - ColumnType::U64, - )) - } + if let Some(field_with_type) = ff_field_with_type { + return Ok(field_with_type); } + + if strict_validation { + validate_field_for_agg(reader, field_name)?; + } + + // Field exists in schema and is a fast field, but has no values in this segment + // OR strict validation is disabled - return an empty column + Ok(( + Column::build_empty_column(reader.num_docs()), + ColumnType::U64, + )) } pub(crate) fn get_dynamic_columns( @@ -130,20 +138,7 @@ pub(crate) fn get_all_ff_reader_or_empty( if ff_field_with_type.is_empty() { if strict_validation { - // Check if the field exists in the schema but is not a fast field - let schema = reader.schema(); - if let Some((field, _path)) = schema.find_field(field_name) { - let field_type = schema.get_field_entry(field).field_type(); - if !field_type.is_fast() { - return Err(crate::TantivyError::SchemaError(format!( - "Field '{}' is not a fast field. Aggregations require fast fields.", - field_name - ))); - } - } else { - // Field doesn't exist in the schema at all - return Err(crate::TantivyError::FieldNotFound(field_name.to_string())); - } + validate_field_for_agg(reader, field_name)?; } // Field exists in schema and is a fast field, but has no values in this segment diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index aa6dd5a30..5ea7ae023 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -181,7 +181,7 @@ pub type BucketId = u32; /// - `limits`: Memory and bucket limits for the aggregation /// - `tokenizers`: TokenizerManager for parsing query strings in filter aggregations /// - `strict_field_validation`: Whether to enforce strict field validation (default: false) -#[derive(Clone)] +#[derive(Clone, Default)] pub struct AggContextParams { /// Aggregation limits (memory and bucket count) pub limits: AggregationLimitsGuard, @@ -195,16 +195,6 @@ pub struct AggContextParams { pub strict_field_validation: bool, } -impl Default for AggContextParams { - fn default() -> Self { - Self { - limits: Default::default(), - tokenizers: Default::default(), - strict_field_validation: false, - } - } -} - impl AggContextParams { /// Create new aggregation context parameters pub fn new(limits: AggregationLimitsGuard, tokenizers: TokenizerManager) -> Self { @@ -215,18 +205,6 @@ impl AggContextParams { } } - /// Create new aggregation context parameters with strict field validation enabled - pub fn with_strict_field_validation( - limits: AggregationLimitsGuard, - tokenizers: TokenizerManager, - ) -> Self { - Self { - limits, - tokenizers, - strict_field_validation: true, - } - } - /// Enable or disable strict field validation pub fn set_strict_field_validation(mut self, strict: bool) -> Self { self.strict_field_validation = strict;