diff --git a/src/aggregation/accessor_helpers.rs b/src/aggregation/accessor_helpers.rs index fa51041e4..cbd24c6b4 100644 --- a/src/aggregation/accessor_helpers.rs +++ b/src/aggregation/accessor_helpers.rs @@ -55,22 +55,44 @@ pub(crate) fn get_numeric_or_date_column_types() -> &'static [ColumnType] { ] } -/// Get fast field reader or empty as default. +/// Get fast field reader or return an error if the field doesn't exist. pub(crate) fn get_ff_reader( reader: &SegmentReader, field_name: &str, allowed_column_types: Option<&[ColumnType]>, ) -> crate::Result<(columnar::Column, ColumnType)> { let ff_fields = reader.fast_fields(); - let ff_field_with_type = ff_fields - .u64_lenient_for_type(allowed_column_types, field_name)? - .unwrap_or_else(|| { - ( + let ff_field_with_type = ff_fields.u64_lenient_for_type(allowed_column_types, field_name)?; + + match ff_field_with_type { + Some(field) => Ok(field), + None => { + // Check if the field exists in the schema but is not a fast field + let schema = reader.schema(); + if let Some((field, _path)) = schema.find_field(field_name) { + let field_type = schema.get_field_entry(field).field_type(); + if !field_type.is_fast() { + return Err(crate::TantivyError::SchemaError(format!( + "Field '{}' is not a fast field. Aggregations require fast fields.", + field_name + ))); + } + } + + // Field doesn't exist at all or has no values in this segment + // Check if it exists in schema to provide a better error message + if schema.find_field(field_name).is_none() { + return Err(crate::TantivyError::FieldNotFound(field_name.to_string())); + } + + // Field exists in schema and is a fast field, but has no values in this segment + // This is acceptable - return an empty column + Ok(( Column::build_empty_column(reader.num_docs()), ColumnType::U64, - ) - }); - Ok(ff_field_with_type) + )) + } + } } pub(crate) fn get_dynamic_columns( @@ -89,6 +111,7 @@ pub(crate) fn get_dynamic_columns( /// Get all fast field reader or empty as default. /// /// Is guaranteed to return at least one column. +/// Returns an error if the field doesn't exist in the schema or is not a fast field. pub(crate) fn get_all_ff_reader_or_empty( reader: &SegmentReader, field_name: &str, @@ -98,7 +121,25 @@ pub(crate) fn get_all_ff_reader_or_empty( let ff_fields = reader.fast_fields(); let mut ff_field_with_type = ff_fields.u64_lenient_for_type_all(allowed_column_types, field_name)?; + if ff_field_with_type.is_empty() { + // Check if the field exists in the schema but is not a fast field + let schema = reader.schema(); + if let Some((field, _path)) = schema.find_field(field_name) { + let field_type = schema.get_field_entry(field).field_type(); + if !field_type.is_fast() { + return Err(crate::TantivyError::SchemaError(format!( + "Field '{}' is not a fast field. Aggregations require fast fields.", + field_name + ))); + } + } else { + // Field doesn't exist in the schema at all + return Err(crate::TantivyError::FieldNotFound(field_name.to_string())); + } + + // Field exists in schema and is a fast field, but has no values in this segment + // This is acceptable - return an empty column ff_field_with_type.push((Column::build_empty_column(reader.num_docs()), fallback_type)); } Ok(ff_field_with_type) diff --git a/src/aggregation/agg_data.rs b/src/aggregation/agg_data.rs index deedb5781..40dcf3fe6 100644 --- a/src/aggregation/agg_data.rs +++ b/src/aggregation/agg_data.rs @@ -1057,7 +1057,7 @@ mod tests { "avg": {"field": "score"} })); let terms_string_with_child = agg_from_json(json!({ - "terms": {"field": "string_id"}, + "terms": {"field": "text"}, "aggs": { "histo": {"histogram": {"field": "score", "interval": 10.0}} } diff --git a/src/aggregation/agg_tests.rs b/src/aggregation/agg_tests.rs index fede0c7c7..2d5ffa769 100644 --- a/src/aggregation/agg_tests.rs +++ b/src/aggregation/agg_tests.rs @@ -1005,3 +1005,123 @@ fn test_aggregation_on_json_object_mixed_numerical_segments() { ) ); } + +#[test] +fn test_aggregation_invalid_field_returns_error() { + // Test that aggregations return an error when given an invalid field name + let index = get_test_index_2_segments(false).unwrap(); + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + + // Test with a field that doesn't exist at all + let agg_req_str = r#" + { + "date_histogram_test": { + "date_histogram": { + "field": "not_valid_field", + "fixed_interval": "30d" + } + } + }"#; + let agg: Aggregations = serde_json::from_str(agg_req_str).unwrap(); + let collector = get_collector(agg); + let result = searcher.search(&AllQuery, &collector); + + assert!(result.is_err()); + match result { + Err(crate::TantivyError::FieldNotFound(field_name)) => { + assert_eq!(field_name, "not_valid_field"); + } + _ => panic!("Expected FieldNotFound error, got: {:?}", result), + } + + // Test with histogram aggregation on invalid field + let agg_req_str = r#" + { + "histogram_test": { + "histogram": { + "field": "invalid_histogram_field", + "interval": 10.0 + } + } + }"#; + let agg: Aggregations = serde_json::from_str(agg_req_str).unwrap(); + let collector = get_collector(agg); + let result = searcher.search(&AllQuery, &collector); + + assert!(result.is_err()); + match result { + Err(crate::TantivyError::FieldNotFound(field_name)) => { + assert_eq!(field_name, "invalid_histogram_field"); + } + _ => panic!("Expected FieldNotFound error, got: {:?}", result), + } + + // Test with terms aggregation on invalid field + let agg_req_str = r#" + { + "terms_test": { + "terms": { + "field": "invalid_terms_field" + } + } + }"#; + let agg: Aggregations = serde_json::from_str(agg_req_str).unwrap(); + let collector = get_collector(agg); + let result = searcher.search(&AllQuery, &collector); + + assert!(result.is_err()); + match result { + Err(crate::TantivyError::FieldNotFound(field_name)) => { + assert_eq!(field_name, "invalid_terms_field"); + } + _ => panic!("Expected FieldNotFound error, got: {:?}", result), + } + + // Test with avg metric aggregation on invalid field + let agg_req_str = r#" + { + "avg_test": { + "avg": { + "field": "invalid_avg_field" + } + } + }"#; + let agg: Aggregations = serde_json::from_str(agg_req_str).unwrap(); + let collector = get_collector(agg); + let result = searcher.search(&AllQuery, &collector); + + assert!(result.is_err()); + match result { + Err(crate::TantivyError::FieldNotFound(field_name)) => { + assert_eq!(field_name, "invalid_avg_field"); + } + _ => panic!("Expected FieldNotFound error, got: {:?}", result), + } + + // Test with range aggregation on invalid field + let agg_req_str = r#" + { + "range_test": { + "range": { + "field": "invalid_range_field", + "ranges": [ + { "to": 10.0 }, + { "from": 10.0, "to": 20.0 }, + { "from": 20.0 } + ] + } + } + }"#; + let agg: Aggregations = serde_json::from_str(agg_req_str).unwrap(); + let collector = get_collector(agg); + let result = searcher.search(&AllQuery, &collector); + + assert!(result.is_err()); + match result { + Err(crate::TantivyError::FieldNotFound(field_name)) => { + assert_eq!(field_name, "invalid_range_field"); + } + _ => panic!("Expected FieldNotFound error, got: {:?}", result), + } +} diff --git a/src/aggregation/bucket/term_missing_agg.rs b/src/aggregation/bucket/term_missing_agg.rs index 66f39927a..2baa7bbc8 100644 --- a/src/aggregation/bucket/term_missing_agg.rs +++ b/src/aggregation/bucket/term_missing_agg.rs @@ -255,6 +255,7 @@ mod tests { fn terms_aggregation_missing_mult_seg_empty() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let score = schema_builder.add_f64_field("score", FAST); + schema_builder.add_json_field("json", FAST); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer: IndexWriter = index.writer_for_tests().unwrap(); @@ -302,6 +303,7 @@ mod tests { fn terms_aggregation_missing_single_seg_empty() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let score = schema_builder.add_f64_field("score", FAST); + schema_builder.add_json_field("json", FAST); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();