mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Fixed agg validation
This commit is contained in:
@@ -55,22 +55,44 @@ pub(crate) fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get fast field reader or empty as default.
|
/// Get fast field reader or return an error if the field doesn't exist.
|
||||||
pub(crate) fn get_ff_reader(
|
pub(crate) fn get_ff_reader(
|
||||||
reader: &SegmentReader,
|
reader: &SegmentReader,
|
||||||
field_name: &str,
|
field_name: &str,
|
||||||
allowed_column_types: Option<&[ColumnType]>,
|
allowed_column_types: Option<&[ColumnType]>,
|
||||||
) -> crate::Result<(columnar::Column<u64>, ColumnType)> {
|
) -> crate::Result<(columnar::Column<u64>, ColumnType)> {
|
||||||
let ff_fields = reader.fast_fields();
|
let ff_fields = reader.fast_fields();
|
||||||
let ff_field_with_type = ff_fields
|
let ff_field_with_type = ff_fields.u64_lenient_for_type(allowed_column_types, field_name)?;
|
||||||
.u64_lenient_for_type(allowed_column_types, field_name)?
|
|
||||||
.unwrap_or_else(|| {
|
match ff_field_with_type {
|
||||||
(
|
Some(field) => Ok(field),
|
||||||
|
None => {
|
||||||
|
// Check if the field exists in the schema but is not a fast field
|
||||||
|
let schema = reader.schema();
|
||||||
|
if let Some((field, _path)) = schema.find_field(field_name) {
|
||||||
|
let field_type = schema.get_field_entry(field).field_type();
|
||||||
|
if !field_type.is_fast() {
|
||||||
|
return Err(crate::TantivyError::SchemaError(format!(
|
||||||
|
"Field '{}' is not a fast field. Aggregations require fast fields.",
|
||||||
|
field_name
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Field doesn't exist at all or has no values in this segment
|
||||||
|
// Check if it exists in schema to provide a better error message
|
||||||
|
if schema.find_field(field_name).is_none() {
|
||||||
|
return Err(crate::TantivyError::FieldNotFound(field_name.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Field exists in schema and is a fast field, but has no values in this segment
|
||||||
|
// This is acceptable - return an empty column
|
||||||
|
Ok((
|
||||||
Column::build_empty_column(reader.num_docs()),
|
Column::build_empty_column(reader.num_docs()),
|
||||||
ColumnType::U64,
|
ColumnType::U64,
|
||||||
)
|
))
|
||||||
});
|
}
|
||||||
Ok(ff_field_with_type)
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_dynamic_columns(
|
pub(crate) fn get_dynamic_columns(
|
||||||
@@ -89,6 +111,7 @@ pub(crate) fn get_dynamic_columns(
|
|||||||
/// Get all fast field reader or empty as default.
|
/// Get all fast field reader or empty as default.
|
||||||
///
|
///
|
||||||
/// Is guaranteed to return at least one column.
|
/// Is guaranteed to return at least one column.
|
||||||
|
/// Returns an error if the field doesn't exist in the schema or is not a fast field.
|
||||||
pub(crate) fn get_all_ff_reader_or_empty(
|
pub(crate) fn get_all_ff_reader_or_empty(
|
||||||
reader: &SegmentReader,
|
reader: &SegmentReader,
|
||||||
field_name: &str,
|
field_name: &str,
|
||||||
@@ -98,7 +121,25 @@ pub(crate) fn get_all_ff_reader_or_empty(
|
|||||||
let ff_fields = reader.fast_fields();
|
let ff_fields = reader.fast_fields();
|
||||||
let mut ff_field_with_type =
|
let mut ff_field_with_type =
|
||||||
ff_fields.u64_lenient_for_type_all(allowed_column_types, field_name)?;
|
ff_fields.u64_lenient_for_type_all(allowed_column_types, field_name)?;
|
||||||
|
|
||||||
if ff_field_with_type.is_empty() {
|
if ff_field_with_type.is_empty() {
|
||||||
|
// Check if the field exists in the schema but is not a fast field
|
||||||
|
let schema = reader.schema();
|
||||||
|
if let Some((field, _path)) = schema.find_field(field_name) {
|
||||||
|
let field_type = schema.get_field_entry(field).field_type();
|
||||||
|
if !field_type.is_fast() {
|
||||||
|
return Err(crate::TantivyError::SchemaError(format!(
|
||||||
|
"Field '{}' is not a fast field. Aggregations require fast fields.",
|
||||||
|
field_name
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Field doesn't exist in the schema at all
|
||||||
|
return Err(crate::TantivyError::FieldNotFound(field_name.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Field exists in schema and is a fast field, but has no values in this segment
|
||||||
|
// This is acceptable - return an empty column
|
||||||
ff_field_with_type.push((Column::build_empty_column(reader.num_docs()), fallback_type));
|
ff_field_with_type.push((Column::build_empty_column(reader.num_docs()), fallback_type));
|
||||||
}
|
}
|
||||||
Ok(ff_field_with_type)
|
Ok(ff_field_with_type)
|
||||||
|
|||||||
@@ -1057,7 +1057,7 @@ mod tests {
|
|||||||
"avg": {"field": "score"}
|
"avg": {"field": "score"}
|
||||||
}));
|
}));
|
||||||
let terms_string_with_child = agg_from_json(json!({
|
let terms_string_with_child = agg_from_json(json!({
|
||||||
"terms": {"field": "string_id"},
|
"terms": {"field": "text"},
|
||||||
"aggs": {
|
"aggs": {
|
||||||
"histo": {"histogram": {"field": "score", "interval": 10.0}}
|
"histo": {"histogram": {"field": "score", "interval": 10.0}}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1005,3 +1005,123 @@ fn test_aggregation_on_json_object_mixed_numerical_segments() {
|
|||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_aggregation_invalid_field_returns_error() {
|
||||||
|
// Test that aggregations return an error when given an invalid field name
|
||||||
|
let index = get_test_index_2_segments(false).unwrap();
|
||||||
|
let reader = index.reader().unwrap();
|
||||||
|
let searcher = reader.searcher();
|
||||||
|
|
||||||
|
// Test with a field that doesn't exist at all
|
||||||
|
let agg_req_str = r#"
|
||||||
|
{
|
||||||
|
"date_histogram_test": {
|
||||||
|
"date_histogram": {
|
||||||
|
"field": "not_valid_field",
|
||||||
|
"fixed_interval": "30d"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}"#;
|
||||||
|
let agg: Aggregations = serde_json::from_str(agg_req_str).unwrap();
|
||||||
|
let collector = get_collector(agg);
|
||||||
|
let result = searcher.search(&AllQuery, &collector);
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
match result {
|
||||||
|
Err(crate::TantivyError::FieldNotFound(field_name)) => {
|
||||||
|
assert_eq!(field_name, "not_valid_field");
|
||||||
|
}
|
||||||
|
_ => panic!("Expected FieldNotFound error, got: {:?}", result),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test with histogram aggregation on invalid field
|
||||||
|
let agg_req_str = r#"
|
||||||
|
{
|
||||||
|
"histogram_test": {
|
||||||
|
"histogram": {
|
||||||
|
"field": "invalid_histogram_field",
|
||||||
|
"interval": 10.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}"#;
|
||||||
|
let agg: Aggregations = serde_json::from_str(agg_req_str).unwrap();
|
||||||
|
let collector = get_collector(agg);
|
||||||
|
let result = searcher.search(&AllQuery, &collector);
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
match result {
|
||||||
|
Err(crate::TantivyError::FieldNotFound(field_name)) => {
|
||||||
|
assert_eq!(field_name, "invalid_histogram_field");
|
||||||
|
}
|
||||||
|
_ => panic!("Expected FieldNotFound error, got: {:?}", result),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test with terms aggregation on invalid field
|
||||||
|
let agg_req_str = r#"
|
||||||
|
{
|
||||||
|
"terms_test": {
|
||||||
|
"terms": {
|
||||||
|
"field": "invalid_terms_field"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}"#;
|
||||||
|
let agg: Aggregations = serde_json::from_str(agg_req_str).unwrap();
|
||||||
|
let collector = get_collector(agg);
|
||||||
|
let result = searcher.search(&AllQuery, &collector);
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
match result {
|
||||||
|
Err(crate::TantivyError::FieldNotFound(field_name)) => {
|
||||||
|
assert_eq!(field_name, "invalid_terms_field");
|
||||||
|
}
|
||||||
|
_ => panic!("Expected FieldNotFound error, got: {:?}", result),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test with avg metric aggregation on invalid field
|
||||||
|
let agg_req_str = r#"
|
||||||
|
{
|
||||||
|
"avg_test": {
|
||||||
|
"avg": {
|
||||||
|
"field": "invalid_avg_field"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}"#;
|
||||||
|
let agg: Aggregations = serde_json::from_str(agg_req_str).unwrap();
|
||||||
|
let collector = get_collector(agg);
|
||||||
|
let result = searcher.search(&AllQuery, &collector);
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
match result {
|
||||||
|
Err(crate::TantivyError::FieldNotFound(field_name)) => {
|
||||||
|
assert_eq!(field_name, "invalid_avg_field");
|
||||||
|
}
|
||||||
|
_ => panic!("Expected FieldNotFound error, got: {:?}", result),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test with range aggregation on invalid field
|
||||||
|
let agg_req_str = r#"
|
||||||
|
{
|
||||||
|
"range_test": {
|
||||||
|
"range": {
|
||||||
|
"field": "invalid_range_field",
|
||||||
|
"ranges": [
|
||||||
|
{ "to": 10.0 },
|
||||||
|
{ "from": 10.0, "to": 20.0 },
|
||||||
|
{ "from": 20.0 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}"#;
|
||||||
|
let agg: Aggregations = serde_json::from_str(agg_req_str).unwrap();
|
||||||
|
let collector = get_collector(agg);
|
||||||
|
let result = searcher.search(&AllQuery, &collector);
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
match result {
|
||||||
|
Err(crate::TantivyError::FieldNotFound(field_name)) => {
|
||||||
|
assert_eq!(field_name, "invalid_range_field");
|
||||||
|
}
|
||||||
|
_ => panic!("Expected FieldNotFound error, got: {:?}", result),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -255,6 +255,7 @@ mod tests {
|
|||||||
fn terms_aggregation_missing_mult_seg_empty() -> crate::Result<()> {
|
fn terms_aggregation_missing_mult_seg_empty() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let score = schema_builder.add_f64_field("score", FAST);
|
let score = schema_builder.add_f64_field("score", FAST);
|
||||||
|
schema_builder.add_json_field("json", FAST);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
||||||
@@ -302,6 +303,7 @@ mod tests {
|
|||||||
fn terms_aggregation_missing_single_seg_empty() -> crate::Result<()> {
|
fn terms_aggregation_missing_single_seg_empty() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let score = schema_builder.add_f64_field("score", FAST);
|
let score = schema_builder.add_f64_field("score", FAST);
|
||||||
|
schema_builder.add_json_field("json", FAST);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
||||||
|
|||||||
Reference in New Issue
Block a user