mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-18 09:10:41 +00:00
Applied PR comments.
This commit is contained in:
@@ -55,6 +55,31 @@ pub(crate) fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
|
||||
]
|
||||
}
|
||||
|
||||
/// Validates that a field exists in the schema and is configured as a fast field.
|
||||
///
|
||||
/// Returns an error if:
|
||||
/// - The field doesn't exist in the schema
|
||||
/// - The field exists but is not a fast field
|
||||
fn validate_field_for_agg(reader: &SegmentReader, field_name: &str) -> crate::Result<()> {
|
||||
let schema = reader.schema();
|
||||
// Here we check two things:
|
||||
// - the field is either directly in the schema or could be part of a json field present in the
|
||||
// schema.
|
||||
// - the field (or the json field holding it) is a fast field.
|
||||
if let Some((field, _path)) = schema.find_field(field_name) {
|
||||
let field_type = schema.get_field_entry(field).field_type();
|
||||
if !field_type.is_fast() {
|
||||
return Err(crate::TantivyError::SchemaError(format!(
|
||||
"Field '{}' is not a fast field. Aggregations require fast fields.",
|
||||
field_name
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
Err(crate::TantivyError::FieldNotFound(field_name.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Get fast field reader or return an error if the field doesn't exist (when strict validation is
|
||||
/// enabled).
|
||||
pub(crate) fn get_ff_reader(
|
||||
@@ -66,37 +91,20 @@ pub(crate) fn get_ff_reader(
|
||||
let ff_fields = reader.fast_fields();
|
||||
let ff_field_with_type = ff_fields.u64_lenient_for_type(allowed_column_types, field_name)?;
|
||||
|
||||
match ff_field_with_type {
|
||||
Some(field) => Ok(field),
|
||||
None => {
|
||||
if strict_validation {
|
||||
// Check if the field exists in the schema but is not a fast field
|
||||
let schema = reader.schema();
|
||||
if let Some((field, _path)) = schema.find_field(field_name) {
|
||||
let field_type = schema.get_field_entry(field).field_type();
|
||||
if !field_type.is_fast() {
|
||||
return Err(crate::TantivyError::SchemaError(format!(
|
||||
"Field '{}' is not a fast field. Aggregations require fast fields.",
|
||||
field_name
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Field doesn't exist at all or has no values in this segment
|
||||
// Check if it exists in schema to provide a better error message
|
||||
if schema.find_field(field_name).is_none() {
|
||||
return Err(crate::TantivyError::FieldNotFound(field_name.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
// Field exists in schema and is a fast field, but has no values in this segment
|
||||
// OR strict validation is disabled - return an empty column
|
||||
Ok((
|
||||
Column::build_empty_column(reader.num_docs()),
|
||||
ColumnType::U64,
|
||||
))
|
||||
}
|
||||
if let Some(field_with_type) = ff_field_with_type {
|
||||
return Ok(field_with_type);
|
||||
}
|
||||
|
||||
if strict_validation {
|
||||
validate_field_for_agg(reader, field_name)?;
|
||||
}
|
||||
|
||||
// Field exists in schema and is a fast field, but has no values in this segment
|
||||
// OR strict validation is disabled - return an empty column
|
||||
Ok((
|
||||
Column::build_empty_column(reader.num_docs()),
|
||||
ColumnType::U64,
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn get_dynamic_columns(
|
||||
@@ -130,20 +138,7 @@ pub(crate) fn get_all_ff_reader_or_empty(
|
||||
|
||||
if ff_field_with_type.is_empty() {
|
||||
if strict_validation {
|
||||
// Check if the field exists in the schema but is not a fast field
|
||||
let schema = reader.schema();
|
||||
if let Some((field, _path)) = schema.find_field(field_name) {
|
||||
let field_type = schema.get_field_entry(field).field_type();
|
||||
if !field_type.is_fast() {
|
||||
return Err(crate::TantivyError::SchemaError(format!(
|
||||
"Field '{}' is not a fast field. Aggregations require fast fields.",
|
||||
field_name
|
||||
)));
|
||||
}
|
||||
} else {
|
||||
// Field doesn't exist in the schema at all
|
||||
return Err(crate::TantivyError::FieldNotFound(field_name.to_string()));
|
||||
}
|
||||
validate_field_for_agg(reader, field_name)?;
|
||||
}
|
||||
|
||||
// Field exists in schema and is a fast field, but has no values in this segment
|
||||
|
||||
@@ -181,7 +181,7 @@ pub type BucketId = u32;
|
||||
/// - `limits`: Memory and bucket limits for the aggregation
|
||||
/// - `tokenizers`: TokenizerManager for parsing query strings in filter aggregations
|
||||
/// - `strict_field_validation`: Whether to enforce strict field validation (default: false)
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Default)]
|
||||
pub struct AggContextParams {
|
||||
/// Aggregation limits (memory and bucket count)
|
||||
pub limits: AggregationLimitsGuard,
|
||||
@@ -195,16 +195,6 @@ pub struct AggContextParams {
|
||||
pub strict_field_validation: bool,
|
||||
}
|
||||
|
||||
impl Default for AggContextParams {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
limits: Default::default(),
|
||||
tokenizers: Default::default(),
|
||||
strict_field_validation: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AggContextParams {
|
||||
/// Create new aggregation context parameters
|
||||
pub fn new(limits: AggregationLimitsGuard, tokenizers: TokenizerManager) -> Self {
|
||||
@@ -215,18 +205,6 @@ impl AggContextParams {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create new aggregation context parameters with strict field validation enabled
|
||||
pub fn with_strict_field_validation(
|
||||
limits: AggregationLimitsGuard,
|
||||
tokenizers: TokenizerManager,
|
||||
) -> Self {
|
||||
Self {
|
||||
limits,
|
||||
tokenizers,
|
||||
strict_field_validation: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Enable or disable strict field validation
|
||||
pub fn set_strict_field_validation(mut self, strict: bool) -> Self {
|
||||
self.strict_field_validation = strict;
|
||||
|
||||
Reference in New Issue
Block a user