add missing Bytes validation to term_agg (#2077)

returns empty for now instead of failing like before
This commit is contained in:
PSeitz
2023-06-12 16:38:07 +08:00
committed by GitHub
parent 3a82ef2560
commit 657f0cd3bd
2 changed files with 46 additions and 3 deletions

View File

@@ -74,14 +74,14 @@ impl AggregationWithAccessor {
ColumnType::I64,
ColumnType::U64,
ColumnType::F64,
ColumnType::Bytes,
ColumnType::Str,
// ColumnType::Bytes Unsupported
// ColumnType::Bool Unsupported
// ColumnType::IpAddr Unsupported
// ColumnType::DateTime Unsupported
];
let mut columns =
get_all_ff_reader(reader, field_name, Some(&allowed_column_types))?;
get_all_ff_reader_or_empty(reader, field_name, Some(&allowed_column_types))?;
let first = columns.pop().unwrap();
accessor2 = columns.pop();
first
@@ -177,7 +177,7 @@ fn get_ff_reader(
/// Get all fast field reader or empty as default.
///
/// Is guaranteed to return at least one column.
fn get_all_ff_reader(
fn get_all_ff_reader_or_empty(
reader: &SegmentReader,
field_name: &str,
allowed_column_types: Option<&[ColumnType]>,

View File

@@ -428,6 +428,12 @@ impl SegmentTermCollector {
field_type: ColumnType,
accessor_idx: usize,
) -> crate::Result<Self> {
if field_type == ColumnType::Bytes || field_type == ColumnType::Bool {
return Err(TantivyError::InvalidArgument(format!(
"terms aggregation is not supported for column type {:?}",
field_type
)));
}
let term_buckets = TermBuckets::default();
if let Some(custom_order) = req.order.as_ref() {
@@ -1500,4 +1506,41 @@ mod tests {
Ok(())
}
#[test]
fn terms_aggregation_bytes() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let bytes_field = schema_builder.add_bytes_field("bytes", FAST);
let index = Index::create_in_ram(schema_builder.build());
{
let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!(
bytes_field => vec![1,2,3],
))?;
index_writer.commit()?;
}
let agg_req: Aggregations = serde_json::from_value(json!({
"my_texts": {
"terms": {
"field": "bytes"
},
}
}))
.unwrap();
let res = exec_request_with_query(agg_req, &index, None)?;
// TODO: Returning an error would be better instead of an empty result, since this is not a
// JSON field
assert_eq!(
res["my_texts"]["buckets"][0]["key"],
serde_json::Value::Null
);
assert_eq!(res["my_texts"]["sum_other_doc_count"], 0);
assert_eq!(res["my_texts"]["doc_count_error_upper_bound"], 0);
Ok(())
}
}