mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Adapting for quickwit2 (#1912)
* Adapting tantivy to make it possible to be plugged to quickwit. * Apply suggestions from code review Co-authored-by: PSeitz <PSeitz@users.noreply.github.com> * Added unit test --------- Co-authored-by: PSeitz <PSeitz@users.noreply.github.com>
This commit is contained in:
@@ -113,7 +113,6 @@ pub fn u64_to_f64(val: u64) -> f64 {
|
||||
///
|
||||
/// This function assumes that the needle is rarely contained in the bytes string
|
||||
/// and offers a fast path if the needle is not present.
|
||||
#[inline(always)]
|
||||
pub fn replace_in_place(needle: u8, replacement: u8, bytes: &mut [u8]) {
|
||||
if !bytes.contains(&needle) {
|
||||
return;
|
||||
|
||||
@@ -344,6 +344,8 @@ mod tests {
|
||||
|
||||
use super::agg_req::Aggregations;
|
||||
use super::*;
|
||||
use crate::aggregation::agg_req::{Aggregation, BucketAggregation, BucketAggregationType};
|
||||
use crate::aggregation::bucket::TermsAggregation;
|
||||
use crate::indexer::NoMergePolicy;
|
||||
use crate::query::{AllQuery, TermQuery};
|
||||
use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
|
||||
@@ -591,4 +593,50 @@ mod tests {
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_aggregation_on_json_object() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json = schema_builder.add_json_field("json", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(json => json!({"color": "red"})))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(json => json!({"color": "blue"})))
|
||||
.unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let agg: Aggregations = vec![(
|
||||
"jsonagg".to_string(),
|
||||
Aggregation::Bucket(BucketAggregation {
|
||||
bucket_agg: BucketAggregationType::Terms(TermsAggregation {
|
||||
field: "json.color".to_string(),
|
||||
..Default::default()
|
||||
}),
|
||||
sub_aggregation: Default::default(),
|
||||
}),
|
||||
)]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let aggregation_collector = AggregationCollector::from_aggs(agg, None);
|
||||
let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap();
|
||||
let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap();
|
||||
assert_eq!(
|
||||
&aggregation_res_json,
|
||||
&serde_json::json!({
|
||||
"jsonagg": {
|
||||
"buckets": [
|
||||
{"doc_count": 1, "key": "blue"},
|
||||
{"doc_count": 1, "key": "red"}
|
||||
],
|
||||
"doc_count_error_upper_bound": 0,
|
||||
"sum_other_doc_count": 0
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ use rustc_hash::FxHashMap;
|
||||
|
||||
use crate::fastfield::FastValue;
|
||||
use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
|
||||
use crate::schema::term::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP};
|
||||
use crate::schema::term::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR};
|
||||
use crate::schema::{Field, Type};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::{OffsetDateTime, UtcOffset};
|
||||
@@ -200,7 +200,7 @@ fn infer_type_from_str(text: &str) -> TextOrDateTime {
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to infer a JSON type from a string
|
||||
// Tries to infer a JSON type from a string.
|
||||
pub(crate) fn convert_to_fast_value_and_get_term(
|
||||
json_term_writer: &mut JsonTermWriter,
|
||||
phrase: &str,
|
||||
@@ -296,6 +296,32 @@ fn split_json_path(json_path: &str) -> Vec<String> {
|
||||
json_path_segments
|
||||
}
|
||||
|
||||
/// Takes a field name, a json path as supplied by a user, and whether we should expand dots, and
|
||||
/// return a column key, as expected by the columnar crate.
|
||||
///
|
||||
/// This function will detect unescaped dots in the path, and split over them.
|
||||
/// If expand_dots is enabled, then even escaped dots will be split over.
|
||||
///
|
||||
/// The resulting list of segment then gets stitched together, joined by \1 separator,
|
||||
/// as defined in the columnar crate.
|
||||
pub(crate) fn encode_column_name(
|
||||
field_name: &str,
|
||||
json_path: &str,
|
||||
expand_dots_enabled: bool,
|
||||
) -> String {
|
||||
let mut column_key: String = String::with_capacity(field_name.len() + json_path.len() + 1);
|
||||
column_key.push_str(field_name);
|
||||
for mut segment in split_json_path(json_path) {
|
||||
column_key.push_str(JSON_PATH_SEGMENT_SEP_STR);
|
||||
if expand_dots_enabled {
|
||||
// We need to replace `.` by JSON_PATH_SEGMENT_SEP.
|
||||
unsafe { replace_in_place(b'.', JSON_PATH_SEGMENT_SEP, segment.as_bytes_mut()) };
|
||||
}
|
||||
column_key.push_str(&segment);
|
||||
}
|
||||
column_key
|
||||
}
|
||||
|
||||
impl<'a> JsonTermWriter<'a> {
|
||||
pub fn from_field_and_json_path(
|
||||
field: Field,
|
||||
@@ -2,6 +2,7 @@ mod executor;
|
||||
pub mod index;
|
||||
mod index_meta;
|
||||
mod inverted_index_reader;
|
||||
pub mod json_utils;
|
||||
pub mod searcher;
|
||||
mod segment;
|
||||
mod segment_component;
|
||||
|
||||
@@ -167,7 +167,7 @@ impl SegmentReader {
|
||||
let schema = segment.schema();
|
||||
|
||||
let fast_fields_data = segment.open_read(SegmentComponent::FastFields)?;
|
||||
let fast_fields_readers = FastFieldReaders::open(fast_fields_data)?;
|
||||
let fast_fields_readers = FastFieldReaders::open(fast_fields_data, schema.clone())?;
|
||||
let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
|
||||
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
|
||||
|
||||
|
||||
@@ -90,8 +90,8 @@ mod tests {
|
||||
use crate::directory::{Directory, RamDirectory, WritePtr};
|
||||
use crate::merge_policy::NoMergePolicy;
|
||||
use crate::schema::{
|
||||
Document, Facet, FacetOptions, Field, Schema, SchemaBuilder, FAST, INDEXED, STORED, STRING,
|
||||
TEXT,
|
||||
Document, Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder, FAST,
|
||||
INDEXED, STORED, STRING, TEXT,
|
||||
};
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{DateOptions, DatePrecision, Index, SegmentId, SegmentReader};
|
||||
@@ -131,7 +131,7 @@ mod tests {
|
||||
let file = directory.open_read(path).unwrap();
|
||||
|
||||
assert_eq!(file.len(), 161);
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
||||
let column = fast_field_readers
|
||||
.u64("field")
|
||||
.unwrap()
|
||||
@@ -181,7 +181,7 @@ mod tests {
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 189);
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
||||
let col = fast_field_readers
|
||||
.u64("field")
|
||||
.unwrap()
|
||||
@@ -214,7 +214,7 @@ mod tests {
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 162);
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
||||
let fast_field_reader = fast_field_readers
|
||||
.u64("field")
|
||||
.unwrap()
|
||||
@@ -247,7 +247,7 @@ mod tests {
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 4557);
|
||||
{
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
||||
let col = fast_field_readers
|
||||
.u64("field")
|
||||
.unwrap()
|
||||
@@ -281,7 +281,7 @@ mod tests {
|
||||
assert_eq!(file.len(), 333_usize);
|
||||
|
||||
{
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
let col = fast_field_readers
|
||||
.i64("field")
|
||||
.unwrap()
|
||||
@@ -318,7 +318,7 @@ mod tests {
|
||||
}
|
||||
|
||||
let file = directory.open_read(path).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
let col = fast_field_readers.i64("field").unwrap();
|
||||
assert_eq!(col.first(0), None);
|
||||
|
||||
@@ -351,7 +351,7 @@ mod tests {
|
||||
}
|
||||
|
||||
let file = directory.open_read(path).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
let col = fast_field_readers
|
||||
.date("date")
|
||||
.unwrap()
|
||||
@@ -387,7 +387,7 @@ mod tests {
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
||||
let col = fast_field_readers
|
||||
.u64("field")
|
||||
.unwrap()
|
||||
@@ -773,7 +773,7 @@ mod tests {
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 175);
|
||||
let fast_field_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
let bool_col = fast_field_readers.bool("field_bool").unwrap();
|
||||
assert_eq!(bool_col.first(0), Some(true));
|
||||
assert_eq!(bool_col.first(1), Some(false));
|
||||
@@ -805,7 +805,7 @@ mod tests {
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 187);
|
||||
let readers = FastFieldReaders::open(file).unwrap();
|
||||
let readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
let bool_col = readers.bool("field_bool").unwrap();
|
||||
for i in 0..25 {
|
||||
assert_eq!(bool_col.first(i * 2), Some(true));
|
||||
@@ -830,7 +830,7 @@ mod tests {
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 177);
|
||||
let fastfield_readers = FastFieldReaders::open(file).unwrap();
|
||||
let fastfield_readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
let col = fastfield_readers.bool("field_bool").unwrap();
|
||||
assert_eq!(col.first(0), None);
|
||||
let col = fastfield_readers
|
||||
@@ -892,7 +892,7 @@ mod tests {
|
||||
let directory = get_index(&docs[..], &schema).unwrap();
|
||||
let path = Path::new("test");
|
||||
let file = directory.open_read(path).unwrap();
|
||||
let readers = FastFieldReaders::open(file).unwrap();
|
||||
let readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
let col = readers.date("field").unwrap();
|
||||
|
||||
for (i, time) in times.iter().enumerate() {
|
||||
@@ -1068,13 +1068,133 @@ mod tests {
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
let segment_reader = searcher.segment_reader(0u32);
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let column_without_opt: Option<StrColumn> = fast_fields.str("without\u{1}hello").unwrap();
|
||||
let column_without_opt: Option<StrColumn> = fast_fields.str("without.hello").unwrap();
|
||||
assert!(column_without_opt.is_none());
|
||||
let column_with_opt: Option<StrColumn> = fast_fields.str("with\u{1}hello").unwrap();
|
||||
let column_with_opt: Option<StrColumn> = fast_fields.str("with.hello").unwrap();
|
||||
let column_with: StrColumn = column_with_opt.unwrap();
|
||||
assert!(column_with.term_ords(0).next().is_none());
|
||||
assert!(column_with.term_ords(1).eq([0]));
|
||||
assert!(column_with.term_ords(2).eq([2]));
|
||||
assert!(column_with.term_ords(3).eq([1]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fast_field_in_json_field_expand_dots_disabled() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json_option = JsonObjectOptions::default().set_fast();
|
||||
let json = schema_builder.add_json_field("json", json_option);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(json => json!({"attr.age": 32})))
|
||||
.unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
||||
assert!(fast_field_reader
|
||||
.column_opt::<i64>("json.attr.age")
|
||||
.unwrap()
|
||||
.is_none());
|
||||
let column = fast_field_reader
|
||||
.column_opt::<i64>(r#"json.attr\.age"#)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
||||
assert_eq!(&vals, &[32])
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fast_field_in_json_field_expand_dots_enabled() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json_option = JsonObjectOptions::default()
|
||||
.set_fast()
|
||||
.set_expand_dots_enabled();
|
||||
let json = schema_builder.add_json_field("json", json_option);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(json => json!({"attr.age": 32})))
|
||||
.unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
||||
for test_column_name in &["json.attr.age", "json.attr\\.age"] {
|
||||
let column = fast_field_reader
|
||||
.column_opt::<i64>(test_column_name)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
||||
assert_eq!(&vals, &[32]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fast_field_dot_in_schema_field_name() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field_with_dot = schema_builder.add_i64_field("field.with.dot", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(field_with_dot => 32i64))
|
||||
.unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
||||
let column = fast_field_reader
|
||||
.column_opt::<i64>("field.with.dot")
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
||||
assert_eq!(&vals, &[32]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shadowing_fast_field() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json_field = schema_builder.add_json_field("jsonfield", FAST);
|
||||
let shadowing_json_field = schema_builder.add_json_field("jsonfield.attr", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(json_field=> json!({"attr": {"age": 32}}), shadowing_json_field=>json!({"age": 33})))
|
||||
.unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
||||
let column = fast_field_reader
|
||||
.column_opt::<i64>(&"jsonfield.attr.age")
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
||||
assert_eq!(&vals, &[33]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shadowing_fast_field_with_expand_dots() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json_option = JsonObjectOptions::default()
|
||||
.set_fast()
|
||||
.set_expand_dots_enabled();
|
||||
let json_field = schema_builder.add_json_field("jsonfield", json_option.clone());
|
||||
let shadowing_json_field = schema_builder.add_json_field("jsonfield.attr", json_option);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(json_field=> json!({"attr.age": 32}), shadowing_json_field=>json!({"age": 33})))
|
||||
.unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
||||
let column = fast_field_reader
|
||||
.column_opt::<i64>(&"jsonfield.attr.age")
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
|
||||
assert_eq!(&vals, &[33]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,8 +7,9 @@ use columnar::{
|
||||
DynamicColumnHandle, HasAssociatedColumnType, StrColumn,
|
||||
};
|
||||
|
||||
use crate::core::json_utils::encode_column_name;
|
||||
use crate::directory::FileSlice;
|
||||
use crate::schema::Schema;
|
||||
use crate::schema::{Field, FieldEntry, FieldType, Schema};
|
||||
use crate::space_usage::{FieldUsage, PerFieldSpaceUsage};
|
||||
|
||||
/// Provides access to all of the BitpackedFastFieldReader.
|
||||
@@ -18,16 +19,22 @@ use crate::space_usage::{FieldUsage, PerFieldSpaceUsage};
|
||||
#[derive(Clone)]
|
||||
pub struct FastFieldReaders {
|
||||
columnar: Arc<ColumnarReader>,
|
||||
schema: Schema,
|
||||
}
|
||||
|
||||
impl FastFieldReaders {
|
||||
pub(crate) fn open(fast_field_file: FileSlice) -> io::Result<FastFieldReaders> {
|
||||
pub(crate) fn open(fast_field_file: FileSlice, schema: Schema) -> io::Result<FastFieldReaders> {
|
||||
let columnar = Arc::new(ColumnarReader::open(fast_field_file)?);
|
||||
Ok(FastFieldReaders { columnar })
|
||||
Ok(FastFieldReaders { columnar, schema })
|
||||
}
|
||||
|
||||
pub(crate) fn columnar(&self) -> &ColumnarReader {
|
||||
self.columnar.as_ref()
|
||||
fn resolve_field(&self, column_name: &str) -> Option<String> {
|
||||
let default_field_opt: Option<Field> = if cfg!(feature = "quickwit") {
|
||||
self.schema.get_field("_dynamic").ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.resolve_column_name_given_default_field(column_name, default_field_opt)
|
||||
}
|
||||
|
||||
pub(crate) fn space_usage(&self, schema: &Schema) -> io::Result<PerFieldSpaceUsage> {
|
||||
@@ -46,6 +53,59 @@ impl FastFieldReaders {
|
||||
Ok(PerFieldSpaceUsage::new(per_field_usages))
|
||||
}
|
||||
|
||||
pub(crate) fn columnar(&self) -> &ColumnarReader {
|
||||
self.columnar.as_ref()
|
||||
}
|
||||
|
||||
/// Transforms a user-supplied fast field name into a column name.
|
||||
///
|
||||
/// A user-supplied fast field name is not necessarily a schema field name
|
||||
/// because we handle fast fields.
|
||||
///
|
||||
/// For instance, if the documents look like `{.., "attributes": {"color": "red"}}` and
|
||||
/// `attributes` is a json fast field, a user could want to run a term aggregation over
|
||||
/// colors, by referring to the field as `attributes.color`.
|
||||
///
|
||||
/// This function transforms `attributes.color` into a column key to be used in the `columnar`.
|
||||
///
|
||||
/// The logic works as follows, first we identify which field is targetted by calling
|
||||
/// `schema.find_field(..)`. This method will attempt to split the user splied fast field
|
||||
/// name by non-escaped dots, and find the longest matching schema field name.
|
||||
/// In our case, it would return the (attribute_field, "color").
|
||||
///
|
||||
/// If no field is found, but a dynamic field is supplied, then we
|
||||
/// will simply assuem the user is targetting the dynamic field. (This feature is used in
|
||||
/// Quickwit.)
|
||||
///
|
||||
/// We then encode the `(field, path)` into the right `columnar_key`.
|
||||
fn resolve_column_name_given_default_field<'a>(
|
||||
&'a self,
|
||||
field_name: &'a str,
|
||||
default_field_opt: Option<Field>,
|
||||
) -> Option<String> {
|
||||
let (field, path): (Field, &str) = self
|
||||
.schema
|
||||
.find_field(field_name)
|
||||
.or_else(|| default_field_opt.map(|default_field| (default_field, field_name)))?;
|
||||
let field_name = self.schema.get_field_name(field);
|
||||
if path.is_empty() {
|
||||
return Some(field_name.to_string());
|
||||
}
|
||||
let field_entry: &FieldEntry = self.schema.get_field_entry(field);
|
||||
let field_type = field_entry.field_type();
|
||||
match (field_type, path) {
|
||||
(FieldType::JsonObject(json_options), path) if !path.is_empty() => {
|
||||
Some(encode_column_name(
|
||||
field_entry.name(),
|
||||
path,
|
||||
json_options.is_expand_dots_enabled(),
|
||||
))
|
||||
}
|
||||
(_, "") => Some(field_entry.name().to_string()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a typed column associated to a given field name.
|
||||
///
|
||||
/// If no column associated with that field_name exists,
|
||||
@@ -53,11 +113,10 @@ impl FastFieldReaders {
|
||||
/// returns `None`.
|
||||
pub fn column_opt<T>(&self, field_name: &str) -> crate::Result<Option<Column<T>>>
|
||||
where
|
||||
T: PartialOrd + Copy + HasAssociatedColumnType + Send + Sync + 'static,
|
||||
T: HasAssociatedColumnType,
|
||||
DynamicColumn: Into<Option<Column<T>>>,
|
||||
{
|
||||
let column_type = T::column_type();
|
||||
let Some(dynamic_column_handle) = self.dynamic_column_handle(field_name, column_type)?
|
||||
let Some(dynamic_column_handle) = self.dynamic_column_handle(field_name, T::column_type())?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
@@ -66,10 +125,15 @@ impl FastFieldReaders {
|
||||
}
|
||||
|
||||
/// Returns the number of `bytes` associated with a column.
|
||||
///
|
||||
/// Returns 0 if the column does not exist.
|
||||
pub fn column_num_bytes(&self, field: &str) -> crate::Result<usize> {
|
||||
let Some(resolved_field_name) = self.resolve_field(field) else {
|
||||
return Ok(0);
|
||||
};
|
||||
Ok(self
|
||||
.columnar
|
||||
.read_columns(field)?
|
||||
.read_columns(&resolved_field_name)?
|
||||
.into_iter()
|
||||
.map(|column_handle| column_handle.num_bytes())
|
||||
.sum())
|
||||
@@ -152,23 +216,38 @@ impl FastFieldReaders {
|
||||
field_name: &str,
|
||||
column_type: ColumnType,
|
||||
) -> crate::Result<Option<DynamicColumnHandle>> {
|
||||
let Some(resolved_field_name) = self.resolve_field(field_name) else {
|
||||
return Ok(None);
|
||||
};
|
||||
let dynamic_column_handle_opt = self
|
||||
.columnar
|
||||
.read_columns(field_name)?
|
||||
.read_columns(&resolved_field_name)?
|
||||
.into_iter()
|
||||
.find(|column| column.column_type() == column_type);
|
||||
Ok(dynamic_column_handle_opt)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub async fn list_dynamic_column_handles(
|
||||
&self,
|
||||
field_name: &str,
|
||||
) -> crate::Result<Vec<DynamicColumnHandle>> {
|
||||
let Some(resolved_field_name) = self.resolve_field(field_name) else {
|
||||
return Ok(Vec::new());
|
||||
};
|
||||
let columns = self
|
||||
.columnar
|
||||
.read_columns_async(&resolved_field_name)
|
||||
.await?;
|
||||
Ok(columns)
|
||||
}
|
||||
|
||||
/// Returns the `u64` column used to represent any `u64`-mapped typed (i64, u64, f64, DateTime).
|
||||
#[doc(hidden)]
|
||||
pub fn u64_lenient(&self, field_name: &str) -> crate::Result<Option<Column<u64>>> {
|
||||
for col in self.columnar.read_columns(field_name)? {
|
||||
if let Some(col_u64) = col.open_u64_lenient()? {
|
||||
return Ok(Some(col_u64));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
Ok(self
|
||||
.u64_lenient_with_type(field_name)?
|
||||
.map(|(u64_column, _)| u64_column))
|
||||
}
|
||||
|
||||
/// Returns the `u64` column used to represent any `u64`-mapped typed (i64, u64, f64, DateTime).
|
||||
@@ -177,7 +256,10 @@ impl FastFieldReaders {
|
||||
&self,
|
||||
field_name: &str,
|
||||
) -> crate::Result<Option<(Column<u64>, ColumnType)>> {
|
||||
for col in self.columnar.read_columns(field_name)? {
|
||||
let Some(resolved_field_name) = self.resolve_field(field_name) else {
|
||||
return Ok(None);
|
||||
};
|
||||
for col in self.columnar.read_columns(&resolved_field_name)? {
|
||||
if let Some(col_u64) = col.open_u64_lenient()? {
|
||||
return Ok(Some((col_u64, col.column_type())));
|
||||
}
|
||||
@@ -206,3 +288,73 @@ impl FastFieldReaders {
|
||||
self.column(field_name)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::schema::{JsonObjectOptions, Schema, FAST};
|
||||
use crate::{Document, Index};
|
||||
|
||||
#[test]
|
||||
fn test_fast_field_reader_resolve_with_dynamic_internal() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
schema_builder.add_i64_field("age", FAST);
|
||||
schema_builder.add_json_field("json_expand_dots_disabled", FAST);
|
||||
schema_builder.add_json_field(
|
||||
"json_expand_dots_enabled",
|
||||
JsonObjectOptions::default()
|
||||
.set_fast()
|
||||
.set_expand_dots_enabled(),
|
||||
);
|
||||
let dynamic_field = schema_builder.add_json_field("_dyna", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer.add_document(Document::default()).unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let reader = searcher.segment_reader(0u32);
|
||||
let fast_field_readers = reader.fast_fields();
|
||||
assert_eq!(
|
||||
fast_field_readers.resolve_column_name_given_default_field("age", None),
|
||||
Some("age".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
fast_field_readers.resolve_column_name_given_default_field("age", Some(dynamic_field)),
|
||||
Some("age".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
fast_field_readers.resolve_column_name_given_default_field(
|
||||
"json_expand_dots_disabled.attr.color",
|
||||
None
|
||||
),
|
||||
Some("json_expand_dots_disabled\u{1}attr\u{1}color".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
fast_field_readers.resolve_column_name_given_default_field(
|
||||
"json_expand_dots_disabled.attr\\.color",
|
||||
Some(dynamic_field)
|
||||
),
|
||||
Some("json_expand_dots_disabled\u{1}attr.color".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
fast_field_readers.resolve_column_name_given_default_field(
|
||||
"json_expand_dots_enabled.attr\\.color",
|
||||
Some(dynamic_field)
|
||||
),
|
||||
Some("json_expand_dots_enabled\u{1}attr\u{1}color".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
fast_field_readers
|
||||
.resolve_column_name_given_default_field("notinschema.attr.color", None),
|
||||
None
|
||||
);
|
||||
assert_eq!(
|
||||
fast_field_readers.resolve_column_name_given_default_field(
|
||||
"notinschema.attr.color",
|
||||
Some(dynamic_field)
|
||||
),
|
||||
Some("_dyna\u{1}notinschema\u{1}attr\u{1}color".to_string())
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ mod doc_opstamp_mapping;
|
||||
mod flat_map_with_buffer;
|
||||
pub mod index_writer;
|
||||
mod index_writer_status;
|
||||
mod json_term_writer;
|
||||
mod log_merge_policy;
|
||||
mod merge_operation;
|
||||
pub mod merge_policy;
|
||||
@@ -25,9 +24,6 @@ use crossbeam_channel as channel;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
pub use self::index_writer::IndexWriter;
|
||||
pub(crate) use self::json_term_writer::{
|
||||
convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter,
|
||||
};
|
||||
pub use self::log_merge_policy::LogMergePolicy;
|
||||
pub use self::merge_operation::MergeOperation;
|
||||
pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy};
|
||||
|
||||
@@ -3,10 +3,10 @@ use itertools::Itertools;
|
||||
|
||||
use super::doc_id_mapping::{get_doc_id_mapping_from_field, DocIdMapping};
|
||||
use super::operation::AddOperation;
|
||||
use crate::core::json_utils::index_json_values;
|
||||
use crate::core::Segment;
|
||||
use crate::fastfield::FastFieldsWriter;
|
||||
use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
|
||||
use crate::indexer::json_term_writer::index_json_values;
|
||||
use crate::indexer::segment_serializer::SegmentSerializer;
|
||||
use crate::postings::{
|
||||
compute_table_size, serialize_postings, IndexingContext, IndexingPosition,
|
||||
@@ -435,8 +435,8 @@ mod tests {
|
||||
|
||||
use super::compute_initial_table_size;
|
||||
use crate::collector::Count;
|
||||
use crate::core::json_utils::JsonTermWriter;
|
||||
use crate::directory::RamDirectory;
|
||||
use crate::indexer::json_term_writer::JsonTermWriter;
|
||||
use crate::postings::TermInfo;
|
||||
use crate::query::PhraseQuery;
|
||||
use crate::schema::{IndexRecordOption, Schema, Type, STORED, STRING, TEXT};
|
||||
|
||||
@@ -9,10 +9,10 @@ use query_grammar::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLitera
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
use super::logical_ast::*;
|
||||
use crate::core::Index;
|
||||
use crate::indexer::{
|
||||
use crate::core::json_utils::{
|
||||
convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter,
|
||||
};
|
||||
use crate::core::Index;
|
||||
use crate::query::range_query::{is_type_valid_for_fastfield_range_query, RangeQuery};
|
||||
use crate::query::{
|
||||
AllQuery,
|
||||
|
||||
Reference in New Issue
Block a user