From 7fae4d98d749a565e6c998c322756c4e308be3cf Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 1 Mar 2023 16:27:46 +0900 Subject: [PATCH] Adapting for quickwit2 (#1912) * Adapting tantivy to make it possible to be plugged to quickwit. * Apply suggestions from code review Co-authored-by: PSeitz * Added unit test --------- Co-authored-by: PSeitz --- common/src/lib.rs | 1 - src/aggregation/mod.rs | 48 +++++ .../json_utils.rs} | 30 ++- src/core/mod.rs | 1 + src/core/segment_reader.rs | 2 +- src/fastfield/mod.rs | 152 ++++++++++++-- src/fastfield/readers.rs | 186 ++++++++++++++++-- src/indexer/mod.rs | 4 - src/indexer/segment_writer.rs | 4 +- src/query/query_parser/query_parser.rs | 4 +- 10 files changed, 387 insertions(+), 45 deletions(-) rename src/{indexer/json_term_writer.rs => core/json_utils.rs} (94%) diff --git a/common/src/lib.rs b/common/src/lib.rs index e9cf27b36..93e44a82b 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -113,7 +113,6 @@ pub fn u64_to_f64(val: u64) -> f64 { /// /// This function assumes that the needle is rarely contained in the bytes string /// and offers a fast path if the needle is not present. -#[inline(always)] pub fn replace_in_place(needle: u8, replacement: u8, bytes: &mut [u8]) { if !bytes.contains(&needle) { return; diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index f3627499f..f3b327b61 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -344,6 +344,8 @@ mod tests { use super::agg_req::Aggregations; use super::*; + use crate::aggregation::agg_req::{Aggregation, BucketAggregation, BucketAggregationType}; + use crate::aggregation::bucket::TermsAggregation; use crate::indexer::NoMergePolicy; use crate::query::{AllQuery, TermQuery}; use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING}; @@ -591,4 +593,50 @@ mod tests { Ok(index) } + + #[test] + fn test_aggregation_on_json_object() { + let mut schema_builder = Schema::builder(); + let json = schema_builder.add_json_field("json", FAST); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests().unwrap(); + index_writer + .add_document(doc!(json => json!({"color": "red"}))) + .unwrap(); + index_writer + .add_document(doc!(json => json!({"color": "blue"}))) + .unwrap(); + index_writer.commit().unwrap(); + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + let agg: Aggregations = vec![( + "jsonagg".to_string(), + Aggregation::Bucket(BucketAggregation { + bucket_agg: BucketAggregationType::Terms(TermsAggregation { + field: "json.color".to_string(), + ..Default::default() + }), + sub_aggregation: Default::default(), + }), + )] + .into_iter() + .collect(); + let aggregation_collector = AggregationCollector::from_aggs(agg, None); + let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap(); + let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap(); + assert_eq!( + &aggregation_res_json, + &serde_json::json!({ + "jsonagg": { + "buckets": [ + {"doc_count": 1, "key": "blue"}, + {"doc_count": 1, "key": "red"} + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0 + } + }) + ); + } } diff --git a/src/indexer/json_term_writer.rs b/src/core/json_utils.rs similarity index 94% rename from src/indexer/json_term_writer.rs rename to src/core/json_utils.rs index 23dfcb251..b4459b066 100644 --- a/src/indexer/json_term_writer.rs +++ b/src/core/json_utils.rs @@ -5,7 +5,7 @@ use rustc_hash::FxHashMap; use crate::fastfield::FastValue; use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter}; -use crate::schema::term::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP}; +use crate::schema::term::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR}; use crate::schema::{Field, Type}; use crate::time::format_description::well_known::Rfc3339; use crate::time::{OffsetDateTime, UtcOffset}; @@ -200,7 +200,7 @@ fn infer_type_from_str(text: &str) -> TextOrDateTime { } } -// Tries to infer a JSON type from a string +// Tries to infer a JSON type from a string. pub(crate) fn convert_to_fast_value_and_get_term( json_term_writer: &mut JsonTermWriter, phrase: &str, @@ -296,6 +296,32 @@ fn split_json_path(json_path: &str) -> Vec { json_path_segments } +/// Takes a field name, a json path as supplied by a user, and whether we should expand dots, and +/// return a column key, as expected by the columnar crate. +/// +/// This function will detect unescaped dots in the path, and split over them. +/// If expand_dots is enabled, then even escaped dots will be split over. +/// +/// The resulting list of segment then gets stitched together, joined by \1 separator, +/// as defined in the columnar crate. +pub(crate) fn encode_column_name( + field_name: &str, + json_path: &str, + expand_dots_enabled: bool, +) -> String { + let mut column_key: String = String::with_capacity(field_name.len() + json_path.len() + 1); + column_key.push_str(field_name); + for mut segment in split_json_path(json_path) { + column_key.push_str(JSON_PATH_SEGMENT_SEP_STR); + if expand_dots_enabled { + // We need to replace `.` by JSON_PATH_SEGMENT_SEP. + unsafe { replace_in_place(b'.', JSON_PATH_SEGMENT_SEP, segment.as_bytes_mut()) }; + } + column_key.push_str(&segment); + } + column_key +} + impl<'a> JsonTermWriter<'a> { pub fn from_field_and_json_path( field: Field, diff --git a/src/core/mod.rs b/src/core/mod.rs index 34832d97a..38976378d 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -2,6 +2,7 @@ mod executor; pub mod index; mod index_meta; mod inverted_index_reader; +pub mod json_utils; pub mod searcher; mod segment; mod segment_component; diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 1ed0a675b..698ea2f12 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -167,7 +167,7 @@ impl SegmentReader { let schema = segment.schema(); let fast_fields_data = segment.open_read(SegmentComponent::FastFields)?; - let fast_fields_readers = FastFieldReaders::open(fast_fields_data)?; + let fast_fields_readers = FastFieldReaders::open(fast_fields_data, schema.clone())?; let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?; let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?; diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 8a10788df..1b0d73e05 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -90,8 +90,8 @@ mod tests { use crate::directory::{Directory, RamDirectory, WritePtr}; use crate::merge_policy::NoMergePolicy; use crate::schema::{ - Document, Facet, FacetOptions, Field, Schema, SchemaBuilder, FAST, INDEXED, STORED, STRING, - TEXT, + Document, Facet, FacetOptions, Field, JsonObjectOptions, Schema, SchemaBuilder, FAST, + INDEXED, STORED, STRING, TEXT, }; use crate::time::OffsetDateTime; use crate::{DateOptions, DatePrecision, Index, SegmentId, SegmentReader}; @@ -131,7 +131,7 @@ mod tests { let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 161); - let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let column = fast_field_readers .u64("field") .unwrap() @@ -181,7 +181,7 @@ mod tests { } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 189); - let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let col = fast_field_readers .u64("field") .unwrap() @@ -214,7 +214,7 @@ mod tests { } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 162); - let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let fast_field_reader = fast_field_readers .u64("field") .unwrap() @@ -247,7 +247,7 @@ mod tests { let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 4557); { - let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let col = fast_field_readers .u64("field") .unwrap() @@ -281,7 +281,7 @@ mod tests { assert_eq!(file.len(), 333_usize); { - let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let fast_field_readers = FastFieldReaders::open(file, schema).unwrap(); let col = fast_field_readers .i64("field") .unwrap() @@ -318,7 +318,7 @@ mod tests { } let file = directory.open_read(path).unwrap(); - let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let fast_field_readers = FastFieldReaders::open(file, schema).unwrap(); let col = fast_field_readers.i64("field").unwrap(); assert_eq!(col.first(0), None); @@ -351,7 +351,7 @@ mod tests { } let file = directory.open_read(path).unwrap(); - let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let fast_field_readers = FastFieldReaders::open(file, schema).unwrap(); let col = fast_field_readers .date("date") .unwrap() @@ -387,7 +387,7 @@ mod tests { write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let col = fast_field_readers .u64("field") .unwrap() @@ -773,7 +773,7 @@ mod tests { } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 175); - let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let fast_field_readers = FastFieldReaders::open(file, schema).unwrap(); let bool_col = fast_field_readers.bool("field_bool").unwrap(); assert_eq!(bool_col.first(0), Some(true)); assert_eq!(bool_col.first(1), Some(false)); @@ -805,7 +805,7 @@ mod tests { } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 187); - let readers = FastFieldReaders::open(file).unwrap(); + let readers = FastFieldReaders::open(file, schema).unwrap(); let bool_col = readers.bool("field_bool").unwrap(); for i in 0..25 { assert_eq!(bool_col.first(i * 2), Some(true)); @@ -830,7 +830,7 @@ mod tests { } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 177); - let fastfield_readers = FastFieldReaders::open(file).unwrap(); + let fastfield_readers = FastFieldReaders::open(file, schema).unwrap(); let col = fastfield_readers.bool("field_bool").unwrap(); assert_eq!(col.first(0), None); let col = fastfield_readers @@ -892,7 +892,7 @@ mod tests { let directory = get_index(&docs[..], &schema).unwrap(); let path = Path::new("test"); let file = directory.open_read(path).unwrap(); - let readers = FastFieldReaders::open(file).unwrap(); + let readers = FastFieldReaders::open(file, schema).unwrap(); let col = readers.date("field").unwrap(); for (i, time) in times.iter().enumerate() { @@ -1068,13 +1068,133 @@ mod tests { let searcher = index.reader().unwrap().searcher(); let segment_reader = searcher.segment_reader(0u32); let fast_fields = segment_reader.fast_fields(); - let column_without_opt: Option = fast_fields.str("without\u{1}hello").unwrap(); + let column_without_opt: Option = fast_fields.str("without.hello").unwrap(); assert!(column_without_opt.is_none()); - let column_with_opt: Option = fast_fields.str("with\u{1}hello").unwrap(); + let column_with_opt: Option = fast_fields.str("with.hello").unwrap(); let column_with: StrColumn = column_with_opt.unwrap(); assert!(column_with.term_ords(0).next().is_none()); assert!(column_with.term_ords(1).eq([0])); assert!(column_with.term_ords(2).eq([2])); assert!(column_with.term_ords(3).eq([1])); } + + #[test] + fn test_fast_field_in_json_field_expand_dots_disabled() { + let mut schema_builder = Schema::builder(); + let json_option = JsonObjectOptions::default().set_fast(); + let json = schema_builder.add_json_field("json", json_option); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests().unwrap(); + index_writer + .add_document(doc!(json => json!({"attr.age": 32}))) + .unwrap(); + index_writer.commit().unwrap(); + let searcher = index.reader().unwrap().searcher(); + let fast_field_reader = searcher.segment_reader(0u32).fast_fields(); + assert!(fast_field_reader + .column_opt::("json.attr.age") + .unwrap() + .is_none()); + let column = fast_field_reader + .column_opt::(r#"json.attr\.age"#) + .unwrap() + .unwrap(); + let vals: Vec = column.values_for_doc(0u32).collect(); + assert_eq!(&vals, &[32]) + } + + #[test] + fn test_fast_field_in_json_field_expand_dots_enabled() { + let mut schema_builder = Schema::builder(); + let json_option = JsonObjectOptions::default() + .set_fast() + .set_expand_dots_enabled(); + let json = schema_builder.add_json_field("json", json_option); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests().unwrap(); + index_writer + .add_document(doc!(json => json!({"attr.age": 32}))) + .unwrap(); + index_writer.commit().unwrap(); + let searcher = index.reader().unwrap().searcher(); + let fast_field_reader = searcher.segment_reader(0u32).fast_fields(); + for test_column_name in &["json.attr.age", "json.attr\\.age"] { + let column = fast_field_reader + .column_opt::(test_column_name) + .unwrap() + .unwrap(); + let vals: Vec = column.values_for_doc(0u32).collect(); + assert_eq!(&vals, &[32]); + } + } + + #[test] + fn test_fast_field_dot_in_schema_field_name() { + let mut schema_builder = Schema::builder(); + let field_with_dot = schema_builder.add_i64_field("field.with.dot", FAST); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests().unwrap(); + index_writer + .add_document(doc!(field_with_dot => 32i64)) + .unwrap(); + index_writer.commit().unwrap(); + let searcher = index.reader().unwrap().searcher(); + let fast_field_reader = searcher.segment_reader(0u32).fast_fields(); + let column = fast_field_reader + .column_opt::("field.with.dot") + .unwrap() + .unwrap(); + let vals: Vec = column.values_for_doc(0u32).collect(); + assert_eq!(&vals, &[32]); + } + + #[test] + fn test_shadowing_fast_field() { + let mut schema_builder = Schema::builder(); + let json_field = schema_builder.add_json_field("jsonfield", FAST); + let shadowing_json_field = schema_builder.add_json_field("jsonfield.attr", FAST); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests().unwrap(); + index_writer + .add_document(doc!(json_field=> json!({"attr": {"age": 32}}), shadowing_json_field=>json!({"age": 33}))) + .unwrap(); + index_writer.commit().unwrap(); + let searcher = index.reader().unwrap().searcher(); + let fast_field_reader = searcher.segment_reader(0u32).fast_fields(); + let column = fast_field_reader + .column_opt::(&"jsonfield.attr.age") + .unwrap() + .unwrap(); + let vals: Vec = column.values_for_doc(0u32).collect(); + assert_eq!(&vals, &[33]); + } + + #[test] + fn test_shadowing_fast_field_with_expand_dots() { + let mut schema_builder = Schema::builder(); + let json_option = JsonObjectOptions::default() + .set_fast() + .set_expand_dots_enabled(); + let json_field = schema_builder.add_json_field("jsonfield", json_option.clone()); + let shadowing_json_field = schema_builder.add_json_field("jsonfield.attr", json_option); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests().unwrap(); + index_writer + .add_document(doc!(json_field=> json!({"attr.age": 32}), shadowing_json_field=>json!({"age": 33}))) + .unwrap(); + index_writer.commit().unwrap(); + let searcher = index.reader().unwrap().searcher(); + let fast_field_reader = searcher.segment_reader(0u32).fast_fields(); + let column = fast_field_reader + .column_opt::(&"jsonfield.attr.age") + .unwrap() + .unwrap(); + let vals: Vec = column.values_for_doc(0u32).collect(); + assert_eq!(&vals, &[33]); + } } diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs index 97523335f..f0b3d5f1b 100644 --- a/src/fastfield/readers.rs +++ b/src/fastfield/readers.rs @@ -7,8 +7,9 @@ use columnar::{ DynamicColumnHandle, HasAssociatedColumnType, StrColumn, }; +use crate::core::json_utils::encode_column_name; use crate::directory::FileSlice; -use crate::schema::Schema; +use crate::schema::{Field, FieldEntry, FieldType, Schema}; use crate::space_usage::{FieldUsage, PerFieldSpaceUsage}; /// Provides access to all of the BitpackedFastFieldReader. @@ -18,16 +19,22 @@ use crate::space_usage::{FieldUsage, PerFieldSpaceUsage}; #[derive(Clone)] pub struct FastFieldReaders { columnar: Arc, + schema: Schema, } impl FastFieldReaders { - pub(crate) fn open(fast_field_file: FileSlice) -> io::Result { + pub(crate) fn open(fast_field_file: FileSlice, schema: Schema) -> io::Result { let columnar = Arc::new(ColumnarReader::open(fast_field_file)?); - Ok(FastFieldReaders { columnar }) + Ok(FastFieldReaders { columnar, schema }) } - pub(crate) fn columnar(&self) -> &ColumnarReader { - self.columnar.as_ref() + fn resolve_field(&self, column_name: &str) -> Option { + let default_field_opt: Option = if cfg!(feature = "quickwit") { + self.schema.get_field("_dynamic").ok() + } else { + None + }; + self.resolve_column_name_given_default_field(column_name, default_field_opt) } pub(crate) fn space_usage(&self, schema: &Schema) -> io::Result { @@ -46,6 +53,59 @@ impl FastFieldReaders { Ok(PerFieldSpaceUsage::new(per_field_usages)) } + pub(crate) fn columnar(&self) -> &ColumnarReader { + self.columnar.as_ref() + } + + /// Transforms a user-supplied fast field name into a column name. + /// + /// A user-supplied fast field name is not necessarily a schema field name + /// because we handle fast fields. + /// + /// For instance, if the documents look like `{.., "attributes": {"color": "red"}}` and + /// `attributes` is a json fast field, a user could want to run a term aggregation over + /// colors, by referring to the field as `attributes.color`. + /// + /// This function transforms `attributes.color` into a column key to be used in the `columnar`. + /// + /// The logic works as follows, first we identify which field is targetted by calling + /// `schema.find_field(..)`. This method will attempt to split the user splied fast field + /// name by non-escaped dots, and find the longest matching schema field name. + /// In our case, it would return the (attribute_field, "color"). + /// + /// If no field is found, but a dynamic field is supplied, then we + /// will simply assuem the user is targetting the dynamic field. (This feature is used in + /// Quickwit.) + /// + /// We then encode the `(field, path)` into the right `columnar_key`. + fn resolve_column_name_given_default_field<'a>( + &'a self, + field_name: &'a str, + default_field_opt: Option, + ) -> Option { + let (field, path): (Field, &str) = self + .schema + .find_field(field_name) + .or_else(|| default_field_opt.map(|default_field| (default_field, field_name)))?; + let field_name = self.schema.get_field_name(field); + if path.is_empty() { + return Some(field_name.to_string()); + } + let field_entry: &FieldEntry = self.schema.get_field_entry(field); + let field_type = field_entry.field_type(); + match (field_type, path) { + (FieldType::JsonObject(json_options), path) if !path.is_empty() => { + Some(encode_column_name( + field_entry.name(), + path, + json_options.is_expand_dots_enabled(), + )) + } + (_, "") => Some(field_entry.name().to_string()), + _ => None, + } + } + /// Returns a typed column associated to a given field name. /// /// If no column associated with that field_name exists, @@ -53,11 +113,10 @@ impl FastFieldReaders { /// returns `None`. pub fn column_opt(&self, field_name: &str) -> crate::Result>> where - T: PartialOrd + Copy + HasAssociatedColumnType + Send + Sync + 'static, + T: HasAssociatedColumnType, DynamicColumn: Into>>, { - let column_type = T::column_type(); - let Some(dynamic_column_handle) = self.dynamic_column_handle(field_name, column_type)? + let Some(dynamic_column_handle) = self.dynamic_column_handle(field_name, T::column_type())? else { return Ok(None); }; @@ -66,10 +125,15 @@ impl FastFieldReaders { } /// Returns the number of `bytes` associated with a column. + /// + /// Returns 0 if the column does not exist. pub fn column_num_bytes(&self, field: &str) -> crate::Result { + let Some(resolved_field_name) = self.resolve_field(field) else { + return Ok(0); + }; Ok(self .columnar - .read_columns(field)? + .read_columns(&resolved_field_name)? .into_iter() .map(|column_handle| column_handle.num_bytes()) .sum()) @@ -152,23 +216,38 @@ impl FastFieldReaders { field_name: &str, column_type: ColumnType, ) -> crate::Result> { + let Some(resolved_field_name) = self.resolve_field(field_name) else { + return Ok(None); + }; let dynamic_column_handle_opt = self .columnar - .read_columns(field_name)? + .read_columns(&resolved_field_name)? .into_iter() .find(|column| column.column_type() == column_type); Ok(dynamic_column_handle_opt) } + #[doc(hidden)] + pub async fn list_dynamic_column_handles( + &self, + field_name: &str, + ) -> crate::Result> { + let Some(resolved_field_name) = self.resolve_field(field_name) else { + return Ok(Vec::new()); + }; + let columns = self + .columnar + .read_columns_async(&resolved_field_name) + .await?; + Ok(columns) + } + /// Returns the `u64` column used to represent any `u64`-mapped typed (i64, u64, f64, DateTime). #[doc(hidden)] pub fn u64_lenient(&self, field_name: &str) -> crate::Result>> { - for col in self.columnar.read_columns(field_name)? { - if let Some(col_u64) = col.open_u64_lenient()? { - return Ok(Some(col_u64)); - } - } - Ok(None) + Ok(self + .u64_lenient_with_type(field_name)? + .map(|(u64_column, _)| u64_column)) } /// Returns the `u64` column used to represent any `u64`-mapped typed (i64, u64, f64, DateTime). @@ -177,7 +256,10 @@ impl FastFieldReaders { &self, field_name: &str, ) -> crate::Result, ColumnType)>> { - for col in self.columnar.read_columns(field_name)? { + let Some(resolved_field_name) = self.resolve_field(field_name) else { + return Ok(None); + }; + for col in self.columnar.read_columns(&resolved_field_name)? { if let Some(col_u64) = col.open_u64_lenient()? { return Ok(Some((col_u64, col.column_type()))); } @@ -206,3 +288,73 @@ impl FastFieldReaders { self.column(field_name) } } + +#[cfg(test)] +mod tests { + use crate::schema::{JsonObjectOptions, Schema, FAST}; + use crate::{Document, Index}; + + #[test] + fn test_fast_field_reader_resolve_with_dynamic_internal() { + let mut schema_builder = Schema::builder(); + schema_builder.add_i64_field("age", FAST); + schema_builder.add_json_field("json_expand_dots_disabled", FAST); + schema_builder.add_json_field( + "json_expand_dots_enabled", + JsonObjectOptions::default() + .set_fast() + .set_expand_dots_enabled(), + ); + let dynamic_field = schema_builder.add_json_field("_dyna", FAST); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests().unwrap(); + index_writer.add_document(Document::default()).unwrap(); + index_writer.commit().unwrap(); + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + let reader = searcher.segment_reader(0u32); + let fast_field_readers = reader.fast_fields(); + assert_eq!( + fast_field_readers.resolve_column_name_given_default_field("age", None), + Some("age".to_string()) + ); + assert_eq!( + fast_field_readers.resolve_column_name_given_default_field("age", Some(dynamic_field)), + Some("age".to_string()) + ); + assert_eq!( + fast_field_readers.resolve_column_name_given_default_field( + "json_expand_dots_disabled.attr.color", + None + ), + Some("json_expand_dots_disabled\u{1}attr\u{1}color".to_string()) + ); + assert_eq!( + fast_field_readers.resolve_column_name_given_default_field( + "json_expand_dots_disabled.attr\\.color", + Some(dynamic_field) + ), + Some("json_expand_dots_disabled\u{1}attr.color".to_string()) + ); + assert_eq!( + fast_field_readers.resolve_column_name_given_default_field( + "json_expand_dots_enabled.attr\\.color", + Some(dynamic_field) + ), + Some("json_expand_dots_enabled\u{1}attr\u{1}color".to_string()) + ); + assert_eq!( + fast_field_readers + .resolve_column_name_given_default_field("notinschema.attr.color", None), + None + ); + assert_eq!( + fast_field_readers.resolve_column_name_given_default_field( + "notinschema.attr.color", + Some(dynamic_field) + ), + Some("_dyna\u{1}notinschema\u{1}attr\u{1}color".to_string()) + ); + } +} diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index 7152cbcb8..53cdcdaf5 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -5,7 +5,6 @@ mod doc_opstamp_mapping; mod flat_map_with_buffer; pub mod index_writer; mod index_writer_status; -mod json_term_writer; mod log_merge_policy; mod merge_operation; pub mod merge_policy; @@ -25,9 +24,6 @@ use crossbeam_channel as channel; use smallvec::SmallVec; pub use self::index_writer::IndexWriter; -pub(crate) use self::json_term_writer::{ - convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter, -}; pub use self::log_merge_policy::LogMergePolicy; pub use self::merge_operation::MergeOperation; pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy}; diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 39f34f014..d60c640d9 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -3,10 +3,10 @@ use itertools::Itertools; use super::doc_id_mapping::{get_doc_id_mapping_from_field, DocIdMapping}; use super::operation::AddOperation; +use crate::core::json_utils::index_json_values; use crate::core::Segment; use crate::fastfield::FastFieldsWriter; use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter}; -use crate::indexer::json_term_writer::index_json_values; use crate::indexer::segment_serializer::SegmentSerializer; use crate::postings::{ compute_table_size, serialize_postings, IndexingContext, IndexingPosition, @@ -435,8 +435,8 @@ mod tests { use super::compute_initial_table_size; use crate::collector::Count; + use crate::core::json_utils::JsonTermWriter; use crate::directory::RamDirectory; - use crate::indexer::json_term_writer::JsonTermWriter; use crate::postings::TermInfo; use crate::query::PhraseQuery; use crate::schema::{IndexRecordOption, Schema, Type, STORED, STRING, TEXT}; diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 4a29ff3c6..88417c0de 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -9,10 +9,10 @@ use query_grammar::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLitera use rustc_hash::FxHashMap; use super::logical_ast::*; -use crate::core::Index; -use crate::indexer::{ +use crate::core::json_utils::{ convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter, }; +use crate::core::Index; use crate::query::range_query::{is_type_valid_for_fastfield_range_query, RangeQuery}; use crate::query::{ AllQuery,