From 29c1a76d5acaa5fa33d1eba634185e6c02c2b860 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Tue, 17 Jan 2023 16:34:56 +0900 Subject: [PATCH] Removed cardinality from fast field options. --- columnar/src/columnar/writer/mod.rs | 9 + examples/aggregation.rs | 4 +- examples/date_time_field.rs | 4 +- src/aggregation/metric/mod.rs | 4 +- src/aggregation/mod.rs | 10 +- src/core/index.rs | 2 +- src/fastfield/mod.rs | 4 +- src/fastfield/multivalued/mod.rs | 18 +- src/fastfield/multivalued/reader.rs | 6 +- src/fastfield/writer.rs | 320 ++++++------------ src/indexer/doc_id_mapping.rs | 59 ++-- src/indexer/index_writer.rs | 8 +- src/indexer/merger.rs | 47 ++- src/indexer/merger_sorted_index_test.rs | 8 +- src/indexer/segment_writer.rs | 44 +-- .../range_query/range_query_ip_fastfield.rs | 2 +- .../range_query/range_query_u64_fastfield.rs | 6 +- src/schema/date_time_options.rs | 39 +-- src/schema/ip_options.rs | 26 +- src/schema/numeric_options.rs | 12 +- src/schema/schema.rs | 21 +- 21 files changed, 253 insertions(+), 400 deletions(-) diff --git a/columnar/src/columnar/writer/mod.rs b/columnar/src/columnar/writer/mod.rs index 962aaf8c0..e2915af63 100644 --- a/columnar/src/columnar/writer/mod.rs +++ b/columnar/src/columnar/writer/mod.rs @@ -85,6 +85,15 @@ fn mutate_or_create_column( } impl ColumnarWriter { + + pub fn mem_usage(&self) -> usize { + // TODO add dictionary builders. + self.arena.mem_usage() + + self.numerical_field_hash_map.mem_usage() + + self.bool_field_hash_map.mem_usage() + + self.bytes_field_hash_map.mem_usage() + } + pub fn force_numerical_type(&mut self, column_name: &str, numerical_type: NumericalType) { let (hash_map, _) = (&mut self.numerical_field_hash_map, &mut self.arena); mutate_or_create_column( diff --git a/examples/aggregation.rs b/examples/aggregation.rs index d7b763788..1cc7c0e96 100644 --- a/examples/aggregation.rs +++ b/examples/aggregation.rs @@ -13,7 +13,7 @@ use tantivy::aggregation::agg_result::AggregationResults; use tantivy::aggregation::metric::AverageAggregation; use tantivy::aggregation::AggregationCollector; use tantivy::query::TermQuery; -use tantivy::schema::{self, Cardinality, IndexRecordOption, Schema, TextFieldIndexing}; +use tantivy::schema::{self, IndexRecordOption, Schema, TextFieldIndexing}; use tantivy::{doc, Index, Term}; fn main() -> tantivy::Result<()> { @@ -25,7 +25,7 @@ fn main() -> tantivy::Result<()> { .set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); let score_fieldtype = - crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue); + crate::schema::NumericOptions::default().set_fast(); let highscore_field = schema_builder.add_f64_field("highscore", score_fieldtype.clone()); let price_field = schema_builder.add_f64_field("price", score_fieldtype); diff --git a/examples/date_time_field.rs b/examples/date_time_field.rs index 4381ed34c..b42d4208f 100644 --- a/examples/date_time_field.rs +++ b/examples/date_time_field.rs @@ -4,7 +4,7 @@ use tantivy::collector::TopDocs; use tantivy::query::QueryParser; -use tantivy::schema::{Cardinality, DateOptions, Schema, Value, INDEXED, STORED, STRING}; +use tantivy::schema::{DateOptions, Schema, Value, INDEXED, STORED, STRING}; use tantivy::Index; fn main() -> tantivy::Result<()> { @@ -12,7 +12,7 @@ fn main() -> tantivy::Result<()> { let mut schema_builder = Schema::builder(); let opts = DateOptions::from(INDEXED) .set_stored() - .set_fast(Cardinality::SingleValue) + .set_fast() .set_precision(tantivy::DatePrecision::Seconds); let occurred_at = schema_builder.add_date_field("occurred_at", opts); let event_type = schema_builder.add_text_field("event", STRING | STORED); diff --git a/src/aggregation/metric/mod.rs b/src/aggregation/metric/mod.rs index 0d1ad056d..a13994209 100644 --- a/src/aggregation/metric/mod.rs +++ b/src/aggregation/metric/mod.rs @@ -43,13 +43,13 @@ mod tests { use crate::aggregation::agg_result::AggregationResults; use crate::aggregation::AggregationCollector; use crate::query::AllQuery; - use crate::schema::{Cardinality, NumericOptions, Schema}; + use crate::schema::{NumericOptions, Schema}; use crate::Index; #[test] fn test_metric_aggregations() { let mut schema_builder = Schema::builder(); - let field_options = NumericOptions::default().set_fast(Cardinality::SingleValue); + let field_options = NumericOptions::default().set_fast(); let field = schema_builder.add_f64_field("price", field_options); let index = Index::create_in_ram(schema_builder.build()); let mut index_writer = index.writer_for_tests().unwrap(); diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index df1687161..9a81fb9ac 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -430,13 +430,13 @@ mod tests { let text_field_id = schema_builder.add_text_field("text_id", text_fieldtype); let string_field_id = schema_builder.add_text_field("string_id", STRING | FAST); let score_fieldtype = - crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue); + crate::schema::NumericOptions::default().set_fast(); let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone()); let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone()); let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype); let fraction_field = schema_builder.add_f64_field( "fraction_f64", - crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue), + crate::schema::NumericOptions::default().set_fast(), ); let index = Index::create_in_ram(schema_builder.build()); { @@ -654,12 +654,12 @@ mod tests { let date_field = schema_builder.add_date_field("date", FAST); schema_builder.add_text_field("dummy_text", STRING); let score_fieldtype = - crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue); + crate::schema::NumericOptions::default().set_fast(); let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone()); let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone()); let multivalue = - crate::schema::NumericOptions::default().set_fast(Cardinality::MultiValues); + crate::schema::NumericOptions::default().set_fast(); let scores_field_i64 = schema_builder.add_i64_field("scores_i64", multivalue); let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype); @@ -1187,7 +1187,7 @@ mod tests { let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST); let score_fieldtype = - crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue); + crate::schema::NumericOptions::default().set_fast(); let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone()); let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone()); diff --git a/src/core/index.rs b/src/core/index.rs index 2ee43b05e..46ccc895d 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -93,7 +93,7 @@ fn save_new_metas( /// let body_field = schema_builder.add_text_field("body", TEXT); /// let number_field = schema_builder.add_u64_field( /// "number", -/// NumericOptions::default().set_fast(Cardinality::SingleValue), +/// NumericOptions::default().set_fast(), /// ); /// /// let schema = schema_builder.build(); diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 5ba84ce92..94bd4e2ac 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -749,7 +749,7 @@ mod tests { "multi_date", DateOptions::default() .set_precision(DatePrecision::Microseconds) - .set_fast(Cardinality::MultiValues), + .set_fast(), ); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -962,7 +962,7 @@ mod tests { .take(1_000) .collect(); let date_options = DateOptions::default() - .set_fast(Cardinality::SingleValue) + .set_fast() .set_precision(precision); let mut schema_builder = SchemaBuilder::default(); let field = schema_builder.add_date_field("field", date_options); diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index 108dd2db1..7cc67e339 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -38,7 +38,7 @@ mod tests { let mut schema_builder = Schema::builder(); let field = schema_builder.add_u64_field( "multifield", - NumericOptions::default().set_fast(Cardinality::MultiValues), + NumericOptions::default().set_fast(), ); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -74,7 +74,7 @@ mod tests { let date_field = schema_builder.add_date_field( "multi_date_field", DateOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_indexed() .set_fieldnorm() .set_stored(), @@ -215,7 +215,7 @@ mod tests { let mut schema_builder = Schema::builder(); let field = schema_builder.add_i64_field( "multifield", - NumericOptions::default().set_fast(Cardinality::MultiValues), + NumericOptions::default().set_fast(), ); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -246,7 +246,7 @@ mod tests { let mut schema_builder = Schema::builder(); let bool_field = schema_builder.add_bool_field( "multifield", - NumericOptions::default().set_fast(Cardinality::MultiValues), + NumericOptions::default().set_fast(), ); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -278,7 +278,7 @@ mod tests { let field = schema_builder.add_u64_field( "multifield", NumericOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_indexed(), ); let schema = schema_builder.build(); @@ -424,7 +424,7 @@ mod bench { let mut builder = crate::schema::SchemaBuilder::new(); let fast_multi = - crate::schema::NumericOptions::default().set_fast(Cardinality::MultiValues); + crate::schema::NumericOptions::default().set_fast(); let multi_field = builder.add_f64_field("f64s", fast_multi); let index = crate::Index::create_in_ram(builder.build()); @@ -504,7 +504,7 @@ mod bench { let path = Path::new("test"); let directory: RamDirectory = RamDirectory::create(); let field = { - let options = NumericOptions::default().set_fast(Cardinality::MultiValues); + let options = NumericOptions::default().set_fast(); let mut schema_builder = Schema::builder(); let field = schema_builder.add_u64_field("field", options); let schema = schema_builder.build(); @@ -562,7 +562,7 @@ mod bench { b.iter(|| { let directory: RamDirectory = RamDirectory::create(); - let options = NumericOptions::default().set_fast(Cardinality::MultiValues); + let options = NumericOptions::default().set_fast(); let mut schema_builder = Schema::builder(); let field = schema_builder.add_u64_field("field", options); let schema = schema_builder.build(); @@ -595,7 +595,7 @@ mod bench { b.iter(|| { let directory: RamDirectory = RamDirectory::create(); - let options = NumericOptions::default().set_fast(Cardinality::MultiValues); + let options = NumericOptions::default().set_fast(); let mut schema_builder = Schema::builder(); let field = schema_builder.add_u64_field("field", options); let schema = schema_builder.build(); diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index fe7dcceb7..5f7ba2e39 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -137,7 +137,7 @@ mod tests { let date_field = schema_builder.add_date_field( "multi_date_field", DateOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_indexed() .set_fieldnorm() .set_precision(DatePrecision::Microseconds) @@ -188,7 +188,7 @@ mod tests { let date_field = schema_builder.add_date_field( "multi_date_field", DateOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() // TODO: Test different precision after fixing https://github.com/quickwit-oss/tantivy/issues/1783 .set_precision(DatePrecision::Microseconds) .set_indexed() @@ -307,7 +307,7 @@ mod tests { let mut schema_builder = Schema::builder(); let field_options = NumericOptions::default() .set_indexed() - .set_fast(Cardinality::MultiValues); + .set_fast(); let item_field = schema_builder.add_i64_field("items", field_options); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 6b8f7a8c4..9d063df61 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::io; +use columnar::{ColumnarWriter, NumericalType}; use common; use fastfield_codecs::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64}; use rustc_hash::FxHashMap; @@ -17,12 +18,14 @@ use crate::DatePrecision; /// The `FastFieldsWriter` groups all of the fast field writers. pub struct FastFieldsWriter { - term_id_writers: Vec, - single_value_writers: Vec, - u128_value_writers: Vec, - u128_multi_value_writers: Vec, - multi_values_writers: Vec, - bytes_value_writers: Vec, + columnar_writer: ColumnarWriter, + fast_fields: Vec>, + // term_id_writers: Vec, + // single_value_writers: Vec, + // u128_value_writers: Vec, + // u128_multi_value_writers: Vec, + // multi_values_writers: Vec, + // bytes_value_writers: Vec, } pub(crate) fn unexpected_value(expected: &str, actual: &Value) -> crate::TantivyError { @@ -40,214 +43,96 @@ fn fast_field_default_value(field_entry: &FieldEntry) -> u64 { } } +enum FastFieldTyp { + Numerical(NumericalType), + Other, +} + +fn fast_numerical_type(field_type: &FieldType) -> Option { + // TODO + match field_type { + FieldType::U64(numerical_option) => { + if numerical_option.is_fast() { + Some(FastFieldTyp::Numerical(NumericalType::U64)) + } else { + None + } + }, + FieldType::I64(numerical_option) => { + if numerical_option.is_fast() { + Some(FastFieldTyp::Numerical(NumericalType::I64)) + } else { + None + } + }, + FieldType::F64(numerical_option) => { + if numerical_option.is_fast() { + Some(FastFieldTyp::Numerical(NumericalType::F64)) + } else { + None + } + }, + FieldType::Str(str_option) => { + if str_option.is_fast() { + Some(FastFieldTyp::Other) + } else { + None + } + }, + FieldType::Bool(int_options) => { + if int_options.is_fast() { + Some(FastFieldTyp::Other) + } else { + None + } + }, + FieldType::Date(date_options) => { + if date_options.is_fast() { + Some(FastFieldTyp::Other) + } else { + None + } + }, + FieldType::Facet(_) => todo!(), + FieldType::Bytes(_) => todo!(), + FieldType::JsonObject(_) => todo!(), + FieldType::IpAddr(_) => todo!(), + + + } +} + impl FastFieldsWriter { /// Create all `FastFieldWriter` required by the schema. pub fn from_schema(schema: &Schema) -> FastFieldsWriter { - let mut u128_value_writers = Vec::new(); - let mut u128_multi_value_writers = Vec::new(); - let mut single_value_writers = Vec::new(); - let mut term_id_writers = Vec::new(); - let mut multi_values_writers = Vec::new(); - let mut bytes_value_writers = Vec::new(); - + let mut columnar_writer = ColumnarWriter::default(); + let mut fast_fields = vec![None; schema.num_fields()]; + // TODO see other types for (field, field_entry) in schema.fields() { - match field_entry.field_type() { - FieldType::I64(ref int_options) - | FieldType::U64(ref int_options) - | FieldType::F64(ref int_options) - | FieldType::Bool(ref int_options) => { - todo!(); - // match int_options.get_fastfield_cardinality() { - // Some(Cardinality::SingleValue) => { - // let mut fast_field_writer = IntFastFieldWriter::new(field, None); - // let default_value = fast_field_default_value(field_entry); - // fast_field_writer.set_val_if_missing(default_value); - // single_value_writers.push(fast_field_writer); - // } - // Some(Cardinality::MultiValues) => { - // let fast_field_writer = MultiValuedFastFieldWriter::new( - // field, - // FastFieldType::Numeric, - // None, - // ); - // multi_values_writers.push(fast_field_writer); - // } - // None => {} - // } + if let Some(fast_field_typ) =fast_numerical_type(field_entry.field_type()) { + match fast_field_typ { + FastFieldTyp::Numerical(numerical_type) => { + columnar_writer.force_numerical_type(field_entry.name(), numerical_type); + }, + FastFieldTyp::Other => {}, } - FieldType::Date(ref options) => match options.get_fastfield_cardinality() { - Some(Cardinality::SingleValue) => { - let mut fast_field_writer = - IntFastFieldWriter::new(field, Some(options.get_precision())); - let default_value = fast_field_default_value(field_entry); - fast_field_writer.set_val_if_missing(default_value); - single_value_writers.push(fast_field_writer); - } - Some(Cardinality::MultiValues) => { - let fast_field_writer = MultiValuedFastFieldWriter::new( - field, - FastFieldType::Numeric, - Some(options.get_precision()), - ); - multi_values_writers.push(fast_field_writer); - } - None => {} - }, - FieldType::Facet(_) => { - let fast_field_writer = - MultiValuedFastFieldWriter::new(field, FastFieldType::Facet, None); - term_id_writers.push(fast_field_writer); - } - FieldType::Str(_) if field_entry.is_fast() => { - let fast_field_writer = - MultiValuedFastFieldWriter::new(field, FastFieldType::String, None); - term_id_writers.push(fast_field_writer); - } - FieldType::Bytes(bytes_option) => { - if bytes_option.is_fast() { - let fast_field_writer = BytesFastFieldWriter::new(field); - bytes_value_writers.push(fast_field_writer); - } - } - FieldType::IpAddr(opt) => { - if opt.is_fast() { - match opt.get_fastfield_cardinality() { - Some(Cardinality::SingleValue) => { - let fast_field_writer = U128FastFieldWriter::new(field); - u128_value_writers.push(fast_field_writer); - } - Some(Cardinality::MultiValues) => { - let fast_field_writer = MultiValueU128FastFieldWriter::new(field); - u128_multi_value_writers.push(fast_field_writer); - } - None => {} - } - } - } - FieldType::Str(_) | FieldType::JsonObject(_) => {} + fast_fields[field.field_id() as usize] = Some(field_entry.name().to_string()); } } FastFieldsWriter { - u128_value_writers, - u128_multi_value_writers, - term_id_writers, - single_value_writers, - multi_values_writers, - bytes_value_writers, + columnar_writer, + fast_fields, } } /// The memory used (inclusive childs) pub fn mem_usage(&self) -> usize { - self.term_id_writers - .iter() - .map(|w| w.mem_usage()) - .sum::() - + self - .single_value_writers - .iter() - .map(|w| w.mem_usage()) - .sum::() - + self - .multi_values_writers - .iter() - .map(|w| w.mem_usage()) - .sum::() - + self - .bytes_value_writers - .iter() - .map(|w| w.mem_usage()) - .sum::() - + self - .u128_value_writers - .iter() - .map(|w| w.mem_usage()) - .sum::() - + self - .u128_multi_value_writers - .iter() - .map(|w| w.mem_usage()) - .sum::() + self.columnar_writer.mem_usage() } - /// Get the `FastFieldWriter` associated with a field. - pub fn get_term_id_writer(&self, field: Field) -> Option<&MultiValuedFastFieldWriter> { - // TODO optimize - self.term_id_writers - .iter() - .find(|field_writer| field_writer.field() == field) - } - - /// Get the `FastFieldWriter` associated with a field. - pub fn get_field_writer(&self, field: Field) -> Option<&IntFastFieldWriter> { - // TODO optimize - self.single_value_writers - .iter() - .find(|field_writer| field_writer.field() == field) - } - - /// Get the `FastFieldWriter` associated with a field. - pub fn get_field_writer_mut(&mut self, field: Field) -> Option<&mut IntFastFieldWriter> { - // TODO optimize - self.single_value_writers - .iter_mut() - .find(|field_writer| field_writer.field() == field) - } - - /// Get the `FastFieldWriter` associated with a field. - pub fn get_term_id_writer_mut( - &mut self, - field: Field, - ) -> Option<&mut MultiValuedFastFieldWriter> { - // TODO optimize - self.term_id_writers - .iter_mut() - .find(|field_writer| field_writer.field() == field) - } - - /// Returns the fast field multi-value writer for the given field. - /// - /// Returns `None` if the field does not exist, or is not - /// configured as a multivalued fastfield in the schema. - pub fn get_multivalue_writer_mut( - &mut self, - field: Field, - ) -> Option<&mut MultiValuedFastFieldWriter> { - // TODO optimize - self.multi_values_writers - .iter_mut() - .find(|multivalue_writer| multivalue_writer.field() == field) - } - - /// Returns the bytes fast field writer for the given field. - /// - /// Returns `None` if the field does not exist, or is not - /// configured as a bytes fastfield in the schema. - pub fn get_bytes_writer_mut(&mut self, field: Field) -> Option<&mut BytesFastFieldWriter> { - // TODO optimize - self.bytes_value_writers - .iter_mut() - .find(|field_writer| field_writer.field() == field) - } /// Indexes all of the fastfields of a new document. pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> { - for field_writer in &mut self.term_id_writers { - field_writer.add_document(doc)?; - } - for field_writer in &mut self.single_value_writers { - field_writer.add_document(doc)?; - } - for field_writer in &mut self.multi_values_writers { - field_writer.add_document(doc)?; - } - for field_writer in &mut self.bytes_value_writers { - field_writer.add_document(doc)?; - } - for field_writer in &mut self.u128_value_writers { - field_writer.add_document(doc)?; - } - for field_writer in &mut self.u128_multi_value_writers { - field_writer.add_document(doc)?; + for field_value in doc.field_values() { } Ok(()) } @@ -260,27 +145,28 @@ impl FastFieldsWriter { mapping: &HashMap>, doc_id_map: Option<&DocIdMapping>, ) -> io::Result<()> { - for field_writer in self.term_id_writers { - let field = field_writer.field(); - field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?; - } - for field_writer in &self.single_value_writers { - field_writer.serialize(serializer, doc_id_map)?; - } + todo!(); + // for field_writer in self.term_id_writers { + // let field = field_writer.field(); + // field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?; + // } + // for field_writer in &self.single_value_writers { + // field_writer.serialize(serializer, doc_id_map)?; + // } - for field_writer in self.multi_values_writers { - let field = field_writer.field(); - field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?; - } - for field_writer in self.bytes_value_writers { - field_writer.serialize(serializer, doc_id_map)?; - } - for field_writer in self.u128_value_writers { - field_writer.serialize(serializer, doc_id_map)?; - } - for field_writer in self.u128_multi_value_writers { - field_writer.serialize(serializer, doc_id_map)?; - } + // for field_writer in self.multi_values_writers { + // let field = field_writer.field(); + // field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?; + // } + // for field_writer in self.bytes_value_writers { + // field_writer.serialize(serializer, doc_id_map)?; + // } + // for field_writer in self.u128_value_writers { + // field_writer.serialize(serializer, doc_id_map)?; + // } + // for field_writer in self.u128_multi_value_writers { + // field_writer.serialize(serializer, doc_id_map)?; + // } Ok(()) } diff --git a/src/indexer/doc_id_mapping.rs b/src/indexer/doc_id_mapping.rs index e359bb2a2..1fd2a90cb 100644 --- a/src/indexer/doc_id_mapping.rs +++ b/src/indexer/doc_id_mapping.rs @@ -113,34 +113,35 @@ pub(crate) fn get_doc_id_mapping_from_field( sort_by_field: IndexSortByField, segment_writer: &SegmentWriter, ) -> crate::Result { - let schema = segment_writer.segment_serializer.segment().schema(); - let field_id = expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect fastfield, but not strictly required - let fast_field = segment_writer - .fast_field_writers - .get_field_writer(field_id) - .ok_or_else(|| { - TantivyError::InvalidArgument(format!( - "sort index by field is required to be a fast field {:?}", - sort_by_field.field - )) - })?; + todo!() + // let schema = segment_writer.segment_serializer.segment().schema(); + // let field_id = expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect fastfield, but not strictly required + // let fast_field = segment_writer + // .fast_field_writers + // .get_field_writer(field_id) + // .ok_or_else(|| { + // TantivyError::InvalidArgument(format!( + // "sort index by field is required to be a fast field {:?}", + // sort_by_field.field + // )) + // })?; - // create new doc_id to old doc_id index (used in fast_field_writers) - let mut doc_id_and_data = fast_field - .iter() - .enumerate() - .map(|el| (el.0 as DocId, el.1)) - .collect::>(); - if sort_by_field.order == Order::Desc { - doc_id_and_data.sort_by_key(|k| Reverse(k.1)); - } else { - doc_id_and_data.sort_by_key(|k| k.1); - } - let new_doc_id_to_old = doc_id_and_data - .into_iter() - .map(|el| el.0) - .collect::>(); - Ok(DocIdMapping::from_new_id_to_old_id(new_doc_id_to_old)) + // // create new doc_id to old doc_id index (used in fast_field_writers) + // let mut doc_id_and_data = fast_field + // .iter() + // .enumerate() + // .map(|el| (el.0 as DocId, el.1)) + // .collect::>(); + // if sort_by_field.order == Order::Desc { + // doc_id_and_data.sort_by_key(|k| Reverse(k.1)); + // } else { + // doc_id_and_data.sort_by_key(|k| k.1); + // } + // let new_doc_id_to_old = doc_id_and_data + // .into_iter() + // .map(|el| el.0) + // .collect::>(); + // Ok(DocIdMapping::from_new_id_to_old_id(new_doc_id_to_old)) } #[cfg(test)] @@ -161,12 +162,12 @@ mod tests_indexsorting { let my_string_field = schema_builder.add_text_field("string_field", STRING | STORED); let my_number = schema_builder.add_u64_field( "my_number", - NumericOptions::default().set_fast(Cardinality::SingleValue), + NumericOptions::default().set_fast(), ); let multi_numbers = schema_builder.add_u64_field( "multi_numbers", - NumericOptions::default().set_fast(Cardinality::MultiValues), + NumericOptions::default().set_fast(), ); let schema = schema_builder.build(); diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index abe97d58a..bb7156578 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1398,7 +1398,7 @@ mod tests { #[test] fn test_sort_by_multivalue_field_error() -> crate::Result<()> { let mut schema_builder = schema::Schema::builder(); - let options = NumericOptions::default().set_fast(Cardinality::MultiValues); + let options = NumericOptions::default().set_fast(); schema_builder.add_u64_field("id", options); let schema = schema_builder.build(); @@ -1616,7 +1616,7 @@ mod tests { let ips_field = schema_builder.add_ip_addr_field( "ips", IpAddrOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_indexed(), ); let id_field = schema_builder.add_u64_field("id", FAST | INDEXED | STORED); @@ -1641,13 +1641,13 @@ mod tests { let multi_numbers = schema_builder.add_u64_field( "multi_numbers", NumericOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_stored(), ); let multi_bools = schema_builder.add_bool_field( "multi_bools", NumericOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_stored(), ); let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 3ce2eb2a8..af7312aed 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -276,36 +276,27 @@ impl IndexMerger { | FieldType::Bool(ref options) => { todo!() } - FieldType::Date(ref options) => match options.get_fastfield_cardinality() { - Some(Cardinality::SingleValue) => { - self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?; + FieldType::Date(ref options) => { + if options.is_fast() { + todo!(); } - Some(Cardinality::MultiValues) => { - self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?; - } - None => {} + // Some(Cardinality::SingleValue) => { + // self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?; + // } + // Some(Cardinality::MultiValues) => { + // self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?; + // } + // None => {} }, FieldType::Bytes(byte_options) => { if byte_options.is_fast() { self.write_bytes_fast_field(field, fast_field_serializer, doc_id_mapping)?; } } - FieldType::IpAddr(options) => match options.get_fastfield_cardinality() { - Some(Cardinality::SingleValue) => { - self.write_u128_single_fast_field( - field, - fast_field_serializer, - doc_id_mapping, - )?; + FieldType::IpAddr(options) => { + if options.is_fast() { + todo!(); } - Some(Cardinality::MultiValues) => { - self.write_u128_multi_fast_field( - field, - fast_field_serializer, - doc_id_mapping, - )?; - } - None => {} }, FieldType::JsonObject(_) | FieldType::Facet(_) | FieldType::Str(_) => { @@ -1094,7 +1085,7 @@ mod tests { .set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); let date_field = schema_builder.add_date_field("date", INDEXED); - let score_fieldtype = schema::NumericOptions::default().set_fast(Cardinality::SingleValue); + let score_fieldtype = schema::NumericOptions::default().set_fast(); let score_field = schema_builder.add_u64_field("score", score_fieldtype); let bytes_score_field = schema_builder.add_bytes_field("score_bytes", FAST); let index = Index::create_in_ram(schema_builder.build()); @@ -1249,7 +1240,7 @@ mod tests { ) .set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); - let score_fieldtype = schema::NumericOptions::default().set_fast(Cardinality::SingleValue); + let score_fieldtype = schema::NumericOptions::default().set_fast(); let score_field = schema_builder.add_u64_field("score", score_fieldtype); let bytes_score_field = schema_builder.add_bytes_field("score_bytes", FAST); let index = Index::create_in_ram(schema_builder.build()); @@ -1610,7 +1601,7 @@ mod tests { let mut schema_builder = schema::Schema::builder(); let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let int_options = NumericOptions::default() - .set_fast(Cardinality::SingleValue) + .set_fast() .set_indexed(); let int_field = schema_builder.add_u64_field("intval", int_options); let mut index_builder = Index::builder().schema(schema_builder.build()); @@ -1777,7 +1768,7 @@ mod tests { fn test_merge_multivalued_int_fields_all_deleted() -> crate::Result<()> { let mut schema_builder = schema::Schema::builder(); let int_options = NumericOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_indexed(); let int_field = schema_builder.add_u64_field("intvals", int_options); let index = Index::create_in_ram(schema_builder.build()); @@ -1814,7 +1805,7 @@ mod tests { fn test_merge_multivalued_int_fields_simple() -> crate::Result<()> { let mut schema_builder = schema::Schema::builder(); let int_options = NumericOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_indexed(); let int_field = schema_builder.add_u64_field("intvals", int_options); let index = Index::create_in_ram(schema_builder.build()); @@ -1940,7 +1931,7 @@ mod tests { fn merges_f64_fast_fields_correctly() -> crate::Result<()> { let mut builder = schema::SchemaBuilder::new(); - let fast_multi = NumericOptions::default().set_fast(Cardinality::MultiValues); + let fast_multi = NumericOptions::default().set_fast(); let field = builder.add_f64_field("f64", schema::FAST); let multi_field = builder.add_f64_field("f64s", fast_multi); diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs index 0592a8c8b..26d3e9a24 100644 --- a/src/indexer/merger_sorted_index_test.rs +++ b/src/indexer/merger_sorted_index_test.rs @@ -13,7 +13,7 @@ mod tests { fn create_test_index_posting_list_issue(index_settings: Option) -> Index { let mut schema_builder = schema::Schema::builder(); let int_options = NumericOptions::default() - .set_fast(Cardinality::SingleValue) + .set_fast() .set_indexed(); let int_field = schema_builder.add_u64_field("intval", int_options); @@ -62,7 +62,7 @@ mod tests { ) -> crate::Result { let mut schema_builder = schema::Schema::builder(); let int_options = NumericOptions::default() - .set_fast(Cardinality::SingleValue) + .set_fast() .set_stored() .set_indexed(); let int_field = schema_builder.add_u64_field("intval", int_options); @@ -73,7 +73,7 @@ mod tests { let multi_numbers = schema_builder.add_u64_field( "multi_numbers", - NumericOptions::default().set_fast(Cardinality::MultiValues), + NumericOptions::default().set_fast(), ); let text_field_options = TextOptions::default() .set_indexing_options( @@ -488,7 +488,7 @@ mod bench_sorted_index_merge { fn create_index(sort_by_field: Option) -> Index { let mut schema_builder = Schema::builder(); let int_options = NumericOptions::default() - .set_fast(Cardinality::SingleValue) + .set_fast() .set_indexed(); let int_field = schema_builder.add_u64_field("intval", int_options); let schema = schema_builder.build(); diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 3bf99dc30..5d7de662f 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -182,28 +182,31 @@ impl SegmentWriter { match field_entry.field_type() { FieldType::Facet(_) => { - for value in values { - let facet = value.as_facet().ok_or_else(make_schema_error)?; - let facet_str = facet.encoded_str(); - let mut unordered_term_id_opt = None; - FacetTokenizer - .token_stream(facet_str) - .process(&mut |token| { - term_buffer.set_text(&token.text); - let unordered_term_id = - postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx); - // TODO pass indexing context directly in subscribe function - unordered_term_id_opt = Some(unordered_term_id); - }); - if let Some(unordered_term_id) = unordered_term_id_opt { - self.fast_field_writers - .get_term_id_writer_mut(field) - .expect("writer for facet missing") - .add_val(unordered_term_id); - } - } + todo!(); + // for value in values { + // let facet = value.as_facet().ok_or_else(make_schema_error)?; + // let facet_str = facet.encoded_str(); + // let mut unordered_term_id_opt = None; + // FacetTokenizer + // .token_stream(facet_str) + // .process(&mut |token| { + // term_buffer.set_text(&token.text); + // let unordered_term_id = + // postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx); + // // TODO pass indexing context directly in subscribe function + // unordered_term_id_opt = Some(unordered_term_id); + // }); + // if let Some(unordered_term_id) = unordered_term_id_opt { + // self.fast_field_writers + // .get_term_id_writer_mut(field) + // .expect("writer for facet missing") + // .add_val(unordered_term_id); + // } + // } } FieldType::Str(_) => { + todo!() + /* let mut indexing_position = IndexingPosition::default(); for value in values { let mut token_stream = match value { @@ -234,6 +237,7 @@ impl SegmentWriter { self.fieldnorms_writer .record(doc_id, field, indexing_position.num_tokens); } + */ } FieldType::U64(_) => { let mut num_vals = 0; diff --git a/src/query/range_query/range_query_ip_fastfield.rs b/src/query/range_query/range_query_ip_fastfield.rs index e725ed84a..eb2bebc29 100644 --- a/src/query/range_query/range_query_ip_fastfield.rs +++ b/src/query/range_query/range_query_ip_fastfield.rs @@ -190,7 +190,7 @@ mod tests { let ips_field = schema_builder.add_ip_addr_field( "ips", IpAddrOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_indexed(), ); let text_field = schema_builder.add_text_field("id", STRING | STORED); diff --git a/src/query/range_query/range_query_u64_fastfield.rs b/src/query/range_query/range_query_u64_fastfield.rs index aea18877b..5cdcf15ba 100644 --- a/src/query/range_query/range_query_u64_fastfield.rs +++ b/src/query/range_query/range_query_u64_fastfield.rs @@ -186,7 +186,7 @@ mod tests { let ids_u64_field = schema_builder.add_u64_field( "ids", NumericOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_indexed(), ); @@ -194,7 +194,7 @@ mod tests { let ids_f64_field = schema_builder.add_f64_field( "ids_f64", NumericOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_indexed(), ); @@ -202,7 +202,7 @@ mod tests { let ids_i64_field = schema_builder.add_i64_field( "ids_i64", NumericOptions::default() - .set_fast(Cardinality::MultiValues) + .set_fast() .set_indexed(), ); diff --git a/src/schema/date_time_options.rs b/src/schema/date_time_options.rs index f6d5b62ff..5e276a361 100644 --- a/src/schema/date_time_options.rs +++ b/src/schema/date_time_options.rs @@ -2,7 +2,6 @@ use std::ops::BitOr; use serde::{Deserialize, Serialize}; -use super::Cardinality; use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag}; /// DateTime Precision @@ -29,8 +28,7 @@ pub struct DateOptions { indexed: bool, // This boolean has no effect if the field is not marked as indexed true. fieldnorms: bool, - #[serde(skip_serializing_if = "Option::is_none")] - fast: Option, + fast: bool, stored: bool, // Internal storage precision, used to optimize storage // compression on fast fields. @@ -54,18 +52,9 @@ impl DateOptions { self.fieldnorms && self.indexed } - /// Returns true iff the value is a fast field and multivalue. - pub fn is_multivalue_fast(&self) -> bool { - if let Some(cardinality) = self.fast { - cardinality == Cardinality::MultiValues - } else { - false - } - } - /// Returns true iff the value is a fast field. pub fn is_fast(&self) -> bool { - self.fast.is_some() + self.fast } /// Set the field as stored. @@ -107,19 +96,11 @@ impl DateOptions { /// If more than one value is associated with a fast field, only the last one is /// kept. #[must_use] - pub fn set_fast(mut self, cardinality: Cardinality) -> DateOptions { - self.fast = Some(cardinality); + pub fn set_fast(mut self) -> DateOptions { + self.fast = true; self } - /// Returns the cardinality of the fastfield. - /// - /// If the field has not been declared as a fastfield, then - /// the method returns `None`. - pub fn get_fastfield_cardinality(&self) -> Option { - self.fast - } - /// Sets the precision for this DateTime field. /// /// Internal storage precision, used to optimize storage @@ -147,10 +128,7 @@ impl From<()> for DateOptions { impl From for DateOptions { fn from(_: FastFlag) -> Self { DateOptions { - indexed: false, - fieldnorms: false, - stored: false, - fast: Some(Cardinality::SingleValue), + fast: true, ..Default::default() } } @@ -159,10 +137,7 @@ impl From for DateOptions { impl From for DateOptions { fn from(_: StoredFlag) -> Self { DateOptions { - indexed: false, - fieldnorms: false, stored: true, - fast: None, ..Default::default() } } @@ -173,8 +148,6 @@ impl From for DateOptions { DateOptions { indexed: true, fieldnorms: true, - stored: false, - fast: None, ..Default::default() } } @@ -189,7 +162,7 @@ impl> BitOr for DateOptions { indexed: self.indexed | other.indexed, fieldnorms: self.fieldnorms | other.fieldnorms, stored: self.stored | other.stored, - fast: self.fast.or(other.fast), + fast: self.fast | other.fast, precision: self.precision, } } diff --git a/src/schema/ip_options.rs b/src/schema/ip_options.rs index 8738f75f3..4d5694c31 100644 --- a/src/schema/ip_options.rs +++ b/src/schema/ip_options.rs @@ -4,7 +4,6 @@ use std::ops::BitOr; use serde::{Deserialize, Serialize}; use super::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag}; -use super::Cardinality; /// Trait to convert into an Ipv6Addr. pub trait IntoIpv6Addr { @@ -24,8 +23,7 @@ impl IntoIpv6Addr for IpAddr { /// Define how an ip field should be handled by tantivy. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)] pub struct IpAddrOptions { - #[serde(skip_serializing_if = "Option::is_none")] - fast: Option, + fast: bool, stored: bool, indexed: bool, fieldnorms: bool, @@ -34,7 +32,7 @@ pub struct IpAddrOptions { impl IpAddrOptions { /// Returns true iff the value is a fast field. pub fn is_fast(&self) -> bool { - self.fast.is_some() + self.fast } /// Returns `true` if the ip address should be stored in the doc store. @@ -52,14 +50,6 @@ impl IpAddrOptions { self.fieldnorms } - /// Returns the cardinality of the fastfield. - /// - /// If the field has not been declared as a fastfield, then - /// the method returns None. - pub fn get_fastfield_cardinality(&self) -> Option { - self.fast - } - /// Set the field as normed. /// /// Setting an integer as normed will generate @@ -97,8 +87,8 @@ impl IpAddrOptions { /// If more than one value is associated with a fast field, only the last one is /// kept. #[must_use] - pub fn set_fast(mut self, cardinality: Cardinality) -> Self { - self.fast = Some(cardinality); + pub fn set_fast(mut self,) -> Self { + self.fast = true; self } } @@ -115,7 +105,7 @@ impl From for IpAddrOptions { fieldnorms: false, indexed: false, stored: false, - fast: Some(Cardinality::SingleValue), + fast: true, } } } @@ -126,7 +116,7 @@ impl From for IpAddrOptions { fieldnorms: false, indexed: false, stored: true, - fast: None, + fast: false, } } } @@ -137,7 +127,7 @@ impl From for IpAddrOptions { fieldnorms: true, indexed: true, stored: false, - fast: None, + fast: false, } } } @@ -151,7 +141,7 @@ impl> BitOr for IpAddrOptions { fieldnorms: self.fieldnorms | other.fieldnorms, indexed: self.indexed | other.indexed, stored: self.stored | other.stored, - fast: self.fast.or(other.fast), + fast: self.fast | other.fast, } } } diff --git a/src/schema/numeric_options.rs b/src/schema/numeric_options.rs index 23e4b0515..d8d1947f8 100644 --- a/src/schema/numeric_options.rs +++ b/src/schema/numeric_options.rs @@ -116,8 +116,8 @@ impl NumericOptions { /// If more than one value is associated with a fast field, only the last one is /// kept. #[must_use] - pub fn set_fast(mut self, fast: bool) -> NumericOptions { - self.fast = fast; + pub fn set_fast(mut self) -> NumericOptions { + self.fast = true; self } } @@ -202,7 +202,7 @@ mod tests { &NumericOptions { indexed: true, fieldnorms: true, - fast: None, + fast: false, stored: false } ); @@ -220,7 +220,7 @@ mod tests { &NumericOptions { indexed: false, fieldnorms: false, - fast: None, + fast: false, stored: false } ); @@ -239,7 +239,7 @@ mod tests { &NumericOptions { indexed: true, fieldnorms: false, - fast: None, + fast: false, stored: false } ); @@ -259,7 +259,7 @@ mod tests { &NumericOptions { indexed: false, fieldnorms: true, - fast: None, + fast: false, stored: false } ); diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 4d3c75528..8f2c29972 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -484,7 +484,6 @@ mod tests { use serde_json; use crate::schema::field_type::ValueParsingError; - use crate::schema::numeric_options::Cardinality::SingleValue; use crate::schema::schema::DocParsingError::InvalidJson; use crate::schema::*; @@ -508,17 +507,17 @@ mod tests { let mut schema_builder = Schema::builder(); let count_options = NumericOptions::default() .set_stored() - .set_fast(Cardinality::SingleValue); + .set_fast(); let popularity_options = NumericOptions::default() .set_stored() - .set_fast(Cardinality::SingleValue); + .set_fast(); let score_options = NumericOptions::default() .set_indexed() .set_fieldnorm() - .set_fast(Cardinality::SingleValue); + .set_fast(); let is_read_options = NumericOptions::default() .set_stored() - .set_fast(Cardinality::SingleValue); + .set_fast(); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field( "author", @@ -645,10 +644,10 @@ mod tests { let mut schema_builder = Schema::builder(); let count_options = NumericOptions::default() .set_stored() - .set_fast(Cardinality::SingleValue); + .set_fast(); let is_read_options = NumericOptions::default() .set_stored() - .set_fast(Cardinality::SingleValue); + .set_fast(); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field("author", STRING); schema_builder.add_u64_field("count", count_options); @@ -750,13 +749,13 @@ mod tests { let mut schema_builder = Schema::builder(); let count_options = NumericOptions::default() .set_stored() - .set_fast(Cardinality::SingleValue); + .set_fast(); let popularity_options = NumericOptions::default() .set_stored() - .set_fast(Cardinality::SingleValue); + .set_fast(); let score_options = NumericOptions::default() .set_indexed() - .set_fast(Cardinality::SingleValue); + .set_fast(); let title_field = schema_builder.add_text_field("title", TEXT); let author_field = schema_builder.add_text_field("author", STRING); let count_field = schema_builder.add_u64_field("count", count_options); @@ -907,7 +906,7 @@ mod tests { .set_stored() .set_indexed() .set_fieldnorm() - .set_fast(SingleValue); + .set_fast(); schema_builder.add_text_field("_id", id_options); schema_builder.add_date_field("_timestamp", timestamp_options);