Removed cardinality from fast field options.

This commit is contained in:
Paul Masurel
2023-01-17 16:34:56 +09:00
parent a0c1ba46c7
commit 29c1a76d5a
21 changed files with 253 additions and 400 deletions

View File

@@ -85,6 +85,15 @@ fn mutate_or_create_column<V, TMutator>(
}
impl ColumnarWriter {
pub fn mem_usage(&self) -> usize {
// TODO add dictionary builders.
self.arena.mem_usage() +
self.numerical_field_hash_map.mem_usage() +
self.bool_field_hash_map.mem_usage() +
self.bytes_field_hash_map.mem_usage()
}
pub fn force_numerical_type(&mut self, column_name: &str, numerical_type: NumericalType) {
let (hash_map, _) = (&mut self.numerical_field_hash_map, &mut self.arena);
mutate_or_create_column(

View File

@@ -13,7 +13,7 @@ use tantivy::aggregation::agg_result::AggregationResults;
use tantivy::aggregation::metric::AverageAggregation;
use tantivy::aggregation::AggregationCollector;
use tantivy::query::TermQuery;
use tantivy::schema::{self, Cardinality, IndexRecordOption, Schema, TextFieldIndexing};
use tantivy::schema::{self, IndexRecordOption, Schema, TextFieldIndexing};
use tantivy::{doc, Index, Term};
fn main() -> tantivy::Result<()> {
@@ -25,7 +25,7 @@ fn main() -> tantivy::Result<()> {
.set_stored();
let text_field = schema_builder.add_text_field("text", text_fieldtype);
let score_fieldtype =
crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
crate::schema::NumericOptions::default().set_fast();
let highscore_field = schema_builder.add_f64_field("highscore", score_fieldtype.clone());
let price_field = schema_builder.add_f64_field("price", score_fieldtype);

View File

@@ -4,7 +4,7 @@
use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::{Cardinality, DateOptions, Schema, Value, INDEXED, STORED, STRING};
use tantivy::schema::{DateOptions, Schema, Value, INDEXED, STORED, STRING};
use tantivy::Index;
fn main() -> tantivy::Result<()> {
@@ -12,7 +12,7 @@ fn main() -> tantivy::Result<()> {
let mut schema_builder = Schema::builder();
let opts = DateOptions::from(INDEXED)
.set_stored()
.set_fast(Cardinality::SingleValue)
.set_fast()
.set_precision(tantivy::DatePrecision::Seconds);
let occurred_at = schema_builder.add_date_field("occurred_at", opts);
let event_type = schema_builder.add_text_field("event", STRING | STORED);

View File

@@ -43,13 +43,13 @@ mod tests {
use crate::aggregation::agg_result::AggregationResults;
use crate::aggregation::AggregationCollector;
use crate::query::AllQuery;
use crate::schema::{Cardinality, NumericOptions, Schema};
use crate::schema::{NumericOptions, Schema};
use crate::Index;
#[test]
fn test_metric_aggregations() {
let mut schema_builder = Schema::builder();
let field_options = NumericOptions::default().set_fast(Cardinality::SingleValue);
let field_options = NumericOptions::default().set_fast();
let field = schema_builder.add_f64_field("price", field_options);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();

View File

@@ -430,13 +430,13 @@ mod tests {
let text_field_id = schema_builder.add_text_field("text_id", text_fieldtype);
let string_field_id = schema_builder.add_text_field("string_id", STRING | FAST);
let score_fieldtype =
crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
crate::schema::NumericOptions::default().set_fast();
let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
let fraction_field = schema_builder.add_f64_field(
"fraction_f64",
crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue),
crate::schema::NumericOptions::default().set_fast(),
);
let index = Index::create_in_ram(schema_builder.build());
{
@@ -654,12 +654,12 @@ mod tests {
let date_field = schema_builder.add_date_field("date", FAST);
schema_builder.add_text_field("dummy_text", STRING);
let score_fieldtype =
crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
crate::schema::NumericOptions::default().set_fast();
let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
let multivalue =
crate::schema::NumericOptions::default().set_fast(Cardinality::MultiValues);
crate::schema::NumericOptions::default().set_fast();
let scores_field_i64 = schema_builder.add_i64_field("scores_i64", multivalue);
let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
@@ -1187,7 +1187,7 @@ mod tests {
let text_field_few_terms =
schema_builder.add_text_field("text_few_terms", STRING | FAST);
let score_fieldtype =
crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
crate::schema::NumericOptions::default().set_fast();
let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
let score_field_f64 =
schema_builder.add_f64_field("score_f64", score_fieldtype.clone());

View File

@@ -93,7 +93,7 @@ fn save_new_metas(
/// let body_field = schema_builder.add_text_field("body", TEXT);
/// let number_field = schema_builder.add_u64_field(
/// "number",
/// NumericOptions::default().set_fast(Cardinality::SingleValue),
/// NumericOptions::default().set_fast(),
/// );
///
/// let schema = schema_builder.build();

View File

@@ -749,7 +749,7 @@ mod tests {
"multi_date",
DateOptions::default()
.set_precision(DatePrecision::Microseconds)
.set_fast(Cardinality::MultiValues),
.set_fast(),
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
@@ -962,7 +962,7 @@ mod tests {
.take(1_000)
.collect();
let date_options = DateOptions::default()
.set_fast(Cardinality::SingleValue)
.set_fast()
.set_precision(precision);
let mut schema_builder = SchemaBuilder::default();
let field = schema_builder.add_date_field("field", date_options);

View File

@@ -38,7 +38,7 @@ mod tests {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field(
"multifield",
NumericOptions::default().set_fast(Cardinality::MultiValues),
NumericOptions::default().set_fast(),
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
@@ -74,7 +74,7 @@ mod tests {
let date_field = schema_builder.add_date_field(
"multi_date_field",
DateOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_indexed()
.set_fieldnorm()
.set_stored(),
@@ -215,7 +215,7 @@ mod tests {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_i64_field(
"multifield",
NumericOptions::default().set_fast(Cardinality::MultiValues),
NumericOptions::default().set_fast(),
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
@@ -246,7 +246,7 @@ mod tests {
let mut schema_builder = Schema::builder();
let bool_field = schema_builder.add_bool_field(
"multifield",
NumericOptions::default().set_fast(Cardinality::MultiValues),
NumericOptions::default().set_fast(),
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
@@ -278,7 +278,7 @@ mod tests {
let field = schema_builder.add_u64_field(
"multifield",
NumericOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_indexed(),
);
let schema = schema_builder.build();
@@ -424,7 +424,7 @@ mod bench {
let mut builder = crate::schema::SchemaBuilder::new();
let fast_multi =
crate::schema::NumericOptions::default().set_fast(Cardinality::MultiValues);
crate::schema::NumericOptions::default().set_fast();
let multi_field = builder.add_f64_field("f64s", fast_multi);
let index = crate::Index::create_in_ram(builder.build());
@@ -504,7 +504,7 @@ mod bench {
let path = Path::new("test");
let directory: RamDirectory = RamDirectory::create();
let field = {
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
let options = NumericOptions::default().set_fast();
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field("field", options);
let schema = schema_builder.build();
@@ -562,7 +562,7 @@ mod bench {
b.iter(|| {
let directory: RamDirectory = RamDirectory::create();
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
let options = NumericOptions::default().set_fast();
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field("field", options);
let schema = schema_builder.build();
@@ -595,7 +595,7 @@ mod bench {
b.iter(|| {
let directory: RamDirectory = RamDirectory::create();
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
let options = NumericOptions::default().set_fast();
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field("field", options);
let schema = schema_builder.build();

View File

@@ -137,7 +137,7 @@ mod tests {
let date_field = schema_builder.add_date_field(
"multi_date_field",
DateOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_indexed()
.set_fieldnorm()
.set_precision(DatePrecision::Microseconds)
@@ -188,7 +188,7 @@ mod tests {
let date_field = schema_builder.add_date_field(
"multi_date_field",
DateOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
// TODO: Test different precision after fixing https://github.com/quickwit-oss/tantivy/issues/1783
.set_precision(DatePrecision::Microseconds)
.set_indexed()
@@ -307,7 +307,7 @@ mod tests {
let mut schema_builder = Schema::builder();
let field_options = NumericOptions::default()
.set_indexed()
.set_fast(Cardinality::MultiValues);
.set_fast();
let item_field = schema_builder.add_i64_field("items", field_options);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

View File

@@ -1,6 +1,7 @@
use std::collections::HashMap;
use std::io;
use columnar::{ColumnarWriter, NumericalType};
use common;
use fastfield_codecs::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64};
use rustc_hash::FxHashMap;
@@ -17,12 +18,14 @@ use crate::DatePrecision;
/// The `FastFieldsWriter` groups all of the fast field writers.
pub struct FastFieldsWriter {
term_id_writers: Vec<MultiValuedFastFieldWriter>,
single_value_writers: Vec<IntFastFieldWriter>,
u128_value_writers: Vec<U128FastFieldWriter>,
u128_multi_value_writers: Vec<MultiValueU128FastFieldWriter>,
multi_values_writers: Vec<MultiValuedFastFieldWriter>,
bytes_value_writers: Vec<BytesFastFieldWriter>,
columnar_writer: ColumnarWriter,
fast_fields: Vec<Option<String>>,
// term_id_writers: Vec<MultiValuedFastFieldWriter>,
// single_value_writers: Vec<IntFastFieldWriter>,
// u128_value_writers: Vec<U128FastFieldWriter>,
// u128_multi_value_writers: Vec<MultiValueU128FastFieldWriter>,
// multi_values_writers: Vec<MultiValuedFastFieldWriter>,
// bytes_value_writers: Vec<BytesFastFieldWriter>,
}
pub(crate) fn unexpected_value(expected: &str, actual: &Value) -> crate::TantivyError {
@@ -40,214 +43,96 @@ fn fast_field_default_value(field_entry: &FieldEntry) -> u64 {
}
}
enum FastFieldTyp {
Numerical(NumericalType),
Other,
}
fn fast_numerical_type(field_type: &FieldType) -> Option<FastFieldTyp> {
// TODO
match field_type {
FieldType::U64(numerical_option) => {
if numerical_option.is_fast() {
Some(FastFieldTyp::Numerical(NumericalType::U64))
} else {
None
}
},
FieldType::I64(numerical_option) => {
if numerical_option.is_fast() {
Some(FastFieldTyp::Numerical(NumericalType::I64))
} else {
None
}
},
FieldType::F64(numerical_option) => {
if numerical_option.is_fast() {
Some(FastFieldTyp::Numerical(NumericalType::F64))
} else {
None
}
},
FieldType::Str(str_option) => {
if str_option.is_fast() {
Some(FastFieldTyp::Other)
} else {
None
}
},
FieldType::Bool(int_options) => {
if int_options.is_fast() {
Some(FastFieldTyp::Other)
} else {
None
}
},
FieldType::Date(date_options) => {
if date_options.is_fast() {
Some(FastFieldTyp::Other)
} else {
None
}
},
FieldType::Facet(_) => todo!(),
FieldType::Bytes(_) => todo!(),
FieldType::JsonObject(_) => todo!(),
FieldType::IpAddr(_) => todo!(),
}
}
impl FastFieldsWriter {
/// Create all `FastFieldWriter` required by the schema.
pub fn from_schema(schema: &Schema) -> FastFieldsWriter {
let mut u128_value_writers = Vec::new();
let mut u128_multi_value_writers = Vec::new();
let mut single_value_writers = Vec::new();
let mut term_id_writers = Vec::new();
let mut multi_values_writers = Vec::new();
let mut bytes_value_writers = Vec::new();
let mut columnar_writer = ColumnarWriter::default();
let mut fast_fields = vec![None; schema.num_fields()];
// TODO see other types
for (field, field_entry) in schema.fields() {
match field_entry.field_type() {
FieldType::I64(ref int_options)
| FieldType::U64(ref int_options)
| FieldType::F64(ref int_options)
| FieldType::Bool(ref int_options) => {
todo!();
// match int_options.get_fastfield_cardinality() {
// Some(Cardinality::SingleValue) => {
// let mut fast_field_writer = IntFastFieldWriter::new(field, None);
// let default_value = fast_field_default_value(field_entry);
// fast_field_writer.set_val_if_missing(default_value);
// single_value_writers.push(fast_field_writer);
// }
// Some(Cardinality::MultiValues) => {
// let fast_field_writer = MultiValuedFastFieldWriter::new(
// field,
// FastFieldType::Numeric,
// None,
// );
// multi_values_writers.push(fast_field_writer);
// }
// None => {}
// }
if let Some(fast_field_typ) =fast_numerical_type(field_entry.field_type()) {
match fast_field_typ {
FastFieldTyp::Numerical(numerical_type) => {
columnar_writer.force_numerical_type(field_entry.name(), numerical_type);
},
FastFieldTyp::Other => {},
}
FieldType::Date(ref options) => match options.get_fastfield_cardinality() {
Some(Cardinality::SingleValue) => {
let mut fast_field_writer =
IntFastFieldWriter::new(field, Some(options.get_precision()));
let default_value = fast_field_default_value(field_entry);
fast_field_writer.set_val_if_missing(default_value);
single_value_writers.push(fast_field_writer);
}
Some(Cardinality::MultiValues) => {
let fast_field_writer = MultiValuedFastFieldWriter::new(
field,
FastFieldType::Numeric,
Some(options.get_precision()),
);
multi_values_writers.push(fast_field_writer);
}
None => {}
},
FieldType::Facet(_) => {
let fast_field_writer =
MultiValuedFastFieldWriter::new(field, FastFieldType::Facet, None);
term_id_writers.push(fast_field_writer);
}
FieldType::Str(_) if field_entry.is_fast() => {
let fast_field_writer =
MultiValuedFastFieldWriter::new(field, FastFieldType::String, None);
term_id_writers.push(fast_field_writer);
}
FieldType::Bytes(bytes_option) => {
if bytes_option.is_fast() {
let fast_field_writer = BytesFastFieldWriter::new(field);
bytes_value_writers.push(fast_field_writer);
}
}
FieldType::IpAddr(opt) => {
if opt.is_fast() {
match opt.get_fastfield_cardinality() {
Some(Cardinality::SingleValue) => {
let fast_field_writer = U128FastFieldWriter::new(field);
u128_value_writers.push(fast_field_writer);
}
Some(Cardinality::MultiValues) => {
let fast_field_writer = MultiValueU128FastFieldWriter::new(field);
u128_multi_value_writers.push(fast_field_writer);
}
None => {}
}
}
}
FieldType::Str(_) | FieldType::JsonObject(_) => {}
fast_fields[field.field_id() as usize] = Some(field_entry.name().to_string());
}
}
FastFieldsWriter {
u128_value_writers,
u128_multi_value_writers,
term_id_writers,
single_value_writers,
multi_values_writers,
bytes_value_writers,
columnar_writer,
fast_fields,
}
}
/// The memory used (inclusive childs)
pub fn mem_usage(&self) -> usize {
self.term_id_writers
.iter()
.map(|w| w.mem_usage())
.sum::<usize>()
+ self
.single_value_writers
.iter()
.map(|w| w.mem_usage())
.sum::<usize>()
+ self
.multi_values_writers
.iter()
.map(|w| w.mem_usage())
.sum::<usize>()
+ self
.bytes_value_writers
.iter()
.map(|w| w.mem_usage())
.sum::<usize>()
+ self
.u128_value_writers
.iter()
.map(|w| w.mem_usage())
.sum::<usize>()
+ self
.u128_multi_value_writers
.iter()
.map(|w| w.mem_usage())
.sum::<usize>()
self.columnar_writer.mem_usage()
}
/// Get the `FastFieldWriter` associated with a field.
pub fn get_term_id_writer(&self, field: Field) -> Option<&MultiValuedFastFieldWriter> {
// TODO optimize
self.term_id_writers
.iter()
.find(|field_writer| field_writer.field() == field)
}
/// Get the `FastFieldWriter` associated with a field.
pub fn get_field_writer(&self, field: Field) -> Option<&IntFastFieldWriter> {
// TODO optimize
self.single_value_writers
.iter()
.find(|field_writer| field_writer.field() == field)
}
/// Get the `FastFieldWriter` associated with a field.
pub fn get_field_writer_mut(&mut self, field: Field) -> Option<&mut IntFastFieldWriter> {
// TODO optimize
self.single_value_writers
.iter_mut()
.find(|field_writer| field_writer.field() == field)
}
/// Get the `FastFieldWriter` associated with a field.
pub fn get_term_id_writer_mut(
&mut self,
field: Field,
) -> Option<&mut MultiValuedFastFieldWriter> {
// TODO optimize
self.term_id_writers
.iter_mut()
.find(|field_writer| field_writer.field() == field)
}
/// Returns the fast field multi-value writer for the given field.
///
/// Returns `None` if the field does not exist, or is not
/// configured as a multivalued fastfield in the schema.
pub fn get_multivalue_writer_mut(
&mut self,
field: Field,
) -> Option<&mut MultiValuedFastFieldWriter> {
// TODO optimize
self.multi_values_writers
.iter_mut()
.find(|multivalue_writer| multivalue_writer.field() == field)
}
/// Returns the bytes fast field writer for the given field.
///
/// Returns `None` if the field does not exist, or is not
/// configured as a bytes fastfield in the schema.
pub fn get_bytes_writer_mut(&mut self, field: Field) -> Option<&mut BytesFastFieldWriter> {
// TODO optimize
self.bytes_value_writers
.iter_mut()
.find(|field_writer| field_writer.field() == field)
}
/// Indexes all of the fastfields of a new document.
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
for field_writer in &mut self.term_id_writers {
field_writer.add_document(doc)?;
}
for field_writer in &mut self.single_value_writers {
field_writer.add_document(doc)?;
}
for field_writer in &mut self.multi_values_writers {
field_writer.add_document(doc)?;
}
for field_writer in &mut self.bytes_value_writers {
field_writer.add_document(doc)?;
}
for field_writer in &mut self.u128_value_writers {
field_writer.add_document(doc)?;
}
for field_writer in &mut self.u128_multi_value_writers {
field_writer.add_document(doc)?;
for field_value in doc.field_values() {
}
Ok(())
}
@@ -260,27 +145,28 @@ impl FastFieldsWriter {
mapping: &HashMap<Field, FxHashMap<UnorderedTermId, TermOrdinal>>,
doc_id_map: Option<&DocIdMapping>,
) -> io::Result<()> {
for field_writer in self.term_id_writers {
let field = field_writer.field();
field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?;
}
for field_writer in &self.single_value_writers {
field_writer.serialize(serializer, doc_id_map)?;
}
todo!();
// for field_writer in self.term_id_writers {
// let field = field_writer.field();
// field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?;
// }
// for field_writer in &self.single_value_writers {
// field_writer.serialize(serializer, doc_id_map)?;
// }
for field_writer in self.multi_values_writers {
let field = field_writer.field();
field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?;
}
for field_writer in self.bytes_value_writers {
field_writer.serialize(serializer, doc_id_map)?;
}
for field_writer in self.u128_value_writers {
field_writer.serialize(serializer, doc_id_map)?;
}
for field_writer in self.u128_multi_value_writers {
field_writer.serialize(serializer, doc_id_map)?;
}
// for field_writer in self.multi_values_writers {
// let field = field_writer.field();
// field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?;
// }
// for field_writer in self.bytes_value_writers {
// field_writer.serialize(serializer, doc_id_map)?;
// }
// for field_writer in self.u128_value_writers {
// field_writer.serialize(serializer, doc_id_map)?;
// }
// for field_writer in self.u128_multi_value_writers {
// field_writer.serialize(serializer, doc_id_map)?;
// }
Ok(())
}

View File

@@ -113,34 +113,35 @@ pub(crate) fn get_doc_id_mapping_from_field(
sort_by_field: IndexSortByField,
segment_writer: &SegmentWriter,
) -> crate::Result<DocIdMapping> {
let schema = segment_writer.segment_serializer.segment().schema();
let field_id = expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect fastfield, but not strictly required
let fast_field = segment_writer
.fast_field_writers
.get_field_writer(field_id)
.ok_or_else(|| {
TantivyError::InvalidArgument(format!(
"sort index by field is required to be a fast field {:?}",
sort_by_field.field
))
})?;
todo!()
// let schema = segment_writer.segment_serializer.segment().schema();
// let field_id = expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect fastfield, but not strictly required
// let fast_field = segment_writer
// .fast_field_writers
// .get_field_writer(field_id)
// .ok_or_else(|| {
// TantivyError::InvalidArgument(format!(
// "sort index by field is required to be a fast field {:?}",
// sort_by_field.field
// ))
// })?;
// create new doc_id to old doc_id index (used in fast_field_writers)
let mut doc_id_and_data = fast_field
.iter()
.enumerate()
.map(|el| (el.0 as DocId, el.1))
.collect::<Vec<_>>();
if sort_by_field.order == Order::Desc {
doc_id_and_data.sort_by_key(|k| Reverse(k.1));
} else {
doc_id_and_data.sort_by_key(|k| k.1);
}
let new_doc_id_to_old = doc_id_and_data
.into_iter()
.map(|el| el.0)
.collect::<Vec<_>>();
Ok(DocIdMapping::from_new_id_to_old_id(new_doc_id_to_old))
// // create new doc_id to old doc_id index (used in fast_field_writers)
// let mut doc_id_and_data = fast_field
// .iter()
// .enumerate()
// .map(|el| (el.0 as DocId, el.1))
// .collect::<Vec<_>>();
// if sort_by_field.order == Order::Desc {
// doc_id_and_data.sort_by_key(|k| Reverse(k.1));
// } else {
// doc_id_and_data.sort_by_key(|k| k.1);
// }
// let new_doc_id_to_old = doc_id_and_data
// .into_iter()
// .map(|el| el.0)
// .collect::<Vec<_>>();
// Ok(DocIdMapping::from_new_id_to_old_id(new_doc_id_to_old))
}
#[cfg(test)]
@@ -161,12 +162,12 @@ mod tests_indexsorting {
let my_string_field = schema_builder.add_text_field("string_field", STRING | STORED);
let my_number = schema_builder.add_u64_field(
"my_number",
NumericOptions::default().set_fast(Cardinality::SingleValue),
NumericOptions::default().set_fast(),
);
let multi_numbers = schema_builder.add_u64_field(
"multi_numbers",
NumericOptions::default().set_fast(Cardinality::MultiValues),
NumericOptions::default().set_fast(),
);
let schema = schema_builder.build();

View File

@@ -1398,7 +1398,7 @@ mod tests {
#[test]
fn test_sort_by_multivalue_field_error() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
let options = NumericOptions::default().set_fast();
schema_builder.add_u64_field("id", options);
let schema = schema_builder.build();
@@ -1616,7 +1616,7 @@ mod tests {
let ips_field = schema_builder.add_ip_addr_field(
"ips",
IpAddrOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_indexed(),
);
let id_field = schema_builder.add_u64_field("id", FAST | INDEXED | STORED);
@@ -1641,13 +1641,13 @@ mod tests {
let multi_numbers = schema_builder.add_u64_field(
"multi_numbers",
NumericOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_stored(),
);
let multi_bools = schema_builder.add_bool_field(
"multi_bools",
NumericOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_stored(),
);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());

View File

@@ -276,36 +276,27 @@ impl IndexMerger {
| FieldType::Bool(ref options) => {
todo!()
}
FieldType::Date(ref options) => match options.get_fastfield_cardinality() {
Some(Cardinality::SingleValue) => {
self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?;
FieldType::Date(ref options) => {
if options.is_fast() {
todo!();
}
Some(Cardinality::MultiValues) => {
self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?;
}
None => {}
// Some(Cardinality::SingleValue) => {
// self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?;
// }
// Some(Cardinality::MultiValues) => {
// self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?;
// }
// None => {}
},
FieldType::Bytes(byte_options) => {
if byte_options.is_fast() {
self.write_bytes_fast_field(field, fast_field_serializer, doc_id_mapping)?;
}
}
FieldType::IpAddr(options) => match options.get_fastfield_cardinality() {
Some(Cardinality::SingleValue) => {
self.write_u128_single_fast_field(
field,
fast_field_serializer,
doc_id_mapping,
)?;
FieldType::IpAddr(options) => {
if options.is_fast() {
todo!();
}
Some(Cardinality::MultiValues) => {
self.write_u128_multi_fast_field(
field,
fast_field_serializer,
doc_id_mapping,
)?;
}
None => {}
},
FieldType::JsonObject(_) | FieldType::Facet(_) | FieldType::Str(_) => {
@@ -1094,7 +1085,7 @@ mod tests {
.set_stored();
let text_field = schema_builder.add_text_field("text", text_fieldtype);
let date_field = schema_builder.add_date_field("date", INDEXED);
let score_fieldtype = schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
let score_fieldtype = schema::NumericOptions::default().set_fast();
let score_field = schema_builder.add_u64_field("score", score_fieldtype);
let bytes_score_field = schema_builder.add_bytes_field("score_bytes", FAST);
let index = Index::create_in_ram(schema_builder.build());
@@ -1249,7 +1240,7 @@ mod tests {
)
.set_stored();
let text_field = schema_builder.add_text_field("text", text_fieldtype);
let score_fieldtype = schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
let score_fieldtype = schema::NumericOptions::default().set_fast();
let score_field = schema_builder.add_u64_field("score", score_fieldtype);
let bytes_score_field = schema_builder.add_bytes_field("score_bytes", FAST);
let index = Index::create_in_ram(schema_builder.build());
@@ -1610,7 +1601,7 @@ mod tests {
let mut schema_builder = schema::Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let int_options = NumericOptions::default()
.set_fast(Cardinality::SingleValue)
.set_fast()
.set_indexed();
let int_field = schema_builder.add_u64_field("intval", int_options);
let mut index_builder = Index::builder().schema(schema_builder.build());
@@ -1777,7 +1768,7 @@ mod tests {
fn test_merge_multivalued_int_fields_all_deleted() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let int_options = NumericOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_indexed();
let int_field = schema_builder.add_u64_field("intvals", int_options);
let index = Index::create_in_ram(schema_builder.build());
@@ -1814,7 +1805,7 @@ mod tests {
fn test_merge_multivalued_int_fields_simple() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let int_options = NumericOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_indexed();
let int_field = schema_builder.add_u64_field("intvals", int_options);
let index = Index::create_in_ram(schema_builder.build());
@@ -1940,7 +1931,7 @@ mod tests {
fn merges_f64_fast_fields_correctly() -> crate::Result<()> {
let mut builder = schema::SchemaBuilder::new();
let fast_multi = NumericOptions::default().set_fast(Cardinality::MultiValues);
let fast_multi = NumericOptions::default().set_fast();
let field = builder.add_f64_field("f64", schema::FAST);
let multi_field = builder.add_f64_field("f64s", fast_multi);

View File

@@ -13,7 +13,7 @@ mod tests {
fn create_test_index_posting_list_issue(index_settings: Option<IndexSettings>) -> Index {
let mut schema_builder = schema::Schema::builder();
let int_options = NumericOptions::default()
.set_fast(Cardinality::SingleValue)
.set_fast()
.set_indexed();
let int_field = schema_builder.add_u64_field("intval", int_options);
@@ -62,7 +62,7 @@ mod tests {
) -> crate::Result<Index> {
let mut schema_builder = schema::Schema::builder();
let int_options = NumericOptions::default()
.set_fast(Cardinality::SingleValue)
.set_fast()
.set_stored()
.set_indexed();
let int_field = schema_builder.add_u64_field("intval", int_options);
@@ -73,7 +73,7 @@ mod tests {
let multi_numbers = schema_builder.add_u64_field(
"multi_numbers",
NumericOptions::default().set_fast(Cardinality::MultiValues),
NumericOptions::default().set_fast(),
);
let text_field_options = TextOptions::default()
.set_indexing_options(
@@ -488,7 +488,7 @@ mod bench_sorted_index_merge {
fn create_index(sort_by_field: Option<IndexSortByField>) -> Index {
let mut schema_builder = Schema::builder();
let int_options = NumericOptions::default()
.set_fast(Cardinality::SingleValue)
.set_fast()
.set_indexed();
let int_field = schema_builder.add_u64_field("intval", int_options);
let schema = schema_builder.build();

View File

@@ -182,28 +182,31 @@ impl SegmentWriter {
match field_entry.field_type() {
FieldType::Facet(_) => {
for value in values {
let facet = value.as_facet().ok_or_else(make_schema_error)?;
let facet_str = facet.encoded_str();
let mut unordered_term_id_opt = None;
FacetTokenizer
.token_stream(facet_str)
.process(&mut |token| {
term_buffer.set_text(&token.text);
let unordered_term_id =
postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx);
// TODO pass indexing context directly in subscribe function
unordered_term_id_opt = Some(unordered_term_id);
});
if let Some(unordered_term_id) = unordered_term_id_opt {
self.fast_field_writers
.get_term_id_writer_mut(field)
.expect("writer for facet missing")
.add_val(unordered_term_id);
}
}
todo!();
// for value in values {
// let facet = value.as_facet().ok_or_else(make_schema_error)?;
// let facet_str = facet.encoded_str();
// let mut unordered_term_id_opt = None;
// FacetTokenizer
// .token_stream(facet_str)
// .process(&mut |token| {
// term_buffer.set_text(&token.text);
// let unordered_term_id =
// postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx);
// // TODO pass indexing context directly in subscribe function
// unordered_term_id_opt = Some(unordered_term_id);
// });
// if let Some(unordered_term_id) = unordered_term_id_opt {
// self.fast_field_writers
// .get_term_id_writer_mut(field)
// .expect("writer for facet missing")
// .add_val(unordered_term_id);
// }
// }
}
FieldType::Str(_) => {
todo!()
/*
let mut indexing_position = IndexingPosition::default();
for value in values {
let mut token_stream = match value {
@@ -234,6 +237,7 @@ impl SegmentWriter {
self.fieldnorms_writer
.record(doc_id, field, indexing_position.num_tokens);
}
*/
}
FieldType::U64(_) => {
let mut num_vals = 0;

View File

@@ -190,7 +190,7 @@ mod tests {
let ips_field = schema_builder.add_ip_addr_field(
"ips",
IpAddrOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_indexed(),
);
let text_field = schema_builder.add_text_field("id", STRING | STORED);

View File

@@ -186,7 +186,7 @@ mod tests {
let ids_u64_field = schema_builder.add_u64_field(
"ids",
NumericOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_indexed(),
);
@@ -194,7 +194,7 @@ mod tests {
let ids_f64_field = schema_builder.add_f64_field(
"ids_f64",
NumericOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_indexed(),
);
@@ -202,7 +202,7 @@ mod tests {
let ids_i64_field = schema_builder.add_i64_field(
"ids_i64",
NumericOptions::default()
.set_fast(Cardinality::MultiValues)
.set_fast()
.set_indexed(),
);

View File

@@ -2,7 +2,6 @@ use std::ops::BitOr;
use serde::{Deserialize, Serialize};
use super::Cardinality;
use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
/// DateTime Precision
@@ -29,8 +28,7 @@ pub struct DateOptions {
indexed: bool,
// This boolean has no effect if the field is not marked as indexed true.
fieldnorms: bool,
#[serde(skip_serializing_if = "Option::is_none")]
fast: Option<Cardinality>,
fast: bool,
stored: bool,
// Internal storage precision, used to optimize storage
// compression on fast fields.
@@ -54,18 +52,9 @@ impl DateOptions {
self.fieldnorms && self.indexed
}
/// Returns true iff the value is a fast field and multivalue.
pub fn is_multivalue_fast(&self) -> bool {
if let Some(cardinality) = self.fast {
cardinality == Cardinality::MultiValues
} else {
false
}
}
/// Returns true iff the value is a fast field.
pub fn is_fast(&self) -> bool {
self.fast.is_some()
self.fast
}
/// Set the field as stored.
@@ -107,19 +96,11 @@ impl DateOptions {
/// If more than one value is associated with a fast field, only the last one is
/// kept.
#[must_use]
pub fn set_fast(mut self, cardinality: Cardinality) -> DateOptions {
self.fast = Some(cardinality);
pub fn set_fast(mut self) -> DateOptions {
self.fast = true;
self
}
/// Returns the cardinality of the fastfield.
///
/// If the field has not been declared as a fastfield, then
/// the method returns `None`.
pub fn get_fastfield_cardinality(&self) -> Option<Cardinality> {
self.fast
}
/// Sets the precision for this DateTime field.
///
/// Internal storage precision, used to optimize storage
@@ -147,10 +128,7 @@ impl From<()> for DateOptions {
impl From<FastFlag> for DateOptions {
fn from(_: FastFlag) -> Self {
DateOptions {
indexed: false,
fieldnorms: false,
stored: false,
fast: Some(Cardinality::SingleValue),
fast: true,
..Default::default()
}
}
@@ -159,10 +137,7 @@ impl From<FastFlag> for DateOptions {
impl From<StoredFlag> for DateOptions {
fn from(_: StoredFlag) -> Self {
DateOptions {
indexed: false,
fieldnorms: false,
stored: true,
fast: None,
..Default::default()
}
}
@@ -173,8 +148,6 @@ impl From<IndexedFlag> for DateOptions {
DateOptions {
indexed: true,
fieldnorms: true,
stored: false,
fast: None,
..Default::default()
}
}
@@ -189,7 +162,7 @@ impl<T: Into<DateOptions>> BitOr<T> for DateOptions {
indexed: self.indexed | other.indexed,
fieldnorms: self.fieldnorms | other.fieldnorms,
stored: self.stored | other.stored,
fast: self.fast.or(other.fast),
fast: self.fast | other.fast,
precision: self.precision,
}
}

View File

@@ -4,7 +4,6 @@ use std::ops::BitOr;
use serde::{Deserialize, Serialize};
use super::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
use super::Cardinality;
/// Trait to convert into an Ipv6Addr.
pub trait IntoIpv6Addr {
@@ -24,8 +23,7 @@ impl IntoIpv6Addr for IpAddr {
/// Define how an ip field should be handled by tantivy.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct IpAddrOptions {
#[serde(skip_serializing_if = "Option::is_none")]
fast: Option<Cardinality>,
fast: bool,
stored: bool,
indexed: bool,
fieldnorms: bool,
@@ -34,7 +32,7 @@ pub struct IpAddrOptions {
impl IpAddrOptions {
/// Returns true iff the value is a fast field.
pub fn is_fast(&self) -> bool {
self.fast.is_some()
self.fast
}
/// Returns `true` if the ip address should be stored in the doc store.
@@ -52,14 +50,6 @@ impl IpAddrOptions {
self.fieldnorms
}
/// Returns the cardinality of the fastfield.
///
/// If the field has not been declared as a fastfield, then
/// the method returns None.
pub fn get_fastfield_cardinality(&self) -> Option<Cardinality> {
self.fast
}
/// Set the field as normed.
///
/// Setting an integer as normed will generate
@@ -97,8 +87,8 @@ impl IpAddrOptions {
/// If more than one value is associated with a fast field, only the last one is
/// kept.
#[must_use]
pub fn set_fast(mut self, cardinality: Cardinality) -> Self {
self.fast = Some(cardinality);
pub fn set_fast(mut self,) -> Self {
self.fast = true;
self
}
}
@@ -115,7 +105,7 @@ impl From<FastFlag> for IpAddrOptions {
fieldnorms: false,
indexed: false,
stored: false,
fast: Some(Cardinality::SingleValue),
fast: true,
}
}
}
@@ -126,7 +116,7 @@ impl From<StoredFlag> for IpAddrOptions {
fieldnorms: false,
indexed: false,
stored: true,
fast: None,
fast: false,
}
}
}
@@ -137,7 +127,7 @@ impl From<IndexedFlag> for IpAddrOptions {
fieldnorms: true,
indexed: true,
stored: false,
fast: None,
fast: false,
}
}
}
@@ -151,7 +141,7 @@ impl<T: Into<IpAddrOptions>> BitOr<T> for IpAddrOptions {
fieldnorms: self.fieldnorms | other.fieldnorms,
indexed: self.indexed | other.indexed,
stored: self.stored | other.stored,
fast: self.fast.or(other.fast),
fast: self.fast | other.fast,
}
}
}

View File

@@ -116,8 +116,8 @@ impl NumericOptions {
/// If more than one value is associated with a fast field, only the last one is
/// kept.
#[must_use]
pub fn set_fast(mut self, fast: bool) -> NumericOptions {
self.fast = fast;
pub fn set_fast(mut self) -> NumericOptions {
self.fast = true;
self
}
}
@@ -202,7 +202,7 @@ mod tests {
&NumericOptions {
indexed: true,
fieldnorms: true,
fast: None,
fast: false,
stored: false
}
);
@@ -220,7 +220,7 @@ mod tests {
&NumericOptions {
indexed: false,
fieldnorms: false,
fast: None,
fast: false,
stored: false
}
);
@@ -239,7 +239,7 @@ mod tests {
&NumericOptions {
indexed: true,
fieldnorms: false,
fast: None,
fast: false,
stored: false
}
);
@@ -259,7 +259,7 @@ mod tests {
&NumericOptions {
indexed: false,
fieldnorms: true,
fast: None,
fast: false,
stored: false
}
);

View File

@@ -484,7 +484,6 @@ mod tests {
use serde_json;
use crate::schema::field_type::ValueParsingError;
use crate::schema::numeric_options::Cardinality::SingleValue;
use crate::schema::schema::DocParsingError::InvalidJson;
use crate::schema::*;
@@ -508,17 +507,17 @@ mod tests {
let mut schema_builder = Schema::builder();
let count_options = NumericOptions::default()
.set_stored()
.set_fast(Cardinality::SingleValue);
.set_fast();
let popularity_options = NumericOptions::default()
.set_stored()
.set_fast(Cardinality::SingleValue);
.set_fast();
let score_options = NumericOptions::default()
.set_indexed()
.set_fieldnorm()
.set_fast(Cardinality::SingleValue);
.set_fast();
let is_read_options = NumericOptions::default()
.set_stored()
.set_fast(Cardinality::SingleValue);
.set_fast();
schema_builder.add_text_field("title", TEXT);
schema_builder.add_text_field(
"author",
@@ -645,10 +644,10 @@ mod tests {
let mut schema_builder = Schema::builder();
let count_options = NumericOptions::default()
.set_stored()
.set_fast(Cardinality::SingleValue);
.set_fast();
let is_read_options = NumericOptions::default()
.set_stored()
.set_fast(Cardinality::SingleValue);
.set_fast();
schema_builder.add_text_field("title", TEXT);
schema_builder.add_text_field("author", STRING);
schema_builder.add_u64_field("count", count_options);
@@ -750,13 +749,13 @@ mod tests {
let mut schema_builder = Schema::builder();
let count_options = NumericOptions::default()
.set_stored()
.set_fast(Cardinality::SingleValue);
.set_fast();
let popularity_options = NumericOptions::default()
.set_stored()
.set_fast(Cardinality::SingleValue);
.set_fast();
let score_options = NumericOptions::default()
.set_indexed()
.set_fast(Cardinality::SingleValue);
.set_fast();
let title_field = schema_builder.add_text_field("title", TEXT);
let author_field = schema_builder.add_text_field("author", STRING);
let count_field = schema_builder.add_u64_field("count", count_options);
@@ -907,7 +906,7 @@ mod tests {
.set_stored()
.set_indexed()
.set_fieldnorm()
.set_fast(SingleValue);
.set_fast();
schema_builder.add_text_field("_id", id_options);
schema_builder.add_date_field("_timestamp", timestamp_options);