always serialize, use enum as param

This commit is contained in:
Pascal Seitz
2022-04-04 13:36:28 +08:00
parent ec9478830a
commit bb5254de12
7 changed files with 46 additions and 25 deletions

View File

@@ -401,7 +401,7 @@ mod tests {
let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
assert_eq!(
json,
r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4"},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false}}],"opstamp":0}"#
r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4"},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
);
let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();

View File

@@ -196,6 +196,26 @@ fn value_to_u64(value: &Value) -> u64 {
}
}
/// The fast field type
pub enum FastFieldType {
/// Numeric type, e.g. f64.
Numeric,
/// Fast field stores string ids.
String,
/// Fast field stores string ids for facets.
Facet,
}
impl FastFieldType {
fn is_storing_term_ids(&self) -> bool {
matches!(self, FastFieldType::String | FastFieldType::Facet)
}
fn is_facet(&self) -> bool {
matches!(self, FastFieldType::Facet)
}
}
#[cfg(test)]
mod tests {

View File

@@ -4,7 +4,7 @@ use fnv::FnvHashMap;
use tantivy_bitpacker::minmax;
use crate::fastfield::serializer::BitpackedFastFieldSerializerLegacy;
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer};
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType};
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::postings::UnorderedTermId;
use crate::schema::{Document, Field};
@@ -38,20 +38,17 @@ pub struct MultiValuedFastFieldWriter {
field: Field,
vals: Vec<UnorderedTermId>,
doc_index: Vec<u64>,
is_storing_term_ids: bool,
is_facet: bool,
fast_field_type: FastFieldType,
}
impl MultiValuedFastFieldWriter {
/// Creates a new `MultiValuedFastFieldWriter`
pub(crate) fn new(field: Field, mut is_storing_term_ids: bool, is_facet: bool) -> Self {
is_storing_term_ids |= is_facet;
pub(crate) fn new(field: Field, fast_field_type: FastFieldType) -> Self {
MultiValuedFastFieldWriter {
field,
vals: Vec::new(),
doc_index: Vec::new(),
is_storing_term_ids,
is_facet,
fast_field_type,
}
}
@@ -81,7 +78,7 @@ impl MultiValuedFastFieldWriter {
pub fn add_document(&mut self, doc: &Document) {
self.next_doc();
// facets/texts are indexed in the `SegmentWriter` as we encode their unordered id.
if self.is_storing_term_ids {
if self.fast_field_type.is_storing_term_ids() {
return;
}
for field_value in doc.field_values() {
@@ -170,7 +167,7 @@ impl MultiValuedFastFieldWriter {
1,
)?;
if self.is_facet {
if self.fast_field_type.is_facet() {
let mut doc_vals: Vec<u64> = Vec::with_capacity(100);
for vals in self.get_ordered_values(doc_id_map) {
doc_vals.clear();

View File

@@ -7,7 +7,7 @@ use tantivy_bitpacker::BlockedBitpacker;
use super::multivalued::MultiValuedFastFieldWriter;
use super::serializer::FastFieldStats;
use super::FastFieldDataAccess;
use super::{FastFieldDataAccess, FastFieldType};
use crate::fastfield::{BytesFastFieldWriter, CompositeFastFieldSerializer};
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::postings::UnorderedTermId;
@@ -53,18 +53,20 @@ impl FastFieldsWriter {
}
Some(Cardinality::MultiValues) => {
let fast_field_writer =
MultiValuedFastFieldWriter::new(field, false, false);
MultiValuedFastFieldWriter::new(field, FastFieldType::Numeric);
multi_values_writers.push(fast_field_writer);
}
None => {}
}
}
FieldType::Facet(_) => {
let fast_field_writer = MultiValuedFastFieldWriter::new(field, true, true);
let fast_field_writer =
MultiValuedFastFieldWriter::new(field, FastFieldType::Facet);
term_id_writers.push(fast_field_writer);
}
FieldType::Str(_) if field_entry.is_fast() => {
let fast_field_writer = MultiValuedFastFieldWriter::new(field, true, false);
let fast_field_writer =
MultiValuedFastFieldWriter::new(field, FastFieldType::String);
term_id_writers.push(fast_field_writer);
}
FieldType::Bytes(bytes_option) => {

View File

@@ -138,7 +138,8 @@ mod tests {
"fieldnorms": true,
"tokenizer": "default"
},
"stored": false
"stored": false,
"fast": false
}
}"#;
let field_value_json = serde_json::to_string_pretty(&field_value).unwrap();

View File

@@ -417,6 +417,7 @@ mod tests {
use std::collections::BTreeMap;
use matches::{assert_matches, matches};
use pretty_assertions::assert_eq;
use serde_json;
use crate::schema::field_type::ValueParsingError;
@@ -469,7 +470,8 @@ mod tests {
"fieldnorms": true,
"tokenizer": "default"
},
"stored": false
"stored": false,
"fast": false
}
},
{
@@ -481,7 +483,8 @@ mod tests {
"fieldnorms": false,
"tokenizer": "raw"
},
"stored": false
"stored": false,
"fast": false
}
},
{
@@ -784,7 +787,8 @@ mod tests {
"fieldnorms": true,
"tokenizer": "default"
},
"stored": false
"stored": false,
"fast": false
}
},
{
@@ -816,7 +820,8 @@ mod tests {
"fieldnorms": true,
"tokenizer": "raw"
},
"stored": true
"stored": true,
"fast": false
}
},
{
@@ -838,7 +843,8 @@ mod tests {
"fieldnorms": true,
"tokenizer": "default"
},
"stored": false
"stored": false,
"fast": false
}
},
{

View File

@@ -16,14 +16,9 @@ pub struct TextOptions {
#[serde(default)]
stored: bool,
#[serde(default)]
#[serde(skip_serializing_if = "is_false")]
fast: bool,
}
fn is_false(val: &bool) -> bool {
!(*val)
}
impl TextOptions {
/// Returns the indexing options.
pub fn get_indexing_options(&self) -> Option<&TextFieldIndexing> {