mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-08 18:12:55 +00:00
always serialize, use enum as param
This commit is contained in:
@@ -401,7 +401,7 @@ mod tests {
|
||||
let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
|
||||
assert_eq!(
|
||||
json,
|
||||
r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4"},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false}}],"opstamp":0}"#
|
||||
r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4"},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#
|
||||
);
|
||||
|
||||
let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();
|
||||
|
||||
@@ -196,6 +196,26 @@ fn value_to_u64(value: &Value) -> u64 {
|
||||
}
|
||||
}
|
||||
|
||||
/// The fast field type
|
||||
pub enum FastFieldType {
|
||||
/// Numeric type, e.g. f64.
|
||||
Numeric,
|
||||
/// Fast field stores string ids.
|
||||
String,
|
||||
/// Fast field stores string ids for facets.
|
||||
Facet,
|
||||
}
|
||||
|
||||
impl FastFieldType {
|
||||
fn is_storing_term_ids(&self) -> bool {
|
||||
matches!(self, FastFieldType::String | FastFieldType::Facet)
|
||||
}
|
||||
|
||||
fn is_facet(&self) -> bool {
|
||||
matches!(self, FastFieldType::Facet)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ use fnv::FnvHashMap;
|
||||
use tantivy_bitpacker::minmax;
|
||||
|
||||
use crate::fastfield::serializer::BitpackedFastFieldSerializerLegacy;
|
||||
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer};
|
||||
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType};
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::UnorderedTermId;
|
||||
use crate::schema::{Document, Field};
|
||||
@@ -38,20 +38,17 @@ pub struct MultiValuedFastFieldWriter {
|
||||
field: Field,
|
||||
vals: Vec<UnorderedTermId>,
|
||||
doc_index: Vec<u64>,
|
||||
is_storing_term_ids: bool,
|
||||
is_facet: bool,
|
||||
fast_field_type: FastFieldType,
|
||||
}
|
||||
|
||||
impl MultiValuedFastFieldWriter {
|
||||
/// Creates a new `MultiValuedFastFieldWriter`
|
||||
pub(crate) fn new(field: Field, mut is_storing_term_ids: bool, is_facet: bool) -> Self {
|
||||
is_storing_term_ids |= is_facet;
|
||||
pub(crate) fn new(field: Field, fast_field_type: FastFieldType) -> Self {
|
||||
MultiValuedFastFieldWriter {
|
||||
field,
|
||||
vals: Vec::new(),
|
||||
doc_index: Vec::new(),
|
||||
is_storing_term_ids,
|
||||
is_facet,
|
||||
fast_field_type,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,7 +78,7 @@ impl MultiValuedFastFieldWriter {
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
self.next_doc();
|
||||
// facets/texts are indexed in the `SegmentWriter` as we encode their unordered id.
|
||||
if self.is_storing_term_ids {
|
||||
if self.fast_field_type.is_storing_term_ids() {
|
||||
return;
|
||||
}
|
||||
for field_value in doc.field_values() {
|
||||
@@ -170,7 +167,7 @@ impl MultiValuedFastFieldWriter {
|
||||
1,
|
||||
)?;
|
||||
|
||||
if self.is_facet {
|
||||
if self.fast_field_type.is_facet() {
|
||||
let mut doc_vals: Vec<u64> = Vec::with_capacity(100);
|
||||
for vals in self.get_ordered_values(doc_id_map) {
|
||||
doc_vals.clear();
|
||||
|
||||
@@ -7,7 +7,7 @@ use tantivy_bitpacker::BlockedBitpacker;
|
||||
|
||||
use super::multivalued::MultiValuedFastFieldWriter;
|
||||
use super::serializer::FastFieldStats;
|
||||
use super::FastFieldDataAccess;
|
||||
use super::{FastFieldDataAccess, FastFieldType};
|
||||
use crate::fastfield::{BytesFastFieldWriter, CompositeFastFieldSerializer};
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::UnorderedTermId;
|
||||
@@ -53,18 +53,20 @@ impl FastFieldsWriter {
|
||||
}
|
||||
Some(Cardinality::MultiValues) => {
|
||||
let fast_field_writer =
|
||||
MultiValuedFastFieldWriter::new(field, false, false);
|
||||
MultiValuedFastFieldWriter::new(field, FastFieldType::Numeric);
|
||||
multi_values_writers.push(fast_field_writer);
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
FieldType::Facet(_) => {
|
||||
let fast_field_writer = MultiValuedFastFieldWriter::new(field, true, true);
|
||||
let fast_field_writer =
|
||||
MultiValuedFastFieldWriter::new(field, FastFieldType::Facet);
|
||||
term_id_writers.push(fast_field_writer);
|
||||
}
|
||||
FieldType::Str(_) if field_entry.is_fast() => {
|
||||
let fast_field_writer = MultiValuedFastFieldWriter::new(field, true, false);
|
||||
let fast_field_writer =
|
||||
MultiValuedFastFieldWriter::new(field, FastFieldType::String);
|
||||
term_id_writers.push(fast_field_writer);
|
||||
}
|
||||
FieldType::Bytes(bytes_option) => {
|
||||
|
||||
@@ -138,7 +138,8 @@ mod tests {
|
||||
"fieldnorms": true,
|
||||
"tokenizer": "default"
|
||||
},
|
||||
"stored": false
|
||||
"stored": false,
|
||||
"fast": false
|
||||
}
|
||||
}"#;
|
||||
let field_value_json = serde_json::to_string_pretty(&field_value).unwrap();
|
||||
|
||||
@@ -417,6 +417,7 @@ mod tests {
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use matches::{assert_matches, matches};
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json;
|
||||
|
||||
use crate::schema::field_type::ValueParsingError;
|
||||
@@ -469,7 +470,8 @@ mod tests {
|
||||
"fieldnorms": true,
|
||||
"tokenizer": "default"
|
||||
},
|
||||
"stored": false
|
||||
"stored": false,
|
||||
"fast": false
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -481,7 +483,8 @@ mod tests {
|
||||
"fieldnorms": false,
|
||||
"tokenizer": "raw"
|
||||
},
|
||||
"stored": false
|
||||
"stored": false,
|
||||
"fast": false
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -784,7 +787,8 @@ mod tests {
|
||||
"fieldnorms": true,
|
||||
"tokenizer": "default"
|
||||
},
|
||||
"stored": false
|
||||
"stored": false,
|
||||
"fast": false
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -816,7 +820,8 @@ mod tests {
|
||||
"fieldnorms": true,
|
||||
"tokenizer": "raw"
|
||||
},
|
||||
"stored": true
|
||||
"stored": true,
|
||||
"fast": false
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -838,7 +843,8 @@ mod tests {
|
||||
"fieldnorms": true,
|
||||
"tokenizer": "default"
|
||||
},
|
||||
"stored": false
|
||||
"stored": false,
|
||||
"fast": false
|
||||
}
|
||||
},
|
||||
{
|
||||
|
||||
@@ -16,14 +16,9 @@ pub struct TextOptions {
|
||||
#[serde(default)]
|
||||
stored: bool,
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "is_false")]
|
||||
fast: bool,
|
||||
}
|
||||
|
||||
fn is_false(val: &bool) -> bool {
|
||||
!(*val)
|
||||
}
|
||||
|
||||
impl TextOptions {
|
||||
/// Returns the indexing options.
|
||||
pub fn get_indexing_options(&self) -> Option<&TextFieldIndexing> {
|
||||
|
||||
Reference in New Issue
Block a user