mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
tokenizer option on text fastfield (#1945)
* tokenizer option on text fastfield allow to set tokenizer option on text fastfield (fixes #1901) handle PreTokenized strings in fast field * change visibility * remove custom de/serialization
This commit is contained in:
@@ -42,7 +42,7 @@ fn main() -> tantivy::Result<()> {
|
||||
.set_index_option(IndexRecordOption::WithFreqs)
|
||||
.set_tokenizer("raw"),
|
||||
)
|
||||
.set_fast()
|
||||
.set_fast(None)
|
||||
.set_stored();
|
||||
schema_builder.add_text_field("category", text_fieldtype);
|
||||
schema_builder.add_f64_field("stock", FAST);
|
||||
|
||||
@@ -445,7 +445,7 @@ mod tests {
|
||||
.set_index_option(IndexRecordOption::Basic)
|
||||
.set_fieldnorms(false),
|
||||
)
|
||||
.set_fast()
|
||||
.set_fast(None)
|
||||
.set_stored();
|
||||
let text_field = schema_builder.add_text_field("text", text_fieldtype.clone());
|
||||
let text_field_id = schema_builder.add_text_field("text_id", text_fieldtype);
|
||||
@@ -500,7 +500,7 @@ mod tests {
|
||||
.set_indexing_options(
|
||||
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
|
||||
)
|
||||
.set_fast()
|
||||
.set_fast(None)
|
||||
.set_stored();
|
||||
let text_field = schema_builder.add_text_field("text", text_fieldtype);
|
||||
let date_field = schema_builder.add_date_field("date", FAST);
|
||||
|
||||
@@ -115,7 +115,7 @@ mod tests {
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA).unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>13u64))
|
||||
.unwrap();
|
||||
@@ -148,7 +148,7 @@ mod tests {
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA).unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>4u64))
|
||||
.unwrap();
|
||||
@@ -203,7 +203,7 @@ mod tests {
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA).unwrap();
|
||||
for _ in 0..10_000 {
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>100_000u64))
|
||||
@@ -231,7 +231,7 @@ mod tests {
|
||||
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA).unwrap();
|
||||
// forcing the amplitude to be high
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>0u64))
|
||||
@@ -268,7 +268,7 @@ mod tests {
|
||||
let schema = schema_builder.build();
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
||||
for i in -100i64..10_000i64 {
|
||||
let mut doc = Document::default();
|
||||
doc.add_i64(i64_field, i);
|
||||
@@ -310,7 +310,7 @@ mod tests {
|
||||
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
||||
let doc = Document::default();
|
||||
fast_field_writers.add_document(&doc).unwrap();
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
@@ -343,7 +343,7 @@ mod tests {
|
||||
let schema = schema_builder.build();
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
||||
let doc = Document::default();
|
||||
fast_field_writers.add_document(&doc).unwrap();
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
@@ -379,7 +379,7 @@ mod tests {
|
||||
let directory = RamDirectory::create();
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA).unwrap();
|
||||
for &x in &permutation {
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>x)).unwrap();
|
||||
}
|
||||
@@ -759,7 +759,7 @@ mod tests {
|
||||
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(path).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
||||
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(field=>false))
|
||||
@@ -793,7 +793,7 @@ mod tests {
|
||||
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(path).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
||||
for _ in 0..50 {
|
||||
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
||||
fast_field_writers
|
||||
@@ -822,7 +822,7 @@ mod tests {
|
||||
let schema = schema_builder.build();
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(path).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema).unwrap();
|
||||
let doc = Document::default();
|
||||
fast_field_writers.add_document(&doc).unwrap();
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
@@ -849,7 +849,7 @@ mod tests {
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(schema);
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(schema).unwrap();
|
||||
for doc in docs {
|
||||
fast_field_writers.add_document(doc).unwrap();
|
||||
}
|
||||
@@ -1173,6 +1173,45 @@ mod tests {
|
||||
assert_eq!(&vals, &[33]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_fast_field_tokenizer() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
|
||||
let text_fieldtype = crate::schema::TextOptions::default()
|
||||
.set_indexing_options(
|
||||
crate::schema::TextFieldIndexing::default()
|
||||
.set_index_option(crate::schema::IndexRecordOption::WithFreqs)
|
||||
.set_tokenizer("raw"),
|
||||
)
|
||||
.set_fast(Some("default"))
|
||||
.set_stored();
|
||||
|
||||
let log_field = schema_builder.add_text_field("log_level", text_fieldtype);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(log_field => "info"))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(log_field => "INFO"))
|
||||
.unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
|
||||
|
||||
let text_fast_field = fast_field_reader.str("log_level").unwrap().unwrap();
|
||||
let mut buffer = String::new();
|
||||
assert!(text_fast_field.ord_to_str(0, &mut buffer).unwrap());
|
||||
assert_eq!(buffer, "info");
|
||||
assert!(!text_fast_field.ord_to_str(1, &mut buffer).unwrap());
|
||||
|
||||
assert!(text_fast_field.term_ords(0).eq([0].into_iter()));
|
||||
assert!(text_fast_field.term_ords(1).eq([0].into_iter()));
|
||||
assert!(text_fast_field.ords().values_for_doc(0u32).eq([0]));
|
||||
assert!(text_fast_field.ords().values_for_doc(1u32).eq([0]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shadowing_fast_field_with_expand_dots() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
|
||||
@@ -2,11 +2,13 @@ use std::io;
|
||||
|
||||
use columnar::{ColumnarWriter, NumericalValue};
|
||||
use common::replace_in_place;
|
||||
use tokenizer_api::Token;
|
||||
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR};
|
||||
use crate::schema::{value_type_to_column_type, Document, FieldType, Schema, Type, Value};
|
||||
use crate::{DatePrecision, DocId};
|
||||
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
|
||||
use crate::{DatePrecision, DocId, TantivyError};
|
||||
|
||||
/// Only index JSON down to a depth of 20.
|
||||
/// This is mostly to guard us from a stack overflow triggered by malicious input.
|
||||
@@ -15,7 +17,8 @@ const JSON_DEPTH_LIMIT: usize = 20;
|
||||
/// The `FastFieldsWriter` groups all of the fast field writers.
|
||||
pub struct FastFieldsWriter {
|
||||
columnar_writer: ColumnarWriter,
|
||||
fast_field_names: Vec<Option<String>>, //< TODO see if we can cash the field name hash too.
|
||||
fast_field_names: Vec<Option<String>>, //< TODO see if we can hash the field name hash too.
|
||||
per_field_tokenizer: Vec<Option<TextAnalyzer>>,
|
||||
date_precisions: Vec<DatePrecision>,
|
||||
expand_dots: Vec<bool>,
|
||||
num_docs: DocId,
|
||||
@@ -25,14 +28,25 @@ pub struct FastFieldsWriter {
|
||||
|
||||
impl FastFieldsWriter {
|
||||
/// Create all `FastFieldWriter` required by the schema.
|
||||
pub fn from_schema(schema: &Schema) -> FastFieldsWriter {
|
||||
#[cfg(test)]
|
||||
pub fn from_schema(schema: &Schema) -> crate::Result<FastFieldsWriter> {
|
||||
FastFieldsWriter::from_schema_and_tokenizer_manager(&schema, TokenizerManager::new())
|
||||
}
|
||||
|
||||
/// Create all `FastFieldWriter` required by the schema.
|
||||
pub fn from_schema_and_tokenizer_manager(
|
||||
schema: &Schema,
|
||||
tokenizer_manager: TokenizerManager,
|
||||
) -> crate::Result<FastFieldsWriter> {
|
||||
let mut columnar_writer = ColumnarWriter::default();
|
||||
|
||||
let mut fast_field_names: Vec<Option<String>> = vec![None; schema.num_fields()];
|
||||
let mut date_precisions: Vec<DatePrecision> =
|
||||
std::iter::repeat_with(DatePrecision::default)
|
||||
.take(schema.num_fields())
|
||||
.collect();
|
||||
let mut expand_dots = vec![false; schema.num_fields()];
|
||||
let mut per_field_tokenizer = vec![None; schema.num_fields()];
|
||||
// TODO see other types
|
||||
for (field_id, field_entry) in schema.fields() {
|
||||
if !field_entry.field_type().is_fast() {
|
||||
@@ -47,6 +61,18 @@ impl FastFieldsWriter {
|
||||
expand_dots[field_id.field_id() as usize] =
|
||||
json_object_options.is_expand_dots_enabled();
|
||||
}
|
||||
if let FieldType::Str(text_options) = field_entry.field_type() {
|
||||
if let Some(tokenizer_name) = text_options.get_fast_field_tokenizer_name() {
|
||||
let text_analyzer = tokenizer_manager.get(tokenizer_name).ok_or_else(|| {
|
||||
TantivyError::InvalidArgument(format!(
|
||||
"Tokenizer {:?} not found",
|
||||
tokenizer_name
|
||||
))
|
||||
})?;
|
||||
per_field_tokenizer[field_id.field_id() as usize] = Some(text_analyzer);
|
||||
}
|
||||
}
|
||||
|
||||
let sort_values_within_row = value_type == Type::Facet;
|
||||
if let Some(column_type) = value_type_to_column_type(value_type) {
|
||||
columnar_writer.record_column_type(
|
||||
@@ -56,14 +82,15 @@ impl FastFieldsWriter {
|
||||
);
|
||||
}
|
||||
}
|
||||
FastFieldsWriter {
|
||||
Ok(FastFieldsWriter {
|
||||
columnar_writer,
|
||||
fast_field_names,
|
||||
per_field_tokenizer,
|
||||
num_docs: 0u32,
|
||||
date_precisions,
|
||||
expand_dots,
|
||||
json_path_buffer: String::new(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// The memory used (inclusive childs)
|
||||
@@ -111,14 +138,35 @@ impl FastFieldsWriter {
|
||||
);
|
||||
}
|
||||
Value::Str(text_val) => {
|
||||
self.columnar_writer
|
||||
.record_str(doc_id, field_name.as_str(), text_val);
|
||||
if let Some(text_analyzer) =
|
||||
&self.per_field_tokenizer[field_value.field().field_id() as usize]
|
||||
{
|
||||
let mut token_stream = text_analyzer.token_stream(text_val);
|
||||
token_stream.process(&mut |token: &Token| {
|
||||
self.columnar_writer.record_str(
|
||||
doc_id,
|
||||
field_name.as_str(),
|
||||
&token.text,
|
||||
);
|
||||
})
|
||||
} else {
|
||||
self.columnar_writer
|
||||
.record_str(doc_id, field_name.as_str(), text_val);
|
||||
}
|
||||
}
|
||||
Value::Bytes(bytes_val) => {
|
||||
self.columnar_writer
|
||||
.record_bytes(doc_id, field_name.as_str(), bytes_val);
|
||||
}
|
||||
Value::PreTokStr(_) => todo!(),
|
||||
Value::PreTokStr(pre_tok) => {
|
||||
for token in &pre_tok.tokens {
|
||||
self.columnar_writer.record_str(
|
||||
doc_id,
|
||||
field_name.as_str(),
|
||||
&token.text,
|
||||
);
|
||||
}
|
||||
}
|
||||
Value::Bool(bool_val) => {
|
||||
self.columnar_writer
|
||||
.record_bool(doc_id, field_name.as_str(), *bool_val);
|
||||
|
||||
@@ -111,7 +111,10 @@ impl SegmentWriter {
|
||||
per_field_postings_writers,
|
||||
fieldnorms_writer: FieldNormsWriter::for_schema(&schema),
|
||||
segment_serializer,
|
||||
fast_field_writers: FastFieldsWriter::from_schema(&schema),
|
||||
fast_field_writers: FastFieldsWriter::from_schema_and_tokenizer_manager(
|
||||
&schema,
|
||||
tokenizer_manager,
|
||||
)?,
|
||||
doc_opstamps: Vec::with_capacity(1_000),
|
||||
per_field_text_analyzers,
|
||||
term_buffer: Term::with_capacity(16),
|
||||
|
||||
@@ -16,13 +16,53 @@ pub struct TextOptions {
|
||||
#[serde(default)]
|
||||
stored: bool,
|
||||
#[serde(default)]
|
||||
fast: bool,
|
||||
fast: FastFieldOptions,
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "is_false")]
|
||||
/// coerce values if they are not of type string
|
||||
/// coerce values into string if they are not of type string
|
||||
coerce: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
enum FastFieldOptions {
|
||||
IsEnabled(bool),
|
||||
EnabledWithTokenizer { with_tokenizer: TokenizerName },
|
||||
}
|
||||
|
||||
impl Default for FastFieldOptions {
|
||||
fn default() -> Self {
|
||||
FastFieldOptions::IsEnabled(false)
|
||||
}
|
||||
}
|
||||
|
||||
impl BitOr<FastFieldOptions> for FastFieldOptions {
|
||||
type Output = FastFieldOptions;
|
||||
|
||||
fn bitor(self, other: FastFieldOptions) -> FastFieldOptions {
|
||||
match (self, other) {
|
||||
(
|
||||
FastFieldOptions::EnabledWithTokenizer {
|
||||
with_tokenizer: tokenizer,
|
||||
},
|
||||
_,
|
||||
)
|
||||
| (
|
||||
_,
|
||||
FastFieldOptions::EnabledWithTokenizer {
|
||||
with_tokenizer: tokenizer,
|
||||
},
|
||||
) => FastFieldOptions::EnabledWithTokenizer {
|
||||
with_tokenizer: tokenizer,
|
||||
},
|
||||
(FastFieldOptions::IsEnabled(true), _) | (_, FastFieldOptions::IsEnabled(true)) => {
|
||||
FastFieldOptions::IsEnabled(true)
|
||||
}
|
||||
(_, FastFieldOptions::IsEnabled(false)) => FastFieldOptions::IsEnabled(false),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_false(val: &bool) -> bool {
|
||||
!val
|
||||
}
|
||||
@@ -40,7 +80,21 @@ impl TextOptions {
|
||||
|
||||
/// Returns true if and only if the value is a fast field.
|
||||
pub fn is_fast(&self) -> bool {
|
||||
self.fast
|
||||
matches!(self.fast, FastFieldOptions::IsEnabled(true))
|
||||
|| matches!(
|
||||
&self.fast,
|
||||
FastFieldOptions::EnabledWithTokenizer { with_tokenizer: _ }
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns true if and only if the value is a fast field.
|
||||
pub fn get_fast_field_tokenizer_name(&self) -> Option<&str> {
|
||||
match &self.fast {
|
||||
FastFieldOptions::IsEnabled(true) | FastFieldOptions::IsEnabled(false) => None,
|
||||
FastFieldOptions::EnabledWithTokenizer {
|
||||
with_tokenizer: tokenizer,
|
||||
} => Some(tokenizer.name()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if values should be coerced to strings (numbers, null).
|
||||
@@ -53,19 +107,24 @@ impl TextOptions {
|
||||
/// Fast fields are designed for random access.
|
||||
/// Access time are similar to a random lookup in an array.
|
||||
/// Text fast fields will have the term ids stored in the fast field.
|
||||
/// The fast field will be a multivalued fast field.
|
||||
///
|
||||
/// The effective cardinality depends on the tokenizer. When creating fast fields on text
|
||||
/// fields it is recommended to use the "raw" tokenizer, since it will store the original text
|
||||
/// unchanged. The "default" tokenizer will store the terms as lower case and this will be
|
||||
/// reflected in the dictionary.
|
||||
/// The effective cardinality depends on the tokenizer. Without a tokenizer, the text will be
|
||||
/// stored as is, which equals to the "raw" tokenizer. The tokenizer can be used to apply
|
||||
/// normalization like lower case.
|
||||
///
|
||||
/// The original text can be retrieved via
|
||||
/// [`TermDictionary::ord_to_term()`](crate::termdict::TermDictionary::ord_to_term)
|
||||
/// from the dictionary.
|
||||
#[must_use]
|
||||
pub fn set_fast(mut self) -> TextOptions {
|
||||
self.fast = true;
|
||||
pub fn set_fast(mut self, tokenizer_name: Option<&str>) -> TextOptions {
|
||||
if let Some(tokenizer) = tokenizer_name {
|
||||
let tokenizer = TokenizerName::from_name(tokenizer);
|
||||
self.fast = FastFieldOptions::EnabledWithTokenizer {
|
||||
with_tokenizer: tokenizer,
|
||||
}
|
||||
} else {
|
||||
self.fast = FastFieldOptions::IsEnabled(true);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
@@ -92,7 +151,7 @@ impl TextOptions {
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Debug, Eq, Serialize, Deserialize)]
|
||||
struct TokenizerName(Cow<'static, str>);
|
||||
pub(crate) struct TokenizerName(Cow<'static, str>);
|
||||
|
||||
const DEFAULT_TOKENIZER_NAME: &str = "default";
|
||||
|
||||
@@ -105,7 +164,7 @@ impl Default for TokenizerName {
|
||||
}
|
||||
|
||||
impl TokenizerName {
|
||||
const fn from_static(name: &'static str) -> Self {
|
||||
pub const fn from_static(name: &'static str) -> Self {
|
||||
TokenizerName(Cow::Borrowed(name))
|
||||
}
|
||||
fn from_name(name: &str) -> Self {
|
||||
@@ -199,7 +258,7 @@ pub const STRING: TextOptions = TextOptions {
|
||||
record: IndexRecordOption::Basic,
|
||||
}),
|
||||
stored: false,
|
||||
fast: false,
|
||||
fast: FastFieldOptions::IsEnabled(false),
|
||||
coerce: false,
|
||||
};
|
||||
|
||||
@@ -212,7 +271,7 @@ pub const TEXT: TextOptions = TextOptions {
|
||||
}),
|
||||
stored: false,
|
||||
coerce: false,
|
||||
fast: false,
|
||||
fast: FastFieldOptions::IsEnabled(false),
|
||||
};
|
||||
|
||||
impl<T: Into<TextOptions>> BitOr<T> for TextOptions {
|
||||
@@ -240,7 +299,7 @@ impl From<StoredFlag> for TextOptions {
|
||||
TextOptions {
|
||||
indexing: None,
|
||||
stored: true,
|
||||
fast: false,
|
||||
fast: FastFieldOptions::IsEnabled(false),
|
||||
coerce: false,
|
||||
}
|
||||
}
|
||||
@@ -251,7 +310,7 @@ impl From<CoerceFlag> for TextOptions {
|
||||
TextOptions {
|
||||
indexing: None,
|
||||
stored: false,
|
||||
fast: false,
|
||||
fast: FastFieldOptions::IsEnabled(false),
|
||||
coerce: true,
|
||||
}
|
||||
}
|
||||
@@ -262,7 +321,7 @@ impl From<FastFlag> for TextOptions {
|
||||
TextOptions {
|
||||
indexing: None,
|
||||
stored: false,
|
||||
fast: true,
|
||||
fast: FastFieldOptions::IsEnabled(true),
|
||||
coerce: false,
|
||||
}
|
||||
}
|
||||
@@ -281,6 +340,7 @@ where
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::schema::text_options::{FastFieldOptions, TokenizerName};
|
||||
use crate::schema::*;
|
||||
|
||||
#[test]
|
||||
@@ -323,4 +383,44 @@ mod tests {
|
||||
let options3: TextOptions = serde_json::from_str("{}").unwrap();
|
||||
assert_eq!(options3.indexing, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serde_fast_field_tokenizer() {
|
||||
let json = r#" {
|
||||
"fast": { "with_tokenizer": "default" }
|
||||
} "#;
|
||||
let options: TextOptions = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(
|
||||
options.fast,
|
||||
FastFieldOptions::EnabledWithTokenizer {
|
||||
with_tokenizer: TokenizerName::from_static("default")
|
||||
}
|
||||
);
|
||||
let options: TextOptions =
|
||||
serde_json::from_str(&serde_json::to_string(&options).unwrap()).unwrap();
|
||||
assert_eq!(
|
||||
options.fast,
|
||||
FastFieldOptions::EnabledWithTokenizer {
|
||||
with_tokenizer: TokenizerName::from_static("default")
|
||||
}
|
||||
);
|
||||
|
||||
let json = r#" {
|
||||
"fast": true
|
||||
} "#;
|
||||
let options: TextOptions = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(options.fast, FastFieldOptions::IsEnabled(true));
|
||||
let options: TextOptions =
|
||||
serde_json::from_str(&serde_json::to_string(&options).unwrap()).unwrap();
|
||||
assert_eq!(options.fast, FastFieldOptions::IsEnabled(true));
|
||||
|
||||
let json = r#" {
|
||||
"fast": false
|
||||
} "#;
|
||||
let options: TextOptions = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(options.fast, FastFieldOptions::IsEnabled(false));
|
||||
let options: TextOptions =
|
||||
serde_json::from_str(&serde_json::to_string(&options).unwrap()).unwrap();
|
||||
assert_eq!(options.fast, FastFieldOptions::IsEnabled(false));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user