mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
use defaults in meta.json (#1310)
This change allows to have unset fields in meta.json and fall back to their defaults Currently it is required to explicitly put e.g. fieldnorms: false
This commit is contained in:
@@ -20,9 +20,7 @@ fn main() -> tantivy::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_fieldtype = schema::TextOptions::default()
|
||||
.set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_index_option(IndexRecordOption::WithFreqs),
|
||||
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
|
||||
)
|
||||
.set_stored();
|
||||
let text_field = schema_builder.add_text_field("text", text_fieldtype);
|
||||
|
||||
@@ -314,9 +314,7 @@ mod tests {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_fieldtype = crate::schema::TextOptions::default()
|
||||
.set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_index_option(IndexRecordOption::WithFreqs),
|
||||
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
|
||||
)
|
||||
.set_stored();
|
||||
let text_field = schema_builder.add_text_field("text", text_fieldtype);
|
||||
@@ -461,9 +459,7 @@ mod tests {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_fieldtype = crate::schema::TextOptions::default()
|
||||
.set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_index_option(IndexRecordOption::WithFreqs),
|
||||
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
|
||||
)
|
||||
.set_stored();
|
||||
let text_field = schema_builder.add_text_field("text", text_fieldtype);
|
||||
@@ -957,9 +953,7 @@ mod tests {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_fieldtype = crate::schema::TextOptions::default()
|
||||
.set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_index_option(IndexRecordOption::WithFreqs),
|
||||
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
|
||||
)
|
||||
.set_stored();
|
||||
let text_field = schema_builder.add_text_field("text", text_fieldtype);
|
||||
|
||||
@@ -239,7 +239,7 @@ impl InnerSegmentMeta {
|
||||
///
|
||||
/// Contains settings which are applied on the whole
|
||||
/// index, like presort documents.
|
||||
#[derive(Clone, Default, Serialize, Deserialize, Eq, PartialEq)]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct IndexSettings {
|
||||
/// Sorts the documents by information
|
||||
/// provided in `IndexSortByField`
|
||||
@@ -254,7 +254,7 @@ pub struct IndexSettings {
|
||||
/// Presorting documents can greatly performance
|
||||
/// in some scenarios, by applying top n
|
||||
/// optimizations.
|
||||
#[derive(Clone, Serialize, Deserialize, Eq, PartialEq)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct IndexSortByField {
|
||||
/// The field to sort the documents by
|
||||
pub field: String,
|
||||
@@ -262,7 +262,7 @@ pub struct IndexSortByField {
|
||||
pub order: Order,
|
||||
}
|
||||
/// The order to sort by
|
||||
#[derive(Clone, Serialize, Deserialize, Eq, PartialEq)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub enum Order {
|
||||
/// Ascending Order
|
||||
Asc,
|
||||
@@ -298,12 +298,12 @@ pub struct IndexMeta {
|
||||
pub schema: Schema,
|
||||
/// Opstamp associated to the last `commit` operation.
|
||||
pub opstamp: Opstamp,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
/// Payload associated to the last commit.
|
||||
///
|
||||
/// Upon commit, clients can optionally add a small `String` payload to their commit
|
||||
/// to help identify this commit.
|
||||
/// This payload is entirely unused by tantivy.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub payload: Option<String>,
|
||||
}
|
||||
|
||||
@@ -374,6 +374,7 @@ impl fmt::Debug for IndexMeta {
|
||||
mod tests {
|
||||
|
||||
use super::IndexMeta;
|
||||
use crate::core::index_meta::UntrackedIndexMeta;
|
||||
use crate::schema::{Schema, TEXT};
|
||||
use crate::{IndexSettings, IndexSortByField, Order};
|
||||
|
||||
@@ -402,5 +403,10 @@ mod tests {
|
||||
json,
|
||||
r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4"},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false}}],"opstamp":0}"#
|
||||
);
|
||||
|
||||
let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(index_metas.index_settings, deser_meta.index_settings);
|
||||
assert_eq!(index_metas.schema, deser_meta.schema);
|
||||
assert_eq!(index_metas.opstamp, deser_meta.opstamp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1157,9 +1157,7 @@ mod tests {
|
||||
let mut schema_builder = schema::Schema::builder();
|
||||
let text_fieldtype = schema::TextOptions::default()
|
||||
.set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_index_option(IndexRecordOption::WithFreqs),
|
||||
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
|
||||
)
|
||||
.set_stored();
|
||||
let text_field = schema_builder.add_text_field("text", text_fieldtype);
|
||||
|
||||
@@ -126,9 +126,7 @@ pub mod tests {
|
||||
let mut schema_builder = Schema::builder();
|
||||
use crate::schema::{IndexRecordOption, TextFieldIndexing, TextOptions};
|
||||
let no_positions = TextOptions::default().set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_index_option(IndexRecordOption::WithFreqs),
|
||||
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
|
||||
);
|
||||
|
||||
let text_field = schema_builder.add_text_field("text", no_positions);
|
||||
|
||||
@@ -183,7 +183,7 @@ impl FieldType {
|
||||
}
|
||||
}
|
||||
|
||||
/// returns true if the field is normed.
|
||||
/// returns true if the field is normed (see [fieldnorms](crate::fieldnorm)).
|
||||
pub fn has_fieldnorms(&self) -> bool {
|
||||
match *self {
|
||||
FieldType::Str(ref text_options) => text_options
|
||||
|
||||
@@ -29,6 +29,12 @@ pub enum IndexRecordOption {
|
||||
WithFreqsAndPositions,
|
||||
}
|
||||
|
||||
impl Default for IndexRecordOption {
|
||||
fn default() -> Self {
|
||||
IndexRecordOption::Basic
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexRecordOption {
|
||||
/// Returns true if this option includes encoding
|
||||
/// term frequencies.
|
||||
|
||||
@@ -9,7 +9,10 @@ use crate::schema::IndexRecordOption;
|
||||
/// Define how a text field should be handled by tantivy.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
|
||||
pub struct TextOptions {
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
indexing: Option<TextFieldIndexing>,
|
||||
#[serde(default)]
|
||||
stored: bool,
|
||||
}
|
||||
|
||||
@@ -39,26 +42,56 @@ impl TextOptions {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Debug, Eq, Serialize, Deserialize)]
|
||||
struct TokenizerName(Cow<'static, str>);
|
||||
|
||||
impl Default for TokenizerName {
|
||||
fn default() -> Self {
|
||||
TokenizerName::from_static("default")
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenizerName {
|
||||
const fn from_static(name: &'static str) -> Self {
|
||||
TokenizerName(Cow::Borrowed(name))
|
||||
}
|
||||
fn from_name(name: &str) -> Self {
|
||||
TokenizerName(Cow::Owned(name.to_string()))
|
||||
}
|
||||
fn name(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration defining indexing for a text field.
|
||||
///
|
||||
/// It defines
|
||||
/// - the amount of information that should be stored about the presence of a term in a document.
|
||||
/// - The amount of information that should be stored about the presence of a term in a document.
|
||||
/// Essentially, should we store the term frequency and/or the positions (See
|
||||
/// [`IndexRecordOption`](./enum.IndexRecordOption.html)).
|
||||
/// - the name of the `Tokenizer` that should be used to process the field.
|
||||
/// - The name of the `Tokenizer` that should be used to process the field.
|
||||
/// - Flag indicating, if fieldnorms should be stored (See [fieldnorm](crate::fieldnorm)). Defaults
|
||||
/// to `true`.
|
||||
#[derive(Clone, PartialEq, Debug, Eq, Serialize, Deserialize)]
|
||||
pub struct TextFieldIndexing {
|
||||
#[serde(default)]
|
||||
record: IndexRecordOption,
|
||||
#[serde(default = "default_fieldnorms")]
|
||||
fieldnorms: bool,
|
||||
tokenizer: Cow<'static, str>,
|
||||
#[serde(default)]
|
||||
tokenizer: TokenizerName,
|
||||
}
|
||||
|
||||
pub(crate) fn default_fieldnorms() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
impl Default for TextFieldIndexing {
|
||||
fn default() -> TextFieldIndexing {
|
||||
TextFieldIndexing {
|
||||
tokenizer: Cow::Borrowed("default"),
|
||||
record: IndexRecordOption::Basic,
|
||||
fieldnorms: true,
|
||||
tokenizer: TokenizerName::default(),
|
||||
record: IndexRecordOption::default(),
|
||||
fieldnorms: default_fieldnorms(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -67,13 +100,13 @@ impl TextFieldIndexing {
|
||||
/// Sets the tokenizer to be used for a given field.
|
||||
#[must_use]
|
||||
pub fn set_tokenizer(mut self, tokenizer_name: &str) -> TextFieldIndexing {
|
||||
self.tokenizer = Cow::Owned(tokenizer_name.to_string());
|
||||
self.tokenizer = TokenizerName::from_name(tokenizer_name);
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns the tokenizer that will be used for this field.
|
||||
pub fn tokenizer(&self) -> &str {
|
||||
&self.tokenizer
|
||||
&self.tokenizer.name()
|
||||
}
|
||||
|
||||
/// Sets fieldnorms
|
||||
@@ -83,7 +116,7 @@ impl TextFieldIndexing {
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns true if and only if fieldnorms are stored.
|
||||
/// Returns true if and only if [fieldnorms](crate::fieldnorm)are stored.
|
||||
pub fn fieldnorms(&self) -> bool {
|
||||
self.fieldnorms
|
||||
}
|
||||
@@ -108,7 +141,7 @@ impl TextFieldIndexing {
|
||||
/// The field will be untokenized and indexed.
|
||||
pub const STRING: TextOptions = TextOptions {
|
||||
indexing: Some(TextFieldIndexing {
|
||||
tokenizer: Cow::Borrowed("raw"),
|
||||
tokenizer: TokenizerName::from_static("raw"),
|
||||
fieldnorms: true,
|
||||
record: IndexRecordOption::Basic,
|
||||
}),
|
||||
@@ -118,7 +151,7 @@ pub const STRING: TextOptions = TextOptions {
|
||||
/// The field will be tokenized and indexed.
|
||||
pub const TEXT: TextOptions = TextOptions {
|
||||
indexing: Some(TextFieldIndexing {
|
||||
tokenizer: Cow::Borrowed("default"),
|
||||
tokenizer: TokenizerName::from_static("default"),
|
||||
fieldnorms: true,
|
||||
record: IndexRecordOption::WithFreqsAndPositions,
|
||||
}),
|
||||
@@ -187,4 +220,24 @@ mod tests {
|
||||
assert!(IndexRecordOption::WithFreqsAndPositions > IndexRecordOption::WithFreqs);
|
||||
assert!(IndexRecordOption::WithFreqs > IndexRecordOption::Basic);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serde_default_test() {
|
||||
let json = r#"
|
||||
{
|
||||
"indexing": {
|
||||
"record": "basic",
|
||||
"fieldnorms": true,
|
||||
"tokenizer": "default"
|
||||
},
|
||||
"stored": false
|
||||
}
|
||||
"#;
|
||||
let options: TextOptions = serde_json::from_str(json).unwrap();
|
||||
let options2: TextOptions = serde_json::from_str("{\"indexing\": {}}").unwrap();
|
||||
assert_eq!(options, options2);
|
||||
assert_eq!(options.indexing.unwrap().record, IndexRecordOption::Basic);
|
||||
let options3: TextOptions = serde_json::from_str("{}").unwrap();
|
||||
assert_eq!(options3.indexing, None);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user