Removes the indexed option for facets.
Facets are now always indexed.

Closes #1195
This commit is contained in:
Paul Masurel
2021-12-02 14:37:19 +09:00
committed by GitHub
parent c3cc93406d
commit dde49ac8e2
16 changed files with 96 additions and 175 deletions

View File

@@ -1,6 +1,7 @@
Tantivy 0.17
================================
- Change to non-strict schema. Ignore fields in data which are not defined in schema. Previously this returned an error. #1211
- Facets are necessarily indexed. Existing index with indexed facets should work out of the box. Index without facets that are marked with index: false should be broken (but they were already broken in a sense). (@fulmicoton) #1195 .
Tantivy 0.16.2
================================

View File

@@ -23,7 +23,7 @@ fn main() -> tantivy::Result<()> {
let name = schema_builder.add_text_field("felin_name", TEXT | STORED);
// this is our faceted field: its scientific classification
let classification = schema_builder.add_facet_field("classification", INDEXED);
let classification = schema_builder.add_facet_field("classification", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

View File

@@ -9,7 +9,7 @@ fn main() -> tantivy::Result<()> {
let mut schema_builder = Schema::builder();
let title = schema_builder.add_text_field("title", STORED);
let ingredient = schema_builder.add_facet_field("ingredient", INDEXED);
let ingredient = schema_builder.add_facet_field("ingredient", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

View File

@@ -83,7 +83,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// ```rust
/// use tantivy::collector::FacetCollector;
/// use tantivy::query::AllQuery;
/// use tantivy::schema::{Facet, Schema, INDEXED, TEXT};
/// use tantivy::schema::{Facet, Schema, FacetOptions, TEXT};
/// use tantivy::{doc, Index};
///
/// fn example() -> tantivy::Result<()> {
@@ -92,7 +92,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// // Facet have their own specific type.
/// // It is not a bad practise to put all of your
/// // facet information in the same field.
/// let facet = schema_builder.add_facet_field("facet", INDEXED);
/// let facet = schema_builder.add_facet_field("facet", FacetOptions::default());
/// let title = schema_builder.add_text_field("title", TEXT);
/// let schema = schema_builder.build();
/// let index = Index::create_in_ram(schema);
@@ -462,7 +462,7 @@ mod tests {
use crate::collector::Count;
use crate::core::Index;
use crate::query::{AllQuery, QueryParser, TermQuery};
use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema, INDEXED};
use crate::schema::{Document, Facet, FacetOptions, Field, IndexRecordOption, Schema};
use crate::Term;
use rand::distributions::Uniform;
use rand::prelude::SliceRandom;
@@ -472,7 +472,7 @@ mod tests {
#[test]
fn test_facet_collector_drilldown() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
@@ -533,7 +533,7 @@ mod tests {
#[test]
fn test_doc_unsorted_multifacet() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
@@ -558,7 +558,7 @@ mod tests {
#[test]
fn test_doc_search_by_facet() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
@@ -615,7 +615,7 @@ mod tests {
#[test]
fn test_facet_collector_topk() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
@@ -664,7 +664,7 @@ mod tests {
#[test]
fn test_facet_collector_topk_tie_break() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

View File

@@ -84,81 +84,12 @@ impl FacetReader {
mod tests {
use crate::Index;
use crate::{
schema::{Facet, FacetOptions, SchemaBuilder, Value, INDEXED, STORED},
schema::{Facet, FacetOptions, SchemaBuilder, Value, STORED},
DocAddress, Document,
};
#[test]
fn test_facet_only_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert_eq!(&facet_ords, &[2u64]);
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, None);
Ok(())
}
#[test]
fn test_facet_only_stored() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert!(facet_ords.is_empty());
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, Some("/a/b".to_string()));
Ok(())
}
#[test]
fn test_facet_stored_and_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", STORED | INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert_eq!(&facet_ords, &[2u64]);
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, Some("/a/b".to_string()));
Ok(())
}
#[test]
fn test_facet_neither_stored_and_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
@@ -173,17 +104,40 @@ mod tests {
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert!(facet_ords.is_empty());
assert_eq!(&facet_ords, &[2u64]);
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
let value = doc.get_first(facet_field).and_then(Value::facet);
assert_eq!(value, None);
Ok(())
}
#[test]
fn test_facet_stored_and_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert_eq!(&facet_ords, &[2u64]);
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value: Option<&Facet> = doc.get_first(facet_field).and_then(Value::facet);
assert_eq!(value, Facet::from_text("/a/b").ok().as_ref());
Ok(())
}
#[test]
fn test_facet_not_populated_for_all_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
@@ -206,7 +160,7 @@ mod tests {
#[test]
fn test_facet_not_populated_for_any_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;

View File

@@ -12,9 +12,9 @@ mod tests {
use crate::query::QueryParser;
use crate::schema::Cardinality;
use crate::schema::Facet;
use crate::schema::FacetOptions;
use crate::schema::IntOptions;
use crate::schema::Schema;
use crate::schema::INDEXED;
use crate::Document;
use crate::Index;
use crate::Term;
@@ -334,7 +334,7 @@ mod tests {
#[ignore]
fn test_many_facets() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_facet_field("facetfield", INDEXED);
let field = schema_builder.add_facet_field("facetfield", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;

View File

@@ -91,12 +91,12 @@ impl<Item: FastValue> MultiValueLength for MultiValuedFastFieldReader<Item> {
mod tests {
use crate::core::Index;
use crate::schema::{Cardinality, Facet, IntOptions, Schema, INDEXED};
use crate::schema::{Cardinality, Facet, FacetOptions, IntOptions, Schema};
#[test]
fn test_multifastfield_reader() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;

View File

@@ -796,6 +796,7 @@ mod tests {
use crate::query::TermQuery;
use crate::schema::Cardinality;
use crate::schema::Facet;
use crate::schema::FacetOptions;
use crate::schema::IntOptions;
use crate::schema::TextFieldIndexing;
use crate::schema::TextOptions;
@@ -1417,7 +1418,7 @@ mod tests {
.set_fast(Cardinality::MultiValues)
.set_stored(),
);
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let settings = if sort_index {
IndexSettings {

View File

@@ -1118,13 +1118,13 @@ mod tests {
use crate::query::BooleanQuery;
use crate::query::Scorer;
use crate::query::TermQuery;
use crate::schema::Document;
use crate::schema::Facet;
use crate::schema::IndexRecordOption;
use crate::schema::IntOptions;
use crate::schema::Term;
use crate::schema::TextFieldIndexing;
use crate::schema::{Cardinality, TEXT};
use crate::schema::{Document, FacetOptions};
use crate::DocAddress;
use crate::IndexSettings;
use crate::IndexSortByField;
@@ -1650,7 +1650,7 @@ mod tests {
// ranges between segments so that merge algorithm can't apply certain optimizations
fn test_merge_facets(index_settings: Option<IndexSettings>, force_segment_value_overlap: bool) {
let mut schema_builder = schema::Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let int_options = IntOptions::default()
.set_fast(Cardinality::SingleValue)
.set_indexed();

View File

@@ -1,22 +1,17 @@
#[cfg(test)]
mod tests {
use crate::collector::TopDocs;
use crate::core::Index;
use crate::fastfield::MultiValuedFastFieldReader;
use crate::fastfield::{AliveBitSet, FastFieldReader};
use crate::schema::IndexRecordOption;
use crate::{
collector::TopDocs,
schema::{Cardinality, TextFieldIndexing},
};
use crate::{core::Index, fastfield::MultiValuedFastFieldReader};
use crate::{
query::QueryParser,
schema::{IntOptions, TextOptions},
};
use crate::{schema::Facet, IndexSortByField};
use crate::{schema::INDEXED, Order};
use crate::{
schema::{self, BytesOptions},
DocAddress,
use crate::query::QueryParser;
use crate::schema::{
self, BytesOptions, Cardinality, Facet, FacetOptions, IndexRecordOption, TextFieldIndexing,
};
use crate::schema::{IntOptions, TextOptions};
use crate::DocAddress;
use crate::IndexSortByField;
use crate::Order;
use crate::{DocSet, IndexSettings, Postings, Term};
use futures::executor::block_on;
@@ -27,7 +22,7 @@ mod tests {
.set_indexed();
let int_field = schema_builder.add_u64_field("intval", int_options);
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
@@ -79,7 +74,7 @@ mod tests {
let bytes_options = BytesOptions::default().set_fast().set_indexed();
let bytes_field = schema_builder.add_bytes_field("bytes", bytes_options);
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let multi_numbers = schema_builder.add_u64_field(
"multi_numbers",

View File

@@ -175,16 +175,9 @@ impl SegmentWriter {
match *field_entry.field_type() {
FieldType::HierarchicalFacet(_) => {
term_buffer.set_field(field);
let facets =
field_values
.iter()
.flat_map(|field_value| match *field_value.value() {
Value::Facet(ref facet) => Some(facet.encoded_str()),
_ => {
panic!("Expected hierarchical facet");
}
});
for facet_str in facets {
for field_value in field_values {
let facet = field_value.value().facet().ok_or_else(make_schema_error)?;
let facet_str = facet.encoded_str();
let mut unordered_term_id_opt = None;
FacetTokenizer
.token_stream(facet_str)

View File

@@ -47,7 +47,6 @@ pub mod tests {
use crate::fieldnorm::FieldNormReader;
use crate::indexer::operation::AddOperation;
use crate::indexer::SegmentWriter;
use crate::merge_policy::NoMergePolicy;
use crate::query::Scorer;
use crate::schema::{Field, TextOptions};
use crate::schema::{IndexRecordOption, TextFieldIndexing};

View File

@@ -587,6 +587,7 @@ mod test {
use super::QueryParser;
use super::QueryParserError;
use crate::query::Query;
use crate::schema::FacetOptions;
use crate::schema::Field;
use crate::schema::{IndexRecordOption, TextFieldIndexing, TextOptions};
use crate::schema::{Schema, Term, INDEXED, STORED, STRING, TEXT};
@@ -615,8 +616,7 @@ mod test {
schema_builder.add_text_field("with_stop_words", text_options);
schema_builder.add_date_field("date", INDEXED);
schema_builder.add_f64_field("float", INDEXED);
schema_builder.add_facet_field("facet", INDEXED);
schema_builder.add_facet_field("facet_not_indexed", STORED);
schema_builder.add_facet_field("facet", FacetOptions::default());
schema_builder.add_bytes_field("bytes", INDEXED);
schema_builder.add_bytes_field("bytes_not_indexed", STORED);
schema_builder.build()
@@ -669,13 +669,6 @@ mod test {
);
}
#[test]
fn test_parse_query_facet_not_indexed() {
let error =
parse_query_to_logical_ast("facet_not_indexed:/root/branch/leaf", false).unwrap_err();
assert!(matches!(error, QueryParserError::FieldNotIndexed(_)));
}
#[test]
pub fn test_parse_query_with_boost() {
let mut query_parser = make_query_parser();
@@ -817,7 +810,7 @@ mod test {
fn test_parse_bytes() {
test_parse_query_to_logical_ast_helper(
"bytes:YnVidQ==",
"Term(field=13,bytes=[98, 117, 98, 117])",
"Term(field=12,bytes=[98, 117, 98, 117])",
false,
);
}
@@ -832,7 +825,7 @@ mod test {
fn test_parse_bytes_phrase() {
test_parse_query_to_logical_ast_helper(
"bytes:\"YnVidQ==\"",
"Term(field=13,bytes=[98, 117, 98, 117])",
"Term(field=12,bytes=[98, 117, 98, 117])",
false,
);
}

View File

@@ -1,11 +1,12 @@
use crate::schema::flags::{IndexedFlag, SchemaFlagList, StoredFlag};
use crate::schema::flags::{SchemaFlagList, StoredFlag, IndexedFlag};
use serde::{Deserialize, Serialize};
use std::ops::BitOr;
/// Define how a facet field should be handled by tantivy.
///
/// Note that a Facet is always indexed and stored as a fastfield.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct FacetOptions {
indexed: bool,
stored: bool,
}
@@ -15,11 +16,6 @@ impl FacetOptions {
self.stored
}
/// Returns true iff the value is indexed.
pub fn is_indexed(&self) -> bool {
self.indexed
}
/// Set the field as stored.
///
/// Only the fields that are set as *stored* are
@@ -28,15 +24,6 @@ impl FacetOptions {
self.stored = true;
self
}
/// Set the field as indexed.
///
/// Setting a facet as indexed will generate
/// a walkable path.
pub fn set_indexed(mut self) -> FacetOptions {
self.indexed = true;
self
}
}
impl From<()> for FacetOptions {
@@ -47,19 +34,7 @@ impl From<()> for FacetOptions {
impl From<StoredFlag> for FacetOptions {
fn from(_: StoredFlag) -> Self {
FacetOptions {
indexed: false,
stored: true,
}
}
}
impl From<IndexedFlag> for FacetOptions {
fn from(_: IndexedFlag) -> Self {
FacetOptions {
indexed: true,
stored: false,
}
FacetOptions { stored: true }
}
}
@@ -69,7 +44,6 @@ impl<T: Into<FacetOptions>> BitOr<T> for FacetOptions {
fn bitor(self, other: T) -> FacetOptions {
let other = other.into();
FacetOptions {
indexed: self.indexed | other.indexed,
stored: self.stored | other.stored,
}
}
@@ -85,3 +59,22 @@ where
Self::from(head_tail.head) | Self::from(head_tail.tail)
}
}
impl From<IndexedFlag> for FacetOptions {
fn from(_: IndexedFlag) -> Self {
FacetOptions {
stored: false,
}
}
}
#[cfg(test)]
mod tests {
use crate::schema::{FacetOptions, INDEXED};
#[test]
fn test_from_index_flag() {
let facet_option = FacetOptions::from(INDEXED);
assert_eq!(facet_option, FacetOptions::default());
}
}

View File

@@ -92,7 +92,7 @@ impl FieldType {
| FieldType::I64(ref int_options)
| FieldType::F64(ref int_options) => int_options.is_indexed(),
FieldType::Date(ref date_options) => date_options.is_indexed(),
FieldType::HierarchicalFacet(ref facet_options) => facet_options.is_indexed(),
FieldType::HierarchicalFacet(ref _facet_options) => true,
FieldType::Bytes(ref bytes_options) => bytes_options.is_indexed(),
}
}
@@ -116,13 +116,7 @@ impl FieldType {
None
}
}
FieldType::HierarchicalFacet(ref facet_options) => {
if facet_options.is_indexed() {
Some(IndexRecordOption::Basic)
} else {
None
}
}
FieldType::HierarchicalFacet(ref _facet_options) => Some(IndexRecordOption::Basic),
FieldType::Bytes(ref bytes_options) => {
if bytes_options.is_indexed() {
Some(IndexRecordOption::Basic)

View File

@@ -137,19 +137,18 @@ impl Value {
}
}
/// Returns the path value, provided the value is of the `Facet` type.
/// Returns the facet value, provided the value is of the `Facet` type.
/// (Returns None if the value is not of the `Facet` type).
pub fn path(&self) -> Option<String> {
pub fn facet(&self) -> Option<&Facet> {
if let Value::Facet(facet) = self {
Some(facet.to_path_string())
Some(facet)
} else {
None
}
}
/// Returns the tokenized text, provided the value is of the `PreTokStr` type.
///
/// Returns None if the value is not of the `PreTokStr` type.
/// (Returns None if the value is not of the `PreTokStr` type.)
pub fn tokenized_text(&self) -> Option<&PreTokenizedString> {
if let Value::PreTokStr(tokenized_text) = self {
Some(tokenized_text)
@@ -159,8 +158,7 @@ impl Value {
}
/// Returns the u64-value, provided the value is of the `U64` type.
///
/// Returns None if the value is not of the `U64` type.
/// (Returns None if the value is not of the `U64` type)
pub fn u64_value(&self) -> Option<u64> {
if let Value::U64(val) = self {
Some(*val)