From c1400f25a7bcaee448ec5f0e80c7ab1841361887 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 25 Dec 2019 17:43:33 +0900 Subject: [PATCH] Handle facet search in the QueryParser. (#741) Closes #738 --- examples/faceted_search.rs | 107 +++++++++++++++++-------- src/collector/facet_collector.rs | 56 ++++++++++++- src/query/query_parser/query_parser.rs | 16 +++- 3 files changed, 138 insertions(+), 41 deletions(-) diff --git a/examples/faceted_search.rs b/examples/faceted_search.rs index 7ac67c315..2769af5c9 100644 --- a/examples/faceted_search.rs +++ b/examples/faceted_search.rs @@ -13,63 +13,100 @@ // --- // Importing tantivy... use tantivy::collector::FacetCollector; -use tantivy::query::AllQuery; +use tantivy::query::{AllQuery, TermQuery}; use tantivy::schema::*; use tantivy::{doc, Index}; -use tempfile::TempDir; fn main() -> tantivy::Result<()> { - // Let's create a temporary directory for the - // sake of this example - let index_path = TempDir::new()?; + // Let's create a temporary directory for the sake of this example let mut schema_builder = Schema::builder(); - schema_builder.add_text_field("name", TEXT | STORED); - - // this is our faceted field - schema_builder.add_facet_field("tags"); + let name = schema_builder.add_text_field("felin_name", TEXT | STORED); + // this is our faceted field: its scientific classification + let classification = schema_builder.add_facet_field("classification"); let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); - let index = Index::create_in_dir(&index_path, schema.clone())?; - - let mut index_writer = index.writer(50_000_000)?; - - let name = schema.get_field("name").unwrap(); - let tags = schema.get_field("tags").unwrap(); + let mut index_writer = index.writer(30_000_000)?; // For convenience, tantivy also comes with a macro to // reduce the boilerplate above. index_writer.add_document(doc!( - name => "the ditch", - tags => Facet::from("/pools/north") + name => "Cat", + classification => Facet::from("/Felidae/Felinae/Felis") )); - index_writer.add_document(doc!( - name => "little stacey", - tags => Facet::from("/pools/south") + name => "Canada lynx", + classification => Facet::from("/Felidae/Felinae/Lynx") + )); + index_writer.add_document(doc!( + name => "Cheetah", + classification => Facet::from("/Felidae/Felinae/Acinonyx") + )); + index_writer.add_document(doc!( + name => "Tiger", + classification => Facet::from("/Felidae/Pantherinae/Panthera") + )); + index_writer.add_document(doc!( + name => "Lion", + classification => Facet::from("/Felidae/Pantherinae/Panthera") + )); + index_writer.add_document(doc!( + name => "Jaguar", + classification => Facet::from("/Felidae/Pantherinae/Panthera") + )); + index_writer.add_document(doc!( + name => "Sunda clouded leopard", + classification => Facet::from("/Felidae/Pantherinae/Neofelis") + )); + index_writer.add_document(doc!( + name => "Fossa", + classification => Facet::from("/Eupleridae/Cryptoprocta") )); - index_writer.commit()?; let reader = index.reader()?; - let searcher = reader.searcher(); + { + let mut facet_collector = FacetCollector::for_field(classification); + facet_collector.add_facet("/Felidae"); + let facet_counts = searcher.search(&AllQuery, &facet_collector)?; + // This lists all of the facet counts, right below "/Felidae". + let facets: Vec<(&Facet, u64)> = facet_counts.get("/Felidae").collect(); + assert_eq!( + facets, + vec![ + (&Facet::from("/Felidae/Felinae"), 3), + (&Facet::from("/Felidae/Pantherinae"), 4), + ] + ); + } - let mut facet_collector = FacetCollector::for_field(tags); - facet_collector.add_facet("/pools"); + // Facets are also searchable. + // + // For instance a common UI pattern is to allow the user someone to click on a facet link + // (e.g: `Pantherinae`) to drill down and filter the current result set with this subfacet. + // + // The search would then look as follows. - let facet_counts = searcher.search(&AllQuery, &facet_collector).unwrap(); - - // This lists all of the facet counts - let facets: Vec<(&Facet, u64)> = facet_counts.get("/pools").collect(); - assert_eq!( - facets, - vec![ - (&Facet::from("/pools/north"), 1), - (&Facet::from("/pools/south"), 1), - ] - ); + // Check the reference doc for different ways to create a `Facet` object. + { + let facet = Facet::from_text("/Felidae/Pantherinae"); + let facet_term = Term::from_facet(classification, &facet); + let facet_term_query = TermQuery::new(facet_term, IndexRecordOption::Basic); + let mut facet_collector = FacetCollector::for_field(classification); + facet_collector.add_facet("/Felidae/Pantherinae"); + let facet_counts = searcher.search(&facet_term_query, &facet_collector)?; + let facets: Vec<(&Facet, u64)> = facet_counts.get("/Felidae/Pantherinae").collect(); + assert_eq!( + facets, + vec![ + (&Facet::from("/Felidae/Pantherinae/Neofelis"), 1), + (&Facet::from("/Felidae/Pantherinae/Panthera"), 3), + ] + ); + } Ok(()) } diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index 132f35e6c..2cbdf023d 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -452,9 +452,11 @@ impl FacetCounts { #[cfg(test)] mod tests { use super::{FacetCollector, FacetCounts}; + use crate::collector::Count; use crate::core::Index; - use crate::query::AllQuery; - use crate::schema::{Document, Facet, Field, Schema}; + use crate::query::{AllQuery, QueryParser, TermQuery}; + use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema}; + use crate::Term; use rand::distributions::Uniform; use rand::prelude::SliceRandom; use rand::{thread_rng, Rng}; @@ -544,6 +546,56 @@ mod tests { assert_eq!(facets[0].1, 1); } + #[test] + fn test_doc_search_by_facet() { + let mut schema_builder = Schema::builder(); + let facet_field = schema_builder.add_facet_field("facet"); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + index_writer.add_document(doc!( + facet_field => Facet::from_text(&"/A/A"), + )); + index_writer.add_document(doc!( + facet_field => Facet::from_text(&"/A/B"), + )); + index_writer.add_document(doc!( + facet_field => Facet::from_text(&"/A/C/A"), + )); + index_writer.add_document(doc!( + facet_field => Facet::from_text(&"/D/C/A"), + )); + index_writer.commit().unwrap(); + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + assert_eq!(searcher.num_docs(), 4); + + let count_facet = |facet_str: &str| { + let term = Term::from_facet(facet_field, &Facet::from_text(facet_str)); + searcher + .search(&TermQuery::new(term, IndexRecordOption::Basic), &Count) + .unwrap() + }; + + assert_eq!(count_facet("/"), 4); + assert_eq!(count_facet("/A"), 3); + assert_eq!(count_facet("/A/B"), 1); + assert_eq!(count_facet("/A/C"), 1); + assert_eq!(count_facet("/A/C/A"), 1); + assert_eq!(count_facet("/C/A"), 0); + { + let query_parser = QueryParser::for_index(&index, vec![]); + { + let query = query_parser.parse_query("facet:/A/B").unwrap(); + assert_eq!(1, searcher.search(&query, &Count).unwrap()); + } + { + let query = query_parser.parse_query("facet:/A").unwrap(); + assert_eq!(3, searcher.search(&query, &Count).unwrap()); + } + } + } + #[test] fn test_non_used_facet_collector() { let mut facet_collector = FacetCollector::for_field(Field::from_field_id(0)); diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index aad1de652..b9b6d2462 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -8,7 +8,7 @@ use crate::query::PhraseQuery; use crate::query::Query; use crate::query::RangeQuery; use crate::query::TermQuery; -use crate::schema::IndexRecordOption; +use crate::schema::{Facet, IndexRecordOption}; use crate::schema::{Field, Schema}; use crate::schema::{FieldType, Term}; use crate::tokenizer::TokenizerManager; @@ -319,7 +319,10 @@ impl QueryParser { )) } } - FieldType::HierarchicalFacet => Ok(vec![(0, Term::from_field_text(field, phrase))]), + FieldType::HierarchicalFacet => { + let facet = Facet::from_text(phrase); + Ok(vec![(0, Term::from_field_text(field, facet.encoded_str()))]) + } FieldType::Bytes => { let field_name = self.schema.get_field_name(field).to_string(); Err(QueryParserError::FieldNotIndexed(field_name)) @@ -554,6 +557,7 @@ mod test { schema_builder.add_text_field("with_stop_words", text_options); schema_builder.add_date_field("date", INDEXED); schema_builder.add_f64_field("float", INDEXED); + schema_builder.add_facet_field("facet"); let schema = schema_builder.build(); let default_fields = vec![title, text]; let tokenizer_manager = TokenizerManager::default(); @@ -588,9 +592,13 @@ mod test { } #[test] - pub fn test_parse_query_simple() { + pub fn test_parse_query_facet() { let query_parser = make_query_parser(); - assert!(query_parser.parse_query("toto").is_ok()); + let query = query_parser.parse_query("facet:/root/branch/leaf").unwrap(); + assert_eq!( + format!("{:?}", query), + "TermQuery(Term(field=11,bytes=[114, 111, 111, 116, 0, 98, 114, 97, 110, 99, 104, 0, 108, 101, 97, 102]))" + ); } #[test]