Added documentation / test and change the contract of .add_facet()

This commit is contained in:
Paul Masurel
2018-02-03 00:14:54 +09:00
parent 930010aa88
commit dd028841e8
2 changed files with 78 additions and 42 deletions

View File

@@ -14,10 +14,8 @@ use std::collections::BTreeSet;
use termdict::TermMerger;
use postings::SkipResult;
use std::{u64, usize};
use schema::FACET_SEP_BYTE;
use std::iter::Peekable;
use DocId;
use Result;
use Score;
@@ -93,7 +91,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// (e.g. `/category/fiction`, `/category/biography`, `/category/personal_development`).
///
/// Once collection is finished, you can harvest its results in the form
/// of a `FacetCounts` object, and extract your facet counts from it.
/// of a `FacetCounts` object, and extract your face t counts from it.
///
/// This implementation assumes you are working with a number of facets that
/// is much hundreds of time lower than your number of documents.
@@ -204,7 +202,6 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// Ok(())
/// }
/// ```
pub struct FacetCollector {
facet_ords: Vec<u64>,
field: Field,
@@ -218,14 +215,14 @@ pub struct FacetCollector {
// collapse facet_id -> facet_ord
current_collapse_facet_ords: Vec<u64>,
collapse: BTreeSet<Vec<u8>>,
facets: BTreeSet<Facet>
}
fn skip<'a, I: Iterator<Item=&'a Vec<u8>>>(target: &[u8], collapse_it: &mut Peekable<I>) -> SkipResult {
fn skip<'a, I: Iterator<Item=&'a Facet>>(target: &[u8], collapse_it: &mut Peekable<I>) -> SkipResult {
loop {
match collapse_it.peek() {
Some(facet_bytes) => {
match facet_bytes[..].cmp(&target) {
match facet_bytes.encoded_bytes().cmp(&target) {
Ordering::Less => {}
Ordering::Greater => {
return SkipResult::OverStep;
@@ -251,51 +248,49 @@ impl FacetCollector {
/// This function does not check whether the field
/// is of the proper type.
pub fn for_field(field: Field) -> FacetCollector {
let mut facet_collector = FacetCollector {
FacetCollector {
facet_ords: Vec::with_capacity(255),
field: field,
ff_reader: None,
segment_counters: Vec::new(),
collapse: BTreeSet::new(),
field,
ff_reader: None,
facets: BTreeSet::new(),
current_segment_collapse_mapping: Vec::new(),
current_collapse_facet_ords: Vec::new(),
current_segment_counts: Vec::new(),
};
facet_collector.add_facet(Facet::from("/"));
facet_collector
}
}
/// Adds a facet that we want to record counts
///
/// Adding facet `Facet::from("/country")` for instance,
/// will record the counts of all of the direct children of the facet country
/// (e.g. `/country/FR`, `/country/UK`).
///
/// Adding two facets within which one is the prefix of the other is forbidden.
/// If you need the correct number of unique documents for two such facets,
/// just add them in separate `FacetCollector`.
pub fn add_facet<T>(&mut self, facet_from: T)
where Facet: From<T> {
let facet = Facet::from(facet_from);
let facet_bytes: &[u8] = facet.encoded_bytes();
self.collapse.remove(&facet_bytes[..0]);
for pos in facet_bytes.iter()
.cloned()
.position(|b| b == FACET_SEP_BYTE) {
self.collapse.remove(&facet_bytes[..pos]);
}
self.collapse.insert(facet_bytes.to_owned());
}
fn finalize_segment(&mut self) {
if self.ff_reader.is_some() {
self.segment_counters.push(
SegmentFacetCounter {
facet_reader: unsafe { self.ff_reader.take().unwrap().into_inner() },
facet_ords: mem::replace(&mut self.current_collapse_facet_ords, Vec::new()),
facet_counts: mem::replace(&mut self.current_segment_counts, Vec::new()),
}
for old_facet in &self.facets {
assert!(
!old_facet.is_prefix_of(&facet),
"Tried to add a facet which is a descendant of an already added facet.");
assert!(
!facet.is_prefix_of(&old_facet),
"Tried to add a facet which is an ancestor of an already added facet."
);
}
self.facets.insert(facet);
}
fn set_collapse_mapping(&mut self, facet_reader: &FacetReader) {
self.current_segment_collapse_mapping.clear();
self.current_collapse_facet_ords.clear();
self.current_segment_counts.clear();
let mut collapse_facet_it = self.collapse.iter().peekable();
let mut collapse_facet_it = self.facets.iter().peekable();
self.current_collapse_facet_ords.push(0);
let mut facet_streamer = facet_reader
.facet_dict()
@@ -338,6 +333,18 @@ impl FacetCollector {
}
}
fn finalize_segment(&mut self) {
if self.ff_reader.is_some() {
self.segment_counters.push(
SegmentFacetCounter {
facet_reader: unsafe { self.ff_reader.take().unwrap().into_inner() },
facet_ords: mem::replace(&mut self.current_collapse_facet_ords, Vec::new()),
facet_counts: mem::replace(&mut self.current_segment_counts, Vec::new()),
}
);
}
}
/// Returns the results of the collection.
///
/// This method does not just return the counters,
@@ -392,7 +399,7 @@ impl FacetCollector {
}
}
FacetCounts {
facet_counts: facet_counts
facet_counts
}
}
}
@@ -470,10 +477,7 @@ impl FacetCounts {
let mut it = self.get(facet);
for (ref facet, count) in (&mut it).take(k) {
heap.push(Hit {
count: count,
facet: facet
});
heap.push(Hit { count, facet });
}
let mut lowest_count: u64 = heap.peek()
@@ -483,10 +487,7 @@ impl FacetCounts {
if count > lowest_count {
lowest_count = count;
if let Some(mut head) = heap.peek_mut() {
*head = Hit {
count: count,
facet: facet
};
*head = Hit { count, facet };
}
}
}
@@ -508,6 +509,7 @@ mod tests {
use query::AllQuery;
use super::{FacetCollector, FacetCounts};
use std::iter;
use schema::Field;
use rand::{thread_rng, Rng};
#[test]
@@ -561,6 +563,21 @@ mod tests {
}
}
#[test]
#[should_panic(expected="Tried to add a facet which is a descendant of an already added facet.")]
fn test_misused_facet_collector() {
let mut facet_collector = FacetCollector::for_field(Field(0));
facet_collector.add_facet(Facet::from("/country"));
facet_collector.add_facet(Facet::from("/country/europe"));
}
#[test]
fn test_non_used_facet_collector() {
let mut facet_collector = FacetCollector::for_field(Field(0));
facet_collector.add_facet(Facet::from("/country"));
facet_collector.add_facet(Facet::from("/countryeurope"));
}
#[test]
fn test_facet_collector_topk() {
let mut schema_builder = SchemaBuilder::new();

View File

@@ -2,6 +2,7 @@ use std::fmt::{self, Display, Debug, Formatter};
use std::str;
use std::io::{self, Read, Write};
use regex::Regex;
use std::borrow::Borrow;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::borrow::Cow;
use common::BinarySerializable;
@@ -93,8 +94,26 @@ impl Facet {
pub(crate) fn inner_buffer_mut(&mut self) -> &mut Vec<u8> {
&mut self.0
}
/// Returns `true` iff other is a subfacet of `self`.
pub fn is_prefix_of(&self, other: &Facet) -> bool {
let self_bytes: &[u8] = self.encoded_bytes();
let other_bytes: &[u8] = other.encoded_bytes();
if self_bytes.len() < other_bytes.len() {
if other_bytes.starts_with(self_bytes) {
return other_bytes[self_bytes.len()] == 0u8;
}
}
false
}
}
impl Borrow<[u8]> for Facet {
fn borrow(&self) -> &[u8] {
self.encoded_bytes()
}
}
impl<'a, T: ?Sized + AsRef<str>> From<&'a T> for Facet {