use crate::common::BinarySerializable; use once_cell::sync::Lazy; use regex::Regex; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::borrow::Borrow; use std::borrow::Cow; use std::fmt::{self, Debug, Display, Formatter}; use std::io::{self, Read, Write}; use std::str; use std::string::FromUtf8Error; const SLASH_BYTE: u8 = b'/'; const ESCAPE_BYTE: u8 = b'\\'; /// BYTE used as a level separation in the binary /// representation of facets. pub const FACET_SEP_BYTE: u8 = 0u8; /// `char` used as a level separation in the binary /// representation of facets. (It is the null codepoint.) pub const FACET_SEP_CHAR: char = '\u{0}'; /// An error enum for facet parser. #[derive(Debug, PartialEq, Eq, Error)] pub enum FacetParseError { /// The facet text representation is unparsable. #[error("Failed to parse the facet string: '{0}'")] FacetParseError(String), } /// A Facet represent a point in a given hierarchy. /// /// They are typically represented similarly to a filepath. /// For instance, an e-commerce website could /// have a `Facet` for `/electronics/tv_and_video/led_tv`. /// /// A document can be associated to any number of facets. /// The hierarchy implicitely imply that a document /// belonging to a facet also belongs to the ancestor of /// its facet. In the example above, `/electronics/tv_and_video/` /// and `/electronics`. #[derive(Clone, Default, Eq, Hash, PartialEq, Ord, PartialOrd)] pub struct Facet(String); impl Facet { /// Returns a new instance of the "root facet" /// Equivalent to `/`. pub fn root() -> Facet { Facet("".to_string()) } /// Returns true iff the facet is the root facet `/`. pub fn is_root(&self) -> bool { self.encoded_str().is_empty() } /// Returns a binary representation of the facet. /// /// In this representation, `0u8` is used as a separator /// and the string parts of the facet are unescaped. /// (The first `/` is not encoded at all). /// /// This representation has the benefit of making it possible to /// express "being a child of a given facet" as a range over /// the term ordinals. pub fn encoded_str(&self) -> &str { &self.0 } pub(crate) fn from_encoded_string(facet_string: String) -> Facet { Facet(facet_string) } /// Creates a `Facet` from its binary representation. pub fn from_encoded(encoded_bytes: Vec) -> Result { // facet bytes validation. `0u8` is used a separator but that is still legal utf-8 //Ok(Facet(String::from_utf8(encoded_bytes)?)) String::from_utf8(encoded_bytes).map(Facet) } /// Parse a text representation of a facet. /// /// It is conceptually, if one of the steps of this path /// contains a `/` or a `\`, it should be escaped /// using an anti-slash `/`. pub fn from_text(path: &T) -> Result where T: ?Sized + AsRef, { #[derive(Copy, Clone)] enum State { Escaped, Idle, } let path_ref = path.as_ref(); if path_ref.is_empty() { return Err(FacetParseError::FacetParseError(path_ref.to_string())); } if !path_ref.starts_with('/') { return Err(FacetParseError::FacetParseError(path_ref.to_string())); } let mut facet_encoded = String::new(); let mut state = State::Idle; let path_bytes = path_ref.as_bytes(); let mut last_offset = 1; for i in 1..path_bytes.len() { let c = path_bytes[i]; match (state, c) { (State::Idle, ESCAPE_BYTE) => { facet_encoded.push_str(&path_ref[last_offset..i]); last_offset = i + 1; state = State::Escaped } (State::Idle, SLASH_BYTE) => { facet_encoded.push_str(&path_ref[last_offset..i]); facet_encoded.push(FACET_SEP_CHAR); last_offset = i + 1; } (State::Escaped, _escaped_char) => { state = State::Idle; } (State::Idle, _any_char) => {} } } facet_encoded.push_str(&path_ref[last_offset..]); Ok(Facet(facet_encoded)) } /// Returns a `Facet` from an iterator over the different /// steps of the facet path. /// /// The steps are expected to be unescaped. pub fn from_path(path: Path) -> Facet where Path: IntoIterator, Path::Item: ToString, { let mut facet_string: String = String::with_capacity(100); let mut step_it = path.into_iter(); if let Some(step) = step_it.next() { facet_string.push_str(&step.to_string()); } for step in step_it { facet_string.push(FACET_SEP_CHAR); facet_string.push_str(&step.to_string()); } Facet(facet_string) } /// Accessor for the inner buffer of the `Facet`. pub(crate) fn set_facet_str(&mut self, facet_str: &str) { self.0.clear(); self.0.push_str(facet_str); } /// Returns `true` if other is a subfacet of `self`. pub fn is_prefix_of(&self, other: &Facet) -> bool { let self_str = self.encoded_str(); let other_str = other.encoded_str(); self_str.len() < other_str.len() && other_str.starts_with(self_str) && other_str.as_bytes()[self_str.len()] == FACET_SEP_BYTE } /// Extract path from the `Facet`. pub fn to_path(&self) -> Vec<&str> { self.encoded_str().split(|c| c == FACET_SEP_CHAR).collect() } /// This function is the inverse of Facet::from(&str). pub fn to_path_string(&self) -> String { format!("{}", self) } } impl Borrow for Facet { fn borrow(&self) -> &str { self.encoded_str() } } impl<'a, T: ?Sized + AsRef> From<&'a T> for Facet { fn from(path_asref: &'a T) -> Facet { Facet::from_text(path_asref).unwrap() } } impl BinarySerializable for Facet { fn serialize(&self, writer: &mut W) -> io::Result<()> { ::serialize(&self.0, writer) } fn deserialize(reader: &mut R) -> io::Result { Ok(Facet(::deserialize(reader)?)) } } impl Display for Facet { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { for step in self.0.split(FACET_SEP_CHAR) { write!(f, "/")?; write!(f, "{}", escape_slashes(step))?; } Ok(()) } } fn escape_slashes(s: &str) -> Cow<'_, str> { static SLASH_PTN: Lazy = Lazy::new(|| Regex::new(r"[\\/]").unwrap()); SLASH_PTN.replace_all(s, "\\/") } impl Serialize for Facet { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_str(&self.to_string()) } } impl<'de> Deserialize<'de> for Facet { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { <&'de str as Deserialize<'de>>::deserialize(deserializer).map(Facet::from) } } impl Debug for Facet { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "Facet({})", self)?; Ok(()) } } #[cfg(test)] mod tests { use super::{Facet, FacetParseError}; #[test] fn test_root() { assert_eq!(Facet::root(), Facet::from("/")); assert_eq!(format!("{}", Facet::root()), "/"); assert!(Facet::root().is_root()); assert_eq!(Facet::root().encoded_str(), ""); } #[test] fn test_from_path() { assert_eq!( Facet::from_path(vec!["top", "a", "firstdoc"]), Facet::from("/top/a/firstdoc") ); } #[test] fn test_facet_display() { { let v = ["first", "second", "third"]; let facet = Facet::from_path(v.iter()); assert_eq!(format!("{}", facet), "/first/second/third"); } { let v = ["first", "sec/ond", "third"]; let facet = Facet::from_path(v.iter()); assert_eq!(format!("{}", facet), "/first/sec\\/ond/third"); } } #[test] fn test_facet_debug() { let v = ["first", "second", "third"]; let facet = Facet::from_path(v.iter()); assert_eq!(format!("{:?}", facet), "Facet(/first/second/third)"); } #[test] fn test_to_path() { let v = ["first", "second", "third\\/not_fourth"]; let facet = Facet::from_path(v.iter()); assert_eq!(facet.to_path(), v); } #[test] fn test_to_path_string() { let v = ["first", "second", "third/not_fourth"]; let facet = Facet::from_path(v.iter()); assert_eq!( facet.to_path_string(), String::from("/first/second/third\\/not_fourth") ); } #[test] fn test_to_path_string_empty() { let v: Vec<&str> = vec![]; let facet = Facet::from_path(v.iter()); assert_eq!(facet.to_path_string(), "/"); } #[test] fn test_from_text() { assert_eq!( Err(FacetParseError::FacetParseError("INVALID".to_string())), Facet::from_text("INVALID") ); } }