diff --git a/src/common/base/src/bytes.rs b/src/common/base/src/bytes.rs index 5a934fad64..59cc46fee0 100644 --- a/src/common/base/src/bytes.rs +++ b/src/common/base/src/bytes.rs @@ -1,10 +1,158 @@ -use serde::Serialize; -/// Bytes buffer. -#[derive(Debug, Default, Clone, PartialEq, Serialize)] -//TODO: impl From and Deref to remove pub declaration -pub struct Bytes(pub Vec); +use std::ops::Deref; -/// String buffer with arbitrary encoding. -#[derive(Debug, Default, Clone, PartialEq, Serialize)] -//TODO: impl From and Deref to remove pub declaration -pub struct StringBytes(pub Vec); +use serde::Serialize; + +/// Bytes buffer. +#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)] +pub struct Bytes(Vec); + +impl From> for Bytes { + fn from(bytes: Vec) -> Bytes { + Bytes(bytes) + } +} + +impl From<&[u8]> for Bytes { + fn from(bytes: &[u8]) -> Bytes { + Bytes(bytes.to_vec()) + } +} + +impl Deref for Bytes { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + &self.0 + } +} + +impl PartialEq> for Bytes { + fn eq(&self, other: &Vec) -> bool { + &self.0 == other + } +} + +impl PartialEq for Vec { + fn eq(&self, other: &Bytes) -> bool { + *self == other.0 + } +} + +impl PartialEq<[u8]> for Bytes { + fn eq(&self, other: &[u8]) -> bool { + self.0 == other + } +} + +impl PartialEq for [u8] { + fn eq(&self, other: &Bytes) -> bool { + self == other.0 + } +} + +/// String buffer that can hold arbitrary encoding string (only support UTF-8 now). +/// +/// Now this buffer is restricted to only hold valid UTF-8 string (only allow constructing `StringBytes` +/// from String or str). We may support other encoding in the future. +#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)] +pub struct StringBytes(Vec); + +impl StringBytes { + /// View this string as UTF-8 string slice. + /// + /// # Safety + /// We only allow constructing `StringBytes` from String/str, so the inner + /// buffer must holds valid UTF-8. + pub fn as_utf8(&self) -> &str { + unsafe { std::str::from_utf8_unchecked(&self.0) } + } +} + +impl From for StringBytes { + fn from(string: String) -> StringBytes { + StringBytes(string.into_bytes()) + } +} + +impl From<&str> for StringBytes { + fn from(string: &str) -> StringBytes { + StringBytes(string.as_bytes().to_vec()) + } +} + +impl PartialEq for StringBytes { + fn eq(&self, other: &String) -> bool { + self.0 == other.as_bytes() + } +} + +impl PartialEq for String { + fn eq(&self, other: &StringBytes) -> bool { + self.as_bytes() == other.0 + } +} + +impl PartialEq for StringBytes { + fn eq(&self, other: &str) -> bool { + self.0 == other.as_bytes() + } +} + +impl PartialEq for str { + fn eq(&self, other: &StringBytes) -> bool { + self.as_bytes() == other.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn check_bytes_deref(expect: &[u8], given: &[u8]) { + assert_eq!(expect, given); + } + + #[test] + fn test_bytes_deref() { + let hello = b"hello"; + let bytes = Bytes::from(hello.to_vec()); + check_bytes_deref(hello, &bytes); + } + + #[test] + fn test_bytes_from() { + let hello = b"hello".to_vec(); + let bytes = Bytes::from(hello.clone()); + assert_eq!(hello, bytes); + assert_eq!(bytes, hello); + + let world: &[u8] = b"world"; + let bytes = Bytes::from(world); + assert_eq!(&bytes, world); + assert_eq!(world, &bytes); + } + + #[test] + fn test_string_bytes_from() { + let hello = "hello".to_string(); + let bytes = StringBytes::from(hello.clone()); + assert_eq!(hello, bytes); + assert_eq!(bytes, hello); + + let world = "world"; + let bytes = StringBytes::from(world); + assert_eq!(world, &bytes); + assert_eq!(&bytes, world); + } + + fn check_str(expect: &str, given: &str) { + assert_eq!(expect, given); + } + + #[test] + fn test_as_utf8() { + let hello = "hello"; + let bytes = StringBytes::from(hello); + check_str(hello, bytes.as_utf8()); + } +} diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs index 3a236c5699..f81c54af8f 100644 --- a/src/datatypes/src/value.rs +++ b/src/datatypes/src/value.rs @@ -61,15 +61,27 @@ impl_from!(Float64, f64); impl_from!(String, StringBytes); impl_from!(Binary, Bytes); -impl From<&[u8]> for Value { - fn from(s: &[u8]) -> Self { - Value::Binary(Bytes(s.to_vec())) +impl From for Value { + fn from(string: String) -> Value { + Value::String(string.into()) } } impl From<&str> for Value { - fn from(s: &str) -> Self { - Value::String(StringBytes(s.to_string().into_bytes())) + fn from(string: &str) -> Value { + Value::String(string.into()) + } +} + +impl From> for Value { + fn from(bytes: Vec) -> Value { + Value::Binary(bytes.into()) + } +} + +impl From<&[u8]> for Value { + fn from(bytes: &[u8]) -> Value { + Value::Binary(bytes.into()) } } diff --git a/src/datatypes/src/vectors/binary.rs b/src/datatypes/src/vectors/binary.rs index 9d7695eee2..0f205fa3b9 100644 --- a/src/datatypes/src/vectors/binary.rs +++ b/src/datatypes/src/vectors/binary.rs @@ -186,7 +186,10 @@ mod tests { for i in 0..2 { assert!(!v.is_null(i)); - assert_eq!(Value::Binary(Bytes(vec![1, 2, 3])), v.get_unchecked(i)); + assert_eq!( + Value::Binary(Bytes::from(vec![1, 2, 3])), + v.get_unchecked(i) + ); } let arrow_arr = v.to_arrow_array();