mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-08 14:22:58 +00:00
feat: add StringVector datatype (#28)
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
#![feature(generic_associated_types)]
|
||||
|
||||
use arrow::array::{BinaryArray, MutableBinaryArray};
|
||||
use arrow::array;
|
||||
use arrow::array::{BinaryArray, MutableBinaryArray, Utf8Array};
|
||||
|
||||
mod data_type;
|
||||
pub mod prelude;
|
||||
@@ -12,6 +13,10 @@ pub mod vectors;
|
||||
|
||||
pub type LargeBinaryArray = BinaryArray<i64>;
|
||||
pub type MutableLargeBinaryArray = MutableBinaryArray<i64>;
|
||||
|
||||
pub type StringArray = Utf8Array<i32>;
|
||||
pub type MutableStringArray = array::MutableUtf8Array<i32>;
|
||||
|
||||
pub mod schema;
|
||||
|
||||
pub mod deserialize;
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
mod binary_type;
|
||||
mod primitive_traits;
|
||||
mod primitive_type;
|
||||
mod string_type;
|
||||
|
||||
pub use binary_type::BinaryType;
|
||||
pub use primitive_traits::Primitive;
|
||||
pub use primitive_type::{DataTypeBuilder, PrimitiveType};
|
||||
pub use string_type::StringType;
|
||||
|
||||
34
src/datatypes/src/types/string_type.rs
Normal file
34
src/datatypes/src/types/string_type.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::StringBytes;
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{DataTypeRef, LogicalTypeId, Value};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct StringType;
|
||||
|
||||
impl StringType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for StringType {
|
||||
fn name(&self) -> &str {
|
||||
"String"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::String
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
StringBytes::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Utf8
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
pub mod binary;
|
||||
pub mod primitive;
|
||||
mod string;
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
119
src/datatypes/src/vectors/string.rs
Normal file
119
src/datatypes/src/vectors/string.rs
Normal file
@@ -0,0 +1,119 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{ArrayRef, Utf8ValuesIter};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use serde_json::Value;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::data_type::DataTypeRef;
|
||||
use crate::error::SerializeSnafu;
|
||||
use crate::prelude::{ScalarVectorBuilder, Vector};
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::StringType;
|
||||
use crate::{MutableStringArray, StringArray};
|
||||
|
||||
/// String array wrapper
|
||||
#[derive(Clone)]
|
||||
pub struct StringVector {
|
||||
array: StringArray,
|
||||
}
|
||||
|
||||
impl Vector for StringVector {
|
||||
fn data_type(&self) -> DataTypeRef {
|
||||
StringType::arc()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for StringVector {
|
||||
type RefItem<'a> = &'a str;
|
||||
type Iter<'a> = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, i32>>;
|
||||
type Builder = StringVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
match idx < self.array.len() {
|
||||
true => Some(self.array.value(idx)),
|
||||
false => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
self.array.iter()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StringVectorBuilder {
|
||||
buffer: MutableStringArray,
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for StringVectorBuilder {
|
||||
type VectorType = StringVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
buffer: MutableStringArray::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.buffer.push(value)
|
||||
}
|
||||
|
||||
fn finish(self) -> Self::VectorType {
|
||||
Self::VectorType {
|
||||
array: self.buffer.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for StringVector {
|
||||
fn serialize_to_json(&self) -> crate::error::Result<Vec<Value>> {
|
||||
self.array
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
None => Ok(serde_json::Value::Null),
|
||||
Some(s) => serde_json::to_value(s),
|
||||
})
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(SerializeSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
pub fn test_serialize_string_vector() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(3);
|
||||
builder.push(Some("hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some("world"));
|
||||
let string_vector = builder.finish();
|
||||
let serialized = serialize_to_json_string(string_vector.serialize_to_json().unwrap());
|
||||
assert_eq!(r#"["hello",null,"world"]"#, serialized);
|
||||
}
|
||||
|
||||
pub fn serialize_to_json_string<T>(val: T) -> String
|
||||
where
|
||||
T: serde::Serialize,
|
||||
{
|
||||
let mut output = vec![];
|
||||
let mut serializer = serde_json::Serializer::new(&mut output);
|
||||
val.serialize(&mut serializer).unwrap();
|
||||
String::from_utf8_lossy(&output).into()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user