mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-23 16:30:39 +00:00
feat: supports large string (#7097)
* feat: supports large string Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * chore: add doc for extract_string_vector_values Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * chore: refactor by cr comments Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * chore: changes by cr comments Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * refactor: extract_string_vector_values Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * feat: remove large string type and refactor string vector Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * chore: revert some changes Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * feat: adds large string type Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * chore: impl default for StringSizeType Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * fix: tests and test compatibility Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * test: update sqlness tests Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * chore: remove panic Signed-off-by: Dennis Zhuang <killme2008@gmail.com> --------- Signed-off-by: Dennis Zhuang <killme2008@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
use std::borrow::Cow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, StringArray};
|
||||
use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, LargeStringArray, StringArray};
|
||||
use arrow_schema::{DataType, Field};
|
||||
use datafusion::logical_expr::{Signature, TypeSignature, Volatility};
|
||||
use datafusion_common::{Result, ScalarValue};
|
||||
@@ -63,7 +63,7 @@ impl VectorProduct {
|
||||
}
|
||||
|
||||
let t = args.schema.field(0).data_type();
|
||||
if !matches!(t, DataType::Utf8 | DataType::Binary) {
|
||||
if !matches!(t, DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary) {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"unexpected input datatype {t} when creating `VEC_PRODUCT`"
|
||||
)));
|
||||
@@ -91,6 +91,13 @@ impl VectorProduct {
|
||||
.map(|x| x.map(Cow::Owned))
|
||||
.collect::<Result<Vec<_>>>()?
|
||||
}
|
||||
DataType::LargeUtf8 => {
|
||||
let arr: &LargeStringArray = values[0].as_string();
|
||||
arr.iter()
|
||||
.filter_map(|x| x.map(|s| parse_veclit_from_strlit(s).map_err(Into::into)))
|
||||
.map(|x: Result<Vec<f32>>| x.map(Cow::Owned))
|
||||
.collect::<Result<Vec<_>>>()?
|
||||
}
|
||||
DataType::Binary => {
|
||||
let arr: &BinaryArray = values[0].as_binary();
|
||||
arr.iter()
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, StringArray};
|
||||
use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, LargeStringArray, StringArray};
|
||||
use arrow_schema::{DataType, Field};
|
||||
use datafusion_common::{Result, ScalarValue};
|
||||
use datafusion_expr::{
|
||||
@@ -63,7 +63,7 @@ impl VectorSum {
|
||||
}
|
||||
|
||||
let t = args.schema.field(0).data_type();
|
||||
if !matches!(t, DataType::Utf8 | DataType::Binary) {
|
||||
if !matches!(t, DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary) {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"unexpected input datatype {t} when creating `VEC_SUM`"
|
||||
)));
|
||||
@@ -98,6 +98,21 @@ impl VectorSum {
|
||||
*self.inner(vec_column.len()) += vec_column;
|
||||
}
|
||||
}
|
||||
DataType::LargeUtf8 => {
|
||||
let arr: &LargeStringArray = values[0].as_string();
|
||||
for s in arr.iter() {
|
||||
let Some(s) = s else {
|
||||
if is_update {
|
||||
self.has_null = true;
|
||||
self.sum = None;
|
||||
}
|
||||
return Ok(());
|
||||
};
|
||||
let values = parse_veclit_from_strlit(s)?;
|
||||
let vec_column = DVectorView::from_slice(&values, values.len());
|
||||
*self.inner(vec_column.len()) += vec_column;
|
||||
}
|
||||
}
|
||||
DataType::Binary => {
|
||||
let arr: &BinaryArray = values[0].as_binary();
|
||||
for b in arr.iter() {
|
||||
|
||||
Reference in New Issue
Block a user