feat(vector): add conversion between vector and string (#5029)

* feat(vector): add conversion between vector and string

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix sqlness

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
Zhenchi
2024-11-20 16:42:00 +08:00
committed by GitHub
parent 027284ed1b
commit 0aab68c23b
17 changed files with 655 additions and 169 deletions

View File

@@ -21,7 +21,7 @@ use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_telemetry::{debug, error};
use datatypes::prelude::{ConcreteDataType, Value};
use datatypes::schema::SchemaRef;
use datatypes::types::{json_type_value_to_string, vector_type_value_to_string};
use datatypes::types::json_type_value_to_string;
use futures::StreamExt;
use opensrv_mysql::{
Column, ColumnFlags, ColumnType, ErrorKind, OkResponse, QueryResultWriter, RowWriter,
@@ -217,11 +217,6 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> {
.context(ConvertSqlValueSnafu)?;
row_writer.write_col(s)?;
}
ConcreteDataType::Vector(d) => {
let s = vector_type_value_to_string(&v, d.dim)
.context(ConvertSqlValueSnafu)?;
row_writer.write_col(s)?;
}
_ => {
row_writer.write_col(v.deref())?;
}
@@ -303,7 +298,7 @@ pub(crate) fn create_mysql_column(
ConcreteDataType::Duration(_) => Ok(ColumnType::MYSQL_TYPE_TIME),
ConcreteDataType::Decimal128(_) => Ok(ColumnType::MYSQL_TYPE_DECIMAL),
ConcreteDataType::Json(_) => Ok(ColumnType::MYSQL_TYPE_JSON),
ConcreteDataType::Vector(_) => Ok(ColumnType::MYSQL_TYPE_STRING),
ConcreteDataType::Vector(_) => Ok(ColumnType::MYSQL_TYPE_BLOB),
_ => error::UnsupportedDataTypeSnafu {
data_type,
reason: "not implemented",

View File

@@ -27,9 +27,7 @@ use datafusion_expr::LogicalPlan;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::prelude::{ConcreteDataType, Value};
use datatypes::schema::Schema;
use datatypes::types::{
json_type_value_to_string, vector_type_value_to_string, IntervalType, TimestampType,
};
use datatypes::types::{json_type_value_to_string, IntervalType, TimestampType};
use datatypes::value::ListValue;
use pgwire::api::portal::{Format, Portal};
use pgwire::api::results::{DataRowEncoder, FieldInfo};
@@ -178,7 +176,7 @@ fn encode_array(
.collect::<PgWireResult<Vec<Option<f64>>>>()?;
builder.encode_field(&array)
}
&ConcreteDataType::Binary(_) => {
&ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => {
let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output();
match *bytea_output {
@@ -370,24 +368,6 @@ fn encode_array(
.collect::<PgWireResult<Vec<Option<String>>>>()?;
builder.encode_field(&array)
}
&ConcreteDataType::Vector(d) => {
let array = value_list
.items()
.iter()
.map(|v| match v {
Value::Null => Ok(None),
Value::Binary(v) => {
let s = vector_type_value_to_string(v, d.dim)
.map_err(|e| PgWireError::ApiError(Box::new(e)))?;
Ok(Some(s))
}
_ => Err(PgWireError::ApiError(Box::new(Error::Internal {
err_msg: format!("Invalid list item type, find {v:?}, expected vector",),
}))),
})
.collect::<PgWireResult<Vec<Option<String>>>>()?;
builder.encode_field(&array)
}
_ => Err(PgWireError::ApiError(Box::new(Error::Internal {
err_msg: format!(
"cannot write array type {:?} in postgres protocol: unimplemented",
@@ -423,11 +403,6 @@ pub(super) fn encode_value(
.map_err(|e| PgWireError::ApiError(Box::new(e)))?;
builder.encode_field(&s)
}
ConcreteDataType::Vector(d) => {
let s = vector_type_value_to_string(v, d.dim)
.map_err(|e| PgWireError::ApiError(Box::new(e)))?;
builder.encode_field(&s)
}
_ => {
let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output();
match *bytea_output {
@@ -503,7 +478,7 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result<Type> {
&ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => Ok(Type::INT8),
&ConcreteDataType::Float32(_) => Ok(Type::FLOAT4),
&ConcreteDataType::Float64(_) => Ok(Type::FLOAT8),
&ConcreteDataType::Binary(_) => Ok(Type::BYTEA),
&ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => Ok(Type::BYTEA),
&ConcreteDataType::String(_) => Ok(Type::VARCHAR),
&ConcreteDataType::Date(_) => Ok(Type::DATE),
&ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => Ok(Type::TIMESTAMP),
@@ -546,7 +521,6 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result<Type> {
}
.fail()
}
&ConcreteDataType::Vector(_) => Ok(Type::FLOAT4_ARRAY),
}
}