feat: introduce vector type (#4964)

* feat: introduce vector type

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: fix prepared stmt

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add grpc test

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: parse vector value

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: column to row

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: sqlness

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: merge issue

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: add check for bytes size

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update tests/cases/standalone/common/types/vector/vector.sql

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: update proto

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: simplify cargo

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: address comment

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Zhenchi
2024-11-12 16:28:44 +08:00
committed by GitHub
parent 84aa5b7b22
commit d616bd92ef
32 changed files with 1109 additions and 120 deletions

View File

@@ -369,16 +369,23 @@ pub(crate) fn validate_proto_value(
column_schema: &ColumnSchema,
) -> Result<()> {
if let Some(value_type) = proto_value_type(value) {
let column_type = ColumnDataType::try_from(column_schema.datatype).map_err(|_| {
InvalidRequestSnafu {
region_id,
reason: format!(
"column {} has unknown type {}",
column_schema.column_name, column_schema.datatype
),
}
.build()
})?;
ensure!(
value_type as i32 == column_schema.datatype,
proto_value_type_match(column_type, value_type),
InvalidRequestSnafu {
region_id,
reason: format!(
"value has type {:?}, but column {} has type {:?}({})",
value_type,
column_schema.column_name,
ColumnDataType::try_from(column_schema.datatype),
column_schema.datatype,
value_type, column_schema.column_name, column_type, column_schema.datatype,
),
}
);
@@ -387,6 +394,14 @@ pub(crate) fn validate_proto_value(
Ok(())
}
fn proto_value_type_match(column_type: ColumnDataType, value_type: ColumnDataType) -> bool {
match (column_type, value_type) {
(ct, vt) if ct == vt => true,
(ColumnDataType::Vector, ColumnDataType::Binary) => true,
_ => false,
}
}
/// Oneshot output result sender.
#[derive(Debug)]
pub struct OutputTx(Sender<Result<AffectedRows>>);

View File

@@ -69,7 +69,9 @@ impl SortField {
ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9,
ConcreteDataType::Float32(_) => 5,
ConcreteDataType::Float64(_) => 9,
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => 11,
ConcreteDataType::Binary(_)
| ConcreteDataType::Json(_)
| ConcreteDataType::Vector(_) => 11,
ConcreteDataType::String(_) => 11, // a non-empty string takes at least 11 bytes.
ConcreteDataType::Date(_) => 5,
ConcreteDataType::DateTime(_) => 9,
@@ -165,7 +167,8 @@ impl SortField {
Time, time,
Duration, duration,
Decimal128, decimal128,
Json, binary
Json, binary,
Vector, binary
);
Ok(())
@@ -188,7 +191,7 @@ impl SortField {
Ok(Value::from(Option::<$f>::deserialize(deserializer).context(error::DeserializeFieldSnafu)?))
}
)*
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => Ok(Value::from(
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) | ConcreteDataType::Vector(_) => Ok(Value::from(
Option::<Vec<u8>>::deserialize(deserializer)
.context(error::DeserializeFieldSnafu)?
.map(Bytes::from),
@@ -273,7 +276,9 @@ impl SortField {
ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9,
ConcreteDataType::Float32(_) => 5,
ConcreteDataType::Float64(_) => 9,
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => {
ConcreteDataType::Binary(_)
| ConcreteDataType::Json(_)
| ConcreteDataType::Vector(_) => {
// Now the encoder encode binary as a list of bytes so we can't use
// skip bytes.
let pos_before = deserializer.position();
@@ -606,6 +611,7 @@ mod tests {
ConcreteDataType::interval_day_time_datatype(),
ConcreteDataType::interval_month_day_nano_datatype(),
ConcreteDataType::decimal128_default_datatype(),
ConcreteDataType::vector_datatype(3),
],
vec![
Value::Boolean(true),
@@ -630,6 +636,7 @@ mod tests {
Value::IntervalDayTime(IntervalDayTime::new(1, 15)),
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 1, 15)),
Value::Decimal128(Decimal128::from(16)),
Value::Binary(Bytes::from(vec![0; 12])),
],
);
}