feat: introduce vector type (#4964)

* feat: introduce vector type

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: fix prepared stmt

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add grpc test

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: parse vector value

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: column to row

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: sqlness

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: merge issue

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: add check for bytes size

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update tests/cases/standalone/common/types/vector/vector.sql

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: update proto

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: simplify cargo

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: address comment

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Zhenchi
2024-11-12 16:28:44 +08:00
committed by GitHub
parent 84aa5b7b22
commit d616bd92ef
32 changed files with 1109 additions and 120 deletions

View File

@@ -325,6 +325,13 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Datatype error: {}", source))]
Datatype {
source: datatypes::error::Error,
#[snafu(implicit)]
location: Location,
},
}
impl ErrorExt for Error {
@@ -363,6 +370,7 @@ impl ErrorExt for Error {
SerializeColumnDefaultConstraint { source, .. } => source.status_code(),
ConvertToGrpcDataType { source, .. } => source.status_code(),
Datatype { source, .. } => source.status_code(),
ConvertToDfStatement { .. } => StatusCode::Internal,
ConvertSqlValue { .. } | ConvertValue { .. } => StatusCode::Unsupported,

View File

@@ -39,7 +39,7 @@ use crate::parser::{ParserContext, FLOW};
use crate::parsers::utils::validate_column_fulltext_create_option;
use crate::statements::create::{
Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable,
CreateTableLike, CreateView, Partitions, TableConstraint,
CreateTableLike, CreateView, Partitions, TableConstraint, VECTOR_OPT_DIM,
};
use crate::statements::statement::Statement;
use crate::statements::{
@@ -668,6 +668,31 @@ impl<'a> ParserContext<'a> {
column_type: &DataType,
column_extensions: &mut ColumnExtensions,
) -> Result<bool> {
if let DataType::Custom(name, tokens) = column_type
&& name.0.len() == 1
&& &name.0[0].value.to_uppercase() == "VECTOR"
{
ensure!(
tokens.len() == 1,
InvalidColumnOptionSnafu {
name: column_name.to_string(),
msg: "VECTOR type should have dimension",
}
);
let dimension =
tokens[0]
.parse::<u32>()
.ok()
.with_context(|| InvalidColumnOptionSnafu {
name: column_name.to_string(),
msg: "dimension should be a positive integer",
})?;
let options = HashMap::from_iter([(VECTOR_OPT_DIM.to_string(), dimension.to_string())]);
column_extensions.vector_options = Some(options.into());
}
if parser.parse_keyword(Keyword::FULLTEXT) {
ensure!(
column_extensions.fulltext_options.is_none(),

View File

@@ -42,10 +42,10 @@ use common_time::Timestamp;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::constraint::{CURRENT_TIMESTAMP, CURRENT_TIMESTAMP_FN};
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, COMMENT_KEY};
use datatypes::types::{cast, TimestampType};
use datatypes::types::{cast, parse_string_to_vector_type_value, TimestampType};
use datatypes::value::{OrderedF32, OrderedF64, Value};
use snafu::{ensure, OptionExt, ResultExt};
use sqlparser::ast::{ExactNumberInfo, UnaryOperator};
use sqlparser::ast::{ExactNumberInfo, Ident, ObjectName, UnaryOperator};
use crate::ast::{
ColumnDef, ColumnOption, ColumnOptionDef, DataType as SqlDataType, Expr, TimezoneInfo,
@@ -53,7 +53,7 @@ use crate::ast::{
};
use crate::error::{
self, ColumnTypeMismatchSnafu, ConvertSqlValueSnafu, ConvertToGrpcDataTypeSnafu,
ConvertValueSnafu, InvalidCastSnafu, InvalidSqlValueSnafu, InvalidUnaryOpSnafu,
ConvertValueSnafu, DatatypeSnafu, InvalidCastSnafu, InvalidSqlValueSnafu, InvalidUnaryOpSnafu,
ParseSqlValueSnafu, Result, SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu,
TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, UnsupportedUnaryOpSnafu,
};
@@ -61,6 +61,8 @@ use crate::statements::create::Column;
pub use crate::statements::option_map::OptionMap;
pub use crate::statements::transform::{get_data_type_by_alias_name, transform_statements};
const VECTOR_TYPE_NAME: &str = "VECTOR";
fn parse_string_to_value(
column_name: &str,
s: String,
@@ -134,6 +136,10 @@ fn parse_string_to_value(
.fail()
}
}
ConcreteDataType::Vector(d) => {
let v = parse_string_to_vector_type_value(&s, d.dim).context(DatatypeSnafu)?;
Ok(Value::Binary(v.into()))
}
_ => {
unreachable!()
}
@@ -614,6 +620,20 @@ pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result<Co
}
},
SqlDataType::JSON => Ok(ConcreteDataType::json_datatype()),
// Vector type
SqlDataType::Custom(name, d)
if name.0.as_slice().len() == 1
&& name.0.as_slice()[0].value.to_ascii_uppercase() == VECTOR_TYPE_NAME
&& d.len() == 1 =>
{
let dim = d[0].parse().map_err(|e| {
error::ParseSqlValueSnafu {
msg: format!("Failed to parse vector dimension: {}", e),
}
.build()
})?;
Ok(ConcreteDataType::vector_datatype(dim))
}
_ => error::SqlTypeNotSupportedSnafu {
t: data_type.clone(),
}
@@ -651,6 +671,10 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu
ExactNumberInfo::PrecisionAndScale(d.precision() as u64, d.scale() as u64),
)),
ConcreteDataType::Json(_) => Ok(SqlDataType::JSON),
ConcreteDataType::Vector(v) => Ok(SqlDataType::Custom(
ObjectName(vec![Ident::new(VECTOR_TYPE_NAME)]),
vec![v.dim.to_string()],
)),
ConcreteDataType::Duration(_)
| ConcreteDataType::Null(_)
| ConcreteDataType::List(_)
@@ -766,6 +790,14 @@ mod tests {
SqlDataType::Interval,
ConcreteDataType::interval_month_day_nano_datatype(),
);
check_type(SqlDataType::JSON, ConcreteDataType::json_datatype());
check_type(
SqlDataType::Custom(
ObjectName(vec![Ident::new(VECTOR_TYPE_NAME)]),
vec!["3".to_string()],
),
ConcreteDataType::vector_datatype(3),
);
}
#[test]
@@ -1489,6 +1521,7 @@ mod tests {
])
.into(),
),
vector_options: None,
},
};
@@ -1501,7 +1534,7 @@ mod tests {
}
#[test]
pub fn test_parse_placeholder_value() {
fn test_parse_placeholder_value() {
assert!(sql_value_to_value(
"test",
&ConcreteDataType::string_datatype(),

View File

@@ -30,6 +30,7 @@ use crate::statements::OptionMap;
const LINE_SEP: &str = ",\n";
const COMMA_SEP: &str = ", ";
const INDENT: usize = 2;
pub const VECTOR_OPT_DIM: &str = "dim";
macro_rules! format_indent {
($fmt: expr, $arg: expr) => {
@@ -112,6 +113,8 @@ pub struct Column {
pub struct ColumnExtensions {
/// Fulltext options.
pub fulltext_options: Option<OptionMap>,
/// Vector options.
pub vector_options: Option<OptionMap>,
}
impl Column {
@@ -138,6 +141,13 @@ impl Column {
impl Display for Column {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
if let Some(vector_options) = &self.extensions.vector_options {
if let Some(dim) = vector_options.get(VECTOR_OPT_DIM) {
write!(f, "{} VECTOR({})", self.column_def.name, dim)?;
return Ok(());
}
}
write!(f, "{}", self.column_def)?;
if let Some(fulltext_options) = &self.extensions.fulltext_options {
if !fulltext_options.is_empty() {