mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-13 16:52:56 +00:00
feat: introduce vector type (#4964)
* feat: introduce vector type Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * test: fix prepared stmt Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * test: add grpc test Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * test: parse vector value Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * test: column to row Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * test: sqlness Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix: merge issue Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * refactor: add check for bytes size Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * Update tests/cases/standalone/common/types/vector/vector.sql Co-authored-by: Ruihang Xia <waynestxia@gmail.com> * chore: update proto Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * chore: simplify cargo Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * chore: address comment Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
@@ -325,6 +325,13 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Datatype error: {}", source))]
|
||||
Datatype {
|
||||
source: datatypes::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -363,6 +370,7 @@ impl ErrorExt for Error {
|
||||
|
||||
SerializeColumnDefaultConstraint { source, .. } => source.status_code(),
|
||||
ConvertToGrpcDataType { source, .. } => source.status_code(),
|
||||
Datatype { source, .. } => source.status_code(),
|
||||
ConvertToDfStatement { .. } => StatusCode::Internal,
|
||||
ConvertSqlValue { .. } | ConvertValue { .. } => StatusCode::Unsupported,
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ use crate::parser::{ParserContext, FLOW};
|
||||
use crate::parsers::utils::validate_column_fulltext_create_option;
|
||||
use crate::statements::create::{
|
||||
Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable,
|
||||
CreateTableLike, CreateView, Partitions, TableConstraint,
|
||||
CreateTableLike, CreateView, Partitions, TableConstraint, VECTOR_OPT_DIM,
|
||||
};
|
||||
use crate::statements::statement::Statement;
|
||||
use crate::statements::{
|
||||
@@ -668,6 +668,31 @@ impl<'a> ParserContext<'a> {
|
||||
column_type: &DataType,
|
||||
column_extensions: &mut ColumnExtensions,
|
||||
) -> Result<bool> {
|
||||
if let DataType::Custom(name, tokens) = column_type
|
||||
&& name.0.len() == 1
|
||||
&& &name.0[0].value.to_uppercase() == "VECTOR"
|
||||
{
|
||||
ensure!(
|
||||
tokens.len() == 1,
|
||||
InvalidColumnOptionSnafu {
|
||||
name: column_name.to_string(),
|
||||
msg: "VECTOR type should have dimension",
|
||||
}
|
||||
);
|
||||
|
||||
let dimension =
|
||||
tokens[0]
|
||||
.parse::<u32>()
|
||||
.ok()
|
||||
.with_context(|| InvalidColumnOptionSnafu {
|
||||
name: column_name.to_string(),
|
||||
msg: "dimension should be a positive integer",
|
||||
})?;
|
||||
|
||||
let options = HashMap::from_iter([(VECTOR_OPT_DIM.to_string(), dimension.to_string())]);
|
||||
column_extensions.vector_options = Some(options.into());
|
||||
}
|
||||
|
||||
if parser.parse_keyword(Keyword::FULLTEXT) {
|
||||
ensure!(
|
||||
column_extensions.fulltext_options.is_none(),
|
||||
|
||||
@@ -42,10 +42,10 @@ use common_time::Timestamp;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::constraint::{CURRENT_TIMESTAMP, CURRENT_TIMESTAMP_FN};
|
||||
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, COMMENT_KEY};
|
||||
use datatypes::types::{cast, TimestampType};
|
||||
use datatypes::types::{cast, parse_string_to_vector_type_value, TimestampType};
|
||||
use datatypes::value::{OrderedF32, OrderedF64, Value};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sqlparser::ast::{ExactNumberInfo, UnaryOperator};
|
||||
use sqlparser::ast::{ExactNumberInfo, Ident, ObjectName, UnaryOperator};
|
||||
|
||||
use crate::ast::{
|
||||
ColumnDef, ColumnOption, ColumnOptionDef, DataType as SqlDataType, Expr, TimezoneInfo,
|
||||
@@ -53,7 +53,7 @@ use crate::ast::{
|
||||
};
|
||||
use crate::error::{
|
||||
self, ColumnTypeMismatchSnafu, ConvertSqlValueSnafu, ConvertToGrpcDataTypeSnafu,
|
||||
ConvertValueSnafu, InvalidCastSnafu, InvalidSqlValueSnafu, InvalidUnaryOpSnafu,
|
||||
ConvertValueSnafu, DatatypeSnafu, InvalidCastSnafu, InvalidSqlValueSnafu, InvalidUnaryOpSnafu,
|
||||
ParseSqlValueSnafu, Result, SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu,
|
||||
TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, UnsupportedUnaryOpSnafu,
|
||||
};
|
||||
@@ -61,6 +61,8 @@ use crate::statements::create::Column;
|
||||
pub use crate::statements::option_map::OptionMap;
|
||||
pub use crate::statements::transform::{get_data_type_by_alias_name, transform_statements};
|
||||
|
||||
const VECTOR_TYPE_NAME: &str = "VECTOR";
|
||||
|
||||
fn parse_string_to_value(
|
||||
column_name: &str,
|
||||
s: String,
|
||||
@@ -134,6 +136,10 @@ fn parse_string_to_value(
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
ConcreteDataType::Vector(d) => {
|
||||
let v = parse_string_to_vector_type_value(&s, d.dim).context(DatatypeSnafu)?;
|
||||
Ok(Value::Binary(v.into()))
|
||||
}
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
@@ -614,6 +620,20 @@ pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result<Co
|
||||
}
|
||||
},
|
||||
SqlDataType::JSON => Ok(ConcreteDataType::json_datatype()),
|
||||
// Vector type
|
||||
SqlDataType::Custom(name, d)
|
||||
if name.0.as_slice().len() == 1
|
||||
&& name.0.as_slice()[0].value.to_ascii_uppercase() == VECTOR_TYPE_NAME
|
||||
&& d.len() == 1 =>
|
||||
{
|
||||
let dim = d[0].parse().map_err(|e| {
|
||||
error::ParseSqlValueSnafu {
|
||||
msg: format!("Failed to parse vector dimension: {}", e),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
Ok(ConcreteDataType::vector_datatype(dim))
|
||||
}
|
||||
_ => error::SqlTypeNotSupportedSnafu {
|
||||
t: data_type.clone(),
|
||||
}
|
||||
@@ -651,6 +671,10 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu
|
||||
ExactNumberInfo::PrecisionAndScale(d.precision() as u64, d.scale() as u64),
|
||||
)),
|
||||
ConcreteDataType::Json(_) => Ok(SqlDataType::JSON),
|
||||
ConcreteDataType::Vector(v) => Ok(SqlDataType::Custom(
|
||||
ObjectName(vec![Ident::new(VECTOR_TYPE_NAME)]),
|
||||
vec![v.dim.to_string()],
|
||||
)),
|
||||
ConcreteDataType::Duration(_)
|
||||
| ConcreteDataType::Null(_)
|
||||
| ConcreteDataType::List(_)
|
||||
@@ -766,6 +790,14 @@ mod tests {
|
||||
SqlDataType::Interval,
|
||||
ConcreteDataType::interval_month_day_nano_datatype(),
|
||||
);
|
||||
check_type(SqlDataType::JSON, ConcreteDataType::json_datatype());
|
||||
check_type(
|
||||
SqlDataType::Custom(
|
||||
ObjectName(vec![Ident::new(VECTOR_TYPE_NAME)]),
|
||||
vec!["3".to_string()],
|
||||
),
|
||||
ConcreteDataType::vector_datatype(3),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1489,6 +1521,7 @@ mod tests {
|
||||
])
|
||||
.into(),
|
||||
),
|
||||
vector_options: None,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1501,7 +1534,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_parse_placeholder_value() {
|
||||
fn test_parse_placeholder_value() {
|
||||
assert!(sql_value_to_value(
|
||||
"test",
|
||||
&ConcreteDataType::string_datatype(),
|
||||
|
||||
@@ -30,6 +30,7 @@ use crate::statements::OptionMap;
|
||||
const LINE_SEP: &str = ",\n";
|
||||
const COMMA_SEP: &str = ", ";
|
||||
const INDENT: usize = 2;
|
||||
pub const VECTOR_OPT_DIM: &str = "dim";
|
||||
|
||||
macro_rules! format_indent {
|
||||
($fmt: expr, $arg: expr) => {
|
||||
@@ -112,6 +113,8 @@ pub struct Column {
|
||||
pub struct ColumnExtensions {
|
||||
/// Fulltext options.
|
||||
pub fulltext_options: Option<OptionMap>,
|
||||
/// Vector options.
|
||||
pub vector_options: Option<OptionMap>,
|
||||
}
|
||||
|
||||
impl Column {
|
||||
@@ -138,6 +141,13 @@ impl Column {
|
||||
|
||||
impl Display for Column {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(vector_options) = &self.extensions.vector_options {
|
||||
if let Some(dim) = vector_options.get(VECTOR_OPT_DIM) {
|
||||
write!(f, "{} VECTOR({})", self.column_def.name, dim)?;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
write!(f, "{}", self.column_def)?;
|
||||
if let Some(fulltext_options) = &self.extensions.fulltext_options {
|
||||
if !fulltext_options.is_empty() {
|
||||
|
||||
Reference in New Issue
Block a user