mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-14 20:10:37 +00:00
feat(vector): add conversion between vector and string (#5029)
* feat(vector): add conversion between vector and string Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix sqlness Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * address comments Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
@@ -12,20 +12,24 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod convert;
|
||||
mod distance;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use distance::{CosDistanceFunction, DotProductFunction, L2SqDistanceFunction};
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
pub(crate) struct VectorFunction;
|
||||
|
||||
impl VectorFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(CosDistanceFunction));
|
||||
registry.register(Arc::new(DotProductFunction));
|
||||
registry.register(Arc::new(L2SqDistanceFunction));
|
||||
// conversion
|
||||
registry.register(Arc::new(convert::ParseVectorFunction));
|
||||
registry.register(Arc::new(convert::VectorToStringFunction));
|
||||
|
||||
// distance
|
||||
registry.register(Arc::new(distance::CosDistanceFunction));
|
||||
registry.register(Arc::new(distance::DotProductFunction));
|
||||
registry.register(Arc::new(distance::L2SqDistanceFunction));
|
||||
}
|
||||
}
|
||||
|
||||
19
src/common/function/src/scalars/vector/convert.rs
Normal file
19
src/common/function/src/scalars/vector/convert.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod parse_vector;
|
||||
mod vector_to_string;
|
||||
|
||||
pub use parse_vector::ParseVectorFunction;
|
||||
pub use vector_to_string::VectorToStringFunction;
|
||||
160
src/common/function/src/scalars/vector/convert/parse_vector.rs
Normal file
160
src/common/function/src/scalars/vector/convert/parse_vector.rs
Normal file
@@ -0,0 +1,160 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, InvalidVectorStringSnafu, Result};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::types::parse_string_to_vector_type_value;
|
||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
const NAME: &str = "parse_vec";
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ParseVectorFunction;
|
||||
|
||||
impl Function for ParseVectorFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::binary_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::string_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let column = &columns[0];
|
||||
let size = column.len();
|
||||
|
||||
let mut result = BinaryVectorBuilder::with_capacity(size);
|
||||
for i in 0..size {
|
||||
let value = column.get(i).as_string();
|
||||
if let Some(value) = value {
|
||||
let res = parse_string_to_vector_type_value(&value, None)
|
||||
.context(InvalidVectorStringSnafu { vec_str: &value })?;
|
||||
result.push(Some(&res));
|
||||
} else {
|
||||
result.push_null();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ParseVectorFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_base::bytes::Bytes;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::StringVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_vector() {
|
||||
let func = ParseVectorFunction;
|
||||
|
||||
let input = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
None,
|
||||
]));
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[input]).unwrap();
|
||||
|
||||
let result = result.as_ref();
|
||||
assert_eq!(result.len(), 3);
|
||||
assert_eq!(
|
||||
result.get(0),
|
||||
Value::Binary(Bytes::from(
|
||||
[1.0f32, 2.0, 3.0]
|
||||
.iter()
|
||||
.flat_map(|e| e.to_le_bytes())
|
||||
.collect::<Vec<u8>>()
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
result.get(1),
|
||||
Value::Binary(Bytes::from(
|
||||
[4.0f32, 5.0, 6.0]
|
||||
.iter()
|
||||
.flat_map(|e| e.to_le_bytes())
|
||||
.collect::<Vec<u8>>()
|
||||
))
|
||||
);
|
||||
assert!(result.get(2).is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_vector_error() {
|
||||
let func = ParseVectorFunction;
|
||||
|
||||
let input = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("[7.0,8.0,9.0".to_string()),
|
||||
]));
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[input]);
|
||||
assert!(result.is_err());
|
||||
|
||||
let input = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("7.0,8.0,9.0]".to_string()),
|
||||
]));
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[input]);
|
||||
assert!(result.is_err());
|
||||
|
||||
let input = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("[7.0,hello,9.0]".to_string()),
|
||||
]));
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[input]);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::types::vector_type_value_to_string;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
const NAME: &str = "vec_to_string";
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct VectorToStringFunction;
|
||||
|
||||
impl Function for VectorToStringFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::binary_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let column = &columns[0];
|
||||
let size = column.len();
|
||||
|
||||
let mut result = StringVectorBuilder::with_capacity(size);
|
||||
for i in 0..size {
|
||||
let value = column.get(i);
|
||||
match value {
|
||||
Value::Binary(bytes) => {
|
||||
let len = bytes.len();
|
||||
if len % std::mem::size_of::<f32>() != 0 {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!("Invalid binary length of vector: {}", len),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
let dim = len / std::mem::size_of::<f32>();
|
||||
// Safety: `dim` is calculated from the length of `bytes` and is guaranteed to be valid
|
||||
let res = vector_type_value_to_string(&bytes, dim as _).unwrap();
|
||||
result.push(Some(&res));
|
||||
}
|
||||
Value::Null => {
|
||||
result.push_null();
|
||||
}
|
||||
_ => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!("Invalid value type: {:?}", value.data_type()),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for VectorToStringFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::BinaryVectorBuilder;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_vector_to_string() {
|
||||
let func = VectorToStringFunction;
|
||||
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(
|
||||
[1.0f32, 2.0, 3.0]
|
||||
.iter()
|
||||
.flat_map(|e| e.to_le_bytes())
|
||||
.collect::<Vec<_>>()
|
||||
.as_slice(),
|
||||
));
|
||||
builder.push(Some(
|
||||
[4.0f32, 5.0, 6.0]
|
||||
.iter()
|
||||
.flat_map(|e| e.to_le_bytes())
|
||||
.collect::<Vec<_>>()
|
||||
.as_slice(),
|
||||
));
|
||||
builder.push_null();
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[vector]).unwrap();
|
||||
|
||||
assert_eq!(result.len(), 3);
|
||||
assert_eq!(result.get(0), Value::String("[1,2,3]".to_string().into()));
|
||||
assert_eq!(result.get(1), Value::String("[4,5,6]".to_string().into()));
|
||||
assert_eq!(result.get(2), Value::Null);
|
||||
}
|
||||
}
|
||||
@@ -245,6 +245,14 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid vector string: {}", vec_str))]
|
||||
InvalidVectorString {
|
||||
vec_str: String,
|
||||
source: DataTypeError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -273,7 +281,8 @@ impl ErrorExt for Error {
|
||||
| Error::IntoVector { source, .. }
|
||||
| Error::FromScalarValue { source, .. }
|
||||
| Error::ConvertArrowSchema { source, .. }
|
||||
| Error::FromArrowArray { source, .. } => source.status_code(),
|
||||
| Error::FromArrowArray { source, .. }
|
||||
| Error::InvalidVectorString { source, .. } => source.status_code(),
|
||||
|
||||
Error::MissingTableMutationHandler { .. }
|
||||
| Error::MissingProcedureServiceHandler { .. }
|
||||
|
||||
@@ -102,7 +102,7 @@ pub fn vector_type_value_to_string(val: &[u8], dim: u32) -> Result<String> {
|
||||
|
||||
/// Parses a string to a vector type value
|
||||
/// Valid input format: "[1.0,2.0,3.0]", "[1.0, 2.0, 3.0]"
|
||||
pub fn parse_string_to_vector_type_value(s: &str, dim: u32) -> Result<Vec<u8>> {
|
||||
pub fn parse_string_to_vector_type_value(s: &str, dim: Option<u32>) -> Result<Vec<u8>> {
|
||||
// Trim the brackets
|
||||
let trimmed = s.trim();
|
||||
if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
|
||||
@@ -115,7 +115,7 @@ pub fn parse_string_to_vector_type_value(s: &str, dim: u32) -> Result<Vec<u8>> {
|
||||
let content = trimmed[1..trimmed.len() - 1].trim();
|
||||
|
||||
if content.is_empty() {
|
||||
if dim != 0 {
|
||||
if dim.map_or(false, |d| d != 0) {
|
||||
return InvalidVectorSnafu {
|
||||
msg: format!("Failed to parse {s} to Vector value: wrong dimension"),
|
||||
}
|
||||
@@ -139,7 +139,7 @@ pub fn parse_string_to_vector_type_value(s: &str, dim: u32) -> Result<Vec<u8>> {
|
||||
.collect::<Result<Vec<f32>>>()?;
|
||||
|
||||
// Check dimension
|
||||
if elements.len() != dim as usize {
|
||||
if dim.map_or(false, |d| d as usize != elements.len()) {
|
||||
return InvalidVectorSnafu {
|
||||
msg: format!("Failed to parse {s} to Vector value: wrong dimension"),
|
||||
}
|
||||
@@ -180,7 +180,7 @@ mod tests {
|
||||
];
|
||||
|
||||
for (s, expected) in cases.iter() {
|
||||
let val = parse_string_to_vector_type_value(s, dim).unwrap();
|
||||
let val = parse_string_to_vector_type_value(s, Some(dim)).unwrap();
|
||||
let s = vector_type_value_to_string(&val, dim).unwrap();
|
||||
assert_eq!(s, *expected);
|
||||
}
|
||||
@@ -188,7 +188,7 @@ mod tests {
|
||||
let dim = 0;
|
||||
let cases = [("[]", "[]"), ("[ ]", "[]"), ("[ ]", "[]")];
|
||||
for (s, expected) in cases.iter() {
|
||||
let val = parse_string_to_vector_type_value(s, dim).unwrap();
|
||||
let val = parse_string_to_vector_type_value(s, Some(dim)).unwrap();
|
||||
let s = vector_type_value_to_string(&val, dim).unwrap();
|
||||
assert_eq!(s, *expected);
|
||||
}
|
||||
@@ -211,15 +211,15 @@ mod tests {
|
||||
fn test_parse_string_to_vector_type_value_not_properly_enclosed_in_brackets() {
|
||||
let dim = 3;
|
||||
let s = "1.0,2.0,3.0";
|
||||
let res = parse_string_to_vector_type_value(s, dim);
|
||||
let res = parse_string_to_vector_type_value(s, Some(dim));
|
||||
assert!(res.is_err());
|
||||
|
||||
let s = "[1.0,2.0,3.0";
|
||||
let res = parse_string_to_vector_type_value(s, dim);
|
||||
let res = parse_string_to_vector_type_value(s, Some(dim));
|
||||
assert!(res.is_err());
|
||||
|
||||
let s = "1.0,2.0,3.0]";
|
||||
let res = parse_string_to_vector_type_value(s, dim);
|
||||
let res = parse_string_to_vector_type_value(s, Some(dim));
|
||||
assert!(res.is_err());
|
||||
}
|
||||
|
||||
@@ -227,7 +227,7 @@ mod tests {
|
||||
fn test_parse_string_to_vector_type_value_wrong_dimension() {
|
||||
let dim = 3;
|
||||
let s = "[1.0,2.0]";
|
||||
let res = parse_string_to_vector_type_value(s, dim);
|
||||
let res = parse_string_to_vector_type_value(s, Some(dim));
|
||||
assert!(res.is_err());
|
||||
}
|
||||
|
||||
@@ -235,7 +235,7 @@ mod tests {
|
||||
fn test_parse_string_to_vector_type_value_elements_are_not_all_float32() {
|
||||
let dim = 3;
|
||||
let s = "[1.0,2.0,ah]";
|
||||
let res = parse_string_to_vector_type_value(s, dim);
|
||||
let res = parse_string_to_vector_type_value(s, Some(dim));
|
||||
assert!(res.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +80,7 @@ impl BinaryVector {
|
||||
let v = if let Some(binary) = binary {
|
||||
let bytes_size = dim as usize * std::mem::size_of::<f32>();
|
||||
if let Ok(s) = String::from_utf8(binary.to_vec()) {
|
||||
let v = parse_string_to_vector_type_value(&s, dim)?;
|
||||
let v = parse_string_to_vector_type_value(&s, Some(dim))?;
|
||||
Some(v)
|
||||
} else if binary.len() == dim as usize * std::mem::size_of::<f32>() {
|
||||
Some(binary.to_vec())
|
||||
|
||||
@@ -21,7 +21,7 @@ use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_telemetry::{debug, error};
|
||||
use datatypes::prelude::{ConcreteDataType, Value};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::types::{json_type_value_to_string, vector_type_value_to_string};
|
||||
use datatypes::types::json_type_value_to_string;
|
||||
use futures::StreamExt;
|
||||
use opensrv_mysql::{
|
||||
Column, ColumnFlags, ColumnType, ErrorKind, OkResponse, QueryResultWriter, RowWriter,
|
||||
@@ -217,11 +217,6 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> {
|
||||
.context(ConvertSqlValueSnafu)?;
|
||||
row_writer.write_col(s)?;
|
||||
}
|
||||
ConcreteDataType::Vector(d) => {
|
||||
let s = vector_type_value_to_string(&v, d.dim)
|
||||
.context(ConvertSqlValueSnafu)?;
|
||||
row_writer.write_col(s)?;
|
||||
}
|
||||
_ => {
|
||||
row_writer.write_col(v.deref())?;
|
||||
}
|
||||
@@ -303,7 +298,7 @@ pub(crate) fn create_mysql_column(
|
||||
ConcreteDataType::Duration(_) => Ok(ColumnType::MYSQL_TYPE_TIME),
|
||||
ConcreteDataType::Decimal128(_) => Ok(ColumnType::MYSQL_TYPE_DECIMAL),
|
||||
ConcreteDataType::Json(_) => Ok(ColumnType::MYSQL_TYPE_JSON),
|
||||
ConcreteDataType::Vector(_) => Ok(ColumnType::MYSQL_TYPE_STRING),
|
||||
ConcreteDataType::Vector(_) => Ok(ColumnType::MYSQL_TYPE_BLOB),
|
||||
_ => error::UnsupportedDataTypeSnafu {
|
||||
data_type,
|
||||
reason: "not implemented",
|
||||
|
||||
@@ -27,9 +27,7 @@ use datafusion_expr::LogicalPlan;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::prelude::{ConcreteDataType, Value};
|
||||
use datatypes::schema::Schema;
|
||||
use datatypes::types::{
|
||||
json_type_value_to_string, vector_type_value_to_string, IntervalType, TimestampType,
|
||||
};
|
||||
use datatypes::types::{json_type_value_to_string, IntervalType, TimestampType};
|
||||
use datatypes::value::ListValue;
|
||||
use pgwire::api::portal::{Format, Portal};
|
||||
use pgwire::api::results::{DataRowEncoder, FieldInfo};
|
||||
@@ -178,7 +176,7 @@ fn encode_array(
|
||||
.collect::<PgWireResult<Vec<Option<f64>>>>()?;
|
||||
builder.encode_field(&array)
|
||||
}
|
||||
&ConcreteDataType::Binary(_) => {
|
||||
&ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => {
|
||||
let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output();
|
||||
|
||||
match *bytea_output {
|
||||
@@ -370,24 +368,6 @@ fn encode_array(
|
||||
.collect::<PgWireResult<Vec<Option<String>>>>()?;
|
||||
builder.encode_field(&array)
|
||||
}
|
||||
&ConcreteDataType::Vector(d) => {
|
||||
let array = value_list
|
||||
.items()
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
Value::Null => Ok(None),
|
||||
Value::Binary(v) => {
|
||||
let s = vector_type_value_to_string(v, d.dim)
|
||||
.map_err(|e| PgWireError::ApiError(Box::new(e)))?;
|
||||
Ok(Some(s))
|
||||
}
|
||||
_ => Err(PgWireError::ApiError(Box::new(Error::Internal {
|
||||
err_msg: format!("Invalid list item type, find {v:?}, expected vector",),
|
||||
}))),
|
||||
})
|
||||
.collect::<PgWireResult<Vec<Option<String>>>>()?;
|
||||
builder.encode_field(&array)
|
||||
}
|
||||
_ => Err(PgWireError::ApiError(Box::new(Error::Internal {
|
||||
err_msg: format!(
|
||||
"cannot write array type {:?} in postgres protocol: unimplemented",
|
||||
@@ -423,11 +403,6 @@ pub(super) fn encode_value(
|
||||
.map_err(|e| PgWireError::ApiError(Box::new(e)))?;
|
||||
builder.encode_field(&s)
|
||||
}
|
||||
ConcreteDataType::Vector(d) => {
|
||||
let s = vector_type_value_to_string(v, d.dim)
|
||||
.map_err(|e| PgWireError::ApiError(Box::new(e)))?;
|
||||
builder.encode_field(&s)
|
||||
}
|
||||
_ => {
|
||||
let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output();
|
||||
match *bytea_output {
|
||||
@@ -503,7 +478,7 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result<Type> {
|
||||
&ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => Ok(Type::INT8),
|
||||
&ConcreteDataType::Float32(_) => Ok(Type::FLOAT4),
|
||||
&ConcreteDataType::Float64(_) => Ok(Type::FLOAT8),
|
||||
&ConcreteDataType::Binary(_) => Ok(Type::BYTEA),
|
||||
&ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => Ok(Type::BYTEA),
|
||||
&ConcreteDataType::String(_) => Ok(Type::VARCHAR),
|
||||
&ConcreteDataType::Date(_) => Ok(Type::DATE),
|
||||
&ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => Ok(Type::TIMESTAMP),
|
||||
@@ -546,7 +521,6 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result<Type> {
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
&ConcreteDataType::Vector(_) => Ok(Type::FLOAT4_ARRAY),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -133,7 +133,7 @@ fn parse_string_to_value(
|
||||
Ok(Value::Binary(v.into()))
|
||||
}
|
||||
ConcreteDataType::Vector(d) => {
|
||||
let v = parse_string_to_vector_type_value(&s, d.dim).context(DatatypeSnafu)?;
|
||||
let v = parse_string_to_vector_type_value(&s, Some(d.dim)).context(DatatypeSnafu)?;
|
||||
Ok(Value::Binary(v.into()))
|
||||
}
|
||||
_ => {
|
||||
|
||||
@@ -215,7 +215,7 @@ pub async fn test_mysql_crud(store_type: StorageType) {
|
||||
let dt: DateTime<Utc> = row.get("dt");
|
||||
let bytes: Vec<u8> = row.get("b");
|
||||
let json: serde_json::Value = row.get("j");
|
||||
let vector: String = row.get("v");
|
||||
let vector: Vec<u8> = row.get("v");
|
||||
assert_eq!(ret, i as i64);
|
||||
let expected_d = NaiveDate::from_yo_opt(2015, 100).unwrap();
|
||||
assert_eq!(expected_d, d);
|
||||
@@ -242,7 +242,13 @@ pub async fn test_mysql_crud(store_type: StorageType) {
|
||||
}
|
||||
});
|
||||
assert_eq!(json, expected_j);
|
||||
assert_eq!(vector, "[1,2,3]");
|
||||
assert_eq!(
|
||||
vector,
|
||||
[1.0f32, 2.0, 3.0]
|
||||
.iter()
|
||||
.flat_map(|x| x.to_le_bytes())
|
||||
.collect::<Vec<u8>>()
|
||||
);
|
||||
}
|
||||
|
||||
let rows = sqlx::query("select i from demo where i=?")
|
||||
|
||||
24
tests/cases/standalone/common/function/vector/vector.result
Normal file
24
tests/cases/standalone/common/function/vector/vector.result
Normal file
@@ -0,0 +1,24 @@
|
||||
SELECT vec_to_string(parse_vec('[1.0, 2.0]'));
|
||||
|
||||
+----------------------------------------------+
|
||||
| vec_to_string(parse_vec(Utf8("[1.0, 2.0]"))) |
|
||||
+----------------------------------------------+
|
||||
| [1,2] |
|
||||
+----------------------------------------------+
|
||||
|
||||
SELECT vec_to_string(parse_vec('[1.0, 2.0, 3.0]'));
|
||||
|
||||
+---------------------------------------------------+
|
||||
| vec_to_string(parse_vec(Utf8("[1.0, 2.0, 3.0]"))) |
|
||||
+---------------------------------------------------+
|
||||
| [1,2,3] |
|
||||
+---------------------------------------------------+
|
||||
|
||||
SELECT vec_to_string(parse_vec('[]'));
|
||||
|
||||
+--------------------------------------+
|
||||
| vec_to_string(parse_vec(Utf8("[]"))) |
|
||||
+--------------------------------------+
|
||||
| [] |
|
||||
+--------------------------------------+
|
||||
|
||||
5
tests/cases/standalone/common/function/vector/vector.sql
Normal file
5
tests/cases/standalone/common/function/vector/vector.sql
Normal file
@@ -0,0 +1,5 @@
|
||||
SELECT vec_to_string(parse_vec('[1.0, 2.0]'));
|
||||
|
||||
SELECT vec_to_string(parse_vec('[1.0, 2.0, 3.0]'));
|
||||
|
||||
SELECT vec_to_string(parse_vec('[]'));
|
||||
@@ -0,0 +1,96 @@
|
||||
SELECT vec_cos_distance('[1.0, 2.0]', '[0.0, 0.0]');
|
||||
|
||||
+---------------------------------------------------------+
|
||||
| vec_cos_distance(Utf8("[1.0, 2.0]"),Utf8("[0.0, 0.0]")) |
|
||||
+---------------------------------------------------------+
|
||||
| 1.0 |
|
||||
+---------------------------------------------------------+
|
||||
|
||||
SELECT vec_cos_distance(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]');
|
||||
|
||||
+--------------------------------------------------------------------+
|
||||
| vec_cos_distance(parse_vec(Utf8("[1.0, 2.0]")),Utf8("[0.0, 0.0]")) |
|
||||
+--------------------------------------------------------------------+
|
||||
| 1.0 |
|
||||
+--------------------------------------------------------------------+
|
||||
|
||||
SELECT vec_cos_distance('[1.0, 2.0]', parse_vec('[0.0, 0.0]'));
|
||||
|
||||
+--------------------------------------------------------------------+
|
||||
| vec_cos_distance(Utf8("[1.0, 2.0]"),parse_vec(Utf8("[0.0, 0.0]"))) |
|
||||
+--------------------------------------------------------------------+
|
||||
| 1.0 |
|
||||
+--------------------------------------------------------------------+
|
||||
|
||||
SELECT vec_cos_distance(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]'));
|
||||
|
||||
+-------------------------------------------------------------------------------+
|
||||
| vec_cos_distance(parse_vec(Utf8("[1.0, 2.0]")),parse_vec(Utf8("[0.0, 0.0]"))) |
|
||||
+-------------------------------------------------------------------------------+
|
||||
| 1.0 |
|
||||
+-------------------------------------------------------------------------------+
|
||||
|
||||
SELECT vec_l2sq_distance('[1.0, 2.0]', '[0.0, 0.0]');
|
||||
|
||||
+----------------------------------------------------------+
|
||||
| vec_l2sq_distance(Utf8("[1.0, 2.0]"),Utf8("[0.0, 0.0]")) |
|
||||
+----------------------------------------------------------+
|
||||
| 5.0 |
|
||||
+----------------------------------------------------------+
|
||||
|
||||
SELECT vec_l2sq_distance(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]');
|
||||
|
||||
+---------------------------------------------------------------------+
|
||||
| vec_l2sq_distance(parse_vec(Utf8("[1.0, 2.0]")),Utf8("[0.0, 0.0]")) |
|
||||
+---------------------------------------------------------------------+
|
||||
| 5.0 |
|
||||
+---------------------------------------------------------------------+
|
||||
|
||||
SELECT vec_l2sq_distance('[1.0, 2.0]', parse_vec('[0.0, 0.0]'));
|
||||
|
||||
+---------------------------------------------------------------------+
|
||||
| vec_l2sq_distance(Utf8("[1.0, 2.0]"),parse_vec(Utf8("[0.0, 0.0]"))) |
|
||||
+---------------------------------------------------------------------+
|
||||
| 5.0 |
|
||||
+---------------------------------------------------------------------+
|
||||
|
||||
SELECT vec_l2sq_distance(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]'));
|
||||
|
||||
+--------------------------------------------------------------------------------+
|
||||
| vec_l2sq_distance(parse_vec(Utf8("[1.0, 2.0]")),parse_vec(Utf8("[0.0, 0.0]"))) |
|
||||
+--------------------------------------------------------------------------------+
|
||||
| 5.0 |
|
||||
+--------------------------------------------------------------------------------+
|
||||
|
||||
SELECT vec_dot_product('[1.0, 2.0]', '[0.0, 0.0]');
|
||||
|
||||
+--------------------------------------------------------+
|
||||
| vec_dot_product(Utf8("[1.0, 2.0]"),Utf8("[0.0, 0.0]")) |
|
||||
+--------------------------------------------------------+
|
||||
| 0.0 |
|
||||
+--------------------------------------------------------+
|
||||
|
||||
SELECT vec_dot_product(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]');
|
||||
|
||||
+-------------------------------------------------------------------+
|
||||
| vec_dot_product(parse_vec(Utf8("[1.0, 2.0]")),Utf8("[0.0, 0.0]")) |
|
||||
+-------------------------------------------------------------------+
|
||||
| 0.0 |
|
||||
+-------------------------------------------------------------------+
|
||||
|
||||
SELECT vec_dot_product('[1.0, 2.0]', parse_vec('[0.0, 0.0]'));
|
||||
|
||||
+-------------------------------------------------------------------+
|
||||
| vec_dot_product(Utf8("[1.0, 2.0]"),parse_vec(Utf8("[0.0, 0.0]"))) |
|
||||
+-------------------------------------------------------------------+
|
||||
| 0.0 |
|
||||
+-------------------------------------------------------------------+
|
||||
|
||||
SELECT vec_dot_product(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]'));
|
||||
|
||||
+------------------------------------------------------------------------------+
|
||||
| vec_dot_product(parse_vec(Utf8("[1.0, 2.0]")),parse_vec(Utf8("[0.0, 0.0]"))) |
|
||||
+------------------------------------------------------------------------------+
|
||||
| 0.0 |
|
||||
+------------------------------------------------------------------------------+
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
SELECT vec_cos_distance('[1.0, 2.0]', '[0.0, 0.0]');
|
||||
|
||||
SELECT vec_cos_distance(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]');
|
||||
|
||||
SELECT vec_cos_distance('[1.0, 2.0]', parse_vec('[0.0, 0.0]'));
|
||||
|
||||
SELECT vec_cos_distance(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]'));
|
||||
|
||||
SELECT vec_l2sq_distance('[1.0, 2.0]', '[0.0, 0.0]');
|
||||
|
||||
SELECT vec_l2sq_distance(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]');
|
||||
|
||||
SELECT vec_l2sq_distance('[1.0, 2.0]', parse_vec('[0.0, 0.0]'));
|
||||
|
||||
SELECT vec_l2sq_distance(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]'));
|
||||
|
||||
SELECT vec_dot_product('[1.0, 2.0]', '[0.0, 0.0]');
|
||||
|
||||
SELECT vec_dot_product(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]');
|
||||
|
||||
SELECT vec_dot_product('[1.0, 2.0]', parse_vec('[0.0, 0.0]'));
|
||||
|
||||
SELECT vec_dot_product(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]'));
|
||||
@@ -2,34 +2,34 @@ CREATE TABLE t (ts TIMESTAMP TIME INDEX, v VECTOR(3));
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Invert string
|
||||
INSERT INTO t VALUES
|
||||
(1, "[1.0, 2.0, 3.0]"),
|
||||
(2, "[4.0, 5.0, 6.0]"),
|
||||
(3, "[7.0, 8.0, 9.0]");
|
||||
(1, '[1.0, 2.0, 3.0]'),
|
||||
(2, '[4.0, 5.0, 6.0]'),
|
||||
(3, '[7.0, 8.0, 9.0]');
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
-- SQLNESS PROTOCOL MYSQL
|
||||
SELECT * FROM t;
|
||||
-- Invert vector value
|
||||
INSERT INTO t VALUES
|
||||
(4, parse_vec('[1.0, 2.0, 3.0]')),
|
||||
(5, parse_vec('[4.0, 5.0, 6.0]')),
|
||||
(6, parse_vec('[7.0, 8.0, 9.0]'));
|
||||
|
||||
+----------------------------+---------+
|
||||
| ts | v |
|
||||
+----------------------------+---------+
|
||||
| 1970-01-01 00:00:00.001000 | [1,2,3] |
|
||||
| 1970-01-01 00:00:00.002000 | [4,5,6] |
|
||||
| 1970-01-01 00:00:00.003000 | [7,8,9] |
|
||||
+----------------------------+---------+
|
||||
Affected Rows: 3
|
||||
|
||||
-- SQLNESS PROTOCOL POSTGRES
|
||||
SELECT * FROM t;
|
||||
SELECT ts, v, vec_to_string(v) FROM t;
|
||||
|
||||
+----------------------------+-----------+
|
||||
| ts | v |
|
||||
+----------------------------+-----------+
|
||||
| 1970-01-01 00:00:00.001000 | "[1,2,3]" |
|
||||
| 1970-01-01 00:00:00.002000 | "[4,5,6]" |
|
||||
| 1970-01-01 00:00:00.003000 | "[7,8,9]" |
|
||||
+----------------------------+-----------+
|
||||
+-------------------------+--------------------------+--------------------+
|
||||
| ts | v | vec_to_string(t.v) |
|
||||
+-------------------------+--------------------------+--------------------+
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] |
|
||||
| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] |
|
||||
| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] |
|
||||
| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] |
|
||||
+-------------------------+--------------------------+--------------------+
|
||||
|
||||
SELECT round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) FROM t;
|
||||
|
||||
@@ -39,17 +39,23 @@ SELECT round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) FROM t;
|
||||
| 1.0 |
|
||||
| 1.0 |
|
||||
| 1.0 |
|
||||
| 1.0 |
|
||||
| 1.0 |
|
||||
| 1.0 |
|
||||
+---------------------------------------------------------------+
|
||||
|
||||
SELECT *, round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
+-------------------------+--------------------------+-----+
|
||||
| ts | v | d |
|
||||
+-------------------------+--------------------------+-----+
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 1.0 |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 1.0 |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 1.0 |
|
||||
+-------------------------+--------------------------+-----+
|
||||
+-------------------------+--------------------------+--------------------+-----+
|
||||
| ts | v | vec_to_string(t.v) | d |
|
||||
+-------------------------+--------------------------+--------------------+-----+
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 1.0 |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 1.0 |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 1.0 |
|
||||
| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 1.0 |
|
||||
| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 1.0 |
|
||||
| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 1.0 |
|
||||
+-------------------------+--------------------------+--------------------+-----+
|
||||
|
||||
SELECT round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) FROM t;
|
||||
|
||||
@@ -59,17 +65,23 @@ SELECT round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) FROM t;
|
||||
| 0.04 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.04 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
+---------------------------------------------------------------+
|
||||
|
||||
SELECT *, round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
+-------------------------+--------------------------+------+
|
||||
| ts | v | d |
|
||||
+-------------------------+--------------------------+------+
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 0.0 |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 0.0 |
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 0.04 |
|
||||
+-------------------------+--------------------------+------+
|
||||
+-------------------------+--------------------------+--------------------+------+
|
||||
| ts | v | vec_to_string(t.v) | d |
|
||||
+-------------------------+--------------------------+--------------------+------+
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 0.0 |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 0.0 |
|
||||
| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 0.0 |
|
||||
| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 0.0 |
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 0.04 |
|
||||
| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 0.04 |
|
||||
+-------------------------+--------------------------+--------------------+------+
|
||||
|
||||
SELECT round(vec_cos_distance(v, v), 2) FROM t;
|
||||
|
||||
@@ -79,6 +91,9 @@ SELECT round(vec_cos_distance(v, v), 2) FROM t;
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
+-------------------------------------------+
|
||||
|
||||
-- Unexpected dimension --
|
||||
@@ -99,17 +114,23 @@ SELECT round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) FROM t;
|
||||
| 14.0 |
|
||||
| 77.0 |
|
||||
| 194.0 |
|
||||
| 14.0 |
|
||||
| 77.0 |
|
||||
| 194.0 |
|
||||
+----------------------------------------------------------------+
|
||||
|
||||
SELECT *, round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
+-------------------------+--------------------------+-------+
|
||||
| ts | v | d |
|
||||
+-------------------------+--------------------------+-------+
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 14.0 |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 77.0 |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 194.0 |
|
||||
+-------------------------+--------------------------+-------+
|
||||
+-------------------------+--------------------------+--------------------+-------+
|
||||
| ts | v | vec_to_string(t.v) | d |
|
||||
+-------------------------+--------------------------+--------------------+-------+
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 14.0 |
|
||||
| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 14.0 |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 77.0 |
|
||||
| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 77.0 |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 194.0 |
|
||||
| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 194.0 |
|
||||
+-------------------------+--------------------------+--------------------+-------+
|
||||
|
||||
SELECT round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) FROM t;
|
||||
|
||||
@@ -119,17 +140,23 @@ SELECT round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) FROM t;
|
||||
| 108.0 |
|
||||
| 27.0 |
|
||||
| 0.0 |
|
||||
| 108.0 |
|
||||
| 27.0 |
|
||||
| 0.0 |
|
||||
+----------------------------------------------------------------+
|
||||
|
||||
SELECT *, round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
+-------------------------+--------------------------+-------+
|
||||
| ts | v | d |
|
||||
+-------------------------+--------------------------+-------+
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 0.0 |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 27.0 |
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 108.0 |
|
||||
+-------------------------+--------------------------+-------+
|
||||
+-------------------------+--------------------------+--------------------+-------+
|
||||
| ts | v | vec_to_string(t.v) | d |
|
||||
+-------------------------+--------------------------+--------------------+-------+
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 0.0 |
|
||||
| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 0.0 |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 27.0 |
|
||||
| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 27.0 |
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 108.0 |
|
||||
| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 108.0 |
|
||||
+-------------------------+--------------------------+--------------------+-------+
|
||||
|
||||
SELECT round(vec_l2sq_distance(v, v), 2) FROM t;
|
||||
|
||||
@@ -139,6 +166,9 @@ SELECT round(vec_l2sq_distance(v, v), 2) FROM t;
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
+--------------------------------------------+
|
||||
|
||||
-- Unexpected dimension --
|
||||
@@ -159,17 +189,23 @@ SELECT round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) FROM t;
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
| 0.0 |
|
||||
+--------------------------------------------------------------+
|
||||
|
||||
SELECT *, round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
+-------------------------+--------------------------+-----+
|
||||
| ts | v | d |
|
||||
+-------------------------+--------------------------+-----+
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 0.0 |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 0.0 |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 0.0 |
|
||||
+-------------------------+--------------------------+-----+
|
||||
+-------------------------+--------------------------+--------------------+-----+
|
||||
| ts | v | vec_to_string(t.v) | d |
|
||||
+-------------------------+--------------------------+--------------------+-----+
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 0.0 |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 0.0 |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 0.0 |
|
||||
| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 0.0 |
|
||||
| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 0.0 |
|
||||
| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 0.0 |
|
||||
+-------------------------+--------------------------+--------------------+-----+
|
||||
|
||||
SELECT round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) FROM t;
|
||||
|
||||
@@ -179,17 +215,23 @@ SELECT round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) FROM t;
|
||||
| 50.0 |
|
||||
| 122.0 |
|
||||
| 194.0 |
|
||||
| 50.0 |
|
||||
| 122.0 |
|
||||
| 194.0 |
|
||||
+--------------------------------------------------------------+
|
||||
|
||||
SELECT *, round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
+-------------------------+--------------------------+-------+
|
||||
| ts | v | d |
|
||||
+-------------------------+--------------------------+-------+
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 50.0 |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 122.0 |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 194.0 |
|
||||
+-------------------------+--------------------------+-------+
|
||||
+-------------------------+--------------------------+--------------------+-------+
|
||||
| ts | v | vec_to_string(t.v) | d |
|
||||
+-------------------------+--------------------------+--------------------+-------+
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 50.0 |
|
||||
| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 50.0 |
|
||||
| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 122.0 |
|
||||
| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 122.0 |
|
||||
| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 194.0 |
|
||||
| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 194.0 |
|
||||
+-------------------------+--------------------------+--------------------+-------+
|
||||
|
||||
SELECT round(vec_dot_product(v, v), 2) FROM t;
|
||||
|
||||
@@ -199,6 +241,9 @@ SELECT round(vec_dot_product(v, v), 2) FROM t;
|
||||
| 14.0 |
|
||||
| 77.0 |
|
||||
| 194.0 |
|
||||
| 14.0 |
|
||||
| 77.0 |
|
||||
| 194.0 |
|
||||
+------------------------------------------+
|
||||
|
||||
-- Unexpected dimension --
|
||||
@@ -213,19 +258,19 @@ Error: 3001(EngineExecuteQuery), Invalid argument error: Encountered non UTF-8 d
|
||||
|
||||
-- Unexpected dimension --
|
||||
INSERT INTO t VALUES
|
||||
(4, "[1.0]");
|
||||
(4, '[1.0]');
|
||||
|
||||
Error: 1004(InvalidArguments), Invalid Vector: Failed to parse [1.0] to Vector value: wrong dimension
|
||||
|
||||
-- Invalid vector value --
|
||||
INSERT INTO t VALUES
|
||||
(5, "1.0,2.0,3.0");
|
||||
(5, '1.0,2.0,3.0');
|
||||
|
||||
Error: 1004(InvalidArguments), Invalid Vector: Failed to parse 1.0,2.0,3.0 to Vector value: not properly enclosed in brackets
|
||||
|
||||
-- Invalid vector value --
|
||||
INSERT INTO t VALUES
|
||||
(6, "[30h, 40s, 50m]");
|
||||
(6, '[30h, 40s, 50m]');
|
||||
|
||||
Error: 1004(InvalidArguments), Invalid Vector: Failed to parse [30h, 40s, 50m] to Vector value: elements are not all float32
|
||||
|
||||
@@ -240,27 +285,15 @@ INSERT INTO t2 (ts) VALUES
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
-- SQLNESS PROTOCOL MYSQL
|
||||
SELECT * FROM t2;
|
||||
SELECT ts, v, vec_to_string(v) FROM t2;
|
||||
|
||||
+----------------------------+---------+
|
||||
| ts | v |
|
||||
+----------------------------+---------+
|
||||
| 1970-01-01 00:00:00.001000 | [1,2,3] |
|
||||
| 1970-01-01 00:00:00.002000 | [1,2,3] |
|
||||
| 1970-01-01 00:00:00.003000 | [1,2,3] |
|
||||
+----------------------------+---------+
|
||||
|
||||
-- SQLNESS PROTOCOL POSTGRES
|
||||
SELECT * FROM t2;
|
||||
|
||||
+----------------------------+-----------+
|
||||
| ts | v |
|
||||
+----------------------------+-----------+
|
||||
| 1970-01-01 00:00:00.001000 | "[1,2,3]" |
|
||||
| 1970-01-01 00:00:00.002000 | "[1,2,3]" |
|
||||
| 1970-01-01 00:00:00.003000 | "[1,2,3]" |
|
||||
+----------------------------+-----------+
|
||||
+-------------------------+--------------------------+---------------------+
|
||||
| ts | v | vec_to_string(t2.v) |
|
||||
+-------------------------+--------------------------+---------------------+
|
||||
| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] |
|
||||
| 1970-01-01T00:00:00.002 | 0000803f0000004000004040 | [1,2,3] |
|
||||
| 1970-01-01T00:00:00.003 | 0000803f0000004000004040 | [1,2,3] |
|
||||
+-------------------------+--------------------------+---------------------+
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
|
||||
@@ -1,23 +1,26 @@
|
||||
CREATE TABLE t (ts TIMESTAMP TIME INDEX, v VECTOR(3));
|
||||
|
||||
-- Invert string
|
||||
INSERT INTO t VALUES
|
||||
(1, "[1.0, 2.0, 3.0]"),
|
||||
(2, "[4.0, 5.0, 6.0]"),
|
||||
(3, "[7.0, 8.0, 9.0]");
|
||||
(1, '[1.0, 2.0, 3.0]'),
|
||||
(2, '[4.0, 5.0, 6.0]'),
|
||||
(3, '[7.0, 8.0, 9.0]');
|
||||
|
||||
-- SQLNESS PROTOCOL MYSQL
|
||||
SELECT * FROM t;
|
||||
-- Invert vector value
|
||||
INSERT INTO t VALUES
|
||||
(4, parse_vec('[1.0, 2.0, 3.0]')),
|
||||
(5, parse_vec('[4.0, 5.0, 6.0]')),
|
||||
(6, parse_vec('[7.0, 8.0, 9.0]'));
|
||||
|
||||
-- SQLNESS PROTOCOL POSTGRES
|
||||
SELECT * FROM t;
|
||||
SELECT ts, v, vec_to_string(v) FROM t;
|
||||
|
||||
SELECT round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) FROM t;
|
||||
|
||||
SELECT *, round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
SELECT round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) FROM t;
|
||||
|
||||
SELECT *, round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
SELECT round(vec_cos_distance(v, v), 2) FROM t;
|
||||
|
||||
@@ -29,11 +32,11 @@ SELECT vec_cos_distance(v, 1.0) FROM t;
|
||||
|
||||
SELECT round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) FROM t;
|
||||
|
||||
SELECT *, round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
SELECT round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) FROM t;
|
||||
|
||||
SELECT *, round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
SELECT round(vec_l2sq_distance(v, v), 2) FROM t;
|
||||
|
||||
@@ -46,11 +49,11 @@ SELECT vec_l2sq_distance(v, 1.0) FROM t;
|
||||
|
||||
SELECT round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) FROM t;
|
||||
|
||||
SELECT *, round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
SELECT round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) FROM t;
|
||||
|
||||
SELECT *, round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d;
|
||||
SELECT ts, v, vec_to_string(v), round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts;
|
||||
|
||||
SELECT round(vec_dot_product(v, v), 2) FROM t;
|
||||
|
||||
@@ -62,15 +65,15 @@ SELECT vec_dot_product(v, 1.0) FROM t;
|
||||
|
||||
-- Unexpected dimension --
|
||||
INSERT INTO t VALUES
|
||||
(4, "[1.0]");
|
||||
(4, '[1.0]');
|
||||
|
||||
-- Invalid vector value --
|
||||
INSERT INTO t VALUES
|
||||
(5, "1.0,2.0,3.0");
|
||||
(5, '1.0,2.0,3.0');
|
||||
|
||||
-- Invalid vector value --
|
||||
INSERT INTO t VALUES
|
||||
(6, "[30h, 40s, 50m]");
|
||||
(6, '[30h, 40s, 50m]');
|
||||
|
||||
CREATE TABLE t2 (ts TIMESTAMP TIME INDEX, v VECTOR(3) DEFAULT '[1.0, 2.0, 3.0]');
|
||||
|
||||
@@ -79,11 +82,7 @@ INSERT INTO t2 (ts) VALUES
|
||||
(2),
|
||||
(3);
|
||||
|
||||
-- SQLNESS PROTOCOL MYSQL
|
||||
SELECT * FROM t2;
|
||||
|
||||
-- SQLNESS PROTOCOL POSTGRES
|
||||
SELECT * FROM t2;
|
||||
SELECT ts, v, vec_to_string(v) FROM t2;
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user