fix: Fix deleting table with non null column (#849)

If the table has a non-null column, we need to use default value instead
of null to fill the value columns in the record batch for deletion.
Otherwise, we can't create the record batch since the schema check
doesn't allow null in the non-null column.
This commit is contained in:
Yingwen
2023-01-11 20:06:46 +08:00
committed by GitHub
parent 0e8411c2ff
commit b39dbcbda9
3 changed files with 69 additions and 16 deletions

View File

@@ -21,6 +21,7 @@ use snafu::{ensure, ResultExt};
use crate::data_type::{ConcreteDataType, DataType};
use crate::error::{self, Error, Result};
use crate::schema::constraint::ColumnDefaultConstraint;
use crate::value::Value;
use crate::vectors::VectorRef;
pub type Metadata = HashMap<String, String>;
@@ -106,6 +107,9 @@ impl ColumnSchema {
self
}
/// Creates a vector with default value for this column.
///
/// If the column is `NOT NULL` but doesn't has `DEFAULT` value supplied, returns `Ok(None)`.
pub fn create_default_vector(&self, num_rows: usize) -> Result<Option<VectorRef>> {
match &self.default_constraint {
Some(c) => c
@@ -124,6 +128,28 @@ impl ColumnSchema {
}
}
}
/// Creates a vector for padding.
///
/// This method always returns a vector since it uses [DataType::default_value]
/// to fill the vector. Callers should only use the created vector for padding
/// and never read its content.
pub fn create_default_vector_for_padding(&self, num_rows: usize) -> VectorRef {
let padding_value = if self.is_nullable {
Value::Null
} else {
// If the column is not null, use the data type's default value as it is
// more efficient to acquire.
self.data_type.default_value()
};
let value_ref = padding_value.as_value_ref();
let mut mutable_vector = self.data_type.create_mutable_vector(num_rows);
for _ in 0..num_rows {
// Safety: Both the vector and default value are created by the data type.
mutable_vector.push_value_ref(value_ref).unwrap();
}
mutable_vector.to_vector()
}
}
impl TryFrom<&Field> for ColumnSchema {
@@ -182,10 +208,13 @@ impl TryFrom<&ColumnSchema> for Field {
#[cfg(test)]
mod tests {
use std::sync::Arc;
use arrow::datatypes::DataType as ArrowDataType;
use super::*;
use crate::value::Value;
use crate::vectors::Int32Vector;
#[test]
fn test_column_schema() {
@@ -294,4 +323,18 @@ mod tests {
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
assert!(column_schema.create_default_vector(5).unwrap().is_none());
}
#[test]
fn test_create_default_vector_for_padding() {
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
let vector = column_schema.create_default_vector_for_padding(4);
assert!(vector.only_null());
assert_eq!(4, vector.len());
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
let vector = column_schema.create_default_vector_for_padding(4);
assert_eq!(4, vector.len());
let expect: VectorRef = Arc::new(Int32Vector::from_slice(&[0, 0, 0, 0]));
assert_eq!(expect, vector);
}
}

View File

@@ -53,8 +53,10 @@ pub fn new_insert_request(
pub fn schema_for_test() -> Schema {
let column_schemas = vec![
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
// Nullable value column: cpu
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
// Non-null value column: memory
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), false),
ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_datatype(common_time::timestamp::TimeUnit::Millisecond),

View File

@@ -18,9 +18,7 @@ mod compat;
use std::collections::HashMap;
use common_recordbatch::RecordBatch;
use datatypes::data_type::{ConcreteDataType, DataType};
use datatypes::schema::{ColumnSchema, SchemaRef};
use datatypes::value::ValueRef;
use datatypes::vectors::VectorRef;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::{OpType, WriteRequest};
@@ -218,9 +216,9 @@ impl WriteBatch {
columns.push(col.clone());
}
None => {
// Fills value columns by null, these columns are just placeholders to ensure
// Fills value columns by default value, these columns are just placeholders to ensure
// the schema of the record batch is correct.
let col = new_column_with_null(&column_schema.data_type, num_rows);
let col = column_schema.create_default_vector_for_padding(num_rows);
columns.push(col);
}
}
@@ -298,17 +296,6 @@ pub(crate) fn new_column_with_default_value(
Ok(vector)
}
/// Creates a new column and fills it by null.
fn new_column_with_null(data_type: &ConcreteDataType, num_rows: usize) -> VectorRef {
// TODO(yingwen): Use `NullVector` once it supports setting logical type.
let mut mutable_vector = data_type.create_mutable_vector(num_rows);
for _ in 0..num_rows {
// Safety: push null is safe.
mutable_vector.push_value_ref(ValueRef::Null).unwrap();
}
mutable_vector.to_vector()
}
/// Vectors in [NameToVector] have same length.
///
/// MUST construct it via [`NameToVector::new()`] to ensure the vector lengths are validated.
@@ -602,4 +589,25 @@ mod tests {
let err = batch.delete(keys).unwrap_err();
check_err(err, "Type of column ts does not match");
}
#[test]
fn test_delete_non_null_value() {
let intv = Arc::new(UInt64Vector::from_slice(&[1, 2, 3])) as VectorRef;
let tsv = Arc::new(TimestampMillisecondVector::from_slice(&[0, 0, 0])) as VectorRef;
let mut keys = HashMap::with_capacity(2);
keys.insert("k1".to_string(), intv.clone());
keys.insert("ts".to_string(), tsv);
let mut batch = write_batch_util::new_write_batch(
&[
("k1", LogicalTypeId::UInt64, false),
("ts", LogicalTypeId::TimestampMillisecond, false),
("v1", LogicalTypeId::Boolean, false),
],
Some(1),
2,
);
batch.delete(keys).unwrap();
}
}