change dictionary type

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2025-04-11 17:07:55 +08:00
parent d97a76c312
commit 2799d67212
4 changed files with 25 additions and 49 deletions

View File

@@ -16,8 +16,8 @@ use std::any::Any;
use std::sync::Arc;
use arrow::array::Array;
use arrow::datatypes::Int32Type;
use arrow_array::{ArrayRef, DictionaryArray, Int32Array};
use arrow::datatypes::{Int32Type, Int64Type};
use arrow_array::{ArrayRef, DictionaryArray, Int32Array, Int64Array};
use serde_json::Value as JsonValue;
use snafu::ResultExt;
@@ -32,7 +32,7 @@ use crate::vectors::{self, Helper, Validity, Vector, VectorRef};
/// Vector of dictionaries, basically backed by Arrow's `DictionaryArray`.
#[derive(Debug, PartialEq)]
pub struct DictionaryVector {
array: DictionaryArray<Int32Type>,
array: DictionaryArray<Int64Type>,
/// The datatype of the items in the dictionary.
item_type: ConcreteDataType,
/// The vector of items in the dictionary.
@@ -41,7 +41,7 @@ pub struct DictionaryVector {
impl DictionaryVector {
/// Create a new instance of `DictionaryVector` from a dictionary array and item type
pub fn new(array: DictionaryArray<Int32Type>, item_type: ConcreteDataType) -> Result<Self> {
pub fn new(array: DictionaryArray<Int64Type>, item_type: ConcreteDataType) -> Result<Self> {
let item_vector = Helper::try_into_vector(array.values())?;
Ok(Self {
@@ -52,12 +52,12 @@ impl DictionaryVector {
}
/// Returns the underlying Arrow dictionary array
pub fn array(&self) -> &DictionaryArray<Int32Type> {
pub fn array(&self) -> &DictionaryArray<Int64Type> {
&self.array
}
/// Returns the keys array of this dictionary
pub fn keys(&self) -> &arrow_array::PrimitiveArray<Int32Type> {
pub fn keys(&self) -> &arrow_array::PrimitiveArray<Int64Type> {
self.array.keys()
}
@@ -74,7 +74,7 @@ impl DictionaryVector {
impl Vector for DictionaryVector {
fn data_type(&self) -> ConcreteDataType {
ConcreteDataType::Dictionary(DictionaryType::new(
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
self.item_type.clone(),
))
}
@@ -163,10 +163,10 @@ impl Serializable for DictionaryVector {
}
}
impl TryFrom<DictionaryArray<Int32Type>> for DictionaryVector {
impl TryFrom<DictionaryArray<Int64Type>> for DictionaryVector {
type Error = crate::error::Error;
fn try_from(array: DictionaryArray<Int32Type>) -> Result<Self> {
fn try_from(array: DictionaryArray<Int64Type>) -> Result<Self> {
let item_type = ConcreteDataType::from_arrow_type(array.values().data_type());
let item_vector = Helper::try_into_vector(array.values())?;
@@ -243,7 +243,7 @@ impl VectorOp for DictionaryVector {
previous_offset = offset;
}
let new_keys = Int32Array::from(replicated_keys);
let new_keys = Int64Array::from(replicated_keys);
let new_array = DictionaryArray::try_new(new_keys, self.values().clone())
.expect("Failed to create replicated dictionary array");
@@ -261,7 +261,7 @@ impl VectorOp for DictionaryVector {
let filtered_key_array = filtered_key_vector.to_arrow_array();
let filtered_key_array = filtered_key_array
.as_any()
.downcast_ref::<Int32Array>()
.downcast_ref::<Int64Array>()
.unwrap();
let new_array = DictionaryArray::try_new(filtered_key_array.clone(), self.values().clone())
@@ -291,7 +291,7 @@ impl VectorOp for DictionaryVector {
let key_vector = Helper::try_into_vector(&key_array)?;
let new_key_vector = key_vector.take(indices)?;
let new_key_array = new_key_vector.to_arrow_array();
let new_key_array = new_key_array.as_any().downcast_ref::<Int32Array>().unwrap();
let new_key_array = new_key_array.as_any().downcast_ref::<Int64Array>().unwrap();
let new_array = DictionaryArray::try_new(new_key_array.clone(), self.values().clone())
.expect("Failed to create filtered dictionary array");
@@ -318,7 +318,7 @@ mod tests {
// Keys: [0, 1, 2, null, 1, 3]
// Resulting in: ["a", "b", "c", null, "b", "d"]
let values = StringArray::from(vec!["a", "b", "c", "d"]);
let keys = Int32Array::from(vec![Some(0), Some(1), Some(2), None, Some(1), Some(3)]);
let keys = Int64Array::from(vec![Some(0), Some(1), Some(2), None, Some(1), Some(3)]);
let dict_array = DictionaryArray::new(keys, Arc::new(values));
DictionaryVector::try_from(dict_array).unwrap()
}

View File

@@ -20,7 +20,7 @@ use std::sync::Arc;
use arrow::array::{Array, ArrayRef, StringArray};
use arrow::compute;
use arrow::compute::kernels::comparison;
use arrow::datatypes::{DataType as ArrowDataType, Int32Type, TimeUnit};
use arrow::datatypes::{DataType as ArrowDataType, Int32Type, Int64Type, TimeUnit};
use arrow_array::DictionaryArray;
use arrow_schema::IntervalUnit;
use datafusion_common::ScalarValue;
@@ -348,11 +348,11 @@ impl Helper {
ArrowDataType::Decimal128(_, _) => {
Arc::new(Decimal128Vector::try_from_arrow_array(array)?)
}
ArrowDataType::Dictionary(key, value) if matches!(&**key, ArrowDataType::Int32) => {
ArrowDataType::Dictionary(key, value) if matches!(&**key, ArrowDataType::Int64) => {
let array = array
.as_ref()
.as_any()
.downcast_ref::<DictionaryArray<Int32Type>>()
.downcast_ref::<DictionaryArray<Int64Type>>()
.unwrap(); // Safety: the type is guarded by match arm condition
Arc::new(DictionaryVector::new(
array.clone(),

View File

@@ -130,8 +130,7 @@ tql eval (3000, 3000, '1s') label_replace(histogram_quantile(0.8, histogram_buck
-- quantile with rate is covered in other cases
tql eval (3000, 3000, '1s') histogram_quantile(0.2, rate(histogram_bucket[5m]));
++
++
Error: 3001(EngineExecuteQuery), Unsupported arrow data type, type: Dictionary(Int64, Float64)
drop table histogram_bucket;
@@ -228,27 +227,15 @@ tql eval (420, 420, '1s') histogram_quantile(0.833, histogram2_bucket);
tql eval (2820, 2820, '1s') histogram_quantile(0.166, rate(histogram2_bucket[15m]));
+---------------------+----------------------------+
| ts | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
| 1970-01-01T00:47:00 | 0.996 |
+---------------------+----------------------------+
Error: 3001(EngineExecuteQuery), Unsupported arrow data type, type: Dictionary(Int64, Float64)
tql eval (2820, 2820, '1s') histogram_quantile(0.5, rate(histogram2_bucket[15m]));
+---------------------+----------------------------+
| ts | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
| 1970-01-01T00:47:00 | 3.0 |
+---------------------+----------------------------+
Error: 3001(EngineExecuteQuery), Unsupported arrow data type, type: Dictionary(Int64, Float64)
tql eval (2820, 2820, '1s') histogram_quantile(0.833, rate(histogram2_bucket[15m]));
+---------------------+----------------------------+
| ts | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
| 1970-01-01T00:47:00 | 4.998 |
+---------------------+----------------------------+
Error: 3001(EngineExecuteQuery), Unsupported arrow data type, type: Dictionary(Int64, Float64)
drop table histogram2_bucket;
@@ -284,12 +271,7 @@ Affected Rows: 12
tql eval (3000, 3005, '3s') histogram_quantile(0.5, sum by(le, s) (rate(histogram3_bucket[5m])));
+---+---------------------+---------------------------------+
| s | ts | sum(prom_rate(ts_range,val,ts)) |
+---+---------------------+---------------------------------+
| a | 1970-01-01T00:50:00 | 0.55 |
| a | 1970-01-01T00:50:03 | 0.5500000000000002 |
+---+---------------------+---------------------------------+
Error: 3001(EngineExecuteQuery), Unsupported arrow data type, type: Dictionary(Int64, Float64)
drop table histogram3_bucket;

View File

@@ -45,19 +45,13 @@ tql eval (359, 359, '1s') sum_over_time(metric_total[60s:10s]);
tql eval (10, 10, '1s') rate(metric_total[20s:10s]);
+---------------------+----------------------------+
| ts | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
| 1970-01-01T00:00:10 | 0.1 |
+---------------------+----------------------------+
++
++
tql eval (20, 20, '1s') rate(metric_total[20s:5s]);
+---------------------+----------------------------+
| ts | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
| 1970-01-01T00:00:20 | 0.06666666666666667 |
+---------------------+----------------------------+
++
++
drop table metric_total;