perf: avoid boundary checks on accessing array items (#7570)

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2026-01-14 20:56:39 +08:00
committed by GitHub
parent 170f94fc08
commit a5cb0116a2
6 changed files with 18 additions and 26 deletions

View File

@@ -453,8 +453,8 @@ impl Accumulator for CountHashAccumulator {
);
};
let hash_array = inner_array.as_any().downcast_ref::<UInt64Array>().unwrap();
for i in 0..hash_array.len() {
self.values.insert(hash_array.value(i));
for &hash in hash_array.values().iter().take(hash_array.len()) {
self.values.insert(hash);
}
}
Ok(())

View File

@@ -152,9 +152,9 @@ impl DfAccumulator for JsonEncodePathAccumulator {
let lng_array = lng_array.as_primitive::<Float64Type>();
let mut coords = Vec::with_capacity(len);
for i in 0..len {
let lng = lng_array.value(i);
let lat = lat_array.value(i);
let lng_values = lng_array.values();
let lat_values = lat_array.values();
for (&lng, &lat) in lng_values.iter().zip(lat_values.iter()).take(len) {
coords.push(vec![lng, lat]);
}

View File

@@ -208,9 +208,9 @@ fn decode_dictionary(
let mut rows = Vec::with_capacity(number_rows);
let keys = dict.keys();
for i in 0..number_rows {
let dict_index = keys.value(i) as usize;
rows.push(decoded_values[dict_index].clone());
let dict_indices = keys.values();
for &dict_index in dict_indices[..number_rows].iter() {
rows.push(decoded_values[dict_index as usize].clone());
}
Ok(rows)

View File

@@ -170,11 +170,12 @@ impl<K: ArrowDictionaryKeyType> Serializable for DictionaryVector<K> {
// the value it refers to in the dictionary
let mut result = Vec::with_capacity(self.len());
for i in 0..self.len() {
let keys = self.array.keys();
let key_values = &keys.values()[..self.len()];
for (i, &key) in key_values.iter().enumerate() {
if self.is_null(i) {
result.push(JsonValue::Null);
} else {
let key = self.array.keys().value(i);
let value = self.item_vector.get(key.as_usize());
let json_value = serde_json::to_value(value).context(error::SerializeSnafu)?;
result.push(json_value);
@@ -247,16 +248,9 @@ impl<K: ArrowDictionaryKeyType> VectorOp for DictionaryVector<K> {
let mut replicated_keys = PrimitiveBuilder::new();
let mut previous_offset = 0;
for (i, &offset) in offsets.iter().enumerate() {
let key = if i < self.len() {
if keys.is_valid(i) {
Some(keys.value(i))
} else {
None
}
} else {
None
};
let mut key_iter = keys.iter().chain(std::iter::repeat(None));
for &offset in offsets {
let key = key_iter.next().unwrap();
// repeat this key (offset - previous_offset) times
let repeat_count = offset - previous_offset;

View File

@@ -1174,9 +1174,8 @@ pub(crate) fn decode_primary_keys_with_counts(
let mut result: Vec<(CompositeValues, usize)> = Vec::new();
let mut prev_key: Option<u32> = None;
for i in 0..keys.len() {
let current_key = keys.value(i);
let pk_indices = keys.values();
for &current_key in pk_indices.iter().take(keys.len()) {
// Checks if current key is the same as previous key
if let Some(prev) = prev_key
&& prev == current_key

View File

@@ -563,9 +563,8 @@ pub(crate) fn decode_primary_keys(
// The parquet reader may read the whole dictionary page into the dictionary values, so
// we may decode many primary keys not in this batch if we decode the values array directly.
for i in 0..keys.len() {
let current_key = keys.value(i);
let pk_indices = keys.values();
for &current_key in pk_indices.iter().take(keys.len()) {
// Check if current key is the same as previous key
if let Some(prev) = prev_key
&& prev == current_key