perf: remove some fast fields loading overhead (#22)

This removes up some overhead the profiler exposed.  In the case I was testing, fast fields no longer shows up in the profile at all.

I also renamed `BlockWithLength` to `BlockWithData`
This commit is contained in:
Eric Ridge
2025-01-21 13:01:41 -05:00
committed by Stu Hood
parent 95661fba30
commit 658b9b22e0
2 changed files with 18 additions and 23 deletions

View File

@@ -1,7 +1,7 @@
use std::io;
use std::io::Write;
use std::ops::{Deref, DerefMut};
use std::sync::{Arc, OnceLock};
use std::{io, iter};
use common::file_slice::FileSlice;
use common::{BinarySerializable, CountingWriter, DeserializeFrom, HasLen, OwnedBytes};
@@ -190,28 +190,23 @@ impl ColumnCodec<u64> for BlockwiseLinearCodec {
let mut footer = footer.read_bytes()?;
let num_blocks = compute_num_blocks(stats.num_rows);
let mut blocks: Vec<BlockWithLength> =
iter::repeat_with(|| Block::deserialize(&mut footer))
.take(num_blocks as usize)
.map(|block| {
block.map(|block| BlockWithLength {
block,
file_slice: FileSlice::from(Vec::new()),
data: OnceLock::default(),
})
})
.collect::<io::Result<_>>()?;
let mut start_offset = 0;
for block in &mut blocks {
let mut blocks = Vec::with_capacity(num_blocks as usize);
for _ in 0..num_blocks {
let mut block = Block::deserialize(&mut footer)?;
let len = (block.bit_unpacker.bit_width() as usize) * BLOCK_SIZE as usize / 8;
block.data_start_offset = start_offset;
block.file_slice = data
.clone()
.slice(start_offset..(start_offset + len).min(data.len()));
blocks.push(BlockWithData {
block,
file_slice: data.slice(start_offset..(start_offset + len).min(data.len())),
data: Default::default(),
});
start_offset += len;
}
Ok(BlockwiseLinearReader {
blocks: blocks.into_boxed_slice().into(),
stats,
@@ -219,13 +214,13 @@ impl ColumnCodec<u64> for BlockwiseLinearCodec {
}
}
struct BlockWithLength {
struct BlockWithData {
block: Block,
file_slice: FileSlice,
data: OnceLock<OwnedBytes>,
}
impl Deref for BlockWithLength {
impl Deref for BlockWithData {
type Target = Block;
fn deref(&self) -> &Self::Target {
@@ -233,7 +228,7 @@ impl Deref for BlockWithLength {
}
}
impl DerefMut for BlockWithLength {
impl DerefMut for BlockWithData {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.block
}
@@ -241,7 +236,7 @@ impl DerefMut for BlockWithLength {
#[derive(Clone)]
pub struct BlockwiseLinearReader {
blocks: Arc<[BlockWithLength]>,
blocks: Arc<[BlockWithData]>,
stats: ColumnStats,
}

View File

@@ -9,7 +9,7 @@ use fnv::FnvHashMap;
use itertools::Itertools;
use crate::directory::error::OpenReadError;
use crate::directory::{CompositeFile, FileSlice, ManagedDirectory};
use crate::directory::{CompositeFile, FileSlice};
use crate::error::DataCorruption;
use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
@@ -19,7 +19,7 @@ use crate::schema::{Field, IndexRecordOption, Schema, Type};
use crate::space_usage::SegmentSpaceUsage;
use crate::store::StoreReader;
use crate::termdict::TermDictionary;
use crate::{Directory, DocId, Index, Opstamp, SegmentMeta};
use crate::{Directory, DocId, Index, Opstamp};
/// Entry point to access all of the datastructures of the `Segment`
///