mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 01:02:55 +00:00
* handle ip adresses in term aggregation Stores IpAdresses during the segment term aggregation via u64 representation and convert to u128(IpV6Adress) via downcast when converting to intermediate results. Enable Downcasting on `ColumnValues` Expose u64 variant for u128 encoded data via `open_u64_lenient` method. Remove lifetime in VecColumn, to avoid 'static lifetime requirement coming from downcast trait. * rename method
115 lines
4.1 KiB
Rust
115 lines
4.1 KiB
Rust
use std::io;
|
|
use std::io::Write;
|
|
use std::sync::Arc;
|
|
|
|
use common::OwnedBytes;
|
|
use sstable::Dictionary;
|
|
|
|
use crate::column::{BytesColumn, Column};
|
|
use crate::column_index::{serialize_column_index, SerializableColumnIndex};
|
|
use crate::column_values::{
|
|
load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values,
|
|
CodecType, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
|
|
};
|
|
use crate::iterable::Iterable;
|
|
use crate::StrColumn;
|
|
|
|
pub fn serialize_column_mappable_to_u128<T: MonotonicallyMappableToU128>(
|
|
column_index: SerializableColumnIndex<'_>,
|
|
iterable: &dyn Iterable<T>,
|
|
output: &mut impl Write,
|
|
) -> io::Result<()> {
|
|
let column_index_num_bytes = serialize_column_index(column_index, output)?;
|
|
serialize_column_values_u128(iterable, output)?;
|
|
output.write_all(&column_index_num_bytes.to_le_bytes())?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64>(
|
|
column_index: SerializableColumnIndex<'_>,
|
|
column_values: &impl Iterable<T>,
|
|
output: &mut impl Write,
|
|
) -> io::Result<()> {
|
|
let column_index_num_bytes = serialize_column_index(column_index, output)?;
|
|
serialize_u64_based_column_values(
|
|
column_values,
|
|
&[CodecType::Bitpacked, CodecType::BlockwiseLinear],
|
|
output,
|
|
)?;
|
|
output.write_all(&column_index_num_bytes.to_le_bytes())?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn open_column_u64<T: MonotonicallyMappableToU64>(bytes: OwnedBytes) -> io::Result<Column<T>> {
|
|
let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
|
|
let column_index_num_bytes = u32::from_le_bytes(
|
|
column_index_num_bytes_payload
|
|
.as_slice()
|
|
.try_into()
|
|
.unwrap(),
|
|
);
|
|
let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
|
|
let column_index = crate::column_index::open_column_index(column_index_data)?;
|
|
let column_values = load_u64_based_column_values(column_values_data)?;
|
|
Ok(Column {
|
|
index: column_index,
|
|
values: column_values,
|
|
})
|
|
}
|
|
|
|
pub fn open_column_u128<T: MonotonicallyMappableToU128>(
|
|
bytes: OwnedBytes,
|
|
) -> io::Result<Column<T>> {
|
|
let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
|
|
let column_index_num_bytes = u32::from_le_bytes(
|
|
column_index_num_bytes_payload
|
|
.as_slice()
|
|
.try_into()
|
|
.unwrap(),
|
|
);
|
|
let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
|
|
let column_index = crate::column_index::open_column_index(column_index_data)?;
|
|
let column_values = crate::column_values::open_u128_mapped(column_values_data)?;
|
|
Ok(Column {
|
|
index: column_index,
|
|
values: column_values,
|
|
})
|
|
}
|
|
|
|
/// Open the column as u64.
|
|
///
|
|
/// See [`open_u128_as_compact_u64`] for more details.
|
|
pub fn open_column_u128_as_compact_u64(bytes: OwnedBytes) -> io::Result<Column<u64>> {
|
|
let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
|
|
let column_index_num_bytes = u32::from_le_bytes(
|
|
column_index_num_bytes_payload
|
|
.as_slice()
|
|
.try_into()
|
|
.unwrap(),
|
|
);
|
|
let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
|
|
let column_index = crate::column_index::open_column_index(column_index_data)?;
|
|
let column_values = crate::column_values::open_u128_as_compact_u64(column_values_data)?;
|
|
Ok(Column {
|
|
index: column_index,
|
|
values: column_values,
|
|
})
|
|
}
|
|
|
|
pub fn open_column_bytes(data: OwnedBytes) -> io::Result<BytesColumn> {
|
|
let (body, dictionary_len_bytes) = data.rsplit(4);
|
|
let dictionary_len = u32::from_le_bytes(dictionary_len_bytes.as_slice().try_into().unwrap());
|
|
let (dictionary_bytes, column_bytes) = body.split(dictionary_len as usize);
|
|
let dictionary = Arc::new(Dictionary::from_bytes(dictionary_bytes)?);
|
|
let term_ord_column = crate::column::open_column_u64::<u64>(column_bytes)?;
|
|
Ok(BytesColumn {
|
|
dictionary,
|
|
term_ord_column,
|
|
})
|
|
}
|
|
|
|
pub fn open_column_str(data: OwnedBytes) -> io::Result<StrColumn> {
|
|
let bytes_column = open_column_bytes(data)?;
|
|
Ok(StrColumn::wrap(bytes_column))
|
|
}
|