mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
@@ -44,7 +44,7 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
|
||||
|
||||
pub fn num_docs(&self) -> RowId {
|
||||
match &self.idx {
|
||||
ColumnIndex::Full => self.values.num_vals() as u32,
|
||||
ColumnIndex::Full => self.values.num_vals(),
|
||||
ColumnIndex::Optional(optional_index) => optional_index.num_docs(),
|
||||
ColumnIndex::Multivalued(col_index) => {
|
||||
// The multivalued index contains all value start row_id,
|
||||
|
||||
@@ -83,13 +83,13 @@ impl MultiValueIndex {
|
||||
let mut cur_doc = docid_start;
|
||||
let mut last_doc = None;
|
||||
|
||||
assert!(self.start_index_column.get_val(docid_start) as u32 <= ranks[0]);
|
||||
assert!(self.start_index_column.get_val(docid_start) <= ranks[0]);
|
||||
|
||||
let mut write_doc_pos = 0;
|
||||
for i in 0..ranks.len() {
|
||||
let pos = ranks[i];
|
||||
loop {
|
||||
let end = self.start_index_column.get_val(cur_doc + 1) as u32;
|
||||
let end = self.start_index_column.get_val(cur_doc + 1);
|
||||
if end > pos {
|
||||
ranks[write_doc_pos] = cur_doc;
|
||||
write_doc_pos += if last_doc == Some(cur_doc) { 0 } else { 1 };
|
||||
|
||||
@@ -440,7 +440,7 @@ impl SerializedBlockMeta {
|
||||
|
||||
#[inline]
|
||||
fn is_sparse(num_rows_in_block: u32) -> bool {
|
||||
num_rows_in_block < DENSE_BLOCK_THRESHOLD as u32
|
||||
num_rows_in_block < DENSE_BLOCK_THRESHOLD
|
||||
}
|
||||
|
||||
fn deserialize_optional_index_block_metadatas(
|
||||
@@ -448,7 +448,7 @@ fn deserialize_optional_index_block_metadatas(
|
||||
num_rows: u32,
|
||||
) -> (Box<[BlockMeta]>, u32) {
|
||||
let num_blocks = data.len() / SERIALIZED_BLOCK_META_NUM_BYTES;
|
||||
let mut block_metas = Vec::with_capacity(num_blocks as usize + 1);
|
||||
let mut block_metas = Vec::with_capacity(num_blocks + 1);
|
||||
let mut start_byte_offset = 0;
|
||||
let mut non_null_rows_before_block = 0;
|
||||
for block_meta_bytes in data.chunks_exact(SERIALIZED_BLOCK_META_NUM_BYTES) {
|
||||
@@ -479,7 +479,7 @@ fn deserialize_optional_index_block_metadatas(
|
||||
block_variant,
|
||||
});
|
||||
start_byte_offset += block_variant.num_bytes_in_block();
|
||||
non_null_rows_before_block += num_non_null_rows as u32;
|
||||
non_null_rows_before_block += num_non_null_rows;
|
||||
}
|
||||
block_metas.resize(
|
||||
((num_rows + BLOCK_SIZE - 1) / BLOCK_SIZE) as usize,
|
||||
|
||||
@@ -32,7 +32,7 @@ pub const MINI_BLOCK_NUM_BYTES: usize = MINI_BLOCK_BITVEC_NUM_BYTES + MINI_BLOCK
|
||||
|
||||
/// Number of bytes in a dense block.
|
||||
pub const DENSE_BLOCK_NUM_BYTES: u32 =
|
||||
(ELEMENTS_PER_BLOCK as u32 / ELEMENTS_PER_MINI_BLOCK as u32) * MINI_BLOCK_NUM_BYTES as u32;
|
||||
(ELEMENTS_PER_BLOCK / ELEMENTS_PER_MINI_BLOCK as u32) * MINI_BLOCK_NUM_BYTES as u32;
|
||||
|
||||
pub struct DenseBlockCodec;
|
||||
|
||||
@@ -229,7 +229,7 @@ pub fn serialize_dense_codec(
|
||||
while block_id > current_block_id {
|
||||
let dense_mini_block = DenseMiniBlock {
|
||||
bitvec: block,
|
||||
rank: non_null_rows_before as u16,
|
||||
rank: non_null_rows_before,
|
||||
};
|
||||
output.write_all(&dense_mini_block.to_bytes())?;
|
||||
non_null_rows_before += block.count_ones() as u16;
|
||||
|
||||
@@ -37,7 +37,7 @@ proptest! {
|
||||
fn test_with_random_sets_simple() {
|
||||
let vals = 10..BLOCK_SIZE * 2;
|
||||
let mut out: Vec<u8> = Vec::new();
|
||||
serialize_optional_index(&vals.clone(), 100, &mut out).unwrap();
|
||||
serialize_optional_index(&vals, 100, &mut out).unwrap();
|
||||
let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
|
||||
let ranks: Vec<u32> = (65_472u32..65_473u32).collect();
|
||||
let els: Vec<u32> = ranks.iter().copied().map(|rank| rank + 10).collect();
|
||||
|
||||
@@ -305,23 +305,9 @@ where
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::column_values::monotonic_mapping::{
|
||||
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternalBaseval,
|
||||
StrictlyMonotonicMappingToInternalGCDBaseval,
|
||||
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_monotonic_mapping() {
|
||||
let vals = &[3u64, 5u64][..];
|
||||
let col = VecColumn::from(vals);
|
||||
let mapped = monotonic_map_column(col, StrictlyMonotonicMappingToInternalBaseval::new(2));
|
||||
assert_eq!(mapped.min_value(), 1u64);
|
||||
assert_eq!(mapped.max_value(), 3u64);
|
||||
assert_eq!(mapped.num_vals(), 2);
|
||||
assert_eq!(mapped.num_vals(), 2);
|
||||
assert_eq!(mapped.get_val(0), 1);
|
||||
assert_eq!(mapped.get_val(1), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_range_as_col() {
|
||||
let col = IterColumn::from(10..100);
|
||||
@@ -331,42 +317,15 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_monotonic_mapping_iter() {
|
||||
let vals: Vec<u64> = (10..110u64).map(|el| el * 10).collect();
|
||||
let vals: Vec<u64> = (0..100u64).map(|el| el * 10).collect();
|
||||
let col = VecColumn::from(&vals);
|
||||
let mapped = monotonic_map_column(
|
||||
col,
|
||||
StrictlyMonotonicMappingInverter::from(
|
||||
StrictlyMonotonicMappingToInternalGCDBaseval::new(10, 100),
|
||||
),
|
||||
StrictlyMonotonicMappingInverter::from(StrictlyMonotonicMappingToInternal::<i64>::new()),
|
||||
);
|
||||
let val_i64s: Vec<u64> = mapped.iter().collect();
|
||||
for i in 0..100 {
|
||||
assert_eq!(val_i64s[i as usize], mapped.get_val(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_monotonic_mapping_get_range() {
|
||||
let vals: Vec<u64> = (0..100u64).map(|el| el * 10).collect();
|
||||
let col = VecColumn::from(&vals);
|
||||
let mapped = monotonic_map_column(
|
||||
col,
|
||||
StrictlyMonotonicMappingInverter::from(
|
||||
StrictlyMonotonicMappingToInternalGCDBaseval::new(10, 0),
|
||||
),
|
||||
);
|
||||
|
||||
assert_eq!(mapped.min_value(), 0u64);
|
||||
assert_eq!(mapped.max_value(), 9900u64);
|
||||
assert_eq!(mapped.num_vals(), 100);
|
||||
let val_u64s: Vec<u64> = mapped.iter().collect();
|
||||
assert_eq!(val_u64s.len(), 100);
|
||||
for i in 0..100 {
|
||||
assert_eq!(val_u64s[i as usize], mapped.get_val(i));
|
||||
assert_eq!(val_u64s[i as usize], vals[i as usize] * 10);
|
||||
}
|
||||
let mut buf = [0u64; 20];
|
||||
mapped.get_range(7, &mut buf[..]);
|
||||
assert_eq!(&val_u64s[7..][..20], &buf);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use common::DateTime;
|
||||
use fastdivide::DividerU64;
|
||||
|
||||
use super::MonotonicallyMappableToU128;
|
||||
use crate::RowId;
|
||||
@@ -113,68 +112,6 @@ where T: MonotonicallyMappableToU64
|
||||
}
|
||||
}
|
||||
|
||||
/// Mapping dividing by gcd and a base value.
|
||||
///
|
||||
/// The function is assumed to be only called on values divided by passed
|
||||
/// gcd value. (It is necessary for the function to be monotonic.)
|
||||
pub(crate) struct StrictlyMonotonicMappingToInternalGCDBaseval {
|
||||
gcd_divider: DividerU64,
|
||||
gcd: u64,
|
||||
min_value: u64,
|
||||
}
|
||||
impl StrictlyMonotonicMappingToInternalGCDBaseval {
|
||||
/// Creates a linear mapping `x -> gcd*x + min_value`.
|
||||
pub(crate) fn new(gcd: u64, min_value: u64) -> Self {
|
||||
let gcd_divider = DividerU64::divide_by(gcd);
|
||||
Self {
|
||||
gcd_divider,
|
||||
gcd,
|
||||
min_value,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<External: MonotonicallyMappableToU64> StrictlyMonotonicFn<External, u64>
|
||||
for StrictlyMonotonicMappingToInternalGCDBaseval
|
||||
{
|
||||
#[inline(always)]
|
||||
fn mapping(&self, inp: External) -> u64 {
|
||||
self.gcd_divider
|
||||
.divide(External::to_u64(inp) - self.min_value)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn inverse(&self, out: u64) -> External {
|
||||
External::from_u64(self.min_value + out * self.gcd)
|
||||
}
|
||||
}
|
||||
|
||||
/// Strictly monotonic mapping with a base value.
|
||||
pub(crate) struct StrictlyMonotonicMappingToInternalBaseval {
|
||||
min_value: u64,
|
||||
}
|
||||
|
||||
impl StrictlyMonotonicMappingToInternalBaseval {
|
||||
/// Creates a linear mapping `x -> x + min_value`.
|
||||
#[inline(always)]
|
||||
pub(crate) fn new(min_value: u64) -> Self {
|
||||
Self { min_value }
|
||||
}
|
||||
}
|
||||
|
||||
impl<External: MonotonicallyMappableToU64> StrictlyMonotonicFn<External, u64>
|
||||
for StrictlyMonotonicMappingToInternalBaseval
|
||||
{
|
||||
#[inline(always)]
|
||||
fn mapping(&self, val: External) -> u64 {
|
||||
External::to_u64(val) - self.min_value
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn inverse(&self, val: u64) -> External {
|
||||
External::from_u64(self.min_value + val)
|
||||
}
|
||||
}
|
||||
|
||||
impl MonotonicallyMappableToU64 for u64 {
|
||||
#[inline(always)]
|
||||
fn to_u64(self) -> u64 {
|
||||
@@ -263,13 +200,6 @@ mod tests {
|
||||
// TODO
|
||||
// identity mapping
|
||||
// test_round_trip(&StrictlyMonotonicMappingToInternal::<u128>::new(), 100u128);
|
||||
|
||||
// base value to i64 round trip
|
||||
let mapping = StrictlyMonotonicMappingToInternalBaseval::new(100);
|
||||
test_round_trip::<_, _, u64>(&mapping, 100i64);
|
||||
// base value and gcd to u64 round trip
|
||||
let mapping = StrictlyMonotonicMappingToInternalGCDBaseval::new(10, 100);
|
||||
test_round_trip::<_, _, u64>(&mapping, 100u64);
|
||||
}
|
||||
|
||||
fn test_round_trip<T: StrictlyMonotonicFn<K, L>, K: std::fmt::Debug + Eq + Copy, L>(
|
||||
|
||||
@@ -201,8 +201,8 @@ pub struct BlockwiseLinearReader {
|
||||
impl ColumnValues for BlockwiseLinearReader {
|
||||
#[inline(always)]
|
||||
fn get_val(&self, idx: u32) -> u64 {
|
||||
let block_id = (idx / BLOCK_SIZE as u32) as usize;
|
||||
let idx_within_block = idx % (BLOCK_SIZE as u32);
|
||||
let block_id = (idx / BLOCK_SIZE) as usize;
|
||||
let idx_within_block = idx % BLOCK_SIZE;
|
||||
let block = &self.blocks[block_id];
|
||||
let interpoled_val: u64 = block.line.eval(idx_within_block);
|
||||
let block_bytes = &self.data[block.data_start_offset..];
|
||||
|
||||
@@ -143,7 +143,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
for code in COLUMN_TYPES.len() as u8..=u8::MAX {
|
||||
assert!(ColumnType::try_from_code(code as u8).is_err());
|
||||
assert!(ColumnType::try_from_code(code).is_err());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -191,7 +191,7 @@ struct TermOrdinalMapping {
|
||||
impl TermOrdinalMapping {
|
||||
fn add_segment(&mut self, max_term_ord: usize) {
|
||||
self.per_segment_new_term_ordinals
|
||||
.push(vec![TermOrdinal::default(); max_term_ord as usize]);
|
||||
.push(vec![TermOrdinal::default(); max_term_ord]);
|
||||
}
|
||||
|
||||
fn register_from_to(&mut self, segment_ord: usize, from_ord: TermOrdinal, to_ord: TermOrdinal) {
|
||||
|
||||
@@ -101,7 +101,7 @@ fn make_byte_columnar_multiple_columns(columns: &[(&str, &[&[&[u8]]])]) -> Colum
|
||||
for (column_name, column_values) in columns {
|
||||
for (row_id, vals) in column_values.iter().enumerate() {
|
||||
for val in vals.iter() {
|
||||
dataframe_writer.record_bytes(row_id as u32, column_name, *val);
|
||||
dataframe_writer.record_bytes(row_id as u32, column_name, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -122,7 +122,7 @@ fn make_text_columnar_multiple_columns(columns: &[(&str, &[&[&str]])]) -> Column
|
||||
for (column_name, column_values) in columns {
|
||||
for (row_id, vals) in column_values.iter().enumerate() {
|
||||
for val in vals.iter() {
|
||||
dataframe_writer.record_str(row_id as u32, column_name, *val);
|
||||
dataframe_writer.record_str(row_id as u32, column_name, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -310,7 +310,7 @@ mod tests {
|
||||
buffer.extend_from_slice(b"234234");
|
||||
let mut bytes = &buffer[..];
|
||||
let serdeser_symbol = ColumnOperation::deserialize(&mut bytes).unwrap();
|
||||
assert_eq!(bytes.len() + buf.as_ref().len() as usize, buffer.len());
|
||||
assert_eq!(bytes.len() + buf.as_ref().len(), buffer.len());
|
||||
assert_eq!(column_op, serdeser_symbol);
|
||||
}
|
||||
|
||||
@@ -341,7 +341,7 @@ mod tests {
|
||||
fn test_column_operation_unordered_aux(val: u32, expected_len: usize) {
|
||||
let column_op = ColumnOperation::Value(UnorderedId(val));
|
||||
let minibuf = column_op.serialize();
|
||||
assert_eq!(minibuf.as_ref().len() as usize, expected_len);
|
||||
assert_eq!({ minibuf.as_ref().len() }, expected_len);
|
||||
let mut buf = minibuf.as_ref().to_vec();
|
||||
buf.extend_from_slice(&[2, 2, 2, 2, 2, 2]);
|
||||
let mut cursor = &buf[..];
|
||||
|
||||
@@ -761,7 +761,7 @@ mod tests {
|
||||
assert_eq!(column_writer.get_cardinality(3), Cardinality::Full);
|
||||
let mut buffer = Vec::new();
|
||||
let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
|
||||
.operation_iterator(&mut arena, None, &mut buffer)
|
||||
.operation_iterator(&arena, None, &mut buffer)
|
||||
.collect();
|
||||
assert_eq!(symbols.len(), 6);
|
||||
assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
|
||||
@@ -790,7 +790,7 @@ mod tests {
|
||||
assert_eq!(column_writer.get_cardinality(3), Cardinality::Optional);
|
||||
let mut buffer = Vec::new();
|
||||
let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
|
||||
.operation_iterator(&mut arena, None, &mut buffer)
|
||||
.operation_iterator(&arena, None, &mut buffer)
|
||||
.collect();
|
||||
assert_eq!(symbols.len(), 4);
|
||||
assert!(matches!(symbols[0], ColumnOperation::NewDoc(1u32)));
|
||||
@@ -813,7 +813,7 @@ mod tests {
|
||||
assert_eq!(column_writer.get_cardinality(2), Cardinality::Optional);
|
||||
let mut buffer = Vec::new();
|
||||
let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
|
||||
.operation_iterator(&mut arena, None, &mut buffer)
|
||||
.operation_iterator(&arena, None, &mut buffer)
|
||||
.collect();
|
||||
assert_eq!(symbols.len(), 2);
|
||||
assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
|
||||
@@ -832,7 +832,7 @@ mod tests {
|
||||
assert_eq!(column_writer.get_cardinality(1), Cardinality::Multivalued);
|
||||
let mut buffer = Vec::new();
|
||||
let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
|
||||
.operation_iterator(&mut arena, None, &mut buffer)
|
||||
.operation_iterator(&arena, None, &mut buffer)
|
||||
.collect();
|
||||
assert_eq!(symbols.len(), 3);
|
||||
assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
|
||||
|
||||
@@ -150,11 +150,7 @@ mod tests {
|
||||
multivalued_value_index_builder.record_row(2u32);
|
||||
multivalued_value_index_builder.record_value();
|
||||
assert_eq!(
|
||||
multivalued_value_index_builder
|
||||
.finish(4u32)
|
||||
.iter()
|
||||
.copied()
|
||||
.collect::<Vec<u32>>(),
|
||||
multivalued_value_index_builder.finish(4u32).to_vec(),
|
||||
vec![0, 0, 2, 3, 3]
|
||||
);
|
||||
multivalued_value_index_builder.reset();
|
||||
@@ -162,11 +158,7 @@ mod tests {
|
||||
multivalued_value_index_builder.record_value();
|
||||
multivalued_value_index_builder.record_value();
|
||||
assert_eq!(
|
||||
multivalued_value_index_builder
|
||||
.finish(4u32)
|
||||
.iter()
|
||||
.copied()
|
||||
.collect::<Vec<u32>>(),
|
||||
multivalued_value_index_builder.finish(4u32).to_vec(),
|
||||
vec![0, 0, 0, 2, 2]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -21,28 +21,25 @@ use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
|
||||
pub use term_agg::*;
|
||||
|
||||
/// Order for buckets in a bucket aggregation.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, Default)]
|
||||
pub enum Order {
|
||||
/// Asc order
|
||||
#[serde(rename = "asc")]
|
||||
Asc,
|
||||
/// Desc order
|
||||
#[serde(rename = "desc")]
|
||||
#[default]
|
||||
Desc,
|
||||
}
|
||||
|
||||
impl Default for Order {
|
||||
fn default() -> Self {
|
||||
Order::Desc
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
/// Order property by which to apply the order
|
||||
#[derive(Default)]
|
||||
pub enum OrderTarget {
|
||||
/// The key of the bucket
|
||||
Key,
|
||||
/// The doc count of the bucket
|
||||
#[default]
|
||||
Count,
|
||||
/// Order by value of the sub aggregation metric with identified by given `String`.
|
||||
///
|
||||
@@ -50,11 +47,6 @@ pub enum OrderTarget {
|
||||
SubAggregation(String),
|
||||
}
|
||||
|
||||
impl Default for OrderTarget {
|
||||
fn default() -> Self {
|
||||
OrderTarget::Count
|
||||
}
|
||||
}
|
||||
impl From<&str> for OrderTarget {
|
||||
fn from(val: &str) -> Self {
|
||||
match val {
|
||||
|
||||
@@ -124,20 +124,12 @@ pub(crate) fn build_segment_agg_collector(
|
||||
/// The GenericSegmentAggregationResultsCollector is the generic version of the collector, which
|
||||
/// can handle arbitrary complexity of sub-aggregations. Ideally we never have to pick this one
|
||||
/// and can provide specialized versions instead, that remove some of its overhead.
|
||||
#[derive(Default)]
|
||||
pub(crate) struct GenericSegmentAggregationResultsCollector {
|
||||
pub(crate) metrics: Option<VecWithNames<SegmentMetricResultCollector>>,
|
||||
pub(crate) buckets: Option<VecWithNames<SegmentBucketResultCollector>>,
|
||||
}
|
||||
|
||||
impl Default for GenericSegmentAggregationResultsCollector {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
metrics: Default::default(),
|
||||
buckets: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for GenericSegmentAggregationResultsCollector {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SegmentAggregationResultsCollector")
|
||||
@@ -186,7 +178,7 @@ impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {
|
||||
for (collector, agg_with_accessor) in
|
||||
metrics.values_mut().zip(agg_with_accessor.metrics.values())
|
||||
{
|
||||
collector.collect_block(&docs, agg_with_accessor);
|
||||
collector.collect_block(docs, agg_with_accessor);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -194,7 +186,7 @@ impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {
|
||||
for (collector, agg_with_accessor) in
|
||||
buckets.values_mut().zip(agg_with_accessor.buckets.values())
|
||||
{
|
||||
collector.collect_block(&docs, agg_with_accessor)?;
|
||||
collector.collect_block(docs, agg_with_accessor)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -515,8 +515,7 @@ mod tests {
|
||||
expected_compressed_collapsed_mapping: &[usize],
|
||||
expected_unique_facet_ords: &[(u64, usize)],
|
||||
) {
|
||||
let (compressed_collapsed_mapping, unique_facet_ords) =
|
||||
compress_mapping(&collapsed_mapping);
|
||||
let (compressed_collapsed_mapping, unique_facet_ords) = compress_mapping(collapsed_mapping);
|
||||
assert_eq!(
|
||||
compressed_collapsed_mapping,
|
||||
expected_compressed_collapsed_mapping
|
||||
|
||||
@@ -56,9 +56,8 @@ pub fn test_filter_collector() -> crate::Result<()> {
|
||||
assert_eq!(filtered_top_docs.len(), 0);
|
||||
|
||||
fn date_filter(value: DateTime) -> bool {
|
||||
(crate::DateTime::from(value).into_utc()
|
||||
- OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
|
||||
.whole_weeks()
|
||||
(value.into_utc() - OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
|
||||
.whole_weeks()
|
||||
> 0
|
||||
}
|
||||
|
||||
|
||||
@@ -267,7 +267,7 @@ mod tests {
|
||||
.unwrap();
|
||||
for doc_id in 1u64..10_000u64 {
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id as u64))
|
||||
.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id))
|
||||
.unwrap();
|
||||
}
|
||||
fast_field_writers.serialize(&mut write, None).unwrap();
|
||||
@@ -558,7 +558,7 @@ mod tests {
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let text_fast_field = fast_fields.str("text").unwrap().unwrap();
|
||||
|
||||
assert_eq!(&get_vals_for_docs(&text_fast_field.ords(), 0..2), &[0, 1]);
|
||||
assert_eq!(&get_vals_for_docs(text_fast_field.ords(), 0..2), &[0, 1]);
|
||||
}
|
||||
|
||||
// TODO uncomment once merging is available
|
||||
@@ -654,7 +654,7 @@ mod tests {
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let text_col = fast_fields.str("text").unwrap().unwrap();
|
||||
|
||||
assert_eq!(get_vals_for_docs(&text_col.ords(), 0..6), vec![1, 0, 0, 2]);
|
||||
assert_eq!(get_vals_for_docs(text_col.ords(), 0..6), vec![1, 0, 0, 2]);
|
||||
|
||||
let inverted_index = segment_reader.inverted_index(text_field)?;
|
||||
assert_eq!(inverted_index.terms().num_terms(), 3);
|
||||
@@ -702,7 +702,7 @@ mod tests {
|
||||
let text_fast_field = fast_fields.str("text").unwrap().unwrap();
|
||||
|
||||
assert_eq!(
|
||||
get_vals_for_docs(&text_fast_field.ords(), 0..9),
|
||||
get_vals_for_docs(text_fast_field.ords(), 0..9),
|
||||
vec![1, 0, 0, 3 /* next segment */, 0, 2]
|
||||
);
|
||||
|
||||
@@ -925,7 +925,7 @@ mod tests {
|
||||
let col = readers.date("field").unwrap();
|
||||
|
||||
for (i, time) in times.iter().enumerate() {
|
||||
let dt: DateTime = col.first(i as u32).unwrap().into();
|
||||
let dt: DateTime = col.first(i as u32).unwrap();
|
||||
assert_eq!(dt, time.truncate(precision));
|
||||
}
|
||||
readers.column_num_bytes("field").unwrap()
|
||||
|
||||
@@ -261,10 +261,10 @@ fn record_json_value_to_columnar_writer(
|
||||
// TODO handle null
|
||||
}
|
||||
serde_json::Value::Bool(bool_val) => {
|
||||
columnar_writer.record_bool(doc, &json_path_writer, *bool_val);
|
||||
columnar_writer.record_bool(doc, json_path_writer, *bool_val);
|
||||
}
|
||||
serde_json::Value::Number(json_number) => {
|
||||
if let Some(numerical_value) = columnar_numerical_value(&json_number) {
|
||||
if let Some(numerical_value) = columnar_numerical_value(json_number) {
|
||||
columnar_writer.record_numerical(doc, json_path_writer.as_str(), numerical_value);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -227,11 +227,7 @@ pub mod tests {
|
||||
};
|
||||
|
||||
let gen_query_inclusive = |field: &str, ip_range: &RangeInclusive<Ipv6Addr>| {
|
||||
format!(
|
||||
"{field}:[{} TO {}]",
|
||||
ip_range.start().to_string(),
|
||||
ip_range.end().to_string()
|
||||
)
|
||||
format!("{field}:[{} TO {}]", ip_range.start(), ip_range.end())
|
||||
};
|
||||
|
||||
let test_sample = |sample_docs: &[Doc]| {
|
||||
|
||||
@@ -9,10 +9,13 @@ use serde::{Deserialize, Serialize};
|
||||
/// [`TextFieldIndexing::set_index_option()`](crate::schema::TextFieldIndexing::set_index_option))
|
||||
/// * request that a given amount of information to be decoded as one goes through a posting list.
|
||||
/// (See [`InvertedIndexReader::read_postings()`](crate::InvertedIndexReader::read_postings))
|
||||
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize)]
|
||||
#[derive(
|
||||
Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize, Default,
|
||||
)]
|
||||
pub enum IndexRecordOption {
|
||||
/// records only the `DocId`s
|
||||
#[serde(rename = "basic")]
|
||||
#[default]
|
||||
Basic,
|
||||
/// records the document ids as well as the term frequency.
|
||||
/// The term frequency can help giving better scoring of the documents.
|
||||
@@ -25,12 +28,6 @@ pub enum IndexRecordOption {
|
||||
WithFreqsAndPositions,
|
||||
}
|
||||
|
||||
impl Default for IndexRecordOption {
|
||||
fn default() -> Self {
|
||||
IndexRecordOption::Basic
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexRecordOption {
|
||||
/// Returns true if this option includes encoding
|
||||
/// term frequencies.
|
||||
|
||||
@@ -431,8 +431,8 @@ mod tests {
|
||||
let block = dic.sstable_index.get_block_with_ord(0);
|
||||
slice.restrict(block.byte_range);
|
||||
|
||||
assert!(dic.get(&b"$$$").unwrap().is_none());
|
||||
assert!(dic.term_ord(&b"$$$").unwrap().is_none());
|
||||
assert!(dic.get(b"$$$").unwrap().is_none());
|
||||
assert!(dic.term_ord(b"$$$").unwrap().is_none());
|
||||
|
||||
// after last block
|
||||
// last block must be loaded for ord related operations
|
||||
@@ -444,8 +444,8 @@ mod tests {
|
||||
|
||||
// last block isn't required to be loaded for key related operations
|
||||
slice.restrict(0..0);
|
||||
assert!(dic.get(&b"~~~").unwrap().is_none());
|
||||
assert!(dic.term_ord(&b"~~~").unwrap().is_none());
|
||||
assert!(dic.get(b"~~~").unwrap().is_none());
|
||||
assert!(dic.term_ord(b"~~~").unwrap().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -344,7 +344,7 @@ mod test {
|
||||
{
|
||||
let mut sstable_writer = VoidSSTable::writer(&mut buffer);
|
||||
assert!(sstable_writer.insert(&long_key[..], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&[0, 3, 4], &()).is_ok());
|
||||
assert!(sstable_writer.insert([0, 3, 4], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&long_key2[..], &()).is_ok());
|
||||
assert!(sstable_writer.finish().is_ok());
|
||||
}
|
||||
@@ -363,9 +363,9 @@ mod test {
|
||||
let mut buffer = vec![];
|
||||
{
|
||||
let mut sstable_writer = VoidSSTable::writer(&mut buffer);
|
||||
assert!(sstable_writer.insert(&[17u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&[17u8, 18u8, 19u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&[17u8, 20u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert([17u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert([17u8, 18u8, 19u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert([17u8, 20u8], &()).is_ok());
|
||||
assert!(sstable_writer.finish().is_ok());
|
||||
}
|
||||
assert_eq!(
|
||||
@@ -401,8 +401,8 @@ mod test {
|
||||
fn test_simple_sstable_non_increasing_key() {
|
||||
let mut buffer = vec![];
|
||||
let mut sstable_writer = VoidSSTable::writer(&mut buffer);
|
||||
assert!(sstable_writer.insert(&[17u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&[16u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert([17u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert([16u8], &()).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -472,8 +472,8 @@ mod test {
|
||||
fn bound_strategy() -> impl Strategy<Value = Bound<String>> {
|
||||
prop_oneof![
|
||||
Just(Bound::<String>::Unbounded),
|
||||
"[a-c]{0,5}".prop_map(|key| Bound::Included(key)),
|
||||
"[a-c]{0,5}".prop_map(|key| Bound::Excluded(key)),
|
||||
"[a-c]{0,5}".prop_map(Bound::Included),
|
||||
"[a-c]{0,5}".prop_map(Bound::Excluded),
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user