mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Introducing a column trait
This commit is contained in:
committed by
Pascal Seitz
parent
c73b425bc1
commit
5331be800b
@@ -7,10 +7,11 @@
|
|||||||
// Of course, you can have a look at the tantivy's built-in collectors
|
// Of course, you can have a look at the tantivy's built-in collectors
|
||||||
// such as the `CountCollector` for more examples.
|
// such as the `CountCollector` for more examples.
|
||||||
|
|
||||||
|
use fastfield_codecs::Column;
|
||||||
// ---
|
// ---
|
||||||
// Importing tantivy...
|
// Importing tantivy...
|
||||||
use tantivy::collector::{Collector, SegmentCollector};
|
use tantivy::collector::{Collector, SegmentCollector};
|
||||||
use tantivy::fastfield::{DynamicFastFieldReader, FastFieldReader};
|
use tantivy::fastfield::DynamicFastFieldReader;
|
||||||
use tantivy::query::QueryParser;
|
use tantivy::query::QueryParser;
|
||||||
use tantivy::schema::{Field, Schema, FAST, INDEXED, TEXT};
|
use tantivy::schema::{Field, Schema, FAST, INDEXED, TEXT};
|
||||||
use tantivy::{doc, Index, Score, SegmentReader};
|
use tantivy::{doc, Index, Score, SegmentReader};
|
||||||
@@ -103,7 +104,7 @@ impl SegmentCollector for StatsSegmentCollector {
|
|||||||
type Fruit = Option<Stats>;
|
type Fruit = Option<Stats>;
|
||||||
|
|
||||||
fn collect(&mut self, doc: u32, _score: Score) {
|
fn collect(&mut self, doc: u32, _score: Score) {
|
||||||
let value = self.fast_field_reader.get(doc) as f64;
|
let value = self.fast_field_reader.get_val(doc as u64) as f64;
|
||||||
self.stats.count += 1;
|
self.stats.count += 1;
|
||||||
self.stats.sum += value;
|
self.stats.sum += value;
|
||||||
self.stats.squared_sum += value * value;
|
self.stats.squared_sum += value * value;
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ use std::cmp::Reverse;
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::sync::{Arc, RwLock, Weak};
|
use std::sync::{Arc, RwLock, Weak};
|
||||||
|
|
||||||
|
use fastfield_codecs::Column;
|
||||||
use tantivy::collector::TopDocs;
|
use tantivy::collector::TopDocs;
|
||||||
use tantivy::fastfield::FastFieldReader;
|
|
||||||
use tantivy::query::QueryParser;
|
use tantivy::query::QueryParser;
|
||||||
use tantivy::schema::{Field, Schema, FAST, TEXT};
|
use tantivy::schema::{Field, Schema, FAST, TEXT};
|
||||||
use tantivy::{
|
use tantivy::{
|
||||||
@@ -52,7 +52,7 @@ impl Warmer for DynamicPriceColumn {
|
|||||||
let product_id_reader = segment.fast_fields().u64(self.field)?;
|
let product_id_reader = segment.fast_fields().u64(self.field)?;
|
||||||
let product_ids: Vec<ProductId> = segment
|
let product_ids: Vec<ProductId> = segment
|
||||||
.doc_ids_alive()
|
.doc_ids_alive()
|
||||||
.map(|doc| product_id_reader.get(doc))
|
.map(|doc| product_id_reader.get_val(doc as u64))
|
||||||
.collect();
|
.collect();
|
||||||
let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter();
|
let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter();
|
||||||
let mut price_vals: Vec<Price> = Vec::new();
|
let mut price_vals: Vec<Price> = Vec::new();
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use common::BinarySerializable;
|
|||||||
use ownedbytes::OwnedBytes;
|
use ownedbytes::OwnedBytes;
|
||||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||||
|
|
||||||
use crate::{FastFieldCodec, FastFieldCodecType, FastFieldDataAccess};
|
use crate::{Column, FastFieldCodec, FastFieldCodecType};
|
||||||
|
|
||||||
/// Depending on the field type, a different
|
/// Depending on the field type, a different
|
||||||
/// fast field is required.
|
/// fast field is required.
|
||||||
@@ -17,7 +17,7 @@ pub struct BitpackedReader {
|
|||||||
num_vals: u64,
|
num_vals: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FastFieldDataAccess for BitpackedReader {
|
impl Column for BitpackedReader {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn get_val(&self, doc: u64) -> u64 {
|
fn get_val(&self, doc: u64) -> u64 {
|
||||||
self.min_value_u64 + self.bit_unpacker.get(doc, &self.data)
|
self.min_value_u64 + self.bit_unpacker.get(doc, &self.data)
|
||||||
@@ -124,10 +124,7 @@ impl FastFieldCodec for BitpackedCodec {
|
|||||||
/// It requires a `min_value` and a `max_value` to compute
|
/// It requires a `min_value` and a `max_value` to compute
|
||||||
/// compute the minimum number of bits required to encode
|
/// compute the minimum number of bits required to encode
|
||||||
/// values.
|
/// values.
|
||||||
fn serialize(
|
fn serialize(write: &mut impl Write, fastfield_accessor: &dyn Column) -> io::Result<()> {
|
||||||
write: &mut impl Write,
|
|
||||||
fastfield_accessor: &dyn FastFieldDataAccess,
|
|
||||||
) -> io::Result<()> {
|
|
||||||
let mut serializer = BitpackedSerializerLegacy::open(
|
let mut serializer = BitpackedSerializerLegacy::open(
|
||||||
write,
|
write,
|
||||||
fastfield_accessor.min_value(),
|
fastfield_accessor.min_value(),
|
||||||
@@ -142,7 +139,7 @@ impl FastFieldCodec for BitpackedCodec {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> Option<f32> {
|
fn estimate(fastfield_accessor: &impl Column) -> Option<f32> {
|
||||||
let amplitude = fastfield_accessor.max_value() - fastfield_accessor.min_value();
|
let amplitude = fastfield_accessor.max_value() - fastfield_accessor.min_value();
|
||||||
let num_bits = compute_num_bits(amplitude);
|
let num_bits = compute_num_bits(amplitude);
|
||||||
let num_bits_uncompressed = 64;
|
let num_bits_uncompressed = 64;
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ use ownedbytes::OwnedBytes;
|
|||||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||||
|
|
||||||
use crate::linear::{get_calculated_value, get_slope};
|
use crate::linear::{get_calculated_value, get_slope};
|
||||||
use crate::{FastFieldCodec, FastFieldCodecType, FastFieldDataAccess};
|
use crate::{Column, FastFieldCodec, FastFieldCodecType};
|
||||||
|
|
||||||
const CHUNK_SIZE: u64 = 512;
|
const CHUNK_SIZE: u64 = 512;
|
||||||
|
|
||||||
@@ -146,7 +146,7 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
|
|||||||
&interpolations[get_interpolation_position(doc)]
|
&interpolations[get_interpolation_position(doc)]
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FastFieldDataAccess for BlockwiseLinearReader {
|
impl Column for BlockwiseLinearReader {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn get_val(&self, idx: u64) -> u64 {
|
fn get_val(&self, idx: u64) -> u64 {
|
||||||
let interpolation = get_interpolation_function(idx, &self.footer.interpolations);
|
let interpolation = get_interpolation_function(idx, &self.footer.interpolations);
|
||||||
@@ -195,10 +195,7 @@ impl FastFieldCodec for BlockwiseLinearCodec {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new fast field serializer.
|
/// Creates a new fast field serializer.
|
||||||
fn serialize(
|
fn serialize(write: &mut impl Write, fastfield_accessor: &dyn Column) -> io::Result<()> {
|
||||||
write: &mut impl Write,
|
|
||||||
fastfield_accessor: &dyn FastFieldDataAccess,
|
|
||||||
) -> io::Result<()> {
|
|
||||||
assert!(fastfield_accessor.min_value() <= fastfield_accessor.max_value());
|
assert!(fastfield_accessor.min_value() <= fastfield_accessor.max_value());
|
||||||
|
|
||||||
let first_val = fastfield_accessor.get_val(0);
|
let first_val = fastfield_accessor.get_val(0);
|
||||||
@@ -292,7 +289,7 @@ impl FastFieldCodec for BlockwiseLinearCodec {
|
|||||||
/// estimation for linear interpolation is hard because, you don't know
|
/// estimation for linear interpolation is hard because, you don't know
|
||||||
/// where the local maxima are for the deviation of the calculated value and
|
/// where the local maxima are for the deviation of the calculated value and
|
||||||
/// the offset is also unknown.
|
/// the offset is also unknown.
|
||||||
fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> Option<f32> {
|
fn estimate(fastfield_accessor: &impl Column) -> Option<f32> {
|
||||||
if fastfield_accessor.num_vals() < 10 * CHUNK_SIZE {
|
if fastfield_accessor.num_vals() < 10 * CHUNK_SIZE {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|||||||
49
fastfield_codecs/src/column.rs
Normal file
49
fastfield_codecs/src/column.rs
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
pub trait Column<T = u64> {
|
||||||
|
/// Return the value associated to the given idx.
|
||||||
|
///
|
||||||
|
/// This accessor should return as fast as possible.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// May panic if `idx` is greater than the column length.
|
||||||
|
fn get_val(&self, idx: u64) -> T;
|
||||||
|
|
||||||
|
/// Fills an output buffer with the fast field values
|
||||||
|
/// associated with the `DocId` going from
|
||||||
|
/// `start` to `start + output.len()`.
|
||||||
|
///
|
||||||
|
/// Regardless of the type of `Item`, this method works
|
||||||
|
/// - transmuting the output array
|
||||||
|
/// - extracting the `Item`s as if they were `u64`
|
||||||
|
/// - possibly converting the `u64` value to the right type.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// May panic if `start + output.len()` is greater than
|
||||||
|
/// the segment's `maxdoc`.
|
||||||
|
fn get_range(&self, start: u64, output: &mut [T]) {
|
||||||
|
for (out, idx) in output.iter_mut().zip(start..) {
|
||||||
|
*out = self.get_val(idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the minimum value for this fast field.
|
||||||
|
///
|
||||||
|
/// The min value does not take in account of possible
|
||||||
|
/// deleted document, and should be considered as a lower bound
|
||||||
|
/// of the actual minimum value.
|
||||||
|
fn min_value(&self) -> T;
|
||||||
|
|
||||||
|
/// Returns the maximum value for this fast field.
|
||||||
|
///
|
||||||
|
/// The max value does not take in account of possible
|
||||||
|
/// deleted document, and should be considered as an upper bound
|
||||||
|
/// of the actual maximum value
|
||||||
|
fn max_value(&self) -> T;
|
||||||
|
|
||||||
|
fn num_vals(&self) -> u64;
|
||||||
|
/// Returns a iterator over the data
|
||||||
|
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = T> + 'a> {
|
||||||
|
Box::new((0..self.num_vals()).map(|idx| self.get_val(idx)))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -12,16 +12,9 @@ pub mod bitpacked;
|
|||||||
pub mod blockwise_linear;
|
pub mod blockwise_linear;
|
||||||
pub mod linear;
|
pub mod linear;
|
||||||
|
|
||||||
pub trait FastFieldDataAccess {
|
mod column;
|
||||||
fn get_val(&self, doc: u64) -> u64;
|
|
||||||
fn min_value(&self) -> u64;
|
pub use self::column::Column;
|
||||||
fn max_value(&self) -> u64;
|
|
||||||
fn num_vals(&self) -> u64;
|
|
||||||
/// Returns a iterator over the data
|
|
||||||
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = u64> + 'a> {
|
|
||||||
Box::new((0..self.num_vals()).map(|idx| self.get_val(idx)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
|
||||||
#[repr(u8)]
|
#[repr(u8)]
|
||||||
@@ -68,7 +61,7 @@ pub trait FastFieldCodec {
|
|||||||
/// used for debugging and de/serialization.
|
/// used for debugging and de/serialization.
|
||||||
const CODEC_TYPE: FastFieldCodecType;
|
const CODEC_TYPE: FastFieldCodecType;
|
||||||
|
|
||||||
type Reader: FastFieldDataAccess;
|
type Reader: Column<u64>;
|
||||||
|
|
||||||
/// Reads the metadata and returns the CodecReader
|
/// Reads the metadata and returns the CodecReader
|
||||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self::Reader>;
|
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self::Reader>;
|
||||||
@@ -77,10 +70,7 @@ pub trait FastFieldCodec {
|
|||||||
///
|
///
|
||||||
/// The fastfield_accessor iterator should be preferred over using fastfield_accessor for
|
/// The fastfield_accessor iterator should be preferred over using fastfield_accessor for
|
||||||
/// performance reasons.
|
/// performance reasons.
|
||||||
fn serialize(
|
fn serialize(write: &mut impl Write, fastfield_accessor: &dyn Column<u64>) -> io::Result<()>;
|
||||||
write: &mut impl Write,
|
|
||||||
fastfield_accessor: &dyn FastFieldDataAccess,
|
|
||||||
) -> io::Result<()>;
|
|
||||||
|
|
||||||
/// Returns an estimate of the compression ratio.
|
/// Returns an estimate of the compression ratio.
|
||||||
/// If the codec is not applicable, returns `None`.
|
/// If the codec is not applicable, returns `None`.
|
||||||
@@ -89,7 +79,7 @@ pub trait FastFieldCodec {
|
|||||||
///
|
///
|
||||||
/// It could make sense to also return a value representing
|
/// It could make sense to also return a value representing
|
||||||
/// computational complexity.
|
/// computational complexity.
|
||||||
fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> Option<f32>;
|
fn estimate(fastfield_accessor: &impl Column) -> Option<f32>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -100,7 +90,7 @@ pub struct FastFieldStats {
|
|||||||
pub num_vals: u64,
|
pub num_vals: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> FastFieldDataAccess for &'a [u64] {
|
impl<'a> Column for &'a [u64] {
|
||||||
fn get_val(&self, position: u64) -> u64 {
|
fn get_val(&self, position: u64) -> u64 {
|
||||||
self[position as usize]
|
self[position as usize]
|
||||||
}
|
}
|
||||||
@@ -122,7 +112,7 @@ impl<'a> FastFieldDataAccess for &'a [u64] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FastFieldDataAccess for Vec<u64> {
|
impl Column for Vec<u64> {
|
||||||
fn get_val(&self, position: u64) -> u64 {
|
fn get_val(&self, position: u64) -> u64 {
|
||||||
self[position as usize]
|
self[position as usize]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use common::{BinarySerializable, FixedSize};
|
|||||||
use ownedbytes::OwnedBytes;
|
use ownedbytes::OwnedBytes;
|
||||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||||
|
|
||||||
use crate::{FastFieldCodec, FastFieldCodecType, FastFieldDataAccess};
|
use crate::{Column, FastFieldCodec, FastFieldCodecType};
|
||||||
|
|
||||||
/// Depending on the field type, a different
|
/// Depending on the field type, a different
|
||||||
/// fast field is required.
|
/// fast field is required.
|
||||||
@@ -57,7 +57,7 @@ impl FixedSize for LinearFooter {
|
|||||||
const SIZE_IN_BYTES: usize = 56;
|
const SIZE_IN_BYTES: usize = 56;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FastFieldDataAccess for LinearReader {
|
impl Column for LinearReader {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn get_val(&self, doc: u64) -> u64 {
|
fn get_val(&self, doc: u64) -> u64 {
|
||||||
let calculated_value = get_calculated_value(self.footer.first_val, doc, self.slope);
|
let calculated_value = get_calculated_value(self.footer.first_val, doc, self.slope);
|
||||||
@@ -143,10 +143,7 @@ impl FastFieldCodec for LinearCodec {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new fast field serializer.
|
/// Creates a new fast field serializer.
|
||||||
fn serialize(
|
fn serialize(write: &mut impl Write, fastfield_accessor: &dyn Column) -> io::Result<()> {
|
||||||
write: &mut impl Write,
|
|
||||||
fastfield_accessor: &dyn FastFieldDataAccess,
|
|
||||||
) -> io::Result<()> {
|
|
||||||
assert!(fastfield_accessor.min_value() <= fastfield_accessor.max_value());
|
assert!(fastfield_accessor.min_value() <= fastfield_accessor.max_value());
|
||||||
|
|
||||||
let first_val = fastfield_accessor.get_val(0);
|
let first_val = fastfield_accessor.get_val(0);
|
||||||
@@ -196,7 +193,7 @@ impl FastFieldCodec for LinearCodec {
|
|||||||
/// estimation for linear interpolation is hard because, you don't know
|
/// estimation for linear interpolation is hard because, you don't know
|
||||||
/// where the local maxima for the deviation of the calculated value are and
|
/// where the local maxima for the deviation of the calculated value are and
|
||||||
/// the offset to shift all values to >=0 is also unknown.
|
/// the offset to shift all values to >=0 is also unknown.
|
||||||
fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> Option<f32> {
|
fn estimate(fastfield_accessor: &impl Column) -> Option<f32> {
|
||||||
if fastfield_accessor.num_vals() < 3 {
|
if fastfield_accessor.num_vals() < 3 {
|
||||||
return None; // disable compressor for this case
|
return None; // disable compressor for this case
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
use fastfield_codecs::Column;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
@@ -14,7 +15,7 @@ use crate::aggregation::intermediate_agg_result::{
|
|||||||
IntermediateAggregationResults, IntermediateBucketResult, IntermediateHistogramBucketEntry,
|
IntermediateAggregationResults, IntermediateBucketResult, IntermediateHistogramBucketEntry,
|
||||||
};
|
};
|
||||||
use crate::aggregation::segment_agg_result::SegmentAggregationResultsCollector;
|
use crate::aggregation::segment_agg_result::SegmentAggregationResultsCollector;
|
||||||
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader};
|
use crate::fastfield::DynamicFastFieldReader;
|
||||||
use crate::schema::Type;
|
use crate::schema::Type;
|
||||||
use crate::{DocId, TantivyError};
|
use crate::{DocId, TantivyError};
|
||||||
|
|
||||||
@@ -331,10 +332,10 @@ impl SegmentHistogramCollector {
|
|||||||
.expect("unexpected fast field cardinatility");
|
.expect("unexpected fast field cardinatility");
|
||||||
let mut iter = doc.chunks_exact(4);
|
let mut iter = doc.chunks_exact(4);
|
||||||
for docs in iter.by_ref() {
|
for docs in iter.by_ref() {
|
||||||
let val0 = self.f64_from_fastfield_u64(accessor.get(docs[0]));
|
let val0 = self.f64_from_fastfield_u64(accessor.get_val(docs[0] as u64));
|
||||||
let val1 = self.f64_from_fastfield_u64(accessor.get(docs[1]));
|
let val1 = self.f64_from_fastfield_u64(accessor.get_val(docs[1] as u64));
|
||||||
let val2 = self.f64_from_fastfield_u64(accessor.get(docs[2]));
|
let val2 = self.f64_from_fastfield_u64(accessor.get_val(docs[2] as u64));
|
||||||
let val3 = self.f64_from_fastfield_u64(accessor.get(docs[3]));
|
let val3 = self.f64_from_fastfield_u64(accessor.get_val(docs[3] as u64));
|
||||||
|
|
||||||
let bucket_pos0 = get_bucket_num(val0);
|
let bucket_pos0 = get_bucket_num(val0);
|
||||||
let bucket_pos1 = get_bucket_num(val1);
|
let bucket_pos1 = get_bucket_num(val1);
|
||||||
@@ -370,8 +371,8 @@ impl SegmentHistogramCollector {
|
|||||||
&bucket_with_accessor.sub_aggregation,
|
&bucket_with_accessor.sub_aggregation,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
for doc in iter.remainder() {
|
for &doc in iter.remainder() {
|
||||||
let val = f64_from_fastfield_u64(accessor.get(*doc), &self.field_type);
|
let val = f64_from_fastfield_u64(accessor.get_val(doc as u64), &self.field_type);
|
||||||
if !bounds.contains(val) {
|
if !bounds.contains(val) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -382,7 +383,7 @@ impl SegmentHistogramCollector {
|
|||||||
self.buckets[bucket_pos].key,
|
self.buckets[bucket_pos].key,
|
||||||
get_bucket_val(val, self.interval, self.offset) as f64
|
get_bucket_val(val, self.interval, self.offset) as f64
|
||||||
);
|
);
|
||||||
self.increment_bucket(bucket_pos, *doc, &bucket_with_accessor.sub_aggregation)?;
|
self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?;
|
||||||
}
|
}
|
||||||
if force_flush {
|
if force_flush {
|
||||||
if let Some(sub_aggregations) = self.sub_aggregations.as_mut() {
|
if let Some(sub_aggregations) = self.sub_aggregations.as_mut() {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use fastfield_codecs::Column;
|
||||||
use fnv::FnvHashMap;
|
use fnv::FnvHashMap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
@@ -12,7 +13,6 @@ use crate::aggregation::intermediate_agg_result::{
|
|||||||
};
|
};
|
||||||
use crate::aggregation::segment_agg_result::{BucketCount, SegmentAggregationResultsCollector};
|
use crate::aggregation::segment_agg_result::{BucketCount, SegmentAggregationResultsCollector};
|
||||||
use crate::aggregation::{f64_from_fastfield_u64, f64_to_fastfield_u64, Key, SerializedKey};
|
use crate::aggregation::{f64_from_fastfield_u64, f64_to_fastfield_u64, Key, SerializedKey};
|
||||||
use crate::fastfield::FastFieldReader;
|
|
||||||
use crate::schema::Type;
|
use crate::schema::Type;
|
||||||
use crate::{DocId, TantivyError};
|
use crate::{DocId, TantivyError};
|
||||||
|
|
||||||
@@ -264,10 +264,10 @@ impl SegmentRangeCollector {
|
|||||||
.as_single()
|
.as_single()
|
||||||
.expect("unexpected fast field cardinatility");
|
.expect("unexpected fast field cardinatility");
|
||||||
for docs in iter.by_ref() {
|
for docs in iter.by_ref() {
|
||||||
let val1 = accessor.get(docs[0]);
|
let val1 = accessor.get_val(docs[0] as u64);
|
||||||
let val2 = accessor.get(docs[1]);
|
let val2 = accessor.get_val(docs[1] as u64);
|
||||||
let val3 = accessor.get(docs[2]);
|
let val3 = accessor.get_val(docs[2] as u64);
|
||||||
let val4 = accessor.get(docs[3]);
|
let val4 = accessor.get_val(docs[3] as u64);
|
||||||
let bucket_pos1 = self.get_bucket_pos(val1);
|
let bucket_pos1 = self.get_bucket_pos(val1);
|
||||||
let bucket_pos2 = self.get_bucket_pos(val2);
|
let bucket_pos2 = self.get_bucket_pos(val2);
|
||||||
let bucket_pos3 = self.get_bucket_pos(val3);
|
let bucket_pos3 = self.get_bucket_pos(val3);
|
||||||
@@ -278,10 +278,10 @@ impl SegmentRangeCollector {
|
|||||||
self.increment_bucket(bucket_pos3, docs[2], &bucket_with_accessor.sub_aggregation)?;
|
self.increment_bucket(bucket_pos3, docs[2], &bucket_with_accessor.sub_aggregation)?;
|
||||||
self.increment_bucket(bucket_pos4, docs[3], &bucket_with_accessor.sub_aggregation)?;
|
self.increment_bucket(bucket_pos4, docs[3], &bucket_with_accessor.sub_aggregation)?;
|
||||||
}
|
}
|
||||||
for doc in iter.remainder() {
|
for &doc in iter.remainder() {
|
||||||
let val = accessor.get(*doc);
|
let val = accessor.get_val(doc as u64);
|
||||||
let bucket_pos = self.get_bucket_pos(val);
|
let bucket_pos = self.get_bucket_pos(val);
|
||||||
self.increment_bucket(bucket_pos, *doc, &bucket_with_accessor.sub_aggregation)?;
|
self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?;
|
||||||
}
|
}
|
||||||
if force_flush {
|
if force_flush {
|
||||||
for bucket in &mut self.buckets {
|
for bucket in &mut self.buckets {
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
|
use fastfield_codecs::Column;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::aggregation::f64_from_fastfield_u64;
|
use crate::aggregation::f64_from_fastfield_u64;
|
||||||
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader};
|
use crate::fastfield::DynamicFastFieldReader;
|
||||||
use crate::schema::Type;
|
use crate::schema::Type;
|
||||||
use crate::DocId;
|
use crate::DocId;
|
||||||
|
|
||||||
@@ -60,10 +61,10 @@ impl SegmentAverageCollector {
|
|||||||
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &DynamicFastFieldReader<u64>) {
|
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &DynamicFastFieldReader<u64>) {
|
||||||
let mut iter = doc.chunks_exact(4);
|
let mut iter = doc.chunks_exact(4);
|
||||||
for docs in iter.by_ref() {
|
for docs in iter.by_ref() {
|
||||||
let val1 = field.get(docs[0]);
|
let val1 = field.get_val(docs[0] as u64);
|
||||||
let val2 = field.get(docs[1]);
|
let val2 = field.get_val(docs[1] as u64);
|
||||||
let val3 = field.get(docs[2]);
|
let val3 = field.get_val(docs[2] as u64);
|
||||||
let val4 = field.get(docs[3]);
|
let val4 = field.get_val(docs[3] as u64);
|
||||||
let val1 = f64_from_fastfield_u64(val1, &self.field_type);
|
let val1 = f64_from_fastfield_u64(val1, &self.field_type);
|
||||||
let val2 = f64_from_fastfield_u64(val2, &self.field_type);
|
let val2 = f64_from_fastfield_u64(val2, &self.field_type);
|
||||||
let val3 = f64_from_fastfield_u64(val3, &self.field_type);
|
let val3 = f64_from_fastfield_u64(val3, &self.field_type);
|
||||||
@@ -73,8 +74,8 @@ impl SegmentAverageCollector {
|
|||||||
self.data.collect(val3);
|
self.data.collect(val3);
|
||||||
self.data.collect(val4);
|
self.data.collect(val4);
|
||||||
}
|
}
|
||||||
for doc in iter.remainder() {
|
for &doc in iter.remainder() {
|
||||||
let val = field.get(*doc);
|
let val = field.get_val(doc as u64);
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
let val = f64_from_fastfield_u64(val, &self.field_type);
|
||||||
self.data.collect(val);
|
self.data.collect(val);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
|
use fastfield_codecs::Column;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::aggregation::f64_from_fastfield_u64;
|
use crate::aggregation::f64_from_fastfield_u64;
|
||||||
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader};
|
use crate::fastfield::DynamicFastFieldReader;
|
||||||
use crate::schema::Type;
|
use crate::schema::Type;
|
||||||
use crate::{DocId, TantivyError};
|
use crate::{DocId, TantivyError};
|
||||||
|
|
||||||
@@ -166,10 +167,10 @@ impl SegmentStatsCollector {
|
|||||||
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &DynamicFastFieldReader<u64>) {
|
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &DynamicFastFieldReader<u64>) {
|
||||||
let mut iter = doc.chunks_exact(4);
|
let mut iter = doc.chunks_exact(4);
|
||||||
for docs in iter.by_ref() {
|
for docs in iter.by_ref() {
|
||||||
let val1 = field.get(docs[0]);
|
let val1 = field.get_val(docs[0] as u64);
|
||||||
let val2 = field.get(docs[1]);
|
let val2 = field.get_val(docs[1] as u64);
|
||||||
let val3 = field.get(docs[2]);
|
let val3 = field.get_val(docs[2] as u64);
|
||||||
let val4 = field.get(docs[3]);
|
let val4 = field.get_val(docs[3] as u64);
|
||||||
let val1 = f64_from_fastfield_u64(val1, &self.field_type);
|
let val1 = f64_from_fastfield_u64(val1, &self.field_type);
|
||||||
let val2 = f64_from_fastfield_u64(val2, &self.field_type);
|
let val2 = f64_from_fastfield_u64(val2, &self.field_type);
|
||||||
let val3 = f64_from_fastfield_u64(val3, &self.field_type);
|
let val3 = f64_from_fastfield_u64(val3, &self.field_type);
|
||||||
@@ -179,8 +180,8 @@ impl SegmentStatsCollector {
|
|||||||
self.stats.collect(val3);
|
self.stats.collect(val3);
|
||||||
self.stats.collect(val4);
|
self.stats.collect(val4);
|
||||||
}
|
}
|
||||||
for doc in iter.remainder() {
|
for &doc in iter.remainder() {
|
||||||
let val = field.get(*doc);
|
let val = field.get_val(doc as u64);
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
let val = f64_from_fastfield_u64(val, &self.field_type);
|
||||||
self.stats.collect(val);
|
self.stats.collect(val);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,8 +11,10 @@
|
|||||||
// Importing tantivy...
|
// Importing tantivy...
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
use crate::collector::{Collector, SegmentCollector};
|
use crate::collector::{Collector, SegmentCollector};
|
||||||
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
|
use crate::fastfield::{DynamicFastFieldReader, FastValue};
|
||||||
use crate::schema::Field;
|
use crate::schema::Field;
|
||||||
use crate::{Score, SegmentReader, TantivyError};
|
use crate::{Score, SegmentReader, TantivyError};
|
||||||
|
|
||||||
@@ -174,7 +176,7 @@ where
|
|||||||
type Fruit = TSegmentCollector::Fruit;
|
type Fruit = TSegmentCollector::Fruit;
|
||||||
|
|
||||||
fn collect(&mut self, doc: u32, score: Score) {
|
fn collect(&mut self, doc: u32, score: Score) {
|
||||||
let value = self.fast_field_reader.get(doc);
|
let value = self.fast_field_reader.get_val(doc as u64);
|
||||||
if (self.predicate)(value) {
|
if (self.predicate)(value) {
|
||||||
self.segment_collector.collect(doc, score)
|
self.segment_collector.collect(doc, score)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
use fastdivide::DividerU64;
|
use fastdivide::DividerU64;
|
||||||
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
use crate::collector::{Collector, SegmentCollector};
|
use crate::collector::{Collector, SegmentCollector};
|
||||||
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
|
use crate::fastfield::{DynamicFastFieldReader, FastValue};
|
||||||
use crate::schema::{Field, Type};
|
use crate::schema::{Field, Type};
|
||||||
use crate::{DocId, Score};
|
use crate::{DocId, Score};
|
||||||
|
|
||||||
@@ -91,7 +92,7 @@ impl SegmentCollector for SegmentHistogramCollector {
|
|||||||
type Fruit = Vec<u64>;
|
type Fruit = Vec<u64>;
|
||||||
|
|
||||||
fn collect(&mut self, doc: DocId, _score: Score) {
|
fn collect(&mut self, doc: DocId, _score: Score) {
|
||||||
let value = self.ff_reader.get(doc);
|
let value = self.ff_reader.get_val(doc as u64);
|
||||||
self.histogram_computer.add_value(value);
|
self.histogram_computer.add_value(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::collector::{Count, FilterCollector, TopDocs};
|
use crate::collector::{Count, FilterCollector, TopDocs};
|
||||||
use crate::core::SegmentReader;
|
use crate::core::SegmentReader;
|
||||||
use crate::fastfield::{BytesFastFieldReader, DynamicFastFieldReader, FastFieldReader};
|
use crate::fastfield::{BytesFastFieldReader, DynamicFastFieldReader};
|
||||||
use crate::query::{AllQuery, QueryParser};
|
use crate::query::{AllQuery, QueryParser};
|
||||||
use crate::schema::{Field, Schema, FAST, TEXT};
|
use crate::schema::{Field, Schema, FAST, TEXT};
|
||||||
use crate::time::format_description::well_known::Rfc3339;
|
use crate::time::format_description::well_known::Rfc3339;
|
||||||
@@ -197,7 +199,7 @@ impl SegmentCollector for FastFieldSegmentCollector {
|
|||||||
type Fruit = Vec<u64>;
|
type Fruit = Vec<u64>;
|
||||||
|
|
||||||
fn collect(&mut self, doc: DocId, _score: Score) {
|
fn collect(&mut self, doc: DocId, _score: Score) {
|
||||||
let val = self.reader.get(doc);
|
let val = self.reader.get_val(doc as u64);
|
||||||
self.vals.push(val);
|
self.vals.push(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ use std::collections::BinaryHeap;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
use super::Collector;
|
use super::Collector;
|
||||||
use crate::collector::custom_score_top_collector::CustomScoreTopCollector;
|
use crate::collector::custom_score_top_collector::CustomScoreTopCollector;
|
||||||
use crate::collector::top_collector::{ComparableDoc, TopCollector, TopSegmentCollector};
|
use crate::collector::top_collector::{ComparableDoc, TopCollector, TopSegmentCollector};
|
||||||
@@ -9,7 +11,7 @@ use crate::collector::tweak_score_top_collector::TweakedScoreTopCollector;
|
|||||||
use crate::collector::{
|
use crate::collector::{
|
||||||
CustomScorer, CustomSegmentScorer, ScoreSegmentTweaker, ScoreTweaker, SegmentCollector,
|
CustomScorer, CustomSegmentScorer, ScoreSegmentTweaker, ScoreTweaker, SegmentCollector,
|
||||||
};
|
};
|
||||||
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
|
use crate::fastfield::{DynamicFastFieldReader, FastValue};
|
||||||
use crate::query::Weight;
|
use crate::query::Weight;
|
||||||
use crate::schema::Field;
|
use crate::schema::Field;
|
||||||
use crate::{DocAddress, DocId, Score, SegmentOrdinal, SegmentReader, TantivyError};
|
use crate::{DocAddress, DocId, Score, SegmentOrdinal, SegmentReader, TantivyError};
|
||||||
@@ -134,7 +136,7 @@ struct ScorerByFastFieldReader {
|
|||||||
|
|
||||||
impl CustomSegmentScorer<u64> for ScorerByFastFieldReader {
|
impl CustomSegmentScorer<u64> for ScorerByFastFieldReader {
|
||||||
fn score(&mut self, doc: DocId) -> u64 {
|
fn score(&mut self, doc: DocId) -> u64 {
|
||||||
self.ff_reader.get(doc)
|
self.ff_reader.get_val(doc as u64)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
use crate::directory::{FileSlice, OwnedBytes};
|
use crate::directory::{FileSlice, OwnedBytes};
|
||||||
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, MultiValueLength};
|
use crate::fastfield::{DynamicFastFieldReader, MultiValueLength};
|
||||||
use crate::DocId;
|
use crate::DocId;
|
||||||
|
|
||||||
/// Reader for byte array fast fields
|
/// Reader for byte array fast fields
|
||||||
@@ -28,8 +30,9 @@ impl BytesFastFieldReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn range(&self, doc: DocId) -> (usize, usize) {
|
fn range(&self, doc: DocId) -> (usize, usize) {
|
||||||
let start = self.idx_reader.get(doc) as usize;
|
let idx = doc as u64;
|
||||||
let stop = self.idx_reader.get(doc + 1) as usize;
|
let start = self.idx_reader.get_val(idx) as usize;
|
||||||
|
let stop = self.idx_reader.get_val(idx + 1) as usize;
|
||||||
(start, stop)
|
(start, stop)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ use std::num::NonZeroU64;
|
|||||||
|
|
||||||
use common::BinarySerializable;
|
use common::BinarySerializable;
|
||||||
use fastdivide::DividerU64;
|
use fastdivide::DividerU64;
|
||||||
use fastfield_codecs::{FastFieldCodec, FastFieldDataAccess};
|
use fastfield_codecs::{Column, FastFieldCodec};
|
||||||
use ownedbytes::OwnedBytes;
|
use ownedbytes::OwnedBytes;
|
||||||
|
|
||||||
pub const GCD_DEFAULT: u64 = 1;
|
pub const GCD_DEFAULT: u64 = 1;
|
||||||
@@ -12,7 +12,7 @@ pub const GCD_DEFAULT: u64 = 1;
|
|||||||
///
|
///
|
||||||
/// Holds the data and the codec to the read the data.
|
/// Holds the data and the codec to the read the data.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct GCDReader<CodecReader: FastFieldDataAccess> {
|
pub struct GCDReader<CodecReader: Column> {
|
||||||
gcd_params: GCDParams,
|
gcd_params: GCDParams,
|
||||||
reader: CodecReader,
|
reader: CodecReader,
|
||||||
}
|
}
|
||||||
@@ -60,7 +60,7 @@ pub fn open_gcd_from_bytes<WrappedCodec: FastFieldCodec>(
|
|||||||
Ok(GCDReader { gcd_params, reader })
|
Ok(GCDReader { gcd_params, reader })
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<C: FastFieldDataAccess + Clone> FastFieldDataAccess for GCDReader<C> {
|
impl<C: Column + Clone> Column for GCDReader<C> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn get_val(&self, doc: u64) -> u64 {
|
fn get_val(&self, doc: u64) -> u64 {
|
||||||
let val = self.reader.get_val(doc);
|
let val = self.reader.get_val(doc);
|
||||||
@@ -137,6 +137,7 @@ mod tests {
|
|||||||
use std::time::{Duration, SystemTime};
|
use std::time::{Duration, SystemTime};
|
||||||
|
|
||||||
use common::HasLen;
|
use common::HasLen;
|
||||||
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
use crate::directory::{CompositeFile, RamDirectory, WritePtr};
|
use crate::directory::{CompositeFile, RamDirectory, WritePtr};
|
||||||
use crate::fastfield::gcd::compute_gcd;
|
use crate::fastfield::gcd::compute_gcd;
|
||||||
@@ -144,7 +145,7 @@ mod tests {
|
|||||||
use crate::fastfield::tests::{FIELD, FIELDI64, SCHEMA, SCHEMAI64};
|
use crate::fastfield::tests::{FIELD, FIELDI64, SCHEMA, SCHEMAI64};
|
||||||
use crate::fastfield::{
|
use crate::fastfield::{
|
||||||
find_gcd, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldCodecType,
|
find_gcd, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldCodecType,
|
||||||
FastFieldReader, FastFieldsWriter, ALL_CODECS,
|
FastFieldsWriter, ALL_CODECS,
|
||||||
};
|
};
|
||||||
use crate::schema::{Cardinality, Schema};
|
use crate::schema::{Cardinality, Schema};
|
||||||
use crate::{DateOptions, DatePrecision, DateTime, Directory};
|
use crate::{DateOptions, DatePrecision, DateTime, Directory};
|
||||||
@@ -188,9 +189,9 @@ mod tests {
|
|||||||
let file = composite_file.open_read(*FIELD).unwrap();
|
let file = composite_file.open_read(*FIELD).unwrap();
|
||||||
let fast_field_reader = DynamicFastFieldReader::<i64>::open(file)?;
|
let fast_field_reader = DynamicFastFieldReader::<i64>::open(file)?;
|
||||||
|
|
||||||
assert_eq!(fast_field_reader.get(0), -4000i64);
|
assert_eq!(fast_field_reader.get_val(0), -4000i64);
|
||||||
assert_eq!(fast_field_reader.get(1), -3000i64);
|
assert_eq!(fast_field_reader.get_val(1), -3000i64);
|
||||||
assert_eq!(fast_field_reader.get(2), -2000i64);
|
assert_eq!(fast_field_reader.get_val(2), -2000i64);
|
||||||
assert_eq!(fast_field_reader.max_value(), (num_vals as i64 - 5) * 1000);
|
assert_eq!(fast_field_reader.max_value(), (num_vals as i64 - 5) * 1000);
|
||||||
assert_eq!(fast_field_reader.min_value(), -4000i64);
|
assert_eq!(fast_field_reader.min_value(), -4000i64);
|
||||||
let file = directory.open_read(path).unwrap();
|
let file = directory.open_read(path).unwrap();
|
||||||
@@ -229,9 +230,9 @@ mod tests {
|
|||||||
let composite_file = CompositeFile::open(&file)?;
|
let composite_file = CompositeFile::open(&file)?;
|
||||||
let file = composite_file.open_read(*FIELD).unwrap();
|
let file = composite_file.open_read(*FIELD).unwrap();
|
||||||
let fast_field_reader = DynamicFastFieldReader::<u64>::open(file)?;
|
let fast_field_reader = DynamicFastFieldReader::<u64>::open(file)?;
|
||||||
assert_eq!(fast_field_reader.get(0), 1000u64);
|
assert_eq!(fast_field_reader.get_val(0), 1000u64);
|
||||||
assert_eq!(fast_field_reader.get(1), 2000u64);
|
assert_eq!(fast_field_reader.get_val(1), 2000u64);
|
||||||
assert_eq!(fast_field_reader.get(2), 3000u64);
|
assert_eq!(fast_field_reader.get_val(2), 3000u64);
|
||||||
assert_eq!(fast_field_reader.max_value(), num_vals as u64 * 1000);
|
assert_eq!(fast_field_reader.max_value(), num_vals as u64 * 1000);
|
||||||
assert_eq!(fast_field_reader.min_value(), 1000u64);
|
assert_eq!(fast_field_reader.min_value(), 1000u64);
|
||||||
let file = directory.open_read(path).unwrap();
|
let file = directory.open_read(path).unwrap();
|
||||||
@@ -258,9 +259,9 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
pub fn test_fastfield2() {
|
pub fn test_fastfield2() {
|
||||||
let test_fastfield = DynamicFastFieldReader::<u64>::from(vec![100, 200, 300]);
|
let test_fastfield = DynamicFastFieldReader::<u64>::from(vec![100, 200, 300]);
|
||||||
assert_eq!(test_fastfield.get(0), 100);
|
assert_eq!(test_fastfield.get_val(0), 100);
|
||||||
assert_eq!(test_fastfield.get(1), 200);
|
assert_eq!(test_fastfield.get_val(1), 200);
|
||||||
assert_eq!(test_fastfield.get(2), 300);
|
assert_eq!(test_fastfield.get_val(2), 300);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -325,9 +326,9 @@ mod tests {
|
|||||||
let len = file.len();
|
let len = file.len();
|
||||||
let test_fastfield = DynamicFastFieldReader::<DateTime>::open(file)?;
|
let test_fastfield = DynamicFastFieldReader::<DateTime>::open(file)?;
|
||||||
|
|
||||||
assert_eq!(test_fastfield.get(0), time1.truncate(precision));
|
assert_eq!(test_fastfield.get_val(0), time1.truncate(precision));
|
||||||
assert_eq!(test_fastfield.get(1), time2.truncate(precision));
|
assert_eq!(test_fastfield.get_val(1), time2.truncate(precision));
|
||||||
assert_eq!(test_fastfield.get(2), time3.truncate(precision));
|
assert_eq!(test_fastfield.get_val(2), time3.truncate(precision));
|
||||||
Ok(len)
|
Ok(len)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -28,10 +28,10 @@ pub use self::error::{FastFieldNotAvailableError, Result};
|
|||||||
pub use self::facet_reader::FacetReader;
|
pub use self::facet_reader::FacetReader;
|
||||||
pub(crate) use self::gcd::{find_gcd, GCDReader, GCD_DEFAULT};
|
pub(crate) use self::gcd::{find_gcd, GCDReader, GCD_DEFAULT};
|
||||||
pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
|
pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
|
||||||
pub use self::reader::{DynamicFastFieldReader, FastFieldReader};
|
pub use self::reader::DynamicFastFieldReader;
|
||||||
pub use self::readers::FastFieldReaders;
|
pub use self::readers::FastFieldReaders;
|
||||||
pub(crate) use self::readers::{type_and_cardinality, FastType};
|
pub(crate) use self::readers::{type_and_cardinality, FastType};
|
||||||
pub use self::serializer::{CompositeFastFieldSerializer, FastFieldDataAccess, FastFieldStats};
|
pub use self::serializer::{Column, CompositeFastFieldSerializer, FastFieldStats};
|
||||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||||
use crate::schema::{Cardinality, FieldType, Type, Value};
|
use crate::schema::{Cardinality, FieldType, Type, Value};
|
||||||
use crate::{DateTime, DocId};
|
use crate::{DateTime, DocId};
|
||||||
@@ -298,9 +298,9 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
pub fn test_fastfield() {
|
pub fn test_fastfield() {
|
||||||
let test_fastfield = DynamicFastFieldReader::<u64>::from(vec![100, 200, 300]);
|
let test_fastfield = DynamicFastFieldReader::<u64>::from(vec![100, 200, 300]);
|
||||||
assert_eq!(test_fastfield.get(0), 100);
|
assert_eq!(test_fastfield.get_val(0u64), 100);
|
||||||
assert_eq!(test_fastfield.get(1), 200);
|
assert_eq!(test_fastfield.get_val(1u64), 200);
|
||||||
assert_eq!(test_fastfield.get(2), 300);
|
assert_eq!(test_fastfield.get_val(2u64), 300);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -330,9 +330,9 @@ mod tests {
|
|||||||
let composite_file = CompositeFile::open(&file)?;
|
let composite_file = CompositeFile::open(&file)?;
|
||||||
let file = composite_file.open_read(*FIELD).unwrap();
|
let file = composite_file.open_read(*FIELD).unwrap();
|
||||||
let fast_field_reader = DynamicFastFieldReader::<u64>::open(file)?;
|
let fast_field_reader = DynamicFastFieldReader::<u64>::open(file)?;
|
||||||
assert_eq!(fast_field_reader.get(0), 13u64);
|
assert_eq!(fast_field_reader.get_val(0), 13u64);
|
||||||
assert_eq!(fast_field_reader.get(1), 14u64);
|
assert_eq!(fast_field_reader.get_val(1), 14u64);
|
||||||
assert_eq!(fast_field_reader.get(2), 2u64);
|
assert_eq!(fast_field_reader.get_val(2), 2u64);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -362,15 +362,15 @@ mod tests {
|
|||||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||||
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data)?;
|
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data)?;
|
||||||
assert_eq!(fast_field_reader.get(0), 4u64);
|
assert_eq!(fast_field_reader.get_val(0), 4u64);
|
||||||
assert_eq!(fast_field_reader.get(1), 14_082_001u64);
|
assert_eq!(fast_field_reader.get_val(1), 14_082_001u64);
|
||||||
assert_eq!(fast_field_reader.get(2), 3_052u64);
|
assert_eq!(fast_field_reader.get_val(2), 3_052u64);
|
||||||
assert_eq!(fast_field_reader.get(3), 9002u64);
|
assert_eq!(fast_field_reader.get_val(3), 9002u64);
|
||||||
assert_eq!(fast_field_reader.get(4), 15_001u64);
|
assert_eq!(fast_field_reader.get_val(4), 15_001u64);
|
||||||
assert_eq!(fast_field_reader.get(5), 777u64);
|
assert_eq!(fast_field_reader.get_val(5), 777u64);
|
||||||
assert_eq!(fast_field_reader.get(6), 1_002u64);
|
assert_eq!(fast_field_reader.get_val(6), 1_002u64);
|
||||||
assert_eq!(fast_field_reader.get(7), 1_501u64);
|
assert_eq!(fast_field_reader.get_val(7), 1_501u64);
|
||||||
assert_eq!(fast_field_reader.get(8), 215u64);
|
assert_eq!(fast_field_reader.get_val(8), 215u64);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -399,7 +399,7 @@ mod tests {
|
|||||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||||
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data)?;
|
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data)?;
|
||||||
for doc in 0..10_000 {
|
for doc in 0..10_000 {
|
||||||
assert_eq!(fast_field_reader.get(doc), 100_000u64);
|
assert_eq!(fast_field_reader.get_val(doc), 100_000u64);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -430,10 +430,10 @@ mod tests {
|
|||||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||||
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data)?;
|
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data)?;
|
||||||
assert_eq!(fast_field_reader.get(0), 0u64);
|
assert_eq!(fast_field_reader.get_val(0), 0u64);
|
||||||
for doc in 1..10_001 {
|
for doc in 1..10_001 {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
fast_field_reader.get(doc),
|
fast_field_reader.get_val(doc),
|
||||||
5_000_000_000_000_000_000u64 + doc as u64 - 1u64
|
5_000_000_000_000_000_000u64 + doc as u64 - 1u64
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -475,7 +475,7 @@ mod tests {
|
|||||||
assert_eq!(fast_field_reader.min_value(), -100i64);
|
assert_eq!(fast_field_reader.min_value(), -100i64);
|
||||||
assert_eq!(fast_field_reader.max_value(), 9_999i64);
|
assert_eq!(fast_field_reader.max_value(), 9_999i64);
|
||||||
for (doc, i) in (-100i64..10_000i64).enumerate() {
|
for (doc, i) in (-100i64..10_000i64).enumerate() {
|
||||||
assert_eq!(fast_field_reader.get(doc as u32), i);
|
assert_eq!(fast_field_reader.get_val(doc as u64), i);
|
||||||
}
|
}
|
||||||
let mut buffer = vec![0i64; 100];
|
let mut buffer = vec![0i64; 100];
|
||||||
fast_field_reader.get_range(53, &mut buffer[..]);
|
fast_field_reader.get_range(53, &mut buffer[..]);
|
||||||
@@ -511,7 +511,7 @@ mod tests {
|
|||||||
let fast_fields_composite = CompositeFile::open(&file).unwrap();
|
let fast_fields_composite = CompositeFile::open(&file).unwrap();
|
||||||
let data = fast_fields_composite.open_read(i64_field).unwrap();
|
let data = fast_fields_composite.open_read(i64_field).unwrap();
|
||||||
let fast_field_reader = DynamicFastFieldReader::<i64>::open(data)?;
|
let fast_field_reader = DynamicFastFieldReader::<i64>::open(data)?;
|
||||||
assert_eq!(fast_field_reader.get(0u32), 0i64);
|
assert_eq!(fast_field_reader.get_val(0), 0i64);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -551,7 +551,7 @@ mod tests {
|
|||||||
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data)?;
|
let fast_field_reader = DynamicFastFieldReader::<u64>::open(data)?;
|
||||||
|
|
||||||
for a in 0..n {
|
for a in 0..n {
|
||||||
assert_eq!(fast_field_reader.get(a as u32), permutation[a as usize]);
|
assert_eq!(fast_field_reader.get_val(a as u64), permutation[a as usize]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -842,19 +842,19 @@ mod tests {
|
|||||||
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
|
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
|
||||||
let mut dates = vec![];
|
let mut dates = vec![];
|
||||||
{
|
{
|
||||||
assert_eq!(date_fast_field.get(0u32).into_timestamp_micros(), 1i64);
|
assert_eq!(date_fast_field.get_val(0).into_timestamp_micros(), 1i64);
|
||||||
dates_fast_field.get_vals(0u32, &mut dates);
|
dates_fast_field.get_vals(0u32, &mut dates);
|
||||||
assert_eq!(dates.len(), 2);
|
assert_eq!(dates.len(), 2);
|
||||||
assert_eq!(dates[0].into_timestamp_micros(), 2i64);
|
assert_eq!(dates[0].into_timestamp_micros(), 2i64);
|
||||||
assert_eq!(dates[1].into_timestamp_micros(), 3i64);
|
assert_eq!(dates[1].into_timestamp_micros(), 3i64);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
assert_eq!(date_fast_field.get(1u32).into_timestamp_micros(), 4i64);
|
assert_eq!(date_fast_field.get_val(1).into_timestamp_micros(), 4i64);
|
||||||
dates_fast_field.get_vals(1u32, &mut dates);
|
dates_fast_field.get_vals(1u32, &mut dates);
|
||||||
assert!(dates.is_empty());
|
assert!(dates.is_empty());
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
assert_eq!(date_fast_field.get(2u32).into_timestamp_micros(), 0i64);
|
assert_eq!(date_fast_field.get_val(2).into_timestamp_micros(), 0i64);
|
||||||
dates_fast_field.get_vals(2u32, &mut dates);
|
dates_fast_field.get_vals(2u32, &mut dates);
|
||||||
assert_eq!(dates.len(), 2);
|
assert_eq!(dates.len(), 2);
|
||||||
assert_eq!(dates[0].into_timestamp_micros(), 5i64);
|
assert_eq!(dates[0].into_timestamp_micros(), 5i64);
|
||||||
@@ -866,10 +866,10 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
pub fn test_fastfield_bool() {
|
pub fn test_fastfield_bool() {
|
||||||
let test_fastfield = DynamicFastFieldReader::<bool>::from(vec![true, false, true, false]);
|
let test_fastfield = DynamicFastFieldReader::<bool>::from(vec![true, false, true, false]);
|
||||||
assert_eq!(test_fastfield.get(0), true);
|
assert_eq!(test_fastfield.get_val(0), true);
|
||||||
assert_eq!(test_fastfield.get(1), false);
|
assert_eq!(test_fastfield.get_val(1), false);
|
||||||
assert_eq!(test_fastfield.get(2), true);
|
assert_eq!(test_fastfield.get_val(2), true);
|
||||||
assert_eq!(test_fastfield.get(3), false);
|
assert_eq!(test_fastfield.get_val(3), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -900,10 +900,10 @@ mod tests {
|
|||||||
let composite_file = CompositeFile::open(&file)?;
|
let composite_file = CompositeFile::open(&file)?;
|
||||||
let file = composite_file.open_read(field).unwrap();
|
let file = composite_file.open_read(field).unwrap();
|
||||||
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
|
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
|
||||||
assert_eq!(fast_field_reader.get(0), true);
|
assert_eq!(fast_field_reader.get_val(0), true);
|
||||||
assert_eq!(fast_field_reader.get(1), false);
|
assert_eq!(fast_field_reader.get_val(1), false);
|
||||||
assert_eq!(fast_field_reader.get(2), true);
|
assert_eq!(fast_field_reader.get_val(2), true);
|
||||||
assert_eq!(fast_field_reader.get(3), false);
|
assert_eq!(fast_field_reader.get_val(3), false);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -937,8 +937,8 @@ mod tests {
|
|||||||
let file = composite_file.open_read(field).unwrap();
|
let file = composite_file.open_read(field).unwrap();
|
||||||
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
|
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
|
||||||
for i in 0..25 {
|
for i in 0..25 {
|
||||||
assert_eq!(fast_field_reader.get(i * 2), true);
|
assert_eq!(fast_field_reader.get_val(i * 2), true);
|
||||||
assert_eq!(fast_field_reader.get(i * 2 + 1), false);
|
assert_eq!(fast_field_reader.get_val(i * 2 + 1), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -970,7 +970,7 @@ mod tests {
|
|||||||
let composite_file = CompositeFile::open(&file)?;
|
let composite_file = CompositeFile::open(&file)?;
|
||||||
let file = composite_file.open_read(field).unwrap();
|
let file = composite_file.open_read(field).unwrap();
|
||||||
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
|
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
|
||||||
assert_eq!(fast_field_reader.get(0), false);
|
assert_eq!(fast_field_reader.get_val(0), false);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
|
|
||||||
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue, MultiValueLength};
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
|
use crate::fastfield::{DynamicFastFieldReader, FastValue, MultiValueLength};
|
||||||
use crate::DocId;
|
use crate::DocId;
|
||||||
|
|
||||||
/// Reader for a multivalued `u64` fast field.
|
/// Reader for a multivalued `u64` fast field.
|
||||||
@@ -31,8 +33,9 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
|
|||||||
/// to the given document are `start..end`.
|
/// to the given document are `start..end`.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn range(&self, doc: DocId) -> Range<u64> {
|
fn range(&self, doc: DocId) -> Range<u64> {
|
||||||
let start = self.idx_reader.get(doc);
|
let idx = doc as u64;
|
||||||
let end = self.idx_reader.get(doc + 1);
|
let start = self.idx_reader.get_val(idx);
|
||||||
|
let end = self.idx_reader.get_val(idx + 1);
|
||||||
start..end
|
start..end
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use common::BinarySerializable;
|
|||||||
use fastfield_codecs::bitpacked::{BitpackedCodec, BitpackedReader};
|
use fastfield_codecs::bitpacked::{BitpackedCodec, BitpackedReader};
|
||||||
use fastfield_codecs::blockwise_linear::{BlockwiseLinearCodec, BlockwiseLinearReader};
|
use fastfield_codecs::blockwise_linear::{BlockwiseLinearCodec, BlockwiseLinearReader};
|
||||||
use fastfield_codecs::linear::{LinearCodec, LinearReader};
|
use fastfield_codecs::linear::{LinearCodec, LinearReader};
|
||||||
use fastfield_codecs::{FastFieldCodec, FastFieldCodecType, FastFieldDataAccess};
|
use fastfield_codecs::{Column, FastFieldCodec, FastFieldCodecType};
|
||||||
|
|
||||||
use super::gcd::open_gcd_from_bytes;
|
use super::gcd::open_gcd_from_bytes;
|
||||||
use super::FastValue;
|
use super::FastValue;
|
||||||
@@ -14,48 +14,6 @@ use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirec
|
|||||||
use crate::error::DataCorruption;
|
use crate::error::DataCorruption;
|
||||||
use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter, GCDReader};
|
use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter, GCDReader};
|
||||||
use crate::schema::{Schema, FAST};
|
use crate::schema::{Schema, FAST};
|
||||||
use crate::DocId;
|
|
||||||
|
|
||||||
/// FastFieldReader is the trait to access fast field data.
|
|
||||||
pub trait FastFieldReader<Item: FastValue>: Clone {
|
|
||||||
/// Return the value associated to the given document.
|
|
||||||
///
|
|
||||||
/// This accessor should return as fast as possible.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// May panic if `doc` is greater than the segment
|
|
||||||
fn get(&self, doc: DocId) -> Item;
|
|
||||||
|
|
||||||
/// Fills an output buffer with the fast field values
|
|
||||||
/// associated with the `DocId` going from
|
|
||||||
/// `start` to `start + output.len()`.
|
|
||||||
///
|
|
||||||
/// Regardless of the type of `Item`, this method works
|
|
||||||
/// - transmuting the output array
|
|
||||||
/// - extracting the `Item`s as if they were `u64`
|
|
||||||
/// - possibly converting the `u64` value to the right type.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// May panic if `start + output.len()` is greater than
|
|
||||||
/// the segment's `maxdoc`.
|
|
||||||
fn get_range(&self, start: u64, output: &mut [Item]);
|
|
||||||
|
|
||||||
/// Returns the minimum value for this fast field.
|
|
||||||
///
|
|
||||||
/// The min value does not take in account of possible
|
|
||||||
/// deleted document, and should be considered as a lower bound
|
|
||||||
/// of the actual minimum value.
|
|
||||||
fn min_value(&self) -> Item;
|
|
||||||
|
|
||||||
/// Returns the maximum value for this fast field.
|
|
||||||
///
|
|
||||||
/// The max value does not take in account of possible
|
|
||||||
/// deleted document, and should be considered as an upper bound
|
|
||||||
/// of the actual maximum value.
|
|
||||||
fn max_value(&self) -> Item;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
/// DynamicFastFieldReader wraps different readers to access
|
/// DynamicFastFieldReader wraps different readers to access
|
||||||
@@ -127,16 +85,16 @@ impl<Item: FastValue> DynamicFastFieldReader<Item> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Item: FastValue> FastFieldReader<Item> for DynamicFastFieldReader<Item> {
|
impl<Item: FastValue> Column<Item> for DynamicFastFieldReader<Item> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn get(&self, doc: DocId) -> Item {
|
fn get_val(&self, idx: u64) -> Item {
|
||||||
match self {
|
match self {
|
||||||
Self::Bitpacked(reader) => reader.get(doc),
|
Self::Bitpacked(reader) => reader.get_val(idx),
|
||||||
Self::Linear(reader) => reader.get(doc),
|
Self::Linear(reader) => reader.get_val(idx),
|
||||||
Self::BlockwiseLinear(reader) => reader.get(doc),
|
Self::BlockwiseLinear(reader) => reader.get_val(idx),
|
||||||
Self::BitpackedGCD(reader) => reader.get(doc),
|
Self::BitpackedGCD(reader) => reader.get_val(idx),
|
||||||
Self::LinearGCD(reader) => reader.get(doc),
|
Self::LinearGCD(reader) => reader.get_val(idx),
|
||||||
Self::BlockwiseLinearGCD(reader) => reader.get(doc),
|
Self::BlockwiseLinearGCD(reader) => reader.get_val(idx),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[inline]
|
#[inline]
|
||||||
@@ -170,6 +128,17 @@ impl<Item: FastValue> FastFieldReader<Item> for DynamicFastFieldReader<Item> {
|
|||||||
Self::BlockwiseLinearGCD(reader) => reader.max_value(),
|
Self::BlockwiseLinearGCD(reader) => reader.max_value(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn num_vals(&self) -> u64 {
|
||||||
|
match self {
|
||||||
|
Self::Bitpacked(reader) => reader.num_vals(),
|
||||||
|
Self::Linear(reader) => reader.num_vals(),
|
||||||
|
Self::BlockwiseLinear(reader) => reader.num_vals(),
|
||||||
|
Self::BitpackedGCD(reader) => reader.num_vals(),
|
||||||
|
Self::LinearGCD(reader) => reader.num_vals(),
|
||||||
|
Self::BlockwiseLinearGCD(reader) => reader.num_vals(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Wrapper for accessing a fastfield.
|
/// Wrapper for accessing a fastfield.
|
||||||
@@ -192,10 +161,10 @@ impl<Item: FastValue, CodecReader> From<CodecReader>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Item: FastValue, D: FastFieldDataAccess> FastFieldReaderCodecWrapper<Item, D> {
|
impl<Item: FastValue, D: Column> FastFieldReaderCodecWrapper<Item, D> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub(crate) fn get_u64(&self, doc: u64) -> Item {
|
pub(crate) fn get_u64(&self, idx: u64) -> Item {
|
||||||
let data = self.reader.get_val(doc);
|
let data = self.reader.get_val(idx);
|
||||||
Item::from_u64(data)
|
Item::from_u64(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,9 +187,7 @@ impl<Item: FastValue, D: FastFieldDataAccess> FastFieldReaderCodecWrapper<Item,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Item: FastValue, C: FastFieldDataAccess + Clone> FastFieldReader<Item>
|
impl<Item: FastValue, C: Column + Clone> Column<Item> for FastFieldReaderCodecWrapper<Item, C> {
|
||||||
for FastFieldReaderCodecWrapper<Item, C>
|
|
||||||
{
|
|
||||||
/// Return the value associated to the given document.
|
/// Return the value associated to the given document.
|
||||||
///
|
///
|
||||||
/// This accessor should return as fast as possible.
|
/// This accessor should return as fast as possible.
|
||||||
@@ -229,8 +196,8 @@ impl<Item: FastValue, C: FastFieldDataAccess + Clone> FastFieldReader<Item>
|
|||||||
///
|
///
|
||||||
/// May panic if `doc` is greater than the segment
|
/// May panic if `doc` is greater than the segment
|
||||||
// `maxdoc`.
|
// `maxdoc`.
|
||||||
fn get(&self, doc: DocId) -> Item {
|
fn get_val(&self, idx: u64) -> Item {
|
||||||
self.get_u64(u64::from(doc))
|
self.get_u64(idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fills an output buffer with the fast field values
|
/// Fills an output buffer with the fast field values
|
||||||
@@ -267,6 +234,10 @@ impl<Item: FastValue, C: FastFieldDataAccess + Clone> FastFieldReader<Item>
|
|||||||
fn max_value(&self) -> Item {
|
fn max_value(&self) -> Item {
|
||||||
Item::from_u64(self.reader.max_value())
|
Item::from_u64(self.reader.max_value())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn num_vals(&self) -> u64 {
|
||||||
|
self.reader.num_vals()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Item: FastValue> From<Vec<Item>> for DynamicFastFieldReader<Item> {
|
impl<Item: FastValue> From<Vec<Item>> for DynamicFastFieldReader<Item> {
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ pub use fastfield_codecs::bitpacked::{BitpackedCodec, BitpackedSerializerLegacy}
|
|||||||
use fastfield_codecs::blockwise_linear::BlockwiseLinearCodec;
|
use fastfield_codecs::blockwise_linear::BlockwiseLinearCodec;
|
||||||
use fastfield_codecs::linear::LinearCodec;
|
use fastfield_codecs::linear::LinearCodec;
|
||||||
use fastfield_codecs::FastFieldCodecType;
|
use fastfield_codecs::FastFieldCodecType;
|
||||||
pub use fastfield_codecs::{FastFieldCodec, FastFieldDataAccess, FastFieldStats};
|
pub use fastfield_codecs::{Column, FastFieldCodec, FastFieldStats};
|
||||||
|
|
||||||
use super::{find_gcd, ALL_CODECS, GCD_DEFAULT};
|
use super::{find_gcd, ALL_CODECS, GCD_DEFAULT};
|
||||||
use crate::directory::{CompositeWrite, WritePtr};
|
use crate::directory::{CompositeWrite, WritePtr};
|
||||||
@@ -65,7 +65,7 @@ impl From<FastFieldCodecType> for FastFieldCodecEnableCheck {
|
|||||||
// use this, when this is merged and stabilized explicit_generic_args_with_impl_trait
|
// use this, when this is merged and stabilized explicit_generic_args_with_impl_trait
|
||||||
// https://github.com/rust-lang/rust/pull/86176
|
// https://github.com/rust-lang/rust/pull/86176
|
||||||
fn codec_estimation<C: FastFieldCodec>(
|
fn codec_estimation<C: FastFieldCodec>(
|
||||||
fastfield_accessor: &impl FastFieldDataAccess,
|
fastfield_accessor: &impl Column,
|
||||||
estimations: &mut Vec<(f32, FastFieldCodecType)>,
|
estimations: &mut Vec<(f32, FastFieldCodecType)>,
|
||||||
) {
|
) {
|
||||||
if let Some(ratio) = C::estimate(fastfield_accessor) {
|
if let Some(ratio) = C::estimate(fastfield_accessor) {
|
||||||
@@ -97,7 +97,7 @@ impl CompositeFastFieldSerializer {
|
|||||||
pub fn create_auto_detect_u64_fast_field(
|
pub fn create_auto_detect_u64_fast_field(
|
||||||
&mut self,
|
&mut self,
|
||||||
field: Field,
|
field: Field,
|
||||||
fastfield_accessor: impl FastFieldDataAccess,
|
fastfield_accessor: impl Column,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
self.create_auto_detect_u64_fast_field_with_idx(field, fastfield_accessor, 0)
|
self.create_auto_detect_u64_fast_field_with_idx(field, fastfield_accessor, 0)
|
||||||
}
|
}
|
||||||
@@ -117,7 +117,7 @@ impl CompositeFastFieldSerializer {
|
|||||||
pub fn create_auto_detect_u64_fast_field_with_idx(
|
pub fn create_auto_detect_u64_fast_field_with_idx(
|
||||||
&mut self,
|
&mut self,
|
||||||
field: Field,
|
field: Field,
|
||||||
fastfield_accessor: impl FastFieldDataAccess,
|
fastfield_accessor: impl Column,
|
||||||
idx: usize,
|
idx: usize,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
let min_value = fastfield_accessor.min_value();
|
let min_value = fastfield_accessor.min_value();
|
||||||
@@ -136,7 +136,7 @@ impl CompositeFastFieldSerializer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Self::write_header(field_write, FastFieldCodecType::Gcd)?;
|
Self::write_header(field_write, FastFieldCodecType::Gcd)?;
|
||||||
struct GCDWrappedFFAccess<T: FastFieldDataAccess> {
|
struct GCDWrappedFFAccess<T: Column> {
|
||||||
fastfield_accessor: T,
|
fastfield_accessor: T,
|
||||||
base_value: u64,
|
base_value: u64,
|
||||||
max_value: u64,
|
max_value: u64,
|
||||||
@@ -144,7 +144,7 @@ impl CompositeFastFieldSerializer {
|
|||||||
gcd: DividerU64,
|
gcd: DividerU64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: FastFieldDataAccess> FastFieldDataAccess for GCDWrappedFFAccess<T> {
|
impl<T: Column> Column for GCDWrappedFFAccess<T> {
|
||||||
fn get_val(&self, position: u64) -> u64 {
|
fn get_val(&self, position: u64) -> u64 {
|
||||||
self.gcd
|
self.gcd
|
||||||
.divide(self.fastfield_accessor.get_val(position) - self.base_value)
|
.divide(self.fastfield_accessor.get_val(position) - self.base_value)
|
||||||
@@ -197,7 +197,7 @@ impl CompositeFastFieldSerializer {
|
|||||||
codec_enable_checker: FastFieldCodecEnableCheck,
|
codec_enable_checker: FastFieldCodecEnableCheck,
|
||||||
field: Field,
|
field: Field,
|
||||||
field_write: &mut CountingWriter<W>,
|
field_write: &mut CountingWriter<W>,
|
||||||
fastfield_accessor: impl FastFieldDataAccess,
|
fastfield_accessor: impl Column,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
let mut estimations = vec![];
|
let mut estimations = vec![];
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::collections::HashMap;
|
|||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
use common;
|
use common;
|
||||||
use fastfield_codecs::FastFieldDataAccess;
|
use fastfield_codecs::Column;
|
||||||
use fnv::FnvHashMap;
|
use fnv::FnvHashMap;
|
||||||
use tantivy_bitpacker::BlockedBitpacker;
|
use tantivy_bitpacker::BlockedBitpacker;
|
||||||
|
|
||||||
@@ -384,7 +384,7 @@ struct WriterFastFieldAccessProvider<'map, 'bitp> {
|
|||||||
vals: &'bitp BlockedBitpacker,
|
vals: &'bitp BlockedBitpacker,
|
||||||
stats: FastFieldStats,
|
stats: FastFieldStats,
|
||||||
}
|
}
|
||||||
impl<'map, 'bitp> FastFieldDataAccess for WriterFastFieldAccessProvider<'map, 'bitp> {
|
impl<'map, 'bitp> Column for WriterFastFieldAccessProvider<'map, 'bitp> {
|
||||||
/// Return the value associated to the given doc.
|
/// Return the value associated to the given doc.
|
||||||
///
|
///
|
||||||
/// Whenever possible use the Iterator passed to the fastfield creation instead, for performance
|
/// Whenever possible use the Iterator passed to the fastfield creation instead, for performance
|
||||||
|
|||||||
@@ -143,8 +143,9 @@ pub(crate) fn get_doc_id_mapping_from_field(
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests_indexsorting {
|
mod tests_indexsorting {
|
||||||
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
use crate::collector::TopDocs;
|
use crate::collector::TopDocs;
|
||||||
use crate::fastfield::FastFieldReader;
|
|
||||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||||
use crate::query::QueryParser;
|
use crate::query::QueryParser;
|
||||||
use crate::schema::{Schema, *};
|
use crate::schema::{Schema, *};
|
||||||
@@ -464,9 +465,9 @@ mod tests_indexsorting {
|
|||||||
let my_number = index.schema().get_field("my_number").unwrap();
|
let my_number = index.schema().get_field("my_number").unwrap();
|
||||||
|
|
||||||
let fast_field = fast_fields.u64(my_number).unwrap();
|
let fast_field = fast_fields.u64(my_number).unwrap();
|
||||||
assert_eq!(fast_field.get(0u32), 10u64);
|
assert_eq!(fast_field.get_val(0), 10u64);
|
||||||
assert_eq!(fast_field.get(1u32), 20u64);
|
assert_eq!(fast_field.get_val(1), 20u64);
|
||||||
assert_eq!(fast_field.get(2u32), 30u64);
|
assert_eq!(fast_field.get_val(2), 30u64);
|
||||||
|
|
||||||
let multi_numbers = index.schema().get_field("multi_numbers").unwrap();
|
let multi_numbers = index.schema().get_field("multi_numbers").unwrap();
|
||||||
let multifield = fast_fields.u64s(multi_numbers).unwrap();
|
let multifield = fast_fields.u64s(multi_numbers).unwrap();
|
||||||
|
|||||||
@@ -777,6 +777,7 @@ impl Drop for IndexWriter {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
|
use fastfield_codecs::Column;
|
||||||
use proptest::prelude::*;
|
use proptest::prelude::*;
|
||||||
use proptest::prop_oneof;
|
use proptest::prop_oneof;
|
||||||
use proptest::strategy::Strategy;
|
use proptest::strategy::Strategy;
|
||||||
@@ -785,7 +786,6 @@ mod tests {
|
|||||||
use crate::collector::TopDocs;
|
use crate::collector::TopDocs;
|
||||||
use crate::directory::error::LockError;
|
use crate::directory::error::LockError;
|
||||||
use crate::error::*;
|
use crate::error::*;
|
||||||
use crate::fastfield::FastFieldReader;
|
|
||||||
use crate::indexer::NoMergePolicy;
|
use crate::indexer::NoMergePolicy;
|
||||||
use crate::query::{QueryParser, TermQuery};
|
use crate::query::{QueryParser, TermQuery};
|
||||||
use crate::schema::{
|
use crate::schema::{
|
||||||
@@ -1327,7 +1327,7 @@ mod tests {
|
|||||||
let fast_field_reader = segment_reader.fast_fields().u64(id_field)?;
|
let fast_field_reader = segment_reader.fast_fields().u64(id_field)?;
|
||||||
let in_order_alive_ids: Vec<u64> = segment_reader
|
let in_order_alive_ids: Vec<u64> = segment_reader
|
||||||
.doc_ids_alive()
|
.doc_ids_alive()
|
||||||
.map(|doc| fast_field_reader.get(doc))
|
.map(|doc| fast_field_reader.get_val(doc as u64))
|
||||||
.collect();
|
.collect();
|
||||||
assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 1, 0]);
|
assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 1, 0]);
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -1493,7 +1493,7 @@ mod tests {
|
|||||||
let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap();
|
let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap();
|
||||||
segment_reader
|
segment_reader
|
||||||
.doc_ids_alive()
|
.doc_ids_alive()
|
||||||
.map(move |doc| ff_reader.get(doc))
|
.map(move |doc| ff_reader.get_val(doc as u64))
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@@ -1504,7 +1504,7 @@ mod tests {
|
|||||||
let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap();
|
let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap();
|
||||||
segment_reader
|
segment_reader
|
||||||
.doc_ids_alive()
|
.doc_ids_alive()
|
||||||
.map(move |doc| ff_reader.get(doc))
|
.map(move |doc| ff_reader.get_val(doc as u64))
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@@ -1622,7 +1622,7 @@ mod tests {
|
|||||||
facet_reader
|
facet_reader
|
||||||
.facet_from_ord(facet_ords[0], &mut facet)
|
.facet_from_ord(facet_ords[0], &mut facet)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let id = ff_reader.get(doc_id);
|
let id = ff_reader.get_val(doc_id as u64);
|
||||||
let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string()));
|
let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string()));
|
||||||
|
|
||||||
assert_eq!(facet, facet_expected);
|
assert_eq!(facet, facet_expected);
|
||||||
|
|||||||
@@ -4,14 +4,13 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use measure_time::debug_time;
|
use measure_time::debug_time;
|
||||||
use tantivy_bitpacker::minmax;
|
|
||||||
|
|
||||||
use crate::core::{Segment, SegmentReader};
|
use crate::core::{Segment, SegmentReader};
|
||||||
use crate::docset::{DocSet, TERMINATED};
|
use crate::docset::{DocSet, TERMINATED};
|
||||||
use crate::error::DataCorruption;
|
use crate::error::DataCorruption;
|
||||||
use crate::fastfield::{
|
use crate::fastfield::{
|
||||||
AliveBitSet, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldDataAccess,
|
AliveBitSet, Column, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldStats,
|
||||||
FastFieldReader, FastFieldStats, MultiValueLength, MultiValuedFastFieldReader,
|
MultiValueLength, MultiValuedFastFieldReader,
|
||||||
};
|
};
|
||||||
use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
|
use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
|
||||||
use crate::indexer::doc_id_mapping::{expect_field_id_for_sort_field, SegmentDocIdMapping};
|
use crate::indexer::doc_id_mapping::{expect_field_id_for_sort_field, SegmentDocIdMapping};
|
||||||
@@ -88,7 +87,7 @@ pub struct IndexMerger {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn compute_min_max_val(
|
fn compute_min_max_val(
|
||||||
u64_reader: &impl FastFieldReader<u64>,
|
u64_reader: &impl Column<u64>,
|
||||||
segment_reader: &SegmentReader,
|
segment_reader: &SegmentReader,
|
||||||
) -> Option<(u64, u64)> {
|
) -> Option<(u64, u64)> {
|
||||||
if segment_reader.max_doc() == 0 {
|
if segment_reader.max_doc() == 0 {
|
||||||
@@ -102,11 +101,11 @@ fn compute_min_max_val(
|
|||||||
}
|
}
|
||||||
// some deleted documents,
|
// some deleted documents,
|
||||||
// we need to recompute the max / min
|
// we need to recompute the max / min
|
||||||
minmax(
|
segment_reader
|
||||||
segment_reader
|
.doc_ids_alive()
|
||||||
.doc_ids_alive()
|
.map(|doc_id| u64_reader.get_val(doc_id as u64))
|
||||||
.map(|doc_id| u64_reader.get(doc_id)),
|
.minmax()
|
||||||
)
|
.into_option()
|
||||||
}
|
}
|
||||||
|
|
||||||
struct TermOrdinalMapping {
|
struct TermOrdinalMapping {
|
||||||
@@ -376,13 +375,13 @@ impl IndexMerger {
|
|||||||
fast_field_readers: &'a Vec<DynamicFastFieldReader<u64>>,
|
fast_field_readers: &'a Vec<DynamicFastFieldReader<u64>>,
|
||||||
stats: FastFieldStats,
|
stats: FastFieldStats,
|
||||||
}
|
}
|
||||||
impl<'a> FastFieldDataAccess for SortedDocIdFieldAccessProvider<'a> {
|
impl<'a> Column for SortedDocIdFieldAccessProvider<'a> {
|
||||||
fn get_val(&self, doc: u64) -> u64 {
|
fn get_val(&self, doc: u64) -> u64 {
|
||||||
let DocAddress {
|
let DocAddress {
|
||||||
doc_id,
|
doc_id,
|
||||||
segment_ord,
|
segment_ord,
|
||||||
} = self.doc_id_mapping.get_old_doc_addr(doc as u32);
|
} = self.doc_id_mapping.get_old_doc_addr(doc as u32);
|
||||||
self.fast_field_readers[segment_ord as usize].get(doc_id)
|
self.fast_field_readers[segment_ord as usize].get_val(doc_id as u64)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
|
fn iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
|
||||||
@@ -392,7 +391,7 @@ impl IndexMerger {
|
|||||||
.map(|old_doc_addr| {
|
.map(|old_doc_addr| {
|
||||||
let fast_field_reader =
|
let fast_field_reader =
|
||||||
&self.fast_field_readers[old_doc_addr.segment_ord as usize];
|
&self.fast_field_readers[old_doc_addr.segment_ord as usize];
|
||||||
fast_field_reader.get(old_doc_addr.doc_id)
|
fast_field_reader.get_val(old_doc_addr.doc_id as u64)
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -429,7 +428,7 @@ impl IndexMerger {
|
|||||||
|
|
||||||
let everything_is_in_order = reader_ordinal_and_field_accessors
|
let everything_is_in_order = reader_ordinal_and_field_accessors
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|reader| reader.1)
|
.map(|(_, col)| Arc::new(col))
|
||||||
.tuple_windows()
|
.tuple_windows()
|
||||||
.all(|(field_accessor1, field_accessor2)| {
|
.all(|(field_accessor1, field_accessor2)| {
|
||||||
if sort_by_field.order.is_asc() {
|
if sort_by_field.order.is_asc() {
|
||||||
@@ -444,7 +443,7 @@ impl IndexMerger {
|
|||||||
pub(crate) fn get_sort_field_accessor(
|
pub(crate) fn get_sort_field_accessor(
|
||||||
reader: &SegmentReader,
|
reader: &SegmentReader,
|
||||||
sort_by_field: &IndexSortByField,
|
sort_by_field: &IndexSortByField,
|
||||||
) -> crate::Result<impl FastFieldReader<u64>> {
|
) -> crate::Result<impl Column> {
|
||||||
let field_id = expect_field_id_for_sort_field(reader.schema(), sort_by_field)?; // for now expect fastfield, but not strictly required
|
let field_id = expect_field_id_for_sort_field(reader.schema(), sort_by_field)?; // for now expect fastfield, but not strictly required
|
||||||
let value_accessor = reader.fast_fields().u64_lenient(field_id)?;
|
let value_accessor = reader.fast_fields().u64_lenient(field_id)?;
|
||||||
Ok(value_accessor)
|
Ok(value_accessor)
|
||||||
@@ -453,7 +452,7 @@ impl IndexMerger {
|
|||||||
pub(crate) fn get_reader_with_sort_field_accessor(
|
pub(crate) fn get_reader_with_sort_field_accessor(
|
||||||
&self,
|
&self,
|
||||||
sort_by_field: &IndexSortByField,
|
sort_by_field: &IndexSortByField,
|
||||||
) -> crate::Result<Vec<(SegmentOrdinal, impl FastFieldReader<u64> + Clone)>> {
|
) -> crate::Result<Vec<(SegmentOrdinal, impl Column)>> {
|
||||||
let reader_ordinal_and_field_accessors = self
|
let reader_ordinal_and_field_accessors = self
|
||||||
.readers
|
.readers
|
||||||
.iter()
|
.iter()
|
||||||
@@ -506,8 +505,8 @@ impl IndexMerger {
|
|||||||
doc_id_reader_pair
|
doc_id_reader_pair
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.kmerge_by(|a, b| {
|
.kmerge_by(|a, b| {
|
||||||
let val1 = a.2.get(a.0);
|
let val1 = a.2.get_val(a.0 as u64);
|
||||||
let val2 = b.2.get(b.0);
|
let val2 = b.2.get_val(b.0 as u64);
|
||||||
if sort_by_field.order == Order::Asc {
|
if sort_by_field.order == Order::Asc {
|
||||||
val1 < val2
|
val1 < val2
|
||||||
} else {
|
} else {
|
||||||
@@ -578,7 +577,7 @@ impl IndexMerger {
|
|||||||
offsets: &'a [u64],
|
offsets: &'a [u64],
|
||||||
stats: FastFieldStats,
|
stats: FastFieldStats,
|
||||||
}
|
}
|
||||||
impl<'a> FastFieldDataAccess for FieldIndexAccessProvider<'a> {
|
impl<'a> Column for FieldIndexAccessProvider<'a> {
|
||||||
fn get_val(&self, doc: u64) -> u64 {
|
fn get_val(&self, doc: u64) -> u64 {
|
||||||
self.offsets[doc as usize]
|
self.offsets[doc as usize]
|
||||||
}
|
}
|
||||||
@@ -778,7 +777,7 @@ impl IndexMerger {
|
|||||||
offsets: Vec<u64>,
|
offsets: Vec<u64>,
|
||||||
stats: FastFieldStats,
|
stats: FastFieldStats,
|
||||||
}
|
}
|
||||||
impl<'a> FastFieldDataAccess for SortedDocIdMultiValueAccessProvider<'a> {
|
impl<'a> Column for SortedDocIdMultiValueAccessProvider<'a> {
|
||||||
fn get_val(&self, pos: u64) -> u64 {
|
fn get_val(&self, pos: u64) -> u64 {
|
||||||
// use the offsets index to find the doc_id which will contain the position.
|
// use the offsets index to find the doc_id which will contain the position.
|
||||||
// the offsets are strictly increasing so we can do a simple search on it.
|
// the offsets are strictly increasing so we can do a simple search on it.
|
||||||
@@ -1200,6 +1199,7 @@ impl IndexMerger {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use byteorder::{BigEndian, ReadBytesExt};
|
use byteorder::{BigEndian, ReadBytesExt};
|
||||||
|
use fastfield_codecs::Column;
|
||||||
use schema::FAST;
|
use schema::FAST;
|
||||||
|
|
||||||
use crate::collector::tests::{
|
use crate::collector::tests::{
|
||||||
@@ -1207,7 +1207,6 @@ mod tests {
|
|||||||
};
|
};
|
||||||
use crate::collector::{Count, FacetCollector};
|
use crate::collector::{Count, FacetCollector};
|
||||||
use crate::core::Index;
|
use crate::core::Index;
|
||||||
use crate::fastfield::FastFieldReader;
|
|
||||||
use crate::query::{AllQuery, BooleanQuery, Scorer, TermQuery};
|
use crate::query::{AllQuery, BooleanQuery, Scorer, TermQuery};
|
||||||
use crate::schema::{
|
use crate::schema::{
|
||||||
Cardinality, Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term,
|
Cardinality, Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term,
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
use crate::collector::TopDocs;
|
use crate::collector::TopDocs;
|
||||||
use crate::core::Index;
|
use crate::core::Index;
|
||||||
use crate::fastfield::{AliveBitSet, FastFieldReader, MultiValuedFastFieldReader};
|
use crate::fastfield::{AliveBitSet, MultiValuedFastFieldReader};
|
||||||
use crate::query::QueryParser;
|
use crate::query::QueryParser;
|
||||||
use crate::schema::{
|
use crate::schema::{
|
||||||
self, BytesOptions, Cardinality, Facet, FacetOptions, IndexRecordOption, NumericOptions,
|
self, BytesOptions, Cardinality, Facet, FacetOptions, IndexRecordOption, NumericOptions,
|
||||||
@@ -186,17 +188,17 @@ mod tests {
|
|||||||
|
|
||||||
let fast_fields = segment_reader.fast_fields();
|
let fast_fields = segment_reader.fast_fields();
|
||||||
let fast_field = fast_fields.u64(int_field).unwrap();
|
let fast_field = fast_fields.u64(int_field).unwrap();
|
||||||
assert_eq!(fast_field.get(5u32), 1u64);
|
assert_eq!(fast_field.get_val(5), 1u64);
|
||||||
assert_eq!(fast_field.get(4u32), 2u64);
|
assert_eq!(fast_field.get_val(4), 2u64);
|
||||||
assert_eq!(fast_field.get(3u32), 3u64);
|
assert_eq!(fast_field.get_val(3), 3u64);
|
||||||
if force_disjunct_segment_sort_values {
|
if force_disjunct_segment_sort_values {
|
||||||
assert_eq!(fast_field.get(2u32), 20u64);
|
assert_eq!(fast_field.get_val(2u64), 20u64);
|
||||||
assert_eq!(fast_field.get(1u32), 100u64);
|
assert_eq!(fast_field.get_val(1u64), 100u64);
|
||||||
} else {
|
} else {
|
||||||
assert_eq!(fast_field.get(2u32), 10u64);
|
assert_eq!(fast_field.get_val(2u64), 10u64);
|
||||||
assert_eq!(fast_field.get(1u32), 20u64);
|
assert_eq!(fast_field.get_val(1u64), 20u64);
|
||||||
}
|
}
|
||||||
assert_eq!(fast_field.get(0u32), 1_000u64);
|
assert_eq!(fast_field.get_val(0u64), 1_000u64);
|
||||||
|
|
||||||
// test new field norm mapping
|
// test new field norm mapping
|
||||||
{
|
{
|
||||||
@@ -373,12 +375,12 @@ mod tests {
|
|||||||
|
|
||||||
let fast_fields = segment_reader.fast_fields();
|
let fast_fields = segment_reader.fast_fields();
|
||||||
let fast_field = fast_fields.u64(int_field).unwrap();
|
let fast_field = fast_fields.u64(int_field).unwrap();
|
||||||
assert_eq!(fast_field.get(0u32), 1u64);
|
assert_eq!(fast_field.get_val(0), 1u64);
|
||||||
assert_eq!(fast_field.get(1u32), 2u64);
|
assert_eq!(fast_field.get_val(1), 2u64);
|
||||||
assert_eq!(fast_field.get(2u32), 3u64);
|
assert_eq!(fast_field.get_val(2), 3u64);
|
||||||
assert_eq!(fast_field.get(3u32), 10u64);
|
assert_eq!(fast_field.get_val(3), 10u64);
|
||||||
assert_eq!(fast_field.get(4u32), 20u64);
|
assert_eq!(fast_field.get_val(4), 20u64);
|
||||||
assert_eq!(fast_field.get(5u32), 1_000u64);
|
assert_eq!(fast_field.get_val(5), 1_000u64);
|
||||||
|
|
||||||
let get_vals = |fast_field: &MultiValuedFastFieldReader<u64>, doc_id: u32| -> Vec<u64> {
|
let get_vals = |fast_field: &MultiValuedFastFieldReader<u64>, doc_id: u32| -> Vec<u64> {
|
||||||
let mut vals = vec![];
|
let mut vals = vec![];
|
||||||
|
|||||||
@@ -421,6 +421,7 @@ pub struct DocAddress {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub mod tests {
|
pub mod tests {
|
||||||
use common::{BinarySerializable, FixedSize};
|
use common::{BinarySerializable, FixedSize};
|
||||||
|
use fastfield_codecs::Column;
|
||||||
use rand::distributions::{Bernoulli, Uniform};
|
use rand::distributions::{Bernoulli, Uniform};
|
||||||
use rand::rngs::StdRng;
|
use rand::rngs::StdRng;
|
||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
@@ -429,7 +430,6 @@ pub mod tests {
|
|||||||
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
|
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
|
||||||
use crate::core::SegmentReader;
|
use crate::core::SegmentReader;
|
||||||
use crate::docset::{DocSet, TERMINATED};
|
use crate::docset::{DocSet, TERMINATED};
|
||||||
use crate::fastfield::FastFieldReader;
|
|
||||||
use crate::merge_policy::NoMergePolicy;
|
use crate::merge_policy::NoMergePolicy;
|
||||||
use crate::query::BooleanQuery;
|
use crate::query::BooleanQuery;
|
||||||
use crate::schema::*;
|
use crate::schema::*;
|
||||||
@@ -1036,21 +1036,21 @@ pub mod tests {
|
|||||||
let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_unsigned);
|
let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_unsigned);
|
||||||
assert!(fast_field_reader_opt.is_ok());
|
assert!(fast_field_reader_opt.is_ok());
|
||||||
let fast_field_reader = fast_field_reader_opt.unwrap();
|
let fast_field_reader = fast_field_reader_opt.unwrap();
|
||||||
assert_eq!(fast_field_reader.get(0), 4u64)
|
assert_eq!(fast_field_reader.get_val(0), 4u64)
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let fast_field_reader_res = segment_reader.fast_fields().i64(fast_field_signed);
|
let fast_field_reader_res = segment_reader.fast_fields().i64(fast_field_signed);
|
||||||
assert!(fast_field_reader_res.is_ok());
|
assert!(fast_field_reader_res.is_ok());
|
||||||
let fast_field_reader = fast_field_reader_res.unwrap();
|
let fast_field_reader = fast_field_reader_res.unwrap();
|
||||||
assert_eq!(fast_field_reader.get(0), 4i64)
|
assert_eq!(fast_field_reader.get_val(0), 4i64)
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let fast_field_reader_res = segment_reader.fast_fields().f64(fast_field_float);
|
let fast_field_reader_res = segment_reader.fast_fields().f64(fast_field_float);
|
||||||
assert!(fast_field_reader_res.is_ok());
|
assert!(fast_field_reader_res.is_ok());
|
||||||
let fast_field_reader = fast_field_reader_res.unwrap();
|
let fast_field_reader = fast_field_reader_res.unwrap();
|
||||||
assert_eq!(fast_field_reader.get(0), 4f64)
|
assert_eq!(fast_field_reader.get_val(0), 4f64)
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user