diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b4232592..2208b8b38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ Tantivy 0.19 ================================ +- Limit fast fields to u32 (`get_val(u32)`) [#1644](https://github.com/quickwit-oss/tantivy/pull/1644) (@PSeitz) - Major bugfix: Fix missing fieldnorms for u64, i64, f64, bool, bytes and date [#1620](https://github.com/quickwit-oss/tantivy/pull/1620) (@PSeitz) - Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396) The `DateTime` type has been updated to hold timestamps with microseconds precision. diff --git a/bitpacker/src/bitpacker.rs b/bitpacker/src/bitpacker.rs index 716f865f9..86dd1a7ca 100644 --- a/bitpacker/src/bitpacker.rs +++ b/bitpacker/src/bitpacker.rs @@ -87,15 +87,15 @@ impl BitUnpacker { } #[inline] - pub fn get(&self, idx: u64, data: &[u8]) -> u64 { + pub fn get(&self, idx: u32, data: &[u8]) -> u64 { if self.num_bits == 0 { return 0u64; } - let addr_in_bits = idx * self.num_bits; + let addr_in_bits = idx * self.num_bits as u32; let addr = addr_in_bits >> 3; let bit_shift = addr_in_bits & 7; debug_assert!( - addr + 8 <= data.len() as u64, + addr + 8 <= data.len() as u32, "The fast field field should have been padded with 7 bytes." ); let bytes: [u8; 8] = (&data[(addr as usize)..(addr as usize) + 8]) @@ -130,7 +130,7 @@ mod test { fn test_bitpacker_util(len: usize, num_bits: u8) { let (bitunpacker, vals, data) = create_fastfield_bitpacker(len, num_bits); for (i, val) in vals.iter().enumerate() { - assert_eq!(bitunpacker.get(i as u64, &data), *val); + assert_eq!(bitunpacker.get(i as u32, &data), *val); } } diff --git a/bitpacker/src/blocked_bitpacker.rs b/bitpacker/src/blocked_bitpacker.rs index bfe958e76..fa4ee5967 100644 --- a/bitpacker/src/blocked_bitpacker.rs +++ b/bitpacker/src/blocked_bitpacker.rs @@ -130,7 +130,7 @@ impl BlockedBitpacker { let pos_in_block = idx % BLOCK_SIZE as usize; if let Some(metadata) = self.offset_and_bits.get(metadata_pos) { let unpacked = BitUnpacker::new(metadata.num_bits()).get( - pos_in_block as u64, + pos_in_block as u32, &self.compressed_blocks[metadata.offset() as usize..], ); unpacked + metadata.base_value() diff --git a/examples/custom_collector.rs b/examples/custom_collector.rs index f6ac5dcfb..a162839c0 100644 --- a/examples/custom_collector.rs +++ b/examples/custom_collector.rs @@ -105,7 +105,7 @@ impl SegmentCollector for StatsSegmentCollector { type Fruit = Option; fn collect(&mut self, doc: u32, _score: Score) { - let value = self.fast_field_reader.get_val(doc as u64) as f64; + let value = self.fast_field_reader.get_val(doc) as f64; self.stats.count += 1; self.stats.sum += value; self.stats.squared_sum += value * value; diff --git a/examples/warmer.rs b/examples/warmer.rs index 6b8c2830f..c9dc699f2 100644 --- a/examples/warmer.rs +++ b/examples/warmer.rs @@ -51,7 +51,7 @@ impl Warmer for DynamicPriceColumn { let product_id_reader = segment.fast_fields().u64(self.field)?; let product_ids: Vec = segment .doc_ids_alive() - .map(|doc| product_id_reader.get_val(doc as u64)) + .map(|doc| product_id_reader.get_val(doc)) .collect(); let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter(); let mut price_vals: Vec = Vec::new(); diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs index b41bae0d9..cf83903de 100644 --- a/fastfield_codecs/benches/bench.rs +++ b/fastfield_codecs/benches/bench.rs @@ -65,7 +65,7 @@ mod tests { b.iter(|| { let mut a = 0u64; for _ in 0..n { - a = column.get_val(a as u64); + a = column.get_val(a as u32); } a }); @@ -141,7 +141,7 @@ mod tests { b.iter(|| { let mut a = 0u128; for i in 0u64..column.num_vals() as u64 { - a += column.get_val(i); + a += column.get_val(i as u32); } a }); @@ -155,7 +155,7 @@ mod tests { let n = column.num_vals(); let mut a = 0u128; for i in (0..n / 5).map(|val| val * 5) { - a += column.get_val(i as u64); + a += column.get_val(i); } a }); @@ -180,9 +180,9 @@ mod tests { let n = permutation.len(); let column: Arc> = serialize_and_load(&permutation); b.iter(|| { - let mut a = 0u64; + let mut a = 0; for i in (0..n / 7).map(|val| val * 7) { - a += column.get_val(i as u64); + a += column.get_val(i as u32); } a }); @@ -195,7 +195,7 @@ mod tests { let column: Arc> = serialize_and_load(&permutation); b.iter(|| { let mut a = 0u64; - for i in 0u64..n as u64 { + for i in 0u32..n as u32 { a += column.get_val(i); } a @@ -209,8 +209,8 @@ mod tests { let column: Arc> = serialize_and_load(&permutation); b.iter(|| { let mut a = 0u64; - for i in 0..n as u64 { - a += column.get_val(i); + for i in 0..n { + a += column.get_val(i as u32); } a }); diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index 25416d947..044debb96 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -17,7 +17,7 @@ pub struct BitpackedReader { impl Column for BitpackedReader { #[inline] - fn get_val(&self, doc: u64) -> u64 { + fn get_val(&self, doc: u32) -> u64 { self.bit_unpacker.get(doc, &self.data) } #[inline] diff --git a/fastfield_codecs/src/blockwise_linear.rs b/fastfield_codecs/src/blockwise_linear.rs index c589d304e..553463cc7 100644 --- a/fastfield_codecs/src/blockwise_linear.rs +++ b/fastfield_codecs/src/blockwise_linear.rs @@ -78,7 +78,7 @@ impl FastFieldCodec for BlockwiseLinearCodec { let mut first_chunk: Vec = column.iter().take(CHUNK_SIZE as usize).collect(); let line = Line::train(&VecColumn::from(&first_chunk)); for (i, buffer_val) in first_chunk.iter_mut().enumerate() { - let interpolated_val = line.eval(i as u64); + let interpolated_val = line.eval(i as u32); *buffer_val = buffer_val.wrapping_sub(interpolated_val); } let estimated_bit_width = first_chunk @@ -121,7 +121,7 @@ impl FastFieldCodec for BlockwiseLinearCodec { assert!(!buffer.is_empty()); for (i, buffer_val) in buffer.iter_mut().enumerate() { - let interpolated_val = line.eval(i as u64); + let interpolated_val = line.eval(i as u32); *buffer_val = buffer_val.wrapping_sub(interpolated_val); } let bit_width = buffer.iter().copied().map(compute_num_bits).max().unwrap(); @@ -161,9 +161,9 @@ pub struct BlockwiseLinearReader { impl Column for BlockwiseLinearReader { #[inline(always)] - fn get_val(&self, idx: u64) -> u64 { - let block_id = (idx / CHUNK_SIZE as u64) as usize; - let idx_within_block = idx % (CHUNK_SIZE as u64); + fn get_val(&self, idx: u32) -> u64 { + let block_id = (idx / CHUNK_SIZE as u32) as usize; + let idx_within_block = idx % (CHUNK_SIZE as u32); let block = &self.blocks[block_id]; let interpoled_val: u64 = block.line.eval(idx_within_block); let block_bytes = &self.data[block.data_start_offset..]; diff --git a/fastfield_codecs/src/column.rs b/fastfield_codecs/src/column.rs index e4d4fcbb5..6dee298d3 100644 --- a/fastfield_codecs/src/column.rs +++ b/fastfield_codecs/src/column.rs @@ -14,7 +14,7 @@ pub trait Column: Send + Sync { /// # Panics /// /// May panic if `idx` is greater than the column length. - fn get_val(&self, idx: u64) -> T; + fn get_val(&self, idx: u32) -> T; /// Fills an output buffer with the fast field values /// associated with the `DocId` going from @@ -27,7 +27,7 @@ pub trait Column: Send + Sync { #[inline] fn get_range(&self, start: u64, output: &mut [T]) { for (out, idx) in output.iter_mut().zip(start..) { - *out = self.get_val(idx); + *out = self.get_val(idx as u32); } } @@ -44,7 +44,7 @@ pub trait Column: Send + Sync { let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals()); for idx in doc_id_range.start..doc_id_range.end { - let val = self.get_val(idx as u64); + let val = self.get_val(idx); if value_range.contains(&val) { vals.push(idx); } @@ -73,7 +73,7 @@ pub trait Column: Send + Sync { /// Returns a iterator over the data fn iter<'a>(&'a self) -> Box + 'a> { - Box::new((0..self.num_vals() as u64).map(|idx| self.get_val(idx))) + Box::new((0..self.num_vals()).map(|idx| self.get_val(idx))) } } @@ -85,7 +85,7 @@ pub struct VecColumn<'a, T = u64> { } impl<'a, C: Column, T: Copy + PartialOrd> Column for &'a C { - fn get_val(&self, idx: u64) -> T { + fn get_val(&self, idx: u32) -> T { (*self).get_val(idx) } @@ -111,7 +111,7 @@ impl<'a, C: Column, T: Copy + PartialOrd> Column for &'a C { } impl<'a, T: Copy + PartialOrd + Send + Sync> Column for VecColumn<'a, T> { - fn get_val(&self, position: u64) -> T { + fn get_val(&self, position: u32) -> T { self.values[position as usize] } @@ -196,7 +196,7 @@ where Output: PartialOrd + Send + Sync + Clone, { #[inline] - fn get_val(&self, idx: u64) -> Output { + fn get_val(&self, idx: u32) -> Output { let from_val = self.from_column.get_val(idx); self.monotonic_mapping.mapping(from_val) } @@ -254,7 +254,7 @@ where T: Iterator + Clone + ExactSizeIterator + Send + Sync, T::Item: PartialOrd, { - fn get_val(&self, idx: u64) -> T::Item { + fn get_val(&self, idx: u32) -> T::Item { self.0.clone().nth(idx as usize).unwrap() } diff --git a/fastfield_codecs/src/compact_space/mod.rs b/fastfield_codecs/src/compact_space/mod.rs index a183d2cd5..b0f4c9240 100644 --- a/fastfield_codecs/src/compact_space/mod.rs +++ b/fastfield_codecs/src/compact_space/mod.rs @@ -284,7 +284,7 @@ impl BinarySerializable for IPCodecParams { impl Column for CompactSpaceDecompressor { #[inline] - fn get_val(&self, doc: u64) -> u128 { + fn get_val(&self, doc: u32) -> u128 { self.get(doc) } @@ -399,17 +399,17 @@ impl CompactSpaceDecompressor { positions.push(idx); } }; - let get_val = |idx| self.params.bit_unpacker.get(idx as u64, &self.data); + let get_val = |idx| self.params.bit_unpacker.get(idx, &self.data); // unrolled loop for idx in (doc_id_range.start..cutoff).step_by(step_size as usize) { let idx1 = idx; let idx2 = idx + 1; let idx3 = idx + 2; let idx4 = idx + 3; - let val1 = get_val(idx1); - let val2 = get_val(idx2); - let val3 = get_val(idx3); - let val4 = get_val(idx4); + let val1 = get_val(idx1 as u32); + let val2 = get_val(idx2 as u32); + let val3 = get_val(idx3 as u32); + let val4 = get_val(idx4 as u32); push_if_in_range(idx1, val1); push_if_in_range(idx2, val2); push_if_in_range(idx3, val3); @@ -418,7 +418,7 @@ impl CompactSpaceDecompressor { // handle rest for idx in cutoff..doc_id_range.end { - push_if_in_range(idx, get_val(idx)); + push_if_in_range(idx, get_val(idx as u32)); } positions @@ -427,7 +427,7 @@ impl CompactSpaceDecompressor { #[inline] fn iter_compact(&self) -> impl Iterator + '_ { (0..self.params.num_vals) - .map(move |idx| self.params.bit_unpacker.get(idx as u64, &self.data) as u64) + .map(move |idx| self.params.bit_unpacker.get(idx, &self.data) as u64) } #[inline] @@ -439,7 +439,7 @@ impl CompactSpaceDecompressor { } #[inline] - pub fn get(&self, idx: u64) -> u128 { + pub fn get(&self, idx: u32) -> u128 { let compact = self.params.bit_unpacker.get(idx, &self.data); self.compact_to_u128(compact) } @@ -505,7 +505,7 @@ mod tests { fn test_all(data: OwnedBytes, expected: &[u128]) { let decompressor = CompactSpaceDecompressor::open(data).unwrap(); for (idx, expected_val) in expected.iter().cloned().enumerate() { - let val = decompressor.get(idx as u64); + let val = decompressor.get(idx as u32); assert_eq!(val, expected_val); let test_range = |range: RangeInclusive| { diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 6e5ea06ee..bda3a1a44 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -201,7 +201,7 @@ mod tests { let reader = crate::open::(OwnedBytes::new(out)).unwrap(); assert_eq!(reader.num_vals(), data.len() as u32); for (doc, orig_val) in data.iter().copied().enumerate() { - let val = reader.get_val(doc as u64); + let val = reader.get_val(doc as u32); assert_eq!( val, orig_val, "val `{val}` does not match orig_val {orig_val:?}, in data set {name}, data \ @@ -432,7 +432,7 @@ mod bench { b.iter(|| { let mut sum = 0u64; for pos in value_iter() { - let val = col.get_val(pos as u64); + let val = col.get_val(pos as u32); sum = sum.wrapping_add(val); } sum @@ -444,7 +444,7 @@ mod bench { b.iter(|| { let mut sum = 0u64; for pos in value_iter() { - let val = col.get_val(pos as u64); + let val = col.get_val(pos as u32); sum = sum.wrapping_add(val); } sum diff --git a/fastfield_codecs/src/line.rs b/fastfield_codecs/src/line.rs index 4613faf04..3b081b0ed 100644 --- a/fastfield_codecs/src/line.rs +++ b/fastfield_codecs/src/line.rs @@ -62,8 +62,8 @@ fn compute_slope(y0: u64, y1: u64, num_vals: NonZeroU32) -> u64 { impl Line { #[inline(always)] - pub fn eval(&self, x: u64) -> u64 { - let linear_part = (x.wrapping_mul(self.slope) >> 32) as i32 as u64; + pub fn eval(&self, x: u32) -> u64 { + let linear_part = ((x as u64).wrapping_mul(self.slope) >> 32) as i32 as u64; self.intercept.wrapping_add(linear_part) } @@ -129,7 +129,7 @@ impl Line { }; let heuristic_shift = y0.wrapping_sub(MID_POINT); line.intercept = positions_and_values - .map(|(pos, y)| y.wrapping_sub(line.eval(pos))) + .map(|(pos, y)| y.wrapping_sub(line.eval(pos as u32))) .min_by_key(|&val| val.wrapping_sub(heuristic_shift)) .unwrap_or(0u64); //< Never happens. line @@ -199,7 +199,7 @@ mod tests { let line = Line::train(&VecColumn::from(&ys)); ys.iter() .enumerate() - .map(|(x, y)| y.wrapping_sub(line.eval(x as u64))) + .map(|(x, y)| y.wrapping_sub(line.eval(x as u32))) .max() } diff --git a/fastfield_codecs/src/linear.rs b/fastfield_codecs/src/linear.rs index ad2a0ca74..d75eeea80 100644 --- a/fastfield_codecs/src/linear.rs +++ b/fastfield_codecs/src/linear.rs @@ -19,7 +19,7 @@ pub struct LinearReader { impl Column for LinearReader { #[inline] - fn get_val(&self, doc: u64) -> u64 { + fn get_val(&self, doc: u32) -> u64 { let interpoled_val: u64 = self.linear_params.line.eval(doc); let bitpacked_diff = self.linear_params.bit_unpacker.get(doc, &self.data); interpoled_val.wrapping_add(bitpacked_diff) @@ -93,7 +93,7 @@ impl FastFieldCodec for LinearCodec { .iter() .enumerate() .map(|(pos, actual_value)| { - let calculated_value = line.eval(pos as u64); + let calculated_value = line.eval(pos as u32); actual_value.wrapping_sub(calculated_value) }) .max() @@ -108,7 +108,7 @@ impl FastFieldCodec for LinearCodec { let mut bit_packer = BitPacker::new(); for (pos, actual_value) in column.iter().enumerate() { - let calculated_value = line.eval(pos as u64); + let calculated_value = line.eval(pos as u32); let offset = actual_value.wrapping_sub(calculated_value); bit_packer.write(offset, num_bits, write)?; } @@ -140,7 +140,7 @@ impl FastFieldCodec for LinearCodec { let estimated_bit_width = sample_positions_and_values .into_iter() .map(|(pos, actual_value)| { - let interpolated_val = line.eval(pos as u64); + let interpolated_val = line.eval(pos as u32); actual_value.wrapping_sub(interpolated_val) }) .map(|diff| ((diff as f32 * 1.5) * 2.0) as u64) diff --git a/src/aggregation/bucket/histogram/histogram.rs b/src/aggregation/bucket/histogram/histogram.rs index 92053fc21..c2d0c1277 100644 --- a/src/aggregation/bucket/histogram/histogram.rs +++ b/src/aggregation/bucket/histogram/histogram.rs @@ -331,10 +331,10 @@ impl SegmentHistogramCollector { .expect("unexpected fast field cardinatility"); let mut iter = doc.chunks_exact(4); for docs in iter.by_ref() { - let val0 = self.f64_from_fastfield_u64(accessor.get_val(docs[0] as u64)); - let val1 = self.f64_from_fastfield_u64(accessor.get_val(docs[1] as u64)); - let val2 = self.f64_from_fastfield_u64(accessor.get_val(docs[2] as u64)); - let val3 = self.f64_from_fastfield_u64(accessor.get_val(docs[3] as u64)); + let val0 = self.f64_from_fastfield_u64(accessor.get_val(docs[0])); + let val1 = self.f64_from_fastfield_u64(accessor.get_val(docs[1])); + let val2 = self.f64_from_fastfield_u64(accessor.get_val(docs[2])); + let val3 = self.f64_from_fastfield_u64(accessor.get_val(docs[3])); let bucket_pos0 = get_bucket_num(val0); let bucket_pos1 = get_bucket_num(val1); @@ -371,7 +371,7 @@ impl SegmentHistogramCollector { )?; } for &doc in iter.remainder() { - let val = f64_from_fastfield_u64(accessor.get_val(doc as u64), &self.field_type); + let val = f64_from_fastfield_u64(accessor.get_val(doc), &self.field_type); if !bounds.contains(val) { continue; } diff --git a/src/aggregation/bucket/range.rs b/src/aggregation/bucket/range.rs index 477383164..33645cb8f 100644 --- a/src/aggregation/bucket/range.rs +++ b/src/aggregation/bucket/range.rs @@ -263,10 +263,10 @@ impl SegmentRangeCollector { .as_single() .expect("unexpected fast field cardinality"); for docs in iter.by_ref() { - let val1 = accessor.get_val(docs[0] as u64); - let val2 = accessor.get_val(docs[1] as u64); - let val3 = accessor.get_val(docs[2] as u64); - let val4 = accessor.get_val(docs[3] as u64); + let val1 = accessor.get_val(docs[0]); + let val2 = accessor.get_val(docs[1]); + let val3 = accessor.get_val(docs[2]); + let val4 = accessor.get_val(docs[3]); let bucket_pos1 = self.get_bucket_pos(val1); let bucket_pos2 = self.get_bucket_pos(val2); let bucket_pos3 = self.get_bucket_pos(val3); @@ -278,7 +278,7 @@ impl SegmentRangeCollector { self.increment_bucket(bucket_pos4, docs[3], &bucket_with_accessor.sub_aggregation)?; } for &doc in iter.remainder() { - let val = accessor.get_val(doc as u64); + let val = accessor.get_val(doc); let bucket_pos = self.get_bucket_pos(val); self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?; } diff --git a/src/aggregation/metric/average.rs b/src/aggregation/metric/average.rs index 206bb7607..2f22430b4 100644 --- a/src/aggregation/metric/average.rs +++ b/src/aggregation/metric/average.rs @@ -60,10 +60,10 @@ impl SegmentAverageCollector { pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column) { let mut iter = doc.chunks_exact(4); for docs in iter.by_ref() { - let val1 = field.get_val(docs[0] as u64); - let val2 = field.get_val(docs[1] as u64); - let val3 = field.get_val(docs[2] as u64); - let val4 = field.get_val(docs[3] as u64); + let val1 = field.get_val(docs[0]); + let val2 = field.get_val(docs[1]); + let val3 = field.get_val(docs[2]); + let val4 = field.get_val(docs[3]); let val1 = f64_from_fastfield_u64(val1, &self.field_type); let val2 = f64_from_fastfield_u64(val2, &self.field_type); let val3 = f64_from_fastfield_u64(val3, &self.field_type); @@ -74,7 +74,7 @@ impl SegmentAverageCollector { self.data.collect(val4); } for &doc in iter.remainder() { - let val = field.get_val(doc as u64); + let val = field.get_val(doc); let val = f64_from_fastfield_u64(val, &self.field_type); self.data.collect(val); } diff --git a/src/aggregation/metric/stats.rs b/src/aggregation/metric/stats.rs index cb4236b8a..f84944c26 100644 --- a/src/aggregation/metric/stats.rs +++ b/src/aggregation/metric/stats.rs @@ -166,10 +166,10 @@ impl SegmentStatsCollector { pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column) { let mut iter = doc.chunks_exact(4); for docs in iter.by_ref() { - let val1 = field.get_val(docs[0] as u64); - let val2 = field.get_val(docs[1] as u64); - let val3 = field.get_val(docs[2] as u64); - let val4 = field.get_val(docs[3] as u64); + let val1 = field.get_val(docs[0]); + let val2 = field.get_val(docs[1]); + let val3 = field.get_val(docs[2]); + let val4 = field.get_val(docs[3]); let val1 = f64_from_fastfield_u64(val1, &self.field_type); let val2 = f64_from_fastfield_u64(val2, &self.field_type); let val3 = f64_from_fastfield_u64(val3, &self.field_type); @@ -180,7 +180,7 @@ impl SegmentStatsCollector { self.stats.collect(val4); } for &doc in iter.remainder() { - let val = field.get_val(doc as u64); + let val = field.get_val(doc); let val = f64_from_fastfield_u64(val, &self.field_type); self.stats.collect(val); } diff --git a/src/collector/filter_collector_wrapper.rs b/src/collector/filter_collector_wrapper.rs index 323bddc09..15f52e29c 100644 --- a/src/collector/filter_collector_wrapper.rs +++ b/src/collector/filter_collector_wrapper.rs @@ -177,7 +177,7 @@ where type Fruit = TSegmentCollector::Fruit; fn collect(&mut self, doc: u32, score: Score) { - let value = self.fast_field_reader.get_val(doc as u64); + let value = self.fast_field_reader.get_val(doc); if (self.predicate)(value) { self.segment_collector.collect(doc, score) } diff --git a/src/collector/histogram_collector.rs b/src/collector/histogram_collector.rs index 915751f51..dac0e19d9 100644 --- a/src/collector/histogram_collector.rs +++ b/src/collector/histogram_collector.rs @@ -94,7 +94,7 @@ impl SegmentCollector for SegmentHistogramCollector { type Fruit = Vec; fn collect(&mut self, doc: DocId, _score: Score) { - let value = self.ff_reader.get_val(doc as u64); + let value = self.ff_reader.get_val(doc); self.histogram_computer.add_value(value); } diff --git a/src/collector/tests.rs b/src/collector/tests.rs index 690110840..2dd194245 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -201,7 +201,7 @@ impl SegmentCollector for FastFieldSegmentCollector { type Fruit = Vec; fn collect(&mut self, doc: DocId, _score: Score) { - let val = self.reader.get_val(doc as u64); + let val = self.reader.get_val(doc); self.vals.push(val); } diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 43b45cdbf..fdd6fb1fd 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -137,7 +137,7 @@ struct ScorerByFastFieldReader { impl CustomSegmentScorer for ScorerByFastFieldReader { fn score(&mut self, doc: DocId) -> u64 { - self.ff_reader.get_val(doc as u64) + self.ff_reader.get_val(doc) } } @@ -458,7 +458,7 @@ impl TopDocs { /// /// // We can now define our actual scoring function /// move |doc: DocId, original_score: Score| { - /// let popularity: u64 = popularity_reader.get_val(doc as u64); + /// let popularity: u64 = popularity_reader.get_val(doc); /// // Well.. For the sake of the example we use a simple logarithm /// // function. /// let popularity_boost_score = ((2u64 + popularity) as Score).log2(); @@ -567,8 +567,8 @@ impl TopDocs { /// /// // We can now define our actual scoring function /// move |doc: DocId| { - /// let popularity: u64 = popularity_reader.get_val(doc as u64); - /// let boosted: u64 = boosted_reader.get_val(doc as u64); + /// let popularity: u64 = popularity_reader.get_val(doc); + /// let boosted: u64 = boosted_reader.get_val(doc); /// // Score do not have to be `f64` in tantivy. /// // Here we return a couple to get lexicographical order /// // for free. diff --git a/src/fastfield/bytes/reader.rs b/src/fastfield/bytes/reader.rs index 1340bfb66..c4a4f2ffc 100644 --- a/src/fastfield/bytes/reader.rs +++ b/src/fastfield/bytes/reader.rs @@ -32,10 +32,9 @@ impl BytesFastFieldReader { Ok(BytesFastFieldReader { idx_reader, values }) } - fn range(&self, doc: DocId) -> Range { - let idx = doc as u64; - let start = self.idx_reader.get_val(idx); - let end = self.idx_reader.get_val(idx + 1); + fn range(&self, doc: DocId) -> Range { + let start = self.idx_reader.get_val(doc) as u32; + let end = self.idx_reader.get_val(doc + 1) as u32; start..end } @@ -48,7 +47,7 @@ impl BytesFastFieldReader { /// Returns the length of the bytes associated with the given `doc` pub fn num_bytes(&self, doc: DocId) -> u64 { let range = self.range(doc); - range.end - range.start + (range.end - range.start) as u64 } /// Returns the overall number of bytes in this bytes fast field. @@ -58,7 +57,7 @@ impl BytesFastFieldReader { } impl MultiValueLength for BytesFastFieldReader { - fn get_range(&self, doc_id: DocId) -> std::ops::Range { + fn get_range(&self, doc_id: DocId) -> std::ops::Range { self.range(doc_id) } fn get_len(&self, doc_id: DocId) -> u64 { diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index cf65cb169..5bd90f6ce 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -51,7 +51,7 @@ mod writer; /// for a doc_id pub trait MultiValueLength { /// returns the positions for a docid - fn get_range(&self, doc_id: DocId) -> std::ops::Range; + fn get_range(&self, doc_id: DocId) -> std::ops::Range; /// returns the num of values associated with a doc_id fn get_len(&self, doc_id: DocId) -> u64; /// returns the sum of num values for all doc_ids @@ -184,9 +184,9 @@ mod tests { #[test] pub fn test_fastfield() { let test_fastfield = fastfield_codecs::serialize_and_load(&[100u64, 200u64, 300u64][..]); - assert_eq!(test_fastfield.get_val(0u64), 100); - assert_eq!(test_fastfield.get_val(1u64), 200); - assert_eq!(test_fastfield.get_val(2u64), 300); + assert_eq!(test_fastfield.get_val(0), 100); + assert_eq!(test_fastfield.get_val(1), 200); + assert_eq!(test_fastfield.get_val(2), 300); } #[test] @@ -402,7 +402,7 @@ mod tests { assert_eq!(fast_field_reader.min_value(), -100i64); assert_eq!(fast_field_reader.max_value(), 9_999i64); for (doc, i) in (-100i64..10_000i64).enumerate() { - assert_eq!(fast_field_reader.get_val(doc as u64), i); + assert_eq!(fast_field_reader.get_val(doc as u32), i); } let mut buffer = vec![0i64; 100]; fast_field_reader.get_range(53, &mut buffer[..]); @@ -484,7 +484,7 @@ mod tests { let fast_field_reader = open::(data)?; for a in 0..n { - assert_eq!(fast_field_reader.get_val(a as u64), permutation[a as usize]); + assert_eq!(fast_field_reader.get_val(a as u32), permutation[a as usize]); } } Ok(()) @@ -976,7 +976,7 @@ mod tests { let test_fastfield = open::(file.read_bytes()?)?; for (i, time) in times.iter().enumerate() { - assert_eq!(test_fastfield.get_val(i as u64), time.truncate(precision)); + assert_eq!(test_fastfield.get_val(i as u32), time.truncate(precision)); } Ok(len) } diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index c625a2e76..0437ef491 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -515,7 +515,7 @@ mod bench { for val in block { doc.add_u64(field, *val); } - fast_field_writers.add_document(&doc); + fast_field_writers.add_document(&doc).unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) @@ -573,7 +573,7 @@ mod bench { for val in block { doc.add_u64(field, *val); } - fast_field_writers.add_document(&doc); + fast_field_writers.add_document(&doc).unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) @@ -606,7 +606,7 @@ mod bench { for val in block { doc.add_u64(field, *val); } - fast_field_writers.add_document(&doc); + fast_field_writers.add_document(&doc).unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), Some(&doc_id_mapping)) diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index e1f81b28c..59fda70b6 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -33,19 +33,19 @@ impl MultiValuedFastFieldReader { /// Returns `[start, end)`, such that the values associated with /// the given document are `start..end`. #[inline] - fn range(&self, doc: DocId) -> Range { - let idx = doc as u64; - let start = self.idx_reader.get_val(idx); - let end = self.idx_reader.get_val(idx + 1); + fn range(&self, doc: DocId) -> Range { + let start = self.idx_reader.get_val(doc) as u32; + let end = self.idx_reader.get_val(doc + 1) as u32; start..end } /// Returns the array of values associated with the given `doc`. #[inline] - fn get_vals_for_range(&self, range: Range, vals: &mut Vec) { + fn get_vals_for_range(&self, range: Range, vals: &mut Vec) { let len = (range.end - range.start) as usize; vals.resize(len, Item::make_zero()); - self.vals_reader.get_range(range.start, &mut vals[..]); + self.vals_reader + .get_range(range.start as u64, &mut vals[..]); } /// Returns the array of values associated with the given `doc`. @@ -88,7 +88,7 @@ impl MultiValuedFastFieldReader { } impl MultiValueLength for MultiValuedFastFieldReader { - fn get_range(&self, doc_id: DocId) -> Range { + fn get_range(&self, doc_id: DocId) -> Range { self.range(doc_id) } fn get_len(&self, doc_id: DocId) -> u64 { @@ -127,9 +127,9 @@ impl MultiValuedU128FastFieldReader { /// Returns `[start, end)`, such that the values associated /// to the given document are `start..end`. #[inline] - fn range(&self, doc: DocId) -> Range { - let start = self.idx_reader.get_val(doc as u64); - let end = self.idx_reader.get_val(doc as u64 + 1); + fn range(&self, doc: DocId) -> Range { + let start = self.idx_reader.get_val(doc) as u32; + let end = self.idx_reader.get_val(doc + 1) as u32; start..end } @@ -145,10 +145,11 @@ impl MultiValuedU128FastFieldReader { /// Returns the array of values associated to the given `doc`. #[inline] - fn get_vals_for_range(&self, range: Range, vals: &mut Vec) { + fn get_vals_for_range(&self, range: Range, vals: &mut Vec) { let len = (range.end - range.start) as usize; vals.resize(len, T::from_u128(0)); - self.vals_reader.get_range(range.start, &mut vals[..]); + self.vals_reader + .get_range(range.start as u64, &mut vals[..]); } /// Returns the array of values associated to the given `doc`. @@ -209,7 +210,7 @@ impl MultiValuedU128FastFieldReader { } impl MultiValueLength for MultiValuedU128FastFieldReader { - fn get_range(&self, doc_id: DocId) -> std::ops::Range { + fn get_range(&self, doc_id: DocId) -> std::ops::Range { self.range(doc_id) } fn get_len(&self, doc_id: DocId) -> u64 { @@ -236,7 +237,7 @@ fn positions_to_docids(positions: &[u32], idx_reader: &C) -> for pos in positions { loop { - let end = idx_reader.get_val(cur_doc as u64 + 1) as u32; + let end = idx_reader.get_val(cur_doc + 1) as u32; if end > *pos { // avoid duplicates if Some(cur_doc) == last_doc { diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index 446f21004..fa248b5e1 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -232,7 +232,7 @@ impl<'a, C: Column> MultivalueStartIndex<'a, C> { } } impl<'a, C: Column> Column for MultivalueStartIndex<'a, C> { - fn get_val(&self, _idx: u64) -> u64 { + fn get_val(&self, _idx: u32) -> u64 { unimplemented!() } @@ -262,7 +262,7 @@ fn iter_remapped_multivalue_index<'a, C: Column>( ) -> impl Iterator + 'a { let mut offset = 0; std::iter::once(0).chain(doc_id_map.iter_old_doc_ids().map(move |old_doc| { - let num_vals_for_doc = column.get_val(old_doc as u64 + 1) - column.get_val(old_doc as u64); + let num_vals_for_doc = column.get_val(old_doc + 1) - column.get_val(old_doc); offset += num_vals_for_doc; offset as u64 })) diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index f12027ad1..82dccf09a 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -538,7 +538,7 @@ impl<'map, 'bitp> Column for WriterFastFieldAccessProvider<'map, 'bitp> { /// # Panics /// /// May panic if `doc` is greater than the index. - fn get_val(&self, _doc: u64) -> u64 { + fn get_val(&self, _doc: u32) -> u64 { unimplemented!() } diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index ab1c8efe6..272300549 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1472,7 +1472,7 @@ mod tests { let fast_field_reader = segment_reader.fast_fields().u64(id_field)?; let in_order_alive_ids: Vec = segment_reader .doc_ids_alive() - .map(|doc| fast_field_reader.get_val(doc as u64)) + .map(|doc| fast_field_reader.get_val(doc)) .collect(); assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 1, 0]); Ok(()) @@ -1533,7 +1533,7 @@ mod tests { let fast_field_reader = segment_reader.fast_fields().u64(id_field)?; let in_order_alive_ids: Vec = segment_reader .doc_ids_alive() - .map(|doc| fast_field_reader.get_val(doc as u64)) + .map(|doc| fast_field_reader.get_val(doc)) .collect(); assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 2, 0]); Ok(()) @@ -1760,7 +1760,7 @@ mod tests { let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); segment_reader .doc_ids_alive() - .map(move |doc| ff_reader.get_val(doc as u64)) + .map(move |doc| ff_reader.get_val(doc)) }) .collect(); @@ -1771,7 +1771,7 @@ mod tests { let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); segment_reader .doc_ids_alive() - .map(move |doc| ff_reader.get_val(doc as u64)) + .map(move |doc| ff_reader.get_val(doc)) }) .collect(); @@ -1804,7 +1804,7 @@ mod tests { .flat_map(|segment_reader| { let ff_reader = segment_reader.fast_fields().ip_addr(ip_field).unwrap(); segment_reader.doc_ids_alive().flat_map(move |doc| { - let val = ff_reader.get_val(doc as u64); + let val = ff_reader.get_val(doc); if val == Ipv6Addr::from_u128(0) { // TODO Fix null handling None @@ -1861,7 +1861,7 @@ mod tests { ff_reader.get_vals(doc, &mut vals); assert_eq!(vals.len(), 2); assert_eq!(vals[0], vals[1]); - assert_eq!(id_reader.get_val(doc as u64), vals[0]); + assert_eq!(id_reader.get_val(doc), vals[0]); let mut bool_vals = vec![]; bool_ff_reader.get_vals(doc, &mut bool_vals); @@ -2012,7 +2012,7 @@ mod tests { facet_reader .facet_from_ord(facet_ords[0], &mut facet) .unwrap(); - let id = ff_reader.get_val(doc_id as u64); + let id = ff_reader.get_val(doc_id); let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string())); assert_eq!(facet, facet_expected); diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index bfe535c87..26ec45106 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -397,7 +397,7 @@ impl IndexMerger { let iter_gen = || { doc_id_mapping.iter_old_doc_addrs().map(|doc_addr| { let fast_field_reader = &fast_field_readers[doc_addr.segment_ord as usize]; - fast_field_reader.get_val(doc_addr.doc_id as u64) + fast_field_reader.get_val(doc_addr.doc_id) }) }; fast_field_serializer.create_u128_fast_field_with_idx( @@ -510,8 +510,8 @@ impl IndexMerger { doc_id_reader_pair .into_iter() .kmerge_by(|a, b| { - let val1 = a.2.get_val(a.0 as u64); - let val2 = b.2.get_val(b.0 as u64); + let val1 = a.2.get_val(a.0); + let val2 = b.2.get_val(b.0); if sort_by_field.order == Order::Asc { val1 < val2 } else { diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs index 127ad192c..ba41e62f0 100644 --- a/src/indexer/merger_sorted_index_test.rs +++ b/src/indexer/merger_sorted_index_test.rs @@ -190,13 +190,13 @@ mod tests { assert_eq!(fast_field.get_val(4), 2u64); assert_eq!(fast_field.get_val(3), 3u64); if force_disjunct_segment_sort_values { - assert_eq!(fast_field.get_val(2u64), 20u64); - assert_eq!(fast_field.get_val(1u64), 100u64); + assert_eq!(fast_field.get_val(2), 20u64); + assert_eq!(fast_field.get_val(1), 100u64); } else { - assert_eq!(fast_field.get_val(2u64), 10u64); - assert_eq!(fast_field.get_val(1u64), 20u64); + assert_eq!(fast_field.get_val(2), 10u64); + assert_eq!(fast_field.get_val(1), 20u64); } - assert_eq!(fast_field.get_val(0u64), 1_000u64); + assert_eq!(fast_field.get_val(0), 1_000u64); // test new field norm mapping { @@ -545,7 +545,7 @@ mod bench_sorted_index_merge { // add values in order of the new doc_ids let mut val = 0; for (doc_id, _reader, field_reader) in sorted_doc_ids { - val = field_reader.get_val(doc_id as u64); + val = field_reader.get_val(doc_id); } val diff --git a/src/indexer/sorted_doc_id_column.rs b/src/indexer/sorted_doc_id_column.rs index 3d5b36059..75665bab0 100644 --- a/src/indexer/sorted_doc_id_column.rs +++ b/src/indexer/sorted_doc_id_column.rs @@ -32,7 +32,7 @@ fn compute_min_max_val( // we need to recompute the max / min segment_reader .doc_ids_alive() - .map(|doc_id| u64_reader.get_val(doc_id as u64)) + .map(|doc_id| u64_reader.get_val(doc_id)) .minmax() .into_option() } @@ -79,7 +79,7 @@ impl<'a> RemappedDocIdColumn<'a> { } impl<'a> Column for RemappedDocIdColumn<'a> { - fn get_val(&self, _doc: u64) -> u64 { + fn get_val(&self, _doc: u32) -> u64 { unimplemented!() } @@ -90,7 +90,7 @@ impl<'a> Column for RemappedDocIdColumn<'a> { .map(|old_doc_addr| { let fast_field_reader = &self.fast_field_readers[old_doc_addr.segment_ord as usize]; - fast_field_reader.get_val(old_doc_addr.doc_id as u64) + fast_field_reader.get_val(old_doc_addr.doc_id) }), ) } diff --git a/src/indexer/sorted_doc_id_multivalue_column.rs b/src/indexer/sorted_doc_id_multivalue_column.rs index 650043f60..1886a69b1 100644 --- a/src/indexer/sorted_doc_id_multivalue_column.rs +++ b/src/indexer/sorted_doc_id_multivalue_column.rs @@ -67,7 +67,7 @@ impl<'a> RemappedDocIdMultiValueColumn<'a> { } impl<'a> Column for RemappedDocIdMultiValueColumn<'a> { - fn get_val(&self, _pos: u64) -> u64 { + fn get_val(&self, _pos: u32) -> u64 { unimplemented!() } @@ -137,7 +137,7 @@ impl<'a, T: MultiValueLength> RemappedDocIdMultiValueIndexColumn<'a, T> { } impl<'a, T: MultiValueLength + Send + Sync> Column for RemappedDocIdMultiValueIndexColumn<'a, T> { - fn get_val(&self, _pos: u64) -> u64 { + fn get_val(&self, _pos: u32) -> u64 { unimplemented!() } diff --git a/src/query/range_query_ip_fastfield.rs b/src/query/range_query_ip_fastfield.rs index eeb64540c..8a9170b6c 100644 --- a/src/query/range_query_ip_fastfield.rs +++ b/src/query/range_query_ip_fastfield.rs @@ -222,8 +222,8 @@ mod tests { fn operation_strategy() -> impl Strategy { prop_oneof![ - (0u64..100u64).prop_map(|id| doc_from_id_1(id)), - (1u64..100u64).prop_map(|id| doc_from_id_2(id)), + (0u64..100u64).prop_map(doc_from_id_1), + (1u64..100u64).prop_map(doc_from_id_2), ] }