Moved GCD fast field codec. Partial.

This commit is contained in:
Paul Masurel
2022-08-20 19:38:09 +02:00
parent c71169b6e0
commit 6602786db8
7 changed files with 41 additions and 14 deletions

View File

@@ -60,7 +60,6 @@ pretty_assertions = "1.2.1"
serde_cbor = { version = "0.11.2", optional = true }
async-trait = "0.1.53"
arc-swap = "1.5.0"
gcd = "2.1.0"
[target.'cfg(windows)'.dependencies]
winapi = "0.3.9"

View File

@@ -14,6 +14,7 @@ tantivy-bitpacker = { version="0.2", path = "../bitpacker/" }
ownedbytes = { version = "0.3.0", path = "../ownedbytes" }
prettytable-rs = {version="0.9.0", optional= true}
rand = {version="0.8.3", optional= true}
fastdivide = "0.4"
[dev-dependencies]
more-asserts = "0.3.0"

View File

@@ -2,10 +2,10 @@ use std::io::{self, Write};
use common::BinarySerializable;
use fastdivide::DividerU64;
use fastfield_codecs::FastFieldCodecReader;
use gcd::Gcd;
use ownedbytes::OwnedBytes;
use crate::FastFieldCodecReader;
pub const GCD_DEFAULT: u64 = 1;
pub const GCD_CODEC_ID: u8 = 4;
@@ -56,6 +56,13 @@ pub fn write_gcd_header<W: Write>(field_write: &mut W, min_value: u64, gcd: u64)
Ok(())
}
fn compute_gcd(mut left: u64, mut right: u64) -> u64 {
while right != 0 {
(left, right) = (right, left % right);
}
left
}
// Find GCD for iterator of numbers
pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<u64> {
let mut numbers = numbers.filter(|n| *n != 0);
@@ -70,7 +77,7 @@ pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<u64> {
if remainder == 0 {
continue;
}
gcd = gcd.gcd(val);
gcd = compute_gcd(gcd, val);
if gcd == 1 {
return Some(1);
}
@@ -82,15 +89,17 @@ pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<u64> {
#[cfg(test)]
mod tests {
/*
TODO Move test
use std::collections::HashMap;
use std::path::Path;
use common::HasLen;
use crate::directory::{CompositeFile, RamDirectory, WritePtr};
use crate::fastfield::serializer::FastFieldCodecEnableCheck;
use crate::fastfield::tests::{FIELD, FIELDI64, SCHEMA, SCHEMAI64};
use crate::fastfield::{
use super::{
find_gcd, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldCodecName,
FastFieldReader, FastFieldsWriter, ALL_CODECS,
};
@@ -211,6 +220,24 @@ mod tests {
assert_eq!(test_fastfield.get(1), 200);
assert_eq!(test_fastfield.get(2), 300);
}
*/
use crate::gcd::compute_gcd;
use crate::gcd::find_gcd;
#[test]
fn test_compute_gcd() {
assert_eq!(compute_gcd(0, 0), 0);
assert_eq!(compute_gcd(4, 0), 4);
assert_eq!(compute_gcd(0, 4), 4);
assert_eq!(compute_gcd(1, 4), 1);
assert_eq!(compute_gcd(4, 1), 1);
assert_eq!(compute_gcd(4, 2), 2);
assert_eq!(compute_gcd(10, 25), 5);
assert_eq!(compute_gcd(25, 10), 5);
assert_eq!(compute_gcd(25, 25), 25);
}
#[test]
fn find_gcd_test() {
@@ -221,5 +248,6 @@ mod tests {
assert_eq!(find_gcd([15, 30, 5, 10].into_iter()), Some(5));
assert_eq!(find_gcd([15, 16, 10].into_iter()), Some(1));
assert_eq!(find_gcd([0, 5, 5, 5].into_iter()), Some(5));
assert_eq!(find_gcd([0, 0].into_iter()), Some(0));
}
}

View File

@@ -8,6 +8,7 @@ use std::io::Write;
use ownedbytes::OwnedBytes;
pub mod bitpacked;
pub mod gcd;
pub mod linearinterpol;
pub mod multilinearinterpol;

View File

@@ -24,7 +24,6 @@ pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveB
pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
pub use self::error::{FastFieldNotAvailableError, Result};
pub use self::facet_reader::FacetReader;
pub(crate) use self::gcd::{find_gcd, GCDFastFieldCodec, GCD_CODEC_ID, GCD_DEFAULT};
pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
pub use self::reader::{DynamicFastFieldReader, FastFieldReader};
pub use self::readers::FastFieldReaders;
@@ -38,7 +37,6 @@ mod alive_bitset;
mod bytes;
mod error;
mod facet_reader;
mod gcd;
mod multivalued;
mod reader;
mod readers;

View File

@@ -5,6 +5,7 @@ use std::path::Path;
use fastfield_codecs::bitpacked::{
BitpackedFastFieldReader as BitpackedReader, BitpackedFastFieldSerializer,
};
use fastfield_codecs::gcd::{GCDFastFieldCodec, GCD_CODEC_ID};
use fastfield_codecs::linearinterpol::{
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
};
@@ -13,7 +14,7 @@ use fastfield_codecs::multilinearinterpol::{
};
use fastfield_codecs::{FastFieldCodecReader, FastFieldCodecSerializer};
use super::{FastValue, GCDFastFieldCodec, GCD_CODEC_ID};
use super::FastValue;
use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirectory, WritePtr};
use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter};
use crate::schema::{Schema, FAST};

View File

@@ -4,14 +4,13 @@ use common::{BinarySerializable, CountingWriter};
pub use fastfield_codecs::bitpacked::{
BitpackedFastFieldSerializer, BitpackedFastFieldSerializerLegacy,
};
use fastfield_codecs::gcd::{find_gcd, write_gcd_header, GCD_CODEC_ID, GCD_DEFAULT};
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
pub use fastfield_codecs::{FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
use super::{find_gcd, FastFieldCodecName, ALL_CODECS, GCD_DEFAULT};
use super::{FastFieldCodecName, ALL_CODECS};
use crate::directory::{CompositeWrite, WritePtr};
use crate::fastfield::gcd::write_gcd_header;
use crate::fastfield::GCD_CODEC_ID;
use crate::schema::Field;
/// `CompositeFastFieldSerializer` is in charge of serializing
@@ -142,7 +141,7 @@ impl CompositeFastFieldSerializer {
let field_write = self.composite_write.for_field_with_idx(field, idx);
let gcd = find_gcd(iter_gen().map(|val| val - stats.min_value)).unwrap_or(GCD_DEFAULT);
if gcd == 1 {
if gcd <= 1 {
return Self::create_auto_detect_u64_fast_field_with_idx_gcd(
self.codec_enable_checker.clone(),
field,