serialize ip as u128, add test for positions_to_docid

This commit is contained in:
Pascal Seitz
2022-10-06 15:51:11 +08:00
parent e50e74acf8
commit 5171ff611b
3 changed files with 57 additions and 12 deletions

View File

@@ -107,6 +107,19 @@ impl FixedSize for u64 {
const SIZE_IN_BYTES: usize = 8;
}
impl BinarySerializable for u128 {
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
writer.write_u128::<Endianness>(*self)
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
reader.read_u128::<Endianness>()
}
}
impl FixedSize for u128 {
const SIZE_IN_BYTES: usize = 16;
}
impl BinarySerializable for f32 {
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
writer.write_f32::<Endianness>(*self)

View File

@@ -220,8 +220,8 @@ impl<T: MonotonicallyMappableToU128> MultiValueLength for MultiValuedU128FastFie
///
/// Correctness: positions needs to be sorted.
///
/// TODO: Instead of a linear scan we can employ a binary search to match a docid to its value
/// position.
/// TODO: Instead of a linear scan we can employ a expotential search into binary search to match a
/// docid to its value position.
fn positions_to_docids<T: MultiValueLength>(positions: &[u64], multival_idx: &T) -> Vec<DocId> {
let mut docs = vec![];
let mut cur_doc = 0u32;
@@ -250,8 +250,42 @@ fn positions_to_docids<T: MultiValueLength>(positions: &[u64], multival_idx: &T)
mod tests {
use crate::core::Index;
use crate::fastfield::multivalued::reader::positions_to_docids;
use crate::fastfield::MultiValueLength;
use crate::schema::{Cardinality, Facet, FacetOptions, NumericOptions, Schema};
#[test]
fn test_positions_to_docid() {
let positions = vec![10u64, 11, 15, 20, 21, 22];
let offsets = vec![0, 10, 12, 15, 22, 23];
struct MultiValueLenghtIdx {
offsets: Vec<u64>,
}
impl MultiValueLength for MultiValueLenghtIdx {
fn get_range(&self, doc_id: crate::DocId) -> std::ops::Range<u64> {
let idx = doc_id as u64;
let start = self.offsets[idx as usize];
let end = self.offsets[idx as usize + 1];
start..end
}
fn get_len(&self, _doc_id: crate::DocId) -> u64 {
todo!()
}
fn get_total_len(&self) -> u64 {
todo!()
}
}
let idx = MultiValueLenghtIdx { offsets };
let docids = positions_to_docids(&positions, &idx);
assert_eq!(docids, vec![1, 3, 4]);
}
#[test]
fn test_multifastfield_reader() -> crate::Result<()> {
let mut schema_builder = Schema::builder();

View File

@@ -33,7 +33,7 @@ pub enum Value {
Bytes(Vec<u8>),
/// Json object value.
JsonObject(serde_json::Map<String, serde_json::Value>),
/// Ip
/// Ip Address value
IpAddr(IpAddr),
}
@@ -53,7 +53,7 @@ impl Serialize for Value {
Value::Facet(ref facet) => facet.serialize(serializer),
Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes),
Value::JsonObject(ref obj) => obj.serialize(serializer),
Value::IpAddr(ref obj) => obj.serialize(serializer), // TODO check serialization
Value::IpAddr(ref obj) => obj.serialize(serializer),
}
}
}
@@ -307,11 +307,11 @@ impl From<serde_json::Value> for Value {
}
mod binary_serialize {
use std::io::{self, ErrorKind, Read, Write};
use std::io::{self, Read, Write};
use std::net::IpAddr;
use std::str::FromStr;
use common::{f64_to_u64, u64_to_f64, BinarySerializable};
use fastfield_codecs::MonotonicallyMappableToU128;
use super::Value;
use crate::schema::Facet;
@@ -391,7 +391,7 @@ mod binary_serialize {
}
Value::IpAddr(ref ip) => {
IP_CODE.serialize(writer)?;
ip.to_string().serialize(writer) // TODO Check best format
ip.to_u128().serialize(writer)
}
}
}
@@ -445,7 +445,7 @@ mod binary_serialize {
_ => Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"No extened field type is associated with code {:?}",
"No extended field type is associated with code {:?}",
ext_type_code
),
)),
@@ -464,10 +464,8 @@ mod binary_serialize {
Ok(Value::JsonObject(json_map))
}
IP_CODE => {
let text = String::deserialize(reader)?;
Ok(Value::IpAddr(IpAddr::from_str(&text).map_err(|err| {
io::Error::new(ErrorKind::Other, err.to_string())
})?))
let value = u128::deserialize(reader)?;
Ok(Value::IpAddr(IpAddr::from_u128(value)))
}
_ => Err(io::Error::new(