Compare commits

..

2 Commits

Author SHA1 Message Date
Paul Masurel
937485321a Rebase and fixing unit test 2023-01-18 10:15:16 +09:00
Pascal Seitz
e3be14e7d8 start migrate Field to &str
start migrate Field to &str in preparation of columnar
return Result for get_field
2023-01-18 10:02:19 +09:00
5 changed files with 4 additions and 37 deletions

View File

@@ -11,7 +11,6 @@ pub enum ColumnType {
Bytes,
Numerical(NumericalType),
Bool,
IpAddr,
}
impl ColumnType {

View File

@@ -1,5 +1,3 @@
use std::net::Ipv6Addr;
use crate::dictionary::UnorderedId;
use crate::utils::{place_bits, pop_first_byte, select_bits};
use crate::value::NumericalValue;
@@ -144,18 +142,6 @@ impl SymbolValue for bool {
}
}
impl SymbolValue for Ipv6Addr {
fn serialize(self, buffer: &mut [u8]) -> u8 {
// maybe not ueseful to use VIntEncoding for the mooment since we only use it for IP addr.
// We could roll our own RLE compression but it is overkill. let's stick to 8 bytes.
todo!();
}
fn deserialize(bytes: &[u8]) -> Self {
todo!();
}
}
#[derive(Default)]
struct MiniBuffer {
pub bytes: [u8; 10],

View File

@@ -4,7 +4,6 @@ mod serializer;
mod value_index;
use std::io;
use std::net::Ipv6Addr;
use column_operation::ColumnOperation;
use common::CountingWriter;
@@ -49,7 +48,6 @@ struct SpareBuffers {
pub struct ColumnarWriter {
numerical_field_hash_map: ArenaHashMap,
bool_field_hash_map: ArenaHashMap,
ip_addr_field_hash_map: ArenaHashMap,
bytes_field_hash_map: ArenaHashMap,
arena: MemoryArena,
// Dictionaries used to store dictionary-encoded values.
@@ -92,22 +90,6 @@ impl ColumnarWriter {
);
}
pub fn record_ip_addr(&mut self, doc: RowId, column_name: &str, ip_addr: Ipv6Addr) {
assert!(
!column_name.as_bytes().contains(&0u8),
"key may not contain the 0 byte"
);
let (hash_map, arena) = (&mut self.ip_addr_field_hash_map, &mut self.arena);
hash_map.mutate_or_create(
column_name.as_bytes(),
|column_opt: Option<ColumnWriter>| {
let mut column: ColumnWriter = column_opt.unwrap_or_default();
column.record(doc, ip_addr, arena);
column
},
);
}
pub fn record_bool(&mut self, doc: RowId, column_name: &str, val: bool) {
assert!(
!column_name.as_bytes().contains(&0u8),

View File

@@ -1014,7 +1014,7 @@ mod tests {
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let segment = &searcher.segment_readers()[0];
let field = segment.fast_fields().u64(num_field).unwrap();
let field = segment.fast_fields().u64("url_norm_hash").unwrap();
let numbers = vec![100, 200, 300];
let test_range = |range: RangeInclusive<u64>| {
@@ -1063,7 +1063,7 @@ mod tests {
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let segment = &searcher.segment_readers()[0];
let field = segment.fast_fields().u64(num_field).unwrap();
let field = segment.fast_fields().u64("url_norm_hash").unwrap();
let numbers = vec![1000, 1001, 1003];
let test_range = |range: RangeInclusive<u64>| {

View File

@@ -159,7 +159,7 @@ mod tests {
let searcher = reader.searcher();
let reader = searcher.segment_reader(0);
let date_ff_reader = reader.fast_fields().dates(date_field).unwrap();
let date_ff_reader = reader.fast_fields().dates("multi_date_field").unwrap();
let mut docids = vec![];
date_ff_reader.get_docids_for_value_range(
DateTime::from_utc(first_time_stamp)..=DateTime::from_utc(two_secs_ahead),
@@ -173,7 +173,7 @@ mod tests {
assert_eq!(
count_multiples(RangeQuery::new_date(
date_field,
"multi_date_field".to_string(),
DateTime::from_utc(first_time_stamp)..DateTime::from_utc(two_secs_ahead)
)),
1