diff --git a/columnar/src/lib.rs b/columnar/src/lib.rs index 0e28de4da..509f5cb6c 100644 --- a/columnar/src/lib.rs +++ b/columnar/src/lib.rs @@ -9,6 +9,7 @@ pub use column_type_header::Cardinality; pub use reader::ColumnarReader; pub use value::{NumericalType, NumericalValue}; pub use writer::ColumnarWriter; +pub use reader::ColumnHandle; pub type DocId = u32; @@ -17,12 +18,9 @@ pub struct InvalidData; #[cfg(test)] mod tests { - use std::ops::Range; - use common::file_slice::FileSlice; - - use crate::column_type_header::{ColumnType, ColumnTypeAndCardinality}; - use crate::reader::ColumnarReader; + use crate::column_type_header::ColumnType; + use crate::reader::{ColumnarReader, ColumnHandle}; use crate::value::NumericalValue; use crate::{Cardinality, ColumnarWriter}; @@ -36,10 +34,10 @@ mod tests { let columnar_fileslice = FileSlice::from(buffer); let columnar = ColumnarReader::open(columnar_fileslice).unwrap(); assert_eq!(columnar.num_columns(), 1); - let cols: Vec<(ColumnTypeAndCardinality, Range)> = + let cols: Vec = columnar.read_columns("my_string").unwrap(); assert_eq!(cols.len(), 1); - assert_eq!(cols[0].1, 0..158); + assert_eq!(cols[0].num_bytes(), 158); } #[test] @@ -51,17 +49,21 @@ mod tests { let columnar_fileslice = FileSlice::from(buffer); let columnar = ColumnarReader::open(columnar_fileslice).unwrap(); assert_eq!(columnar.num_columns(), 1); - let cols: Vec<(ColumnTypeAndCardinality, Range)> = + let cols: Vec = columnar.read_columns("bool.value").unwrap(); assert_eq!(cols.len(), 1); + let col = cols.into_iter().next().unwrap(); assert_eq!( - cols[0].0, - ColumnTypeAndCardinality { - cardinality: Cardinality::Optional, - typ: ColumnType::Bool - } + col.column_type(), + ColumnType::Bool + ); + assert_eq!( + col.cardinality(), + Cardinality::Optional); + assert_eq!( + col.column_name(), + "bool.value" ); - assert_eq!(cols[0].1, 0..21); } #[test] @@ -75,7 +77,7 @@ mod tests { let columnar_fileslice = FileSlice::from(buffer); let columnar = ColumnarReader::open(columnar_fileslice).unwrap(); assert_eq!(columnar.num_columns(), 1); - let cols: Vec<(ColumnTypeAndCardinality, Range)> = + let cols: Vec = columnar.read_columns("srical.value").unwrap(); assert_eq!(cols.len(), 1); // Right now this 31 bytes are spent as follows @@ -84,6 +86,6 @@ mod tests { // - vals 8 //< due to padding? could have been 1byte?. // - null footer 6 bytes // - version footer 3 bytes // Should be file-wide - assert_eq!(cols[0].1, 0..31); + assert_eq!(cols[0].num_bytes(), 31); } } diff --git a/columnar/src/reader/column_handle.rs b/columnar/src/reader/column_handle.rs new file mode 100644 index 000000000..e1bba6565 --- /dev/null +++ b/columnar/src/reader/column_handle.rs @@ -0,0 +1,42 @@ +use common::HasLen; +use common::file_slice::FileSlice; + +use crate::Cardinality; +use crate::column_type_header::ColumnType; + + +pub struct ColumnHandle { + column_name: String, //< Mostly for debug and display. + data: FileSlice, + column_type: ColumnType, + cardinality: Cardinality, +} + +impl ColumnHandle { + pub fn new(column_name: String, data: FileSlice, column_type: ColumnType, cardinality: Cardinality) -> Self { + ColumnHandle { + column_name, + data, + column_type, + cardinality, + } + } + + pub fn column_name(&self) -> &str { + self.column_name.as_str() + } + + pub fn num_bytes(&self) -> usize { + self.data.len() + } + + pub fn column_type(&self) -> ColumnType { + self.column_type + } + + pub fn cardinality(&self) -> Cardinality { + self.cardinality + } +} + + diff --git a/columnar/src/reader/mod.rs b/columnar/src/reader/mod.rs index 2907ac805..c089c6b05 100644 --- a/columnar/src/reader/mod.rs +++ b/columnar/src/reader/mod.rs @@ -1,3 +1,5 @@ +mod column_handle; + use std::ops::Range; use std::{io, mem}; @@ -6,6 +8,7 @@ use common::BinarySerializable; use sstable::{Dictionary, RangeSSTable}; use crate::column_type_header::ColumnTypeAndCardinality; +pub use crate::reader::column_handle::ColumnHandle; fn io_invalid_data(msg: String) -> io::Error { io::Error::new(io::ErrorKind::InvalidData, msg) @@ -72,7 +75,7 @@ impl ColumnarReader { pub fn read_columns( &self, column_name: &str, - ) -> io::Result)>> { + ) -> io::Result> { // Each column is a associated to a given `column_key`, // that starts by `column_name\0column_header`. // @@ -91,15 +94,17 @@ impl ColumnarReader { .ge(start_key.as_bytes()) .lt(end_key.as_bytes()) .into_stream()?; - let mut results = Vec::new(); + let mut results: Vec = Vec::new(); while stream.advance() { let key_bytes: &[u8] = stream.key(); assert!(key_bytes.starts_with(start_key.as_bytes())); let column_code: u8 = key_bytes.last().cloned().unwrap(); let column_type_and_cardinality = ColumnTypeAndCardinality::try_from_code(column_code) .map_err(|_| io_invalid_data(format!("Unknown column code `{column_code}`")))?; - let range = stream.value().clone(); - results.push((column_type_and_cardinality, range)); + let Range { start, end } = stream.value().clone(); + let column_data = self.column_data.slice(start as usize..end as usize); + let column_handle = ColumnHandle::new(column_name.to_string(), column_data, column_type_and_cardinality.typ, column_type_and_cardinality.cardinality); + results.push(column_handle); } Ok(results) } diff --git a/common/src/file_slice.rs b/common/src/file_slice.rs index ae4175d10..408f5ff0c 100644 --- a/common/src/file_slice.rs +++ b/common/src/file_slice.rs @@ -253,8 +253,7 @@ mod tests { use std::sync::Arc; use super::{FileHandle, FileSlice}; - use crate::file_slice::combine_ranges; - use crate::HasLen; + use crate::{file_slice::combine_ranges, HasLen}; #[test] fn test_file_slice() -> io::Result<()> { diff --git a/fastfield_codecs/src/column.rs b/fastfield_codecs/src/column.rs index 33fee8af3..5910292b3 100644 --- a/fastfield_codecs/src/column.rs +++ b/fastfield_codecs/src/column.rs @@ -42,8 +42,7 @@ pub trait Column: Send + Sync { positions: &mut Vec, ) { let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals()); - - for idx in doc_id_range.start..doc_id_range.end { + for idx in doc_id_range { let val = self.get_val(idx); if value_range.contains(&val) { positions.push(idx);