Added column handle

This commit is contained in:
Paul Masurel
2022-12-27 15:27:50 +09:00
parent c57fa3f746
commit 44daa749a5
5 changed files with 71 additions and 24 deletions

View File

@@ -9,6 +9,7 @@ pub use column_type_header::Cardinality;
pub use reader::ColumnarReader; pub use reader::ColumnarReader;
pub use value::{NumericalType, NumericalValue}; pub use value::{NumericalType, NumericalValue};
pub use writer::ColumnarWriter; pub use writer::ColumnarWriter;
pub use reader::ColumnHandle;
pub type DocId = u32; pub type DocId = u32;
@@ -17,12 +18,9 @@ pub struct InvalidData;
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::ops::Range;
use common::file_slice::FileSlice; use common::file_slice::FileSlice;
use crate::column_type_header::ColumnType;
use crate::column_type_header::{ColumnType, ColumnTypeAndCardinality}; use crate::reader::{ColumnarReader, ColumnHandle};
use crate::reader::ColumnarReader;
use crate::value::NumericalValue; use crate::value::NumericalValue;
use crate::{Cardinality, ColumnarWriter}; use crate::{Cardinality, ColumnarWriter};
@@ -36,10 +34,10 @@ mod tests {
let columnar_fileslice = FileSlice::from(buffer); let columnar_fileslice = FileSlice::from(buffer);
let columnar = ColumnarReader::open(columnar_fileslice).unwrap(); let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
assert_eq!(columnar.num_columns(), 1); assert_eq!(columnar.num_columns(), 1);
let cols: Vec<(ColumnTypeAndCardinality, Range<u64>)> = let cols: Vec<ColumnHandle> =
columnar.read_columns("my_string").unwrap(); columnar.read_columns("my_string").unwrap();
assert_eq!(cols.len(), 1); assert_eq!(cols.len(), 1);
assert_eq!(cols[0].1, 0..158); assert_eq!(cols[0].num_bytes(), 158);
} }
#[test] #[test]
@@ -51,17 +49,21 @@ mod tests {
let columnar_fileslice = FileSlice::from(buffer); let columnar_fileslice = FileSlice::from(buffer);
let columnar = ColumnarReader::open(columnar_fileslice).unwrap(); let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
assert_eq!(columnar.num_columns(), 1); assert_eq!(columnar.num_columns(), 1);
let cols: Vec<(ColumnTypeAndCardinality, Range<u64>)> = let cols: Vec<ColumnHandle> =
columnar.read_columns("bool.value").unwrap(); columnar.read_columns("bool.value").unwrap();
assert_eq!(cols.len(), 1); assert_eq!(cols.len(), 1);
let col = cols.into_iter().next().unwrap();
assert_eq!( assert_eq!(
cols[0].0, col.column_type(),
ColumnTypeAndCardinality { ColumnType::Bool
cardinality: Cardinality::Optional, );
typ: ColumnType::Bool assert_eq!(
} col.cardinality(),
Cardinality::Optional);
assert_eq!(
col.column_name(),
"bool.value"
); );
assert_eq!(cols[0].1, 0..21);
} }
#[test] #[test]
@@ -75,7 +77,7 @@ mod tests {
let columnar_fileslice = FileSlice::from(buffer); let columnar_fileslice = FileSlice::from(buffer);
let columnar = ColumnarReader::open(columnar_fileslice).unwrap(); let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
assert_eq!(columnar.num_columns(), 1); assert_eq!(columnar.num_columns(), 1);
let cols: Vec<(ColumnTypeAndCardinality, Range<u64>)> = let cols: Vec<ColumnHandle> =
columnar.read_columns("srical.value").unwrap(); columnar.read_columns("srical.value").unwrap();
assert_eq!(cols.len(), 1); assert_eq!(cols.len(), 1);
// Right now this 31 bytes are spent as follows // Right now this 31 bytes are spent as follows
@@ -84,6 +86,6 @@ mod tests {
// - vals 8 //< due to padding? could have been 1byte?. // - vals 8 //< due to padding? could have been 1byte?.
// - null footer 6 bytes // - null footer 6 bytes
// - version footer 3 bytes // Should be file-wide // - version footer 3 bytes // Should be file-wide
assert_eq!(cols[0].1, 0..31); assert_eq!(cols[0].num_bytes(), 31);
} }
} }

View File

@@ -0,0 +1,42 @@
use common::HasLen;
use common::file_slice::FileSlice;
use crate::Cardinality;
use crate::column_type_header::ColumnType;
pub struct ColumnHandle {
column_name: String, //< Mostly for debug and display.
data: FileSlice,
column_type: ColumnType,
cardinality: Cardinality,
}
impl ColumnHandle {
pub fn new(column_name: String, data: FileSlice, column_type: ColumnType, cardinality: Cardinality) -> Self {
ColumnHandle {
column_name,
data,
column_type,
cardinality,
}
}
pub fn column_name(&self) -> &str {
self.column_name.as_str()
}
pub fn num_bytes(&self) -> usize {
self.data.len()
}
pub fn column_type(&self) -> ColumnType {
self.column_type
}
pub fn cardinality(&self) -> Cardinality {
self.cardinality
}
}

View File

@@ -1,3 +1,5 @@
mod column_handle;
use std::ops::Range; use std::ops::Range;
use std::{io, mem}; use std::{io, mem};
@@ -6,6 +8,7 @@ use common::BinarySerializable;
use sstable::{Dictionary, RangeSSTable}; use sstable::{Dictionary, RangeSSTable};
use crate::column_type_header::ColumnTypeAndCardinality; use crate::column_type_header::ColumnTypeAndCardinality;
pub use crate::reader::column_handle::ColumnHandle;
fn io_invalid_data(msg: String) -> io::Error { fn io_invalid_data(msg: String) -> io::Error {
io::Error::new(io::ErrorKind::InvalidData, msg) io::Error::new(io::ErrorKind::InvalidData, msg)
@@ -72,7 +75,7 @@ impl ColumnarReader {
pub fn read_columns( pub fn read_columns(
&self, &self,
column_name: &str, column_name: &str,
) -> io::Result<Vec<(ColumnTypeAndCardinality, Range<u64>)>> { ) -> io::Result<Vec<ColumnHandle>> {
// Each column is a associated to a given `column_key`, // Each column is a associated to a given `column_key`,
// that starts by `column_name\0column_header`. // that starts by `column_name\0column_header`.
// //
@@ -91,15 +94,17 @@ impl ColumnarReader {
.ge(start_key.as_bytes()) .ge(start_key.as_bytes())
.lt(end_key.as_bytes()) .lt(end_key.as_bytes())
.into_stream()?; .into_stream()?;
let mut results = Vec::new(); let mut results: Vec<ColumnHandle> = Vec::new();
while stream.advance() { while stream.advance() {
let key_bytes: &[u8] = stream.key(); let key_bytes: &[u8] = stream.key();
assert!(key_bytes.starts_with(start_key.as_bytes())); assert!(key_bytes.starts_with(start_key.as_bytes()));
let column_code: u8 = key_bytes.last().cloned().unwrap(); let column_code: u8 = key_bytes.last().cloned().unwrap();
let column_type_and_cardinality = ColumnTypeAndCardinality::try_from_code(column_code) let column_type_and_cardinality = ColumnTypeAndCardinality::try_from_code(column_code)
.map_err(|_| io_invalid_data(format!("Unknown column code `{column_code}`")))?; .map_err(|_| io_invalid_data(format!("Unknown column code `{column_code}`")))?;
let range = stream.value().clone(); let Range { start, end } = stream.value().clone();
results.push((column_type_and_cardinality, range)); let column_data = self.column_data.slice(start as usize..end as usize);
let column_handle = ColumnHandle::new(column_name.to_string(), column_data, column_type_and_cardinality.typ, column_type_and_cardinality.cardinality);
results.push(column_handle);
} }
Ok(results) Ok(results)
} }

View File

@@ -253,8 +253,7 @@ mod tests {
use std::sync::Arc; use std::sync::Arc;
use super::{FileHandle, FileSlice}; use super::{FileHandle, FileSlice};
use crate::file_slice::combine_ranges; use crate::{file_slice::combine_ranges, HasLen};
use crate::HasLen;
#[test] #[test]
fn test_file_slice() -> io::Result<()> { fn test_file_slice() -> io::Result<()> {

View File

@@ -42,8 +42,7 @@ pub trait Column<T: PartialOrd = u64>: Send + Sync {
positions: &mut Vec<u32>, positions: &mut Vec<u32>,
) { ) {
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals()); let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
for idx in doc_id_range {
for idx in doc_id_range.start..doc_id_range.end {
let val = self.get_val(idx); let val = self.get_val(idx);
if value_range.contains(&val) { if value_range.contains(&val) {
positions.push(idx); positions.push(idx);