mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Added column handle
This commit is contained in:
@@ -9,6 +9,7 @@ pub use column_type_header::Cardinality;
|
||||
pub use reader::ColumnarReader;
|
||||
pub use value::{NumericalType, NumericalValue};
|
||||
pub use writer::ColumnarWriter;
|
||||
pub use reader::ColumnHandle;
|
||||
|
||||
pub type DocId = u32;
|
||||
|
||||
@@ -17,12 +18,9 @@ pub struct InvalidData;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ops::Range;
|
||||
|
||||
use common::file_slice::FileSlice;
|
||||
|
||||
use crate::column_type_header::{ColumnType, ColumnTypeAndCardinality};
|
||||
use crate::reader::ColumnarReader;
|
||||
use crate::column_type_header::ColumnType;
|
||||
use crate::reader::{ColumnarReader, ColumnHandle};
|
||||
use crate::value::NumericalValue;
|
||||
use crate::{Cardinality, ColumnarWriter};
|
||||
|
||||
@@ -36,10 +34,10 @@ mod tests {
|
||||
let columnar_fileslice = FileSlice::from(buffer);
|
||||
let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
|
||||
assert_eq!(columnar.num_columns(), 1);
|
||||
let cols: Vec<(ColumnTypeAndCardinality, Range<u64>)> =
|
||||
let cols: Vec<ColumnHandle> =
|
||||
columnar.read_columns("my_string").unwrap();
|
||||
assert_eq!(cols.len(), 1);
|
||||
assert_eq!(cols[0].1, 0..158);
|
||||
assert_eq!(cols[0].num_bytes(), 158);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -51,17 +49,21 @@ mod tests {
|
||||
let columnar_fileslice = FileSlice::from(buffer);
|
||||
let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
|
||||
assert_eq!(columnar.num_columns(), 1);
|
||||
let cols: Vec<(ColumnTypeAndCardinality, Range<u64>)> =
|
||||
let cols: Vec<ColumnHandle> =
|
||||
columnar.read_columns("bool.value").unwrap();
|
||||
assert_eq!(cols.len(), 1);
|
||||
let col = cols.into_iter().next().unwrap();
|
||||
assert_eq!(
|
||||
cols[0].0,
|
||||
ColumnTypeAndCardinality {
|
||||
cardinality: Cardinality::Optional,
|
||||
typ: ColumnType::Bool
|
||||
}
|
||||
col.column_type(),
|
||||
ColumnType::Bool
|
||||
);
|
||||
assert_eq!(
|
||||
col.cardinality(),
|
||||
Cardinality::Optional);
|
||||
assert_eq!(
|
||||
col.column_name(),
|
||||
"bool.value"
|
||||
);
|
||||
assert_eq!(cols[0].1, 0..21);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -75,7 +77,7 @@ mod tests {
|
||||
let columnar_fileslice = FileSlice::from(buffer);
|
||||
let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
|
||||
assert_eq!(columnar.num_columns(), 1);
|
||||
let cols: Vec<(ColumnTypeAndCardinality, Range<u64>)> =
|
||||
let cols: Vec<ColumnHandle> =
|
||||
columnar.read_columns("srical.value").unwrap();
|
||||
assert_eq!(cols.len(), 1);
|
||||
// Right now this 31 bytes are spent as follows
|
||||
@@ -84,6 +86,6 @@ mod tests {
|
||||
// - vals 8 //< due to padding? could have been 1byte?.
|
||||
// - null footer 6 bytes
|
||||
// - version footer 3 bytes // Should be file-wide
|
||||
assert_eq!(cols[0].1, 0..31);
|
||||
assert_eq!(cols[0].num_bytes(), 31);
|
||||
}
|
||||
}
|
||||
|
||||
42
columnar/src/reader/column_handle.rs
Normal file
42
columnar/src/reader/column_handle.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
use common::HasLen;
|
||||
use common::file_slice::FileSlice;
|
||||
|
||||
use crate::Cardinality;
|
||||
use crate::column_type_header::ColumnType;
|
||||
|
||||
|
||||
pub struct ColumnHandle {
|
||||
column_name: String, //< Mostly for debug and display.
|
||||
data: FileSlice,
|
||||
column_type: ColumnType,
|
||||
cardinality: Cardinality,
|
||||
}
|
||||
|
||||
impl ColumnHandle {
|
||||
pub fn new(column_name: String, data: FileSlice, column_type: ColumnType, cardinality: Cardinality) -> Self {
|
||||
ColumnHandle {
|
||||
column_name,
|
||||
data,
|
||||
column_type,
|
||||
cardinality,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn column_name(&self) -> &str {
|
||||
self.column_name.as_str()
|
||||
}
|
||||
|
||||
pub fn num_bytes(&self) -> usize {
|
||||
self.data.len()
|
||||
}
|
||||
|
||||
pub fn column_type(&self) -> ColumnType {
|
||||
self.column_type
|
||||
}
|
||||
|
||||
pub fn cardinality(&self) -> Cardinality {
|
||||
self.cardinality
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
mod column_handle;
|
||||
|
||||
use std::ops::Range;
|
||||
use std::{io, mem};
|
||||
|
||||
@@ -6,6 +8,7 @@ use common::BinarySerializable;
|
||||
use sstable::{Dictionary, RangeSSTable};
|
||||
|
||||
use crate::column_type_header::ColumnTypeAndCardinality;
|
||||
pub use crate::reader::column_handle::ColumnHandle;
|
||||
|
||||
fn io_invalid_data(msg: String) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::InvalidData, msg)
|
||||
@@ -72,7 +75,7 @@ impl ColumnarReader {
|
||||
pub fn read_columns(
|
||||
&self,
|
||||
column_name: &str,
|
||||
) -> io::Result<Vec<(ColumnTypeAndCardinality, Range<u64>)>> {
|
||||
) -> io::Result<Vec<ColumnHandle>> {
|
||||
// Each column is a associated to a given `column_key`,
|
||||
// that starts by `column_name\0column_header`.
|
||||
//
|
||||
@@ -91,15 +94,17 @@ impl ColumnarReader {
|
||||
.ge(start_key.as_bytes())
|
||||
.lt(end_key.as_bytes())
|
||||
.into_stream()?;
|
||||
let mut results = Vec::new();
|
||||
let mut results: Vec<ColumnHandle> = Vec::new();
|
||||
while stream.advance() {
|
||||
let key_bytes: &[u8] = stream.key();
|
||||
assert!(key_bytes.starts_with(start_key.as_bytes()));
|
||||
let column_code: u8 = key_bytes.last().cloned().unwrap();
|
||||
let column_type_and_cardinality = ColumnTypeAndCardinality::try_from_code(column_code)
|
||||
.map_err(|_| io_invalid_data(format!("Unknown column code `{column_code}`")))?;
|
||||
let range = stream.value().clone();
|
||||
results.push((column_type_and_cardinality, range));
|
||||
let Range { start, end } = stream.value().clone();
|
||||
let column_data = self.column_data.slice(start as usize..end as usize);
|
||||
let column_handle = ColumnHandle::new(column_name.to_string(), column_data, column_type_and_cardinality.typ, column_type_and_cardinality.cardinality);
|
||||
results.push(column_handle);
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
@@ -253,8 +253,7 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::{FileHandle, FileSlice};
|
||||
use crate::file_slice::combine_ranges;
|
||||
use crate::HasLen;
|
||||
use crate::{file_slice::combine_ranges, HasLen};
|
||||
|
||||
#[test]
|
||||
fn test_file_slice() -> io::Result<()> {
|
||||
|
||||
@@ -42,8 +42,7 @@ pub trait Column<T: PartialOrd = u64>: Send + Sync {
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
|
||||
|
||||
for idx in doc_id_range.start..doc_id_range.end {
|
||||
for idx in doc_id_range {
|
||||
let val = self.get_val(idx);
|
||||
if value_range.contains(&val) {
|
||||
positions.push(idx);
|
||||
|
||||
Reference in New Issue
Block a user