mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Added column handle
This commit is contained in:
@@ -9,6 +9,7 @@ pub use column_type_header::Cardinality;
|
|||||||
pub use reader::ColumnarReader;
|
pub use reader::ColumnarReader;
|
||||||
pub use value::{NumericalType, NumericalValue};
|
pub use value::{NumericalType, NumericalValue};
|
||||||
pub use writer::ColumnarWriter;
|
pub use writer::ColumnarWriter;
|
||||||
|
pub use reader::ColumnHandle;
|
||||||
|
|
||||||
pub type DocId = u32;
|
pub type DocId = u32;
|
||||||
|
|
||||||
@@ -17,12 +18,9 @@ pub struct InvalidData;
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::ops::Range;
|
|
||||||
|
|
||||||
use common::file_slice::FileSlice;
|
use common::file_slice::FileSlice;
|
||||||
|
use crate::column_type_header::ColumnType;
|
||||||
use crate::column_type_header::{ColumnType, ColumnTypeAndCardinality};
|
use crate::reader::{ColumnarReader, ColumnHandle};
|
||||||
use crate::reader::ColumnarReader;
|
|
||||||
use crate::value::NumericalValue;
|
use crate::value::NumericalValue;
|
||||||
use crate::{Cardinality, ColumnarWriter};
|
use crate::{Cardinality, ColumnarWriter};
|
||||||
|
|
||||||
@@ -36,10 +34,10 @@ mod tests {
|
|||||||
let columnar_fileslice = FileSlice::from(buffer);
|
let columnar_fileslice = FileSlice::from(buffer);
|
||||||
let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
|
let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
|
||||||
assert_eq!(columnar.num_columns(), 1);
|
assert_eq!(columnar.num_columns(), 1);
|
||||||
let cols: Vec<(ColumnTypeAndCardinality, Range<u64>)> =
|
let cols: Vec<ColumnHandle> =
|
||||||
columnar.read_columns("my_string").unwrap();
|
columnar.read_columns("my_string").unwrap();
|
||||||
assert_eq!(cols.len(), 1);
|
assert_eq!(cols.len(), 1);
|
||||||
assert_eq!(cols[0].1, 0..158);
|
assert_eq!(cols[0].num_bytes(), 158);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -51,17 +49,21 @@ mod tests {
|
|||||||
let columnar_fileslice = FileSlice::from(buffer);
|
let columnar_fileslice = FileSlice::from(buffer);
|
||||||
let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
|
let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
|
||||||
assert_eq!(columnar.num_columns(), 1);
|
assert_eq!(columnar.num_columns(), 1);
|
||||||
let cols: Vec<(ColumnTypeAndCardinality, Range<u64>)> =
|
let cols: Vec<ColumnHandle> =
|
||||||
columnar.read_columns("bool.value").unwrap();
|
columnar.read_columns("bool.value").unwrap();
|
||||||
assert_eq!(cols.len(), 1);
|
assert_eq!(cols.len(), 1);
|
||||||
|
let col = cols.into_iter().next().unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
cols[0].0,
|
col.column_type(),
|
||||||
ColumnTypeAndCardinality {
|
ColumnType::Bool
|
||||||
cardinality: Cardinality::Optional,
|
);
|
||||||
typ: ColumnType::Bool
|
assert_eq!(
|
||||||
}
|
col.cardinality(),
|
||||||
|
Cardinality::Optional);
|
||||||
|
assert_eq!(
|
||||||
|
col.column_name(),
|
||||||
|
"bool.value"
|
||||||
);
|
);
|
||||||
assert_eq!(cols[0].1, 0..21);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -75,7 +77,7 @@ mod tests {
|
|||||||
let columnar_fileslice = FileSlice::from(buffer);
|
let columnar_fileslice = FileSlice::from(buffer);
|
||||||
let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
|
let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
|
||||||
assert_eq!(columnar.num_columns(), 1);
|
assert_eq!(columnar.num_columns(), 1);
|
||||||
let cols: Vec<(ColumnTypeAndCardinality, Range<u64>)> =
|
let cols: Vec<ColumnHandle> =
|
||||||
columnar.read_columns("srical.value").unwrap();
|
columnar.read_columns("srical.value").unwrap();
|
||||||
assert_eq!(cols.len(), 1);
|
assert_eq!(cols.len(), 1);
|
||||||
// Right now this 31 bytes are spent as follows
|
// Right now this 31 bytes are spent as follows
|
||||||
@@ -84,6 +86,6 @@ mod tests {
|
|||||||
// - vals 8 //< due to padding? could have been 1byte?.
|
// - vals 8 //< due to padding? could have been 1byte?.
|
||||||
// - null footer 6 bytes
|
// - null footer 6 bytes
|
||||||
// - version footer 3 bytes // Should be file-wide
|
// - version footer 3 bytes // Should be file-wide
|
||||||
assert_eq!(cols[0].1, 0..31);
|
assert_eq!(cols[0].num_bytes(), 31);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
42
columnar/src/reader/column_handle.rs
Normal file
42
columnar/src/reader/column_handle.rs
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
use common::HasLen;
|
||||||
|
use common::file_slice::FileSlice;
|
||||||
|
|
||||||
|
use crate::Cardinality;
|
||||||
|
use crate::column_type_header::ColumnType;
|
||||||
|
|
||||||
|
|
||||||
|
pub struct ColumnHandle {
|
||||||
|
column_name: String, //< Mostly for debug and display.
|
||||||
|
data: FileSlice,
|
||||||
|
column_type: ColumnType,
|
||||||
|
cardinality: Cardinality,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ColumnHandle {
|
||||||
|
pub fn new(column_name: String, data: FileSlice, column_type: ColumnType, cardinality: Cardinality) -> Self {
|
||||||
|
ColumnHandle {
|
||||||
|
column_name,
|
||||||
|
data,
|
||||||
|
column_type,
|
||||||
|
cardinality,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn column_name(&self) -> &str {
|
||||||
|
self.column_name.as_str()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn num_bytes(&self) -> usize {
|
||||||
|
self.data.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn column_type(&self) -> ColumnType {
|
||||||
|
self.column_type
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cardinality(&self) -> Cardinality {
|
||||||
|
self.cardinality
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
|
mod column_handle;
|
||||||
|
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::{io, mem};
|
use std::{io, mem};
|
||||||
|
|
||||||
@@ -6,6 +8,7 @@ use common::BinarySerializable;
|
|||||||
use sstable::{Dictionary, RangeSSTable};
|
use sstable::{Dictionary, RangeSSTable};
|
||||||
|
|
||||||
use crate::column_type_header::ColumnTypeAndCardinality;
|
use crate::column_type_header::ColumnTypeAndCardinality;
|
||||||
|
pub use crate::reader::column_handle::ColumnHandle;
|
||||||
|
|
||||||
fn io_invalid_data(msg: String) -> io::Error {
|
fn io_invalid_data(msg: String) -> io::Error {
|
||||||
io::Error::new(io::ErrorKind::InvalidData, msg)
|
io::Error::new(io::ErrorKind::InvalidData, msg)
|
||||||
@@ -72,7 +75,7 @@ impl ColumnarReader {
|
|||||||
pub fn read_columns(
|
pub fn read_columns(
|
||||||
&self,
|
&self,
|
||||||
column_name: &str,
|
column_name: &str,
|
||||||
) -> io::Result<Vec<(ColumnTypeAndCardinality, Range<u64>)>> {
|
) -> io::Result<Vec<ColumnHandle>> {
|
||||||
// Each column is a associated to a given `column_key`,
|
// Each column is a associated to a given `column_key`,
|
||||||
// that starts by `column_name\0column_header`.
|
// that starts by `column_name\0column_header`.
|
||||||
//
|
//
|
||||||
@@ -91,15 +94,17 @@ impl ColumnarReader {
|
|||||||
.ge(start_key.as_bytes())
|
.ge(start_key.as_bytes())
|
||||||
.lt(end_key.as_bytes())
|
.lt(end_key.as_bytes())
|
||||||
.into_stream()?;
|
.into_stream()?;
|
||||||
let mut results = Vec::new();
|
let mut results: Vec<ColumnHandle> = Vec::new();
|
||||||
while stream.advance() {
|
while stream.advance() {
|
||||||
let key_bytes: &[u8] = stream.key();
|
let key_bytes: &[u8] = stream.key();
|
||||||
assert!(key_bytes.starts_with(start_key.as_bytes()));
|
assert!(key_bytes.starts_with(start_key.as_bytes()));
|
||||||
let column_code: u8 = key_bytes.last().cloned().unwrap();
|
let column_code: u8 = key_bytes.last().cloned().unwrap();
|
||||||
let column_type_and_cardinality = ColumnTypeAndCardinality::try_from_code(column_code)
|
let column_type_and_cardinality = ColumnTypeAndCardinality::try_from_code(column_code)
|
||||||
.map_err(|_| io_invalid_data(format!("Unknown column code `{column_code}`")))?;
|
.map_err(|_| io_invalid_data(format!("Unknown column code `{column_code}`")))?;
|
||||||
let range = stream.value().clone();
|
let Range { start, end } = stream.value().clone();
|
||||||
results.push((column_type_and_cardinality, range));
|
let column_data = self.column_data.slice(start as usize..end as usize);
|
||||||
|
let column_handle = ColumnHandle::new(column_name.to_string(), column_data, column_type_and_cardinality.typ, column_type_and_cardinality.cardinality);
|
||||||
|
results.push(column_handle);
|
||||||
}
|
}
|
||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -253,8 +253,7 @@ mod tests {
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use super::{FileHandle, FileSlice};
|
use super::{FileHandle, FileSlice};
|
||||||
use crate::file_slice::combine_ranges;
|
use crate::{file_slice::combine_ranges, HasLen};
|
||||||
use crate::HasLen;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_file_slice() -> io::Result<()> {
|
fn test_file_slice() -> io::Result<()> {
|
||||||
|
|||||||
@@ -42,8 +42,7 @@ pub trait Column<T: PartialOrd = u64>: Send + Sync {
|
|||||||
positions: &mut Vec<u32>,
|
positions: &mut Vec<u32>,
|
||||||
) {
|
) {
|
||||||
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
|
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
|
||||||
|
for idx in doc_id_range {
|
||||||
for idx in doc_id_range.start..doc_id_range.end {
|
|
||||||
let val = self.get_val(idx);
|
let val = self.get_val(idx);
|
||||||
if value_range.contains(&val) {
|
if value_range.contains(&val) {
|
||||||
positions.push(idx);
|
positions.push(idx);
|
||||||
|
|||||||
Reference in New Issue
Block a user