mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-28 21:12:54 +00:00
Compare commits
1 Commits
low_card_o
...
test_order
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e2dae2f433 |
@@ -247,6 +247,7 @@ impl ColumnarWriter {
|
||||
}
|
||||
pub fn serialize(&mut self, num_docs: RowId, wrt: &mut dyn io::Write) -> io::Result<()> {
|
||||
let mut serializer = ColumnarSerializer::new(wrt);
|
||||
|
||||
let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
|
||||
.numerical_field_hash_map
|
||||
.iter()
|
||||
@@ -260,7 +261,7 @@ impl ColumnarWriter {
|
||||
columns.extend(
|
||||
self.bytes_field_hash_map
|
||||
.iter()
|
||||
.map(|(term, addr)| (term, ColumnType::Bytes, addr)),
|
||||
.map(|(column_name, addr)| (column_name, ColumnType::Bytes, addr)),
|
||||
);
|
||||
columns.extend(
|
||||
self.str_field_hash_map
|
||||
@@ -282,6 +283,7 @@ impl ColumnarWriter {
|
||||
.iter()
|
||||
.map(|(column_name, addr)| (column_name, ColumnType::DateTime, addr)),
|
||||
);
|
||||
// TODO: replace JSON_END_OF_PATH with b'0' in columns
|
||||
columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type));
|
||||
|
||||
let (arena, buffers, dictionaries) = (&self.arena, &mut self.buffers, &self.dictionaries);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
|
||||
use common::json_path_writer::JSON_END_OF_PATH;
|
||||
use common::{BinarySerializable, CountingWriter};
|
||||
use sstable::value::RangeValueWriter;
|
||||
use sstable::RangeSSTable;
|
||||
@@ -18,13 +19,8 @@ pub struct ColumnarSerializer<W: io::Write> {
|
||||
/// code.
|
||||
fn prepare_key(key: &[u8], column_type: ColumnType, buffer: &mut Vec<u8>) {
|
||||
buffer.clear();
|
||||
// Convert 0 bytes to '0' string, as 0 bytes are reserved for the end of the path.
|
||||
if key.contains(&0u8) {
|
||||
buffer.extend(key.iter().map(|&b| if b == 0 { b'0' } else { b }));
|
||||
} else {
|
||||
buffer.extend_from_slice(key);
|
||||
}
|
||||
buffer.push(0u8);
|
||||
buffer.extend_from_slice(key);
|
||||
buffer.push(JSON_END_OF_PATH);
|
||||
buffer.push(column_type.to_code());
|
||||
}
|
||||
|
||||
|
||||
@@ -2490,4 +2490,29 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bug_2442() -> crate::Result<()> {
|
||||
let mut schema_builder = schema::Schema::builder();
|
||||
let json_field = schema_builder.add_json_field("json", TEXT | FAST);
|
||||
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::builder().schema(schema).create_in_ram()?;
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
|
||||
index_writer
|
||||
.add_document(doc!(
|
||||
json_field=>json!({"\u{0000}B":"1"})
|
||||
))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(
|
||||
json_field=>json!({" A":"1"})
|
||||
))
|
||||
.unwrap();
|
||||
index_writer.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use common::json_path_writer::JSON_END_OF_PATH;
|
||||
use common::replace_in_place;
|
||||
use fnv::FnvHashMap;
|
||||
|
||||
/// `Field` is represented by an unsigned 32-bit integer type.
|
||||
@@ -38,7 +40,14 @@ impl PathToUnorderedId {
|
||||
#[cold]
|
||||
fn insert_new_path(&mut self, path: &str) -> u32 {
|
||||
let next_id = self.map.len() as u32;
|
||||
self.map.insert(path.to_string(), next_id);
|
||||
let mut new_path = path.to_string();
|
||||
|
||||
// The unsafe below is safe as long as b'.' and JSON_PATH_SEGMENT_SEP are
|
||||
// valid single byte ut8 strings.
|
||||
// By utf-8 design, they cannot be part of another codepoint.
|
||||
unsafe { replace_in_place(JSON_END_OF_PATH, b'0', new_path.as_bytes_mut()) };
|
||||
|
||||
self.map.insert(new_path, next_id);
|
||||
next_id
|
||||
}
|
||||
|
||||
|
||||
@@ -249,9 +249,12 @@ impl Term {
|
||||
#[inline]
|
||||
pub fn append_path(&mut self, bytes: &[u8]) -> &mut [u8] {
|
||||
let len_before = self.0.len();
|
||||
if bytes.contains(&0u8) {
|
||||
self.0
|
||||
.extend(bytes.iter().map(|&b| if b == 0 { b'0' } else { b }));
|
||||
if bytes.contains(&JSON_END_OF_PATH) {
|
||||
self.0.extend(
|
||||
bytes
|
||||
.iter()
|
||||
.map(|&b| if b == JSON_END_OF_PATH { b'0' } else { b }),
|
||||
);
|
||||
} else {
|
||||
self.0.extend_from_slice(bytes);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user