mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
Compare commits
1 Commits
flatheadmi
...
test_order
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e2dae2f433 |
@@ -247,6 +247,7 @@ impl ColumnarWriter {
|
|||||||
}
|
}
|
||||||
pub fn serialize(&mut self, num_docs: RowId, wrt: &mut dyn io::Write) -> io::Result<()> {
|
pub fn serialize(&mut self, num_docs: RowId, wrt: &mut dyn io::Write) -> io::Result<()> {
|
||||||
let mut serializer = ColumnarSerializer::new(wrt);
|
let mut serializer = ColumnarSerializer::new(wrt);
|
||||||
|
|
||||||
let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
|
let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
|
||||||
.numerical_field_hash_map
|
.numerical_field_hash_map
|
||||||
.iter()
|
.iter()
|
||||||
@@ -260,7 +261,7 @@ impl ColumnarWriter {
|
|||||||
columns.extend(
|
columns.extend(
|
||||||
self.bytes_field_hash_map
|
self.bytes_field_hash_map
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(term, addr)| (term, ColumnType::Bytes, addr)),
|
.map(|(column_name, addr)| (column_name, ColumnType::Bytes, addr)),
|
||||||
);
|
);
|
||||||
columns.extend(
|
columns.extend(
|
||||||
self.str_field_hash_map
|
self.str_field_hash_map
|
||||||
@@ -282,6 +283,7 @@ impl ColumnarWriter {
|
|||||||
.iter()
|
.iter()
|
||||||
.map(|(column_name, addr)| (column_name, ColumnType::DateTime, addr)),
|
.map(|(column_name, addr)| (column_name, ColumnType::DateTime, addr)),
|
||||||
);
|
);
|
||||||
|
// TODO: replace JSON_END_OF_PATH with b'0' in columns
|
||||||
columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type));
|
columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type));
|
||||||
|
|
||||||
let (arena, buffers, dictionaries) = (&self.arena, &mut self.buffers, &self.dictionaries);
|
let (arena, buffers, dictionaries) = (&self.arena, &mut self.buffers, &self.dictionaries);
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use std::io;
|
use std::io;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
|
||||||
|
use common::json_path_writer::JSON_END_OF_PATH;
|
||||||
use common::{BinarySerializable, CountingWriter};
|
use common::{BinarySerializable, CountingWriter};
|
||||||
use sstable::value::RangeValueWriter;
|
use sstable::value::RangeValueWriter;
|
||||||
use sstable::RangeSSTable;
|
use sstable::RangeSSTable;
|
||||||
@@ -18,13 +19,8 @@ pub struct ColumnarSerializer<W: io::Write> {
|
|||||||
/// code.
|
/// code.
|
||||||
fn prepare_key(key: &[u8], column_type: ColumnType, buffer: &mut Vec<u8>) {
|
fn prepare_key(key: &[u8], column_type: ColumnType, buffer: &mut Vec<u8>) {
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
// Convert 0 bytes to '0' string, as 0 bytes are reserved for the end of the path.
|
buffer.extend_from_slice(key);
|
||||||
if key.contains(&0u8) {
|
buffer.push(JSON_END_OF_PATH);
|
||||||
buffer.extend(key.iter().map(|&b| if b == 0 { b'0' } else { b }));
|
|
||||||
} else {
|
|
||||||
buffer.extend_from_slice(key);
|
|
||||||
}
|
|
||||||
buffer.push(0u8);
|
|
||||||
buffer.push(column_type.to_code());
|
buffer.push(column_type.to_code());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2490,4 +2490,29 @@ mod tests {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bug_2442() -> crate::Result<()> {
|
||||||
|
let mut schema_builder = schema::Schema::builder();
|
||||||
|
let json_field = schema_builder.add_json_field("json", TEXT | FAST);
|
||||||
|
|
||||||
|
let schema = schema_builder.build();
|
||||||
|
let index = Index::builder().schema(schema).create_in_ram()?;
|
||||||
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
|
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||||
|
|
||||||
|
index_writer
|
||||||
|
.add_document(doc!(
|
||||||
|
json_field=>json!({"\u{0000}B":"1"})
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
|
index_writer
|
||||||
|
.add_document(doc!(
|
||||||
|
json_field=>json!({" A":"1"})
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
|
index_writer.commit()?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
use common::json_path_writer::JSON_END_OF_PATH;
|
||||||
|
use common::replace_in_place;
|
||||||
use fnv::FnvHashMap;
|
use fnv::FnvHashMap;
|
||||||
|
|
||||||
/// `Field` is represented by an unsigned 32-bit integer type.
|
/// `Field` is represented by an unsigned 32-bit integer type.
|
||||||
@@ -38,7 +40,14 @@ impl PathToUnorderedId {
|
|||||||
#[cold]
|
#[cold]
|
||||||
fn insert_new_path(&mut self, path: &str) -> u32 {
|
fn insert_new_path(&mut self, path: &str) -> u32 {
|
||||||
let next_id = self.map.len() as u32;
|
let next_id = self.map.len() as u32;
|
||||||
self.map.insert(path.to_string(), next_id);
|
let mut new_path = path.to_string();
|
||||||
|
|
||||||
|
// The unsafe below is safe as long as b'.' and JSON_PATH_SEGMENT_SEP are
|
||||||
|
// valid single byte ut8 strings.
|
||||||
|
// By utf-8 design, they cannot be part of another codepoint.
|
||||||
|
unsafe { replace_in_place(JSON_END_OF_PATH, b'0', new_path.as_bytes_mut()) };
|
||||||
|
|
||||||
|
self.map.insert(new_path, next_id);
|
||||||
next_id
|
next_id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -249,9 +249,12 @@ impl Term {
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub fn append_path(&mut self, bytes: &[u8]) -> &mut [u8] {
|
pub fn append_path(&mut self, bytes: &[u8]) -> &mut [u8] {
|
||||||
let len_before = self.0.len();
|
let len_before = self.0.len();
|
||||||
if bytes.contains(&0u8) {
|
if bytes.contains(&JSON_END_OF_PATH) {
|
||||||
self.0
|
self.0.extend(
|
||||||
.extend(bytes.iter().map(|&b| if b == 0 { b'0' } else { b }));
|
bytes
|
||||||
|
.iter()
|
||||||
|
.map(|&b| if b == JSON_END_OF_PATH { b'0' } else { b }),
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
self.0.extend_from_slice(bytes);
|
self.0.extend_from_slice(bytes);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user