From 0e66423de86f97f87bafd1aa1827d3b7429ddadb Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Fri, 20 Jan 2023 13:52:13 +0900 Subject: [PATCH] Integration of columnar --- Cargo.toml | 3 +- TODO.txt | 18 + columnar/src/TODO.md | 10 +- columnar/src/column/mod.rs | 16 +- columnar/src/column/serialize.rs | 3 +- columnar/src/column_index/mod.rs | 28 +- .../src/column_index/multivalued_index.rs | 113 +- .../src/column_index/optional_index/mod.rs | 15 +- .../src/column_index/optional_index/set.rs | 7 +- .../optional_index/set_block/dense.rs | 9 +- .../optional_index/set_block/sparse.rs | 6 +- .../optional_index/set_block/tests.rs | 2 +- columnar/src/column_index/serialize.rs | 6 +- columnar/src/column_values/column.rs | 20 +- columnar/src/column_values/mod.rs | 7 +- .../src/column_values/monotonic_mapping.rs | 3 +- .../column_values/monotonic_mapping_u128.rs | 3 +- columnar/src/column_values/serialize.rs | 3 +- columnar/src/columnar/column_type.rs | 5 +- columnar/src/lib.rs | 1 + columnar/src/value.rs | 2 +- .../aggregation.rs | 4 +- .../basic_search.rs | 0 .../custom_collector.rs | 0 .../custom_tokenizer.rs | 0 .../date_time_field.rs | 4 +- .../deleting_updating_documents.rs | 0 .../faceted_search.rs | 0 .../faceted_search_with_tweaked_score.rs | 0 .../integer_range_search.rs | 0 {examples => examples-disabled}/ip_field.rs | 0 .../iterating_docs_and_positions.rs | 0 {examples => examples-disabled}/json_field.rs | 0 .../multiple_producer.rs | 0 .../pre_tokenized_text.rs | 0 {examples => examples-disabled}/snippet.rs | 0 {examples => examples-disabled}/stop_words.rs | 0 {examples => examples-disabled}/warmer.rs | 0 .../working_with_json.rs | 0 fastfield_codecs/Cargo.toml | 1 + fastfield_codecs/src/column.rs | 98 +- src/aggregation/agg_req_with_accessor.rs | 2 +- src/aggregation/metric/mod.rs | 4 +- src/aggregation/mod.rs | 10 +- src/collector/filter_collector_wrapper.rs | 28 +- src/collector/histogram_collector.rs | 14 +- src/collector/mod.rs | 5 +- src/collector/tests.rs | 110 +- src/collector/top_score_collector.rs | 53 +- src/core/index.rs | 10 +- src/core/segment_reader.rs | 27 +- src/fastfield/error.rs | 2 +- src/fastfield/mod.rs | 938 +++++----- src/fastfield/multivalued/mod.rs | 18 +- src/fastfield/multivalued/reader.rs | 6 +- src/fastfield/readers.rs | 392 ++-- src/fastfield/writer.rs | 620 ++----- src/indexer/doc_id_mapping.rs | 142 +- src/indexer/index_writer.rs | 1622 ++++++++--------- src/indexer/json_term_writer.rs | 1 - src/indexer/merger.rs | 1011 ++++------ src/indexer/merger_sorted_index_test.rs | 238 ++- src/indexer/mod.rs | 86 +- src/indexer/segment_serializer.rs | 14 +- src/indexer/segment_writer.rs | 37 +- src/lib.rs | 18 +- src/postings/json_postings_writer.rs | 15 +- src/postings/postings_writer.rs | 78 +- src/query/mod.rs | 2 +- src/query/query_parser/query_parser.rs | 15 +- .../range_query/fast_field_range_query.rs | 83 +- src/query/range_query/mod.rs | 27 +- src/query/range_query/range_query.rs | 32 +- .../range_query/range_query_ip_fastfield.rs | 145 +- .../range_query/range_query_u64_fastfield.rs | 215 +-- src/schema/date_time_options.rs | 50 +- src/schema/field_type.rs | 22 +- src/schema/ip_options.rs | 26 +- src/schema/mod.rs | 4 +- src/schema/numeric_options.rs | 57 +- src/schema/schema.rs | 37 +- 81 files changed, 2760 insertions(+), 3843 deletions(-) create mode 100644 TODO.txt rename {examples => examples-disabled}/aggregation.rs (95%) rename {examples => examples-disabled}/basic_search.rs (100%) rename {examples => examples-disabled}/custom_collector.rs (100%) rename {examples => examples-disabled}/custom_tokenizer.rs (100%) rename {examples => examples-disabled}/date_time_field.rs (94%) rename {examples => examples-disabled}/deleting_updating_documents.rs (100%) rename {examples => examples-disabled}/faceted_search.rs (100%) rename {examples => examples-disabled}/faceted_search_with_tweaked_score.rs (100%) rename {examples => examples-disabled}/integer_range_search.rs (100%) rename {examples => examples-disabled}/ip_field.rs (100%) rename {examples => examples-disabled}/iterating_docs_and_positions.rs (100%) rename {examples => examples-disabled}/json_field.rs (100%) rename {examples => examples-disabled}/multiple_producer.rs (100%) rename {examples => examples-disabled}/pre_tokenized_text.rs (100%) rename {examples => examples-disabled}/snippet.rs (100%) rename {examples => examples-disabled}/stop_words.rs (100%) rename {examples => examples-disabled}/warmer.rs (100%) rename {examples => examples-disabled}/working_with_json.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index d871aeae6..6966d7e6f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,6 +59,7 @@ sstable = { version="0.1", path="./sstable", package ="tantivy-sstable", optiona stacker = { version="0.1", path="./stacker", package ="tantivy-stacker" } tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" } tantivy-bitpacker = { version= "0.3", path="./bitpacker" } +columnar = { version= "0.1", path="./columnar", package="tantivy-columnar" } common = { version= "0.5", path = "./common/", package = "tantivy-common" } fastfield_codecs = { version= "0.3", path="./fastfield_codecs", default-features = false } tokenizer-api = { version="0.1", path="./tokenizer-api", package="tantivy-tokenizer-api" } @@ -107,7 +108,7 @@ unstable = [] # useful for benches. quickwit = ["sstable"] [workspace] -members = ["query-grammar", "bitpacker", "common", "fastfield_codecs", "ownedbytes", "stacker", "sstable", "tokenizer-api"] +members = ["query-grammar", "bitpacker", "common", "fastfield_codecs", "ownedbytes", "stacker", "sstable", "tokenizer-api", "columnar"] # Following the "fail" crate best practises, we isolate # tests that define specific behavior in fail check points diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 000000000..64547834d --- /dev/null +++ b/TODO.txt @@ -0,0 +1,18 @@ +Make schema_builder API fluent. +fix doc serialization and prevent compression problems + +u64 , etc. shoudl return Resutl