diff --git a/build.rs b/build.rs index 639bbe93f..5fea0b408 100644 --- a/build.rs +++ b/build.rs @@ -4,7 +4,8 @@ mod build { pub fn build() { let mut config = gcc::Config::new(); - config.include("./cpp/simdcomp/include") + config + .include("./cpp/simdcomp/include") .file("cpp/simdcomp/src/avxbitpacking.c") .file("cpp/simdcomp/src/simdintegratedbitpacking.c") .file("cpp/simdcomp/src/simdbitpacking.c") @@ -18,18 +19,26 @@ mod build { config.opt_level(3); if cfg!(target_env = "msvc") { - config.define("NDEBUG", None) + config + .define("NDEBUG", None) .flag("/Gm-") .flag("/GS-") .flag("/Gy") .flag("/Oi") .flag("/GL"); - } else { - config.flag("-msse4.1") - .flag("-march=native"); } } + if !cfg!(target_env = "msvc") { + config + .include("./cpp/streamvbyte/include") + .file("cpp/streamvbyte/src/streamvbyte.c") + .file("cpp/streamvbyte/src/streamvbytedelta.c") + .flag("-msse4.1") + .flag("-march=native") + .flag("-std=c99"); + } + config.compile("libsimdcomp.a"); // Workaround for linking static libraries built with /GL diff --git a/cpp/streamvbyte/.gitignore b/cpp/streamvbyte/.gitignore new file mode 100644 index 000000000..bbf313b25 --- /dev/null +++ b/cpp/streamvbyte/.gitignore @@ -0,0 +1,32 @@ +# Object files +*.o +*.ko +*.obj +*.elf + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ diff --git a/cpp/streamvbyte/.travis.yml b/cpp/streamvbyte/.travis.yml new file mode 100644 index 000000000..6ecd76739 --- /dev/null +++ b/cpp/streamvbyte/.travis.yml @@ -0,0 +1,7 @@ +language: c +sudo: false +compiler: + - gcc + - clang + +script: make && ./unit diff --git a/cpp/streamvbyte/LICENSE b/cpp/streamvbyte/LICENSE new file mode 100644 index 000000000..e06d20818 --- /dev/null +++ b/cpp/streamvbyte/LICENSE @@ -0,0 +1,202 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/cpp/streamvbyte/README.md b/cpp/streamvbyte/README.md new file mode 100644 index 000000000..78d16a204 --- /dev/null +++ b/cpp/streamvbyte/README.md @@ -0,0 +1,60 @@ +streamvbyte +=========== +[![Build Status](https://travis-ci.org/lemire/streamvbyte.png)](https://travis-ci.org/lemire/streamvbyte) + +StreamVByte is a new integer compression technique that applies SIMD instructions (vectorization) to +Google's Group Varint approach. The net result is faster than other byte-oriented compression +techniques. + +The approach is patent-free, the code is available under the Apache License. + + +It includes fast differential coding. + +It assumes a recent Intel processor (e.g., haswell or better) . + +The code should build using most standard-compliant C99 compilers. The provided makefile +expects a Linux-like system. + + +Usage: + + make + ./unit + +See example.c for an example. + +Short code sample: +```C +// suppose that datain is an array of uint32_t integers +size_t compsize = streamvbyte_encode(datain, N, compressedbuffer); // encoding +// here the result is stored in compressedbuffer using compsize bytes +streamvbyte_decode(compressedbuffer, recovdata, N); // decoding (fast) +``` + +If the values are sorted, then it might be preferable to use differential coding: +```C +// suppose that datain is an array of uint32_t integers +size_t compsize = streamvbyte_delta_encode(datain, N, compressedbuffer,0); // encoding +// here the result is stored in compressedbuffer using compsize bytes +streamvbyte_delta_decode(compressedbuffer, recovdata, N,0); // decoding (fast) +``` +You have to know how many integers were coded when you decompress. You can store this +information along with the compressed stream. + +See also +-------- +* SIMDCompressionAndIntersection: A C++ library to compress and intersect sorted lists of integers using SIMD instructions https://github.com/lemire/SIMDCompressionAndIntersect +* The FastPFOR C++ library : Fast integer compression https://github.com/lemire/FastPFor +* High-performance dictionary coding https://github.com/lemire/dictionary +* LittleIntPacker: C library to pack and unpack short arrays of integers as fast as possible https://github.com/lemire/LittleIntPacker +* The SIMDComp library: A simple C library for compressing lists of integers using binary packing https://github.com/lemire/simdcomp +* MaskedVByte: Fast decoder for VByte-compressed integers https://github.com/lemire/MaskedVByte +* CSharpFastPFOR: A C# integer compression library https://github.com/Genbox/CSharpFastPFOR +* JavaFastPFOR: A java integer compression library https://github.com/lemire/JavaFastPFOR +* Encoding: Integer Compression Libraries for Go https://github.com/zhenjl/encoding +* FrameOfReference is a C++ library dedicated to frame-of-reference (FOR) compression: https://github.com/lemire/FrameOfReference +* libvbyte: A fast implementation for varbyte 32bit/64bit integer compression https://github.com/cruppstahl/libvbyte +* TurboPFor is a C library that offers lots of interesting optimizations. Well worth checking! (GPL license) https://github.com/powturbo/TurboPFor +* Oroch is a C++ library that offers a usable API (MIT license) https://github.com/ademakov/Oroch + diff --git a/cpp/streamvbyte/example.c b/cpp/streamvbyte/example.c new file mode 100644 index 000000000..4d72bd1e2 --- /dev/null +++ b/cpp/streamvbyte/example.c @@ -0,0 +1,24 @@ +#include +#include +#include + +#include "streamvbyte.h" + +int main() { + int N = 5000; + uint32_t * datain = malloc(N * sizeof(uint32_t)); + uint8_t * compressedbuffer = malloc(N * sizeof(uint32_t)); + uint32_t * recovdata = malloc(N * sizeof(uint32_t)); + for (int k = 0; k < N; ++k) + datain[k] = 120; + size_t compsize = streamvbyte_encode(datain, N, compressedbuffer); // encoding + // here the result is stored in compressedbuffer using compsize bytes + size_t compsize2 = streamvbyte_decode(compressedbuffer, recovdata, + N); // decoding (fast) + assert(compsize == compsize2); + free(datain); + free(compressedbuffer); + free(recovdata); + printf("Compressed %d integers down to %d bytes.\n",N,(int) compsize); + return 0; +} diff --git a/cpp/streamvbyte/include/streamvbyte.h b/cpp/streamvbyte/include/streamvbyte.h new file mode 100644 index 000000000..bd4623f6a --- /dev/null +++ b/cpp/streamvbyte/include/streamvbyte.h @@ -0,0 +1,19 @@ + +#ifndef VARINTDECODE_H_ +#define VARINTDECODE_H_ +#define __STDC_FORMAT_MACROS +#include +#include // please use a C99-compatible compiler +#include + + +// Encode an array of a given length read from in to bout in varint format. +// Returns the number of bytes written. +size_t streamvbyte_encode(const uint32_t *in, uint32_t length, uint8_t *out); + +// Read "length" 32-bit integers in varint format from in, storing the result in out. +// Returns the number of bytes read. +size_t streamvbyte_decode(const uint8_t* in, uint32_t* out, uint32_t length); + + +#endif /* VARINTDECODE_H_ */ diff --git a/cpp/streamvbyte/include/streamvbytedelta.h b/cpp/streamvbyte/include/streamvbytedelta.h new file mode 100644 index 000000000..84e47bade --- /dev/null +++ b/cpp/streamvbyte/include/streamvbytedelta.h @@ -0,0 +1,24 @@ +/* + * streamvbytedelta.h + * + * Created on: Apr 14, 2016 + * Author: lemire + */ + +#ifndef INCLUDE_STREAMVBYTEDELTA_H_ +#define INCLUDE_STREAMVBYTEDELTA_H_ + + +// Encode an array of a given length read from in to bout in StreamVByte format. +// Returns the number of bytes written. +// this version uses differential coding (coding differences between values) starting at prev (you can often set prev to zero) +size_t streamvbyte_delta_encode(const uint32_t *in, uint32_t length, uint8_t *out, uint32_t prev); + +// Read "length" 32-bit integers in StreamVByte format from in, storing the result in out. +// Returns the number of bytes read. +// this version uses differential coding (coding differences between values) starting at prev (you can often set prev to zero) +size_t streamvbyte_delta_decode(const uint8_t* in, uint32_t* out, uint32_t length, uint32_t prev); + + + +#endif /* INCLUDE_STREAMVBYTEDELTA_H_ */ diff --git a/cpp/streamvbyte/makefile b/cpp/streamvbyte/makefile new file mode 100644 index 000000000..ec44b7ef6 --- /dev/null +++ b/cpp/streamvbyte/makefile @@ -0,0 +1,58 @@ +# minimalist makefile +.SUFFIXES: +# +.SUFFIXES: .cpp .o .c .h + +CFLAGS = -fPIC -march=native -std=c99 -O3 -Wall -Wextra -pedantic -Wshadow +LDFLAGS = -shared +LIBNAME=libstreamvbyte.so.0.0.1 +all: unit $(LIBNAME) +test: + ./unit +install: $(OBJECTS) + cp $(LIBNAME) /usr/local/lib + ln -s /usr/local/lib/$(LIBNAME) /usr/local/lib/libstreamvbyte.so + ldconfig + cp $(HEADERS) /usr/local/include + + + +HEADERS=./include/streamvbyte.h ./include/streamvbytedelta.h + +uninstall: + for h in $(HEADERS) ; do rm /usr/local/$$h; done + rm /usr/local/lib/$(LIBNAME) + rm /usr/local/lib/libstreamvbyte.so + ldconfig + + +OBJECTS= streamvbyte.o streamvbytedelta.o + + + +streamvbytedelta.o: ./src/streamvbytedelta.c $(HEADERS) + $(CC) $(CFLAGS) -c ./src/streamvbytedelta.c -Iinclude + + +streamvbyte.o: ./src/streamvbyte.c $(HEADERS) + $(CC) $(CFLAGS) -c ./src/streamvbyte.c -Iinclude + + + +$(LIBNAME): $(OBJECTS) + $(CC) $(CFLAGS) -o $(LIBNAME) $(OBJECTS) $(LDFLAGS) + + + + +example: ./example.c $(HEADERS) $(OBJECTS) + $(CC) $(CFLAGS) -o example ./example.c -Iinclude $(OBJECTS) + +unit: ./tests/unit.c $(HEADERS) $(OBJECTS) + $(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude $(OBJECTS) + +dynunit: ./tests/unit.c $(HEADERS) $(LIBNAME) + $(CC) $(CFLAGS) -o dynunit ./tests/unit.c -Iinclude -lstreamvbyte + +clean: + rm -f unit *.o $(LIBNAME) example diff --git a/cpp/streamvbyte/src/streamvbyte.c b/cpp/streamvbyte/src/streamvbyte.c new file mode 100644 index 000000000..a45dbf76d --- /dev/null +++ b/cpp/streamvbyte/src/streamvbyte.c @@ -0,0 +1,495 @@ +#include "streamvbyte.h" +#if defined(_MSC_VER) + /* Microsoft C/C++-compatible compiler */ + #include +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) + /* GCC-compatible compiler, targeting x86/x86-64 */ + #include +#elif defined(__GNUC__) && defined(__ARM_NEON__) + /* GCC-compatible compiler, targeting ARM with NEON */ + #include +#elif defined(__GNUC__) && defined(__IWMMXT__) + /* GCC-compatible compiler, targeting ARM with WMMX */ + #include +#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__)) + /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */ + #include +#elif defined(__GNUC__) && defined(__SPE__) + /* GCC-compatible compiler, targeting PowerPC with SPE */ + #include +#endif + +static uint8_t lengthTable[256] = { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, + 10, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8, + 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, + 11, 12, 10, 11, 12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, + 11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, + 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, + 12, 10, 11, 12, 13, 11, 12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, + 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, + 13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10, + 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 7, 8, 9, 10, 8, + 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12, + 10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, + 13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, + 13, 14, 15, 16 }; + +static uint8_t shuffleTable[256][16] = { { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, + -1, -1, 3, -1, -1, -1 }, // 1111 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 }, // 2111 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 }, // 3111 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 }, // 4111 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 }, // 1211 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 }, // 2211 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 }, // 3211 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1 }, // 4211 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1 }, // 1311 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1 }, // 2311 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1 }, // 3311 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, -1, -1, -1 }, // 4311 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, -1, -1, -1 }, // 1411 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, -1, -1, -1 }, // 2411 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, -1, -1, -1 }, // 3411 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, -1, -1, -1 }, // 4411 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1 }, // 1121 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 }, // 2121 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 }, // 3121 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 }, // 4121 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 }, // 1221 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 }, // 2221 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 }, // 3221 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, -1, -1, -1 }, // 4221 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1 }, // 1321 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1 }, // 2321 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1 }, // 3321 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, -1, -1, -1 }, // 4321 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, -1, -1, -1 }, // 1421 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, -1, -1, -1 }, // 2421 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, -1, -1, -1 }, // 3421 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, -1, -1, -1 }, // 4421 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1 }, // 1131 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 }, // 2131 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 }, // 3131 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 }, // 4131 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 }, // 1231 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 }, // 2231 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 }, // 3231 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, -1, -1, -1 }, // 4231 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1 }, // 1331 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1 }, // 2331 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1 }, // 3331 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, -1, -1, -1 }, // 4331 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, -1, -1 }, // 1431 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, -1, -1, -1 }, // 2431 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, -1, -1, -1 }, // 3431 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, -1, -1, -1 }, // 4431 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1 }, // 1141 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 }, // 2141 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 }, // 3141 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 }, // 4141 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 }, // 1241 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 }, // 2241 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 }, // 3241 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, -1, -1, -1 }, // 4241 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, -1, -1, -1 }, // 1341 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, -1, -1, -1 }, // 2341 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, -1, -1, -1 }, // 3341 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, -1, -1, -1 }, // 4341 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1 }, // 1441 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1 }, // 2441 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1 }, // 3441 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1 }, // 4441 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1 }, // 1112 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 }, // 2112 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 }, // 3112 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 }, // 4112 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 }, // 1212 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 }, // 2212 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 }, // 3212 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, -1, -1 }, // 4212 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1 }, // 1312 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1 }, // 2312 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1 }, // 3312 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, -1, -1 }, // 4312 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, -1, -1 }, // 1412 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, -1, -1 }, // 2412 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, -1, -1 }, // 3412 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, -1, -1 }, // 4412 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1 }, // 1122 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 }, // 2122 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 }, // 3122 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 }, // 4122 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 }, // 1222 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 }, // 2222 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 }, // 3222 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, -1, -1 }, // 4222 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1 }, // 1322 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1 }, // 2322 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1 }, // 3322 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, -1, -1 }, // 4322 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, -1, -1 }, // 1422 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, -1, -1 }, // 2422 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, -1, -1 }, // 3422 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, -1, -1 }, // 4422 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1 }, // 1132 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 }, // 2132 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 }, // 3132 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 }, // 4132 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 }, // 1232 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 }, // 2232 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 }, // 3232 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, -1, -1 }, // 4232 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1 }, // 1332 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1 }, // 2332 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1 }, // 3332 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, -1, -1 }, // 4332 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, -1, -1 }, // 1432 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, -1, -1 }, // 2432 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, -1, -1 }, // 3432 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, -1, -1 }, // 4432 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1 }, // 1142 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 }, // 2142 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 }, // 3142 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 }, // 4142 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 }, // 1242 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 }, // 2242 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 }, // 3242 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, -1, -1 }, // 4242 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, -1, -1 }, // 1342 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, -1, -1 }, // 2342 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, -1, -1 }, // 3342 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, -1, -1 }, // 4342 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1 }, // 1442 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1 }, // 2442 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1 }, // 3442 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1 }, // 4442 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1 }, // 1113 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 }, // 2113 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 }, // 3113 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 }, // 4113 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 }, // 1213 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 }, // 2213 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 }, // 3213 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, -1 }, // 4213 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1 }, // 1313 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1 }, // 2313 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1 }, // 3313 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, -1 }, // 4313 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, -1 }, // 1413 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, -1 }, // 2413 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, -1 }, // 3413 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, -1 }, // 4413 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1 }, // 1123 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 }, // 2123 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 }, // 3123 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 }, // 4123 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 }, // 1223 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 }, // 2223 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 }, // 3223 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, -1 }, // 4223 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1 }, // 1323 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1 }, // 2323 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1 }, // 3323 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, -1 }, // 4323 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, -1 }, // 1423 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, -1 }, // 2423 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, -1 }, // 3423 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, -1 }, // 4423 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1 }, // 1133 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 }, // 2133 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 }, // 3133 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 }, // 4133 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 }, // 1233 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 }, // 2233 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 }, // 3233 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, -1 }, // 4233 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1 }, // 1333 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1 }, // 2333 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1 }, // 3333 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, -1 }, // 4333 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, -1 }, // 1433 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, -1 }, // 2433 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, -1 }, // 3433 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, -1 }, // 4433 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1 }, // 1143 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 }, // 2143 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 }, // 3143 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 }, // 4143 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 }, // 1243 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 }, // 2243 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 }, // 3243 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, -1 }, // 4243 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, -1 }, // 1343 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, -1 }, // 2343 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, -1 }, // 3343 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, -1 }, // 4343 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1 }, // 1443 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1 }, // 2443 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1 }, // 3443 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -1 }, // 4443 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6 }, // 1114 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 }, // 2114 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 }, // 3114 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 }, // 4114 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 }, // 1214 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 }, // 2214 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 }, // 3214 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, 10 }, // 4214 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, 8 }, // 1314 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, 9 }, // 2314 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, 10 }, // 3314 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, 11 }, // 4314 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, 9 }, // 1414 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, 10 }, // 2414 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, 11 }, // 3414 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, 12 }, // 4414 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7 }, // 1124 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 }, // 2124 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 }, // 3124 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 }, // 4124 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 }, // 1224 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 }, // 2224 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 }, // 3224 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, 11 }, // 4224 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, 9 }, // 1324 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, 10 }, // 2324 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, 11 }, // 3324 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, 12 }, // 4324 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, 10 }, // 1424 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, 11 }, // 2424 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, 12 }, // 3424 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, 13 }, // 4424 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8 }, // 1134 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 }, // 2134 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 }, // 3134 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 }, // 4134 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 }, // 1234 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 }, // 2234 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 }, // 3234 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, 12 }, // 4234 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, 10 }, // 1334 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, 11 }, // 2334 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, 12 }, // 3334 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, 13 }, // 4334 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11 }, // 1434 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12 }, // 2434 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, 13 }, // 3434 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, 14 }, // 4434 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9 }, // 1144 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 }, // 2144 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 }, // 3144 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 }, // 4144 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 }, // 1244 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 }, // 2244 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 }, // 3244 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, 13 }, // 4244 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, 11 }, // 1344 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, 12 }, // 2344 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13 }, // 3344 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, 14 }, // 4344 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }, // 1444 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }, // 2444 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }, // 3444 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } // 4444 +}; + +static uint8_t _encode_data(uint32_t val, uint8_t *__restrict__ *dataPtrPtr) { + uint8_t *dataPtr = *dataPtrPtr; + uint8_t code; + + if (val < (1 << 8)) { // 1 byte + *dataPtr = (uint8_t)(val); + *dataPtrPtr += 1; + code = 0; + } else if (val < (1 << 16)) { // 2 bytes + *(uint16_t *) dataPtr = (uint16_t)(val); + *dataPtrPtr += 2; + code = 1; + } else if (val < (1 << 24)) { // 3 bytes + *(uint16_t *) dataPtr = (uint16_t)(val); + *(dataPtr + 2) = (uint8_t)(val >> 16); + *dataPtrPtr += 3; + code = 2; + } else { // 4 bytes + *(uint32_t *) dataPtr = val; + *dataPtrPtr += 4; + code = 3; + } + + return code; +} + +static uint8_t *svb_encode_scalar(const uint32_t *in, + uint8_t *__restrict__ keyPtr, uint8_t *__restrict__ dataPtr, + uint32_t count) { + if (count == 0) + return dataPtr; // exit immediately if no data + + uint8_t shift = 0; // cycles 0, 2, 4, 6, 0, 2, 4, 6, ... + uint8_t key = 0; + for (uint32_t c = 0; c < count; c++) { + if (shift == 8) { + shift = 0; + *keyPtr++ = key; + key = 0; + } + uint32_t val = in[c]; + uint8_t code = _encode_data(val, &dataPtr); + key |= code << shift; + shift += 2; + } + + *keyPtr = key; // write last key (no increment needed) + return dataPtr; // pointer to first unused data byte +} + +// Encode an array of a given length read from in to bout in streamvbyte format. +// Returns the number of bytes written. +size_t streamvbyte_encode(const uint32_t *in, uint32_t count, uint8_t *out) { + uint8_t *keyPtr = out; + uint32_t keyLen = (count + 3) / 4; // 2-bits rounded to full byte + uint8_t *dataPtr = keyPtr + keyLen; // variable byte data after all keys + return svb_encode_scalar(in, keyPtr, dataPtr, count) - out; +} + +static inline __m128i _decode_avx(uint32_t key, + const uint8_t *__restrict__ *dataPtrPtr) { + uint8_t len = lengthTable[key]; + __m128i Data = _mm_loadu_si128((__m128i *) *dataPtrPtr); + __m128i Shuf = *(__m128i *) &shuffleTable[key]; + + Data = _mm_shuffle_epi8(Data, Shuf); + *dataPtrPtr += len; + return Data; +} + +static inline void _write_avx(uint32_t *out, __m128i Vec) { + _mm_storeu_si128((__m128i *) out, Vec); +} + +static inline uint32_t _decode_data(const uint8_t **dataPtrPtr, uint8_t code) { + const uint8_t *dataPtr = *dataPtrPtr; + uint32_t val; + + if (code == 0) { // 1 byte + val = (uint32_t) * dataPtr; + dataPtr += 1; + } else if (code == 1) { // 2 bytes + val = (uint32_t) * (uint16_t *) dataPtr; + dataPtr += 2; + } else if (code == 2) { // 3 bytes + val = (uint32_t) * (uint16_t *) dataPtr; + val |= *(dataPtr + 2) << 16; + dataPtr += 3; + } else { // code == 3 + val = *(uint32_t *) dataPtr; // 4 bytes + dataPtr += 4; + } + + *dataPtrPtr = dataPtr; + return val; +} +static const uint8_t *svb_decode_scalar(uint32_t *outPtr, const uint8_t *keyPtr, + const uint8_t *dataPtr, uint32_t count) { + if (count == 0) + return dataPtr; // no reads or writes if no data + + uint8_t shift = 0; + uint32_t key = *keyPtr++; + for (uint32_t c = 0; c < count; c++) { + if (shift == 8) { + shift = 0; + key = *keyPtr++; + } + uint32_t val = _decode_data(&dataPtr, (key >> shift) & 0x3); + *outPtr++ = val; + shift += 2; + } + + return dataPtr; // pointer to first unused byte after end +} + +const uint8_t *svb_decode_avx_simple(uint32_t *out, + const uint8_t *__restrict__ keyPtr, const uint8_t *__restrict__ dataPtr, + uint64_t count) { + + uint64_t keybytes = count / 4; // number of key bytes + __m128i Data; + if (keybytes >= 8) { + + int64_t Offset = -(int64_t) keybytes / 8 + 1; + + const uint64_t *keyPtr64 = (const uint64_t *) keyPtr - Offset; + uint64_t nextkeys = keyPtr64[Offset]; + for (; Offset != 0; ++Offset) { + uint64_t keys = nextkeys; + nextkeys = keyPtr64[Offset + 1]; + + Data = _decode_avx((keys & 0xFF), &dataPtr); + _write_avx(out, Data); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + _write_avx(out + 4, Data); + + keys >>= 16; + Data = _decode_avx((keys & 0xFF), &dataPtr); + _write_avx(out + 8, Data); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + _write_avx(out + 12, Data); + + keys >>= 16; + Data = _decode_avx((keys & 0xFF), &dataPtr); + _write_avx(out + 16, Data); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + _write_avx(out + 20, Data); + + keys >>= 16; + Data = _decode_avx((keys & 0xFF), &dataPtr); + _write_avx(out + 24, Data); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + _write_avx(out + 28, Data); + + out += 32; + } + { + uint64_t keys = nextkeys; + + Data = _decode_avx((keys & 0xFF), &dataPtr); + _write_avx(out, Data); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + _write_avx(out + 4, Data); + + keys >>= 16; + Data = _decode_avx((keys & 0xFF), &dataPtr); + _write_avx(out + 8, Data); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + _write_avx(out + 12, Data); + + keys >>= 16; + Data = _decode_avx((keys & 0xFF), &dataPtr); + _write_avx(out + 16, Data); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + _write_avx(out + 20, Data); + + keys >>= 16; + Data = _decode_avx((keys & 0xFF), &dataPtr); + _write_avx(out + 24, Data); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + _write_avx(out + 28, Data); + + out += 32; + } + } + uint64_t consumedkeys = keybytes - (keybytes & 7); + return svb_decode_scalar(out, keyPtr + consumedkeys, dataPtr, count & 31); +} + +// Read count 32-bit integers in maskedvbyte format from in, storing the result in out. Returns the number of bytes read. +size_t streamvbyte_decode(const uint8_t* in, uint32_t* out, uint32_t count) { + if (count == 0) + return 0; + const uint8_t *keyPtr = in; // full list of keys is next + uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up) + const uint8_t *dataPtr = keyPtr + keyLen; // data starts at end of keys + return svb_decode_avx_simple(out, keyPtr, dataPtr, count) - in; + +} diff --git a/cpp/streamvbyte/src/streamvbytedelta.c b/cpp/streamvbyte/src/streamvbytedelta.c new file mode 100644 index 000000000..25e0b7fa0 --- /dev/null +++ b/cpp/streamvbyte/src/streamvbytedelta.c @@ -0,0 +1,575 @@ +#include "streamvbyte.h" +#if defined(_MSC_VER) + /* Microsoft C/C++-compatible compiler */ + #include +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) + /* GCC-compatible compiler, targeting x86/x86-64 */ + #include +#elif defined(__GNUC__) && defined(__ARM_NEON__) + /* GCC-compatible compiler, targeting ARM with NEON */ + #include +#elif defined(__GNUC__) && defined(__IWMMXT__) + /* GCC-compatible compiler, targeting ARM with WMMX */ + #include +#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__)) + /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */ + #include +#elif defined(__GNUC__) && defined(__SPE__) + /* GCC-compatible compiler, targeting PowerPC with SPE */ + #include +#endif + +static uint8_t lengthTable[256] = { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, + 10, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8, + 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, + 11, 12, 10, 11, 12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, + 11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, + 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, + 12, 10, 11, 12, 13, 11, 12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, + 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, + 13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10, + 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 7, 8, 9, 10, 8, + 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12, + 10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, + 13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, + 13, 14, 15, 16 }; + +static uint8_t shuffleTable[256][16] = { { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, + -1, -1, 3, -1, -1, -1 }, // 1111 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 }, // 2111 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 }, // 3111 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 }, // 4111 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 }, // 1211 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 }, // 2211 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 }, // 3211 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1 }, // 4211 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1 }, // 1311 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1 }, // 2311 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1 }, // 3311 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, -1, -1, -1 }, // 4311 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, -1, -1, -1 }, // 1411 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, -1, -1, -1 }, // 2411 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, -1, -1, -1 }, // 3411 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, -1, -1, -1 }, // 4411 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1 }, // 1121 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 }, // 2121 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 }, // 3121 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 }, // 4121 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 }, // 1221 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 }, // 2221 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 }, // 3221 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, -1, -1, -1 }, // 4221 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1 }, // 1321 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1 }, // 2321 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1 }, // 3321 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, -1, -1, -1 }, // 4321 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, -1, -1, -1 }, // 1421 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, -1, -1, -1 }, // 2421 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, -1, -1, -1 }, // 3421 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, -1, -1, -1 }, // 4421 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1 }, // 1131 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 }, // 2131 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 }, // 3131 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 }, // 4131 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 }, // 1231 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 }, // 2231 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 }, // 3231 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, -1, -1, -1 }, // 4231 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1 }, // 1331 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1 }, // 2331 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1 }, // 3331 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, -1, -1, -1 }, // 4331 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, -1, -1 }, // 1431 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, -1, -1, -1 }, // 2431 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, -1, -1, -1 }, // 3431 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, -1, -1, -1 }, // 4431 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1 }, // 1141 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 }, // 2141 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 }, // 3141 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 }, // 4141 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 }, // 1241 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 }, // 2241 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 }, // 3241 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, -1, -1, -1 }, // 4241 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, -1, -1, -1 }, // 1341 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, -1, -1, -1 }, // 2341 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, -1, -1, -1 }, // 3341 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, -1, -1, -1 }, // 4341 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1 }, // 1441 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1 }, // 2441 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1 }, // 3441 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1 }, // 4441 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1 }, // 1112 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 }, // 2112 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 }, // 3112 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 }, // 4112 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 }, // 1212 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 }, // 2212 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 }, // 3212 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, -1, -1 }, // 4212 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1 }, // 1312 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1 }, // 2312 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1 }, // 3312 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, -1, -1 }, // 4312 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, -1, -1 }, // 1412 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, -1, -1 }, // 2412 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, -1, -1 }, // 3412 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, -1, -1 }, // 4412 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1 }, // 1122 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 }, // 2122 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 }, // 3122 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 }, // 4122 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 }, // 1222 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 }, // 2222 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 }, // 3222 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, -1, -1 }, // 4222 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1 }, // 1322 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1 }, // 2322 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1 }, // 3322 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, -1, -1 }, // 4322 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, -1, -1 }, // 1422 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, -1, -1 }, // 2422 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, -1, -1 }, // 3422 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, -1, -1 }, // 4422 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1 }, // 1132 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 }, // 2132 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 }, // 3132 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 }, // 4132 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 }, // 1232 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 }, // 2232 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 }, // 3232 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, -1, -1 }, // 4232 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1 }, // 1332 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1 }, // 2332 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1 }, // 3332 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, -1, -1 }, // 4332 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, -1, -1 }, // 1432 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, -1, -1 }, // 2432 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, -1, -1 }, // 3432 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, -1, -1 }, // 4432 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1 }, // 1142 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 }, // 2142 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 }, // 3142 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 }, // 4142 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 }, // 1242 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 }, // 2242 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 }, // 3242 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, -1, -1 }, // 4242 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, -1, -1 }, // 1342 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, -1, -1 }, // 2342 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, -1, -1 }, // 3342 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, -1, -1 }, // 4342 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1 }, // 1442 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1 }, // 2442 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1 }, // 3442 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1 }, // 4442 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1 }, // 1113 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 }, // 2113 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 }, // 3113 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 }, // 4113 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 }, // 1213 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 }, // 2213 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 }, // 3213 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, -1 }, // 4213 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1 }, // 1313 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1 }, // 2313 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1 }, // 3313 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, -1 }, // 4313 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, -1 }, // 1413 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, -1 }, // 2413 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, -1 }, // 3413 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, -1 }, // 4413 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1 }, // 1123 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 }, // 2123 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 }, // 3123 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 }, // 4123 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 }, // 1223 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 }, // 2223 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 }, // 3223 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, -1 }, // 4223 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1 }, // 1323 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1 }, // 2323 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1 }, // 3323 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, -1 }, // 4323 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, -1 }, // 1423 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, -1 }, // 2423 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, -1 }, // 3423 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, -1 }, // 4423 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1 }, // 1133 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 }, // 2133 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 }, // 3133 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 }, // 4133 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 }, // 1233 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 }, // 2233 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 }, // 3233 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, -1 }, // 4233 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1 }, // 1333 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1 }, // 2333 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1 }, // 3333 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, -1 }, // 4333 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, -1 }, // 1433 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, -1 }, // 2433 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, -1 }, // 3433 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, -1 }, // 4433 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1 }, // 1143 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 }, // 2143 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 }, // 3143 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 }, // 4143 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 }, // 1243 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 }, // 2243 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 }, // 3243 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, -1 }, // 4243 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, -1 }, // 1343 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, -1 }, // 2343 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, -1 }, // 3343 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, -1 }, // 4343 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1 }, // 1443 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1 }, // 2443 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1 }, // 3443 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -1 }, // 4443 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6 }, // 1114 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 }, // 2114 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 }, // 3114 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 }, // 4114 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 }, // 1214 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 }, // 2214 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 }, // 3214 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, 10 }, // 4214 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, 8 }, // 1314 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, 9 }, // 2314 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, 10 }, // 3314 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, 11 }, // 4314 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, 9 }, // 1414 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, 10 }, // 2414 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, 11 }, // 3414 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, 12 }, // 4414 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7 }, // 1124 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 }, // 2124 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 }, // 3124 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 }, // 4124 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 }, // 1224 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 }, // 2224 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 }, // 3224 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, 11 }, // 4224 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, 9 }, // 1324 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, 10 }, // 2324 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, 11 }, // 3324 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, 12 }, // 4324 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, 10 }, // 1424 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, 11 }, // 2424 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, 12 }, // 3424 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, 13 }, // 4424 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8 }, // 1134 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 }, // 2134 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 }, // 3134 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 }, // 4134 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 }, // 1234 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 }, // 2234 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 }, // 3234 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, 12 }, // 4234 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, 10 }, // 1334 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, 11 }, // 2334 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, 12 }, // 3334 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, 13 }, // 4334 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11 }, // 1434 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12 }, // 2434 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, 13 }, // 3434 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, 14 }, // 4434 + { 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9 }, // 1144 + { 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 }, // 2144 + { 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 }, // 3144 + { 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 }, // 4144 + { 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 }, // 1244 + { 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 }, // 2244 + { 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 }, // 3244 + { 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, 13 }, // 4244 + { 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, 11 }, // 1344 + { 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, 12 }, // 2344 + { 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13 }, // 3344 + { 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, 14 }, // 4344 + { 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }, // 1444 + { 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }, // 2444 + { 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }, // 3444 + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } // 4444 +}; + +static uint8_t _encode_data(uint32_t val, uint8_t *__restrict__ *dataPtrPtr) { + uint8_t *dataPtr = *dataPtrPtr; + uint8_t code; + + if (val < (1 << 8)) { // 1 byte + *dataPtr = (uint8_t)(val); + *dataPtrPtr += 1; + code = 0; + } else if (val < (1 << 16)) { // 2 bytes + *(uint16_t *) dataPtr = (uint16_t)(val); + *dataPtrPtr += 2; + code = 1; + } else if (val < (1 << 24)) { // 3 bytes + *(uint16_t *) dataPtr = (uint16_t)(val); + *(dataPtr + 2) = (uint8_t)(val >> 16); + *dataPtrPtr += 3; + code = 2; + } else { // 4 bytes + *(uint32_t *) dataPtr = val; + *dataPtrPtr += 4; + code = 3; + } + + return code; +} + +static uint8_t *svb_encode_scalar_d1_init(const uint32_t *in, + uint8_t *__restrict__ keyPtr, uint8_t *__restrict__ dataPtr, + uint32_t count, uint32_t prev) { + if (count == 0) + return dataPtr; // exit immediately if no data + + uint8_t shift = 0; // cycles 0, 2, 4, 6, 0, 2, 4, 6, ... + uint8_t key = 0; + for (uint32_t c = 0; c < count; c++) { + if (shift == 8) { + shift = 0; + *keyPtr++ = key; + key = 0; + } + uint32_t val = in[c] - prev; + prev = in[c]; + uint8_t code = _encode_data(val, &dataPtr); + key |= code << shift; + shift += 2; + } + + *keyPtr = key; // write last key (no increment needed) + return dataPtr; // pointer to first unused data byte +} + +size_t streamvbyte_delta_encode(const uint32_t *in, uint32_t count, uint8_t *out, + uint32_t prev) { + uint8_t *keyPtr = out; // keys come immediately after 32-bit count + uint32_t keyLen = (count + 3) / 4; // 2-bits rounded to full byte + uint8_t *dataPtr = keyPtr + keyLen; // variable byte data after all keys + + return svb_encode_scalar_d1_init(in, keyPtr, dataPtr, count, prev) - out; + +} + +static inline __m128i _decode_avx(uint32_t key, const uint8_t *__restrict__ *dataPtrPtr) { + uint8_t len = lengthTable[key]; + __m128i Data = _mm_loadu_si128((__m128i *) *dataPtrPtr); + __m128i Shuf = *(__m128i *) &shuffleTable[key]; + + Data = _mm_shuffle_epi8(Data, Shuf); + *dataPtrPtr += len; + + return Data; +} +#define BroadcastLastXMM 0xFF // bits 0-7 all set to choose highest element + + + +static inline void _write_avx(uint32_t *out, __m128i Vec) { + _mm_storeu_si128((__m128i *) out, Vec); +} + +static __m128i _write_avx_d1(uint32_t *out, __m128i Vec, __m128i Prev) { + __m128i Add = _mm_slli_si128(Vec, 4); // Cycle 1: [- A B C] (already done) + Prev = _mm_shuffle_epi32(Prev, BroadcastLastXMM); // Cycle 2: [P P P P] + Vec = _mm_add_epi32(Vec, Add); // Cycle 2: [A AB BC CD] + Add = _mm_slli_si128(Vec, 8); // Cycle 3: [- - A AB] + Vec = _mm_add_epi32(Vec, Prev); // Cycle 3: [PA PAB PBC PCD] + Vec = _mm_add_epi32(Vec, Add); // Cycle 4: [PA PAB PABC PABCD] + + _write_avx(out, Vec); + return Vec; +} + +#ifndef _MSC_VER +static __m128i High16To32 = {0xFFFF0B0AFFFF0908, 0xFFFF0F0EFFFF0D0C}; +#else +static __m128i High16To32 = {8, 9, -1, -1, 10, 11, -1, -1, + 12, 13, -1, -1, 14, 15, -1, -1}; +#endif + +static inline __m128i _write_16bit_avx_d1(uint32_t *out, __m128i Vec, __m128i Prev) { + // vec == [A B C D E F G H] (16 bit values) + __m128i Add = _mm_slli_si128(Vec, 2); // [- A B C D E F G] + Prev = _mm_shuffle_epi32(Prev, BroadcastLastXMM); // [P P P P] (32-bit) + Vec = _mm_add_epi32(Vec, Add); // [A AB BC CD DE FG GH] + Add = _mm_slli_si128(Vec, 4); // [- - A AB BC CD DE EF] + Vec = _mm_add_epi32(Vec, Add); // [A AB ABC ABCD BCDE CDEF DEFG EFGH] + __m128i V1 = _mm_cvtepu16_epi32(Vec); // [A AB ABC ABCD] (32-bit) + V1 = _mm_add_epi32(V1, Prev); // [PA PAB PABC PABCD] (32-bit) + __m128i V2 = + _mm_shuffle_epi8(Vec, High16To32); // [BCDE CDEF DEFG EFGH] (32-bit) + V2 = _mm_add_epi32(V1, V2); // [PABCDE PABCDEF PABCDEFG PABCDEFGH] (32-bit) + _write_avx(out, V1); + _write_avx(out + 4, V2); + return V2; +} + +static inline uint32_t _decode_data(const uint8_t **dataPtrPtr, uint8_t code) { + const uint8_t *dataPtr = *dataPtrPtr; + uint32_t val; + + if (code == 0) { // 1 byte + val = (uint32_t) * dataPtr; + dataPtr += 1; + } else if (code == 1) { // 2 bytes + val = (uint32_t) * (uint16_t *) dataPtr; + dataPtr += 2; + } else if (code == 2) { // 3 bytes + val = (uint32_t) * (uint16_t *) dataPtr; + val |= *(dataPtr + 2) << 16; + dataPtr += 3; + } else { // code == 3 + val = *(uint32_t *) dataPtr; // 4 bytes + dataPtr += 4; + } + + *dataPtrPtr = dataPtr; + return val; +} + +const uint8_t *svb_decode_scalar_d1_init(uint32_t *outPtr, const uint8_t *keyPtr, + const uint8_t *dataPtr, uint32_t count, + uint32_t prev) { + if (count == 0) + return dataPtr; // no reads or writes if no data + + uint8_t shift = 0; + uint32_t key = *keyPtr++; + + for (uint32_t c = 0; c < count; c++) { + if (shift == 8) { + shift = 0; + key = *keyPtr++; + } + uint32_t val = _decode_data(&dataPtr, (key >> shift) & 0x3); + val += prev; + *outPtr++ = val; + prev = val; + shift += 2; + } + + return dataPtr; // pointer to first unused byte after end +} + +const uint8_t *svb_decode_avx_d1_init(uint32_t *out, const uint8_t *__restrict__ keyPtr, + const uint8_t *__restrict__ dataPtr, uint64_t count, uint32_t prev) { + uint64_t keybytes = count / 4; // number of key bytes + if (keybytes >= 8) { + __m128i Prev = _mm_set1_epi32(prev); + __m128i Data; + + int64_t Offset = -(int64_t) keybytes / 8 + 1; + + const uint64_t *keyPtr64 = (const uint64_t *) keyPtr - Offset; + uint64_t nextkeys = keyPtr64[Offset]; + for (; Offset != 0; ++Offset) { + uint64_t keys = nextkeys; + nextkeys = keyPtr64[Offset + 1]; + // faster 16-bit delta since we only have 8-bit values + if (!keys) { // 32 1-byte ints in a row + + Data = _mm_cvtepu8_epi16(_mm_lddqu_si128((__m128i *) (dataPtr))); + Prev = _write_16bit_avx_d1(out, Data, Prev); + Data = _mm_cvtepu8_epi16( + _mm_lddqu_si128((__m128i *) (dataPtr + 8))); + Prev = _write_16bit_avx_d1(out + 8, Data, Prev); + Data = _mm_cvtepu8_epi16( + _mm_lddqu_si128((__m128i *) (dataPtr + 16))); + Prev = _write_16bit_avx_d1(out + 16, Data, Prev); + Data = _mm_cvtepu8_epi16( + _mm_lddqu_si128((__m128i *) (dataPtr + 24))); + Prev = _write_16bit_avx_d1(out + 24, Data, Prev); + out += 32; + dataPtr += 32; + continue; + } + + Data = _decode_avx(keys & 0x00FF, &dataPtr); + Prev = _write_avx_d1(out, Data, Prev); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + Prev = _write_avx_d1(out + 4, Data, Prev); + + keys >>= 16; + Data = _decode_avx((keys & 0x00FF), &dataPtr); + Prev = _write_avx_d1(out + 8, Data, Prev); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + Prev = _write_avx_d1(out + 12, Data, Prev); + + keys >>= 16; + Data = _decode_avx((keys & 0x00FF), &dataPtr); + Prev = _write_avx_d1(out + 16, Data, Prev); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + Prev = _write_avx_d1(out + 20, Data, Prev); + + keys >>= 16; + Data = _decode_avx((keys & 0x00FF), &dataPtr); + Prev = _write_avx_d1(out + 24, Data, Prev); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + Prev = _write_avx_d1(out + 28, Data, Prev); + + out += 32; + } + { + uint64_t keys = nextkeys; + // faster 16-bit delta since we only have 8-bit values + if (!keys) { // 32 1-byte ints in a row + Data = _mm_cvtepu8_epi16(_mm_lddqu_si128((__m128i *) (dataPtr))); + Prev = _write_16bit_avx_d1(out, Data, Prev); + Data = _mm_cvtepu8_epi16( + _mm_lddqu_si128((__m128i *) (dataPtr + 8))); + Prev = _write_16bit_avx_d1(out + 8, Data, Prev); + Data = _mm_cvtepu8_epi16( + _mm_lddqu_si128((__m128i *) (dataPtr + 16))); + Prev = _write_16bit_avx_d1(out + 16, Data, Prev); + Data = _mm_cvtepu8_epi16( + _mm_loadl_epi64((__m128i *) (dataPtr + 24))); + Prev = _write_16bit_avx_d1(out + 24, Data, Prev); + out += 32; + dataPtr += 32; + + } else { + + Data = _decode_avx(keys & 0x00FF, &dataPtr); + Prev = _write_avx_d1(out, Data, Prev); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + Prev = _write_avx_d1(out + 4, Data, Prev); + + keys >>= 16; + Data = _decode_avx((keys & 0x00FF), &dataPtr); + Prev = _write_avx_d1(out + 8, Data, Prev); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + Prev = _write_avx_d1(out + 12, Data, Prev); + + keys >>= 16; + Data = _decode_avx((keys & 0x00FF), &dataPtr); + Prev = _write_avx_d1(out + 16, Data, Prev); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + Prev = _write_avx_d1(out + 20, Data, Prev); + + keys >>= 16; + Data = _decode_avx((keys & 0x00FF), &dataPtr); + Prev = _write_avx_d1(out + 24, Data, Prev); + Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr); + Prev = _write_avx_d1(out + 28, Data, Prev); + + out += 32; + } + } + prev = out[-1]; + } + uint64_t consumedkeys = keybytes - (keybytes & 7); + return svb_decode_scalar_d1_init(out, keyPtr + consumedkeys, dataPtr, + count & 31, prev); +} + +size_t streamvbyte_delta_decode(const uint8_t* in, uint32_t* out, + uint32_t count, uint32_t prev) { + uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up) + const uint8_t *keyPtr = in; + const uint8_t *dataPtr = keyPtr + keyLen; // data starts at end of keys + return svb_decode_avx_d1_init(out, keyPtr, dataPtr, count, prev) - in; +} diff --git a/cpp/streamvbyte/tests/unit.c b/cpp/streamvbyte/tests/unit.c new file mode 100644 index 000000000..f2870464f --- /dev/null +++ b/cpp/streamvbyte/tests/unit.c @@ -0,0 +1,73 @@ +#include +#include + +#include "streamvbyte.h" +#include "streamvbytedelta.h" + +int main() { + int N = 4096; + uint32_t * datain = malloc(N * sizeof(uint32_t)); + uint8_t * compressedbuffer = malloc(2 * N * sizeof(uint32_t)); + uint32_t * recovdata = malloc(N * sizeof(uint32_t)); + + for (int length = 0; length <= N;) { + printf("length = %d \n", length); + for (uint32_t gap = 1; gap <= 387420489; gap *= 3) { + for (int k = 0; k < length; ++k) + datain[k] = gap; + size_t compsize = streamvbyte_encode(datain, length, + compressedbuffer); + size_t usedbytes = streamvbyte_decode(compressedbuffer, recovdata, + length); + if (compsize != usedbytes) { + printf( + "[streamvbyte_decode] code is buggy gap = %d, size mismatch %d %d \n", + (int) gap, (int) compsize, (int) usedbytes); + return -1; + } + for (int k = 0; k < length; ++k) { + if (recovdata[k] != datain[k]) { + printf("[streamvbyte_decode] code is buggy gap = %d\n", + (int) gap); + return -1; + } + } + } + + printf("Delta \n"); + for (size_t gap = 1; gap <= 531441; gap *= 3) { + for (int k = 0; k < length; ++k) + datain[k] = gap * k; + size_t compsize = streamvbyte_delta_encode(datain, length, + compressedbuffer, 0); + size_t usedbytes = streamvbyte_delta_decode(compressedbuffer, + recovdata, length, 0); + if (compsize != usedbytes) { + printf( + "[streamvbyte_delta_decode] code is buggy gap = %d, size mismatch %d %d \n", + (int) gap, (int) compsize, (int) usedbytes); + return -1; + } + for (int k = 0; k < length; ++k) { + if (recovdata[k] != datain[k]) { + printf( + "[streamvbyte_delta_decode] code is buggy gap = %d\n", + (int) gap); + return -1; + } + } + + } + + if (length < 128) + ++length; + else { + length *= 2; + } + } + free(datain); + free(compressedbuffer); + free(recovdata); + printf("Code looks good.\n"); + return 0; +} diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 75c22aded..84658ae79 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -74,7 +74,7 @@ pub mod tests { use Score; use core::SegmentReader; use SegmentLocalId; - use fastfield::U32FastFieldReader; + use fastfield::U64FastFieldReader; use schema::Field; /// Stores all of the doc ids. @@ -125,9 +125,9 @@ pub mod tests { /// /// This collector is mainly useful for tests. pub struct FastFieldTestCollector { - vals: Vec, + vals: Vec, field: Field, - ff_reader: Option, + ff_reader: Option, } impl FastFieldTestCollector { @@ -139,7 +139,7 @@ pub mod tests { } } - pub fn vals(self,) -> Vec { + pub fn vals(self,) -> Vec { self.vals } } diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs index a99bc2148..456db4858 100644 --- a/src/common/bitpacker.rs +++ b/src/common/bitpacker.rs @@ -4,8 +4,14 @@ use common::serialize::BinarySerializable; use std::mem; -pub fn compute_num_bits(amplitude: u32) -> u8 { - (32u32 - amplitude.leading_zeros()) as u8 +pub fn compute_num_bits(amplitude: u64) -> u8 { + let amplitude = (64u32 - amplitude.leading_zeros()) as u8; + if amplitude <= 64 - 8 { + amplitude + } + else { + 64 + } } pub struct BitPacker { @@ -15,7 +21,7 @@ pub struct BitPacker { written_size: usize, } -impl BitPacker { +impl BitPacker { pub fn new(num_bits: usize) -> BitPacker { BitPacker { @@ -26,7 +32,7 @@ impl BitPacker { } } - pub fn write(&mut self, val: u32, output: &mut TWrite) -> io::Result<()> { + pub fn write(&mut self, val: u64, output: &mut TWrite) -> io::Result<()> { let val_u64 = val as u64; if self.mini_buffer_written + self.num_bits > 64 { self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32); @@ -67,22 +73,29 @@ impl BitPacker { pub struct BitUnpacker { num_bits: usize, - mask: u32, + mask: u64, data_ptr: *const u8, data_len: usize, } impl BitUnpacker { pub fn new(data: &[u8], num_bits: usize) -> BitUnpacker { + let mask: u64 = + if num_bits == 64 { + !0u64 + } + else { + (1u64 << num_bits) - 1u64 + }; BitUnpacker { num_bits: num_bits, - mask: (1u32 << num_bits) - 1u32, + mask: mask, data_ptr: data.as_ptr(), data_len: data.len() } } - pub fn get(&self, idx: usize) -> u32 { + pub fn get(&self, idx: usize) -> u64 { if self.num_bits == 0 { return 0; } @@ -101,7 +114,7 @@ impl BitUnpacker { } val_unshifted_unmasked = unsafe { mem::transmute::<[u8; 8], u64>(arr) }; } - let val_shifted = (val_unshifted_unmasked >> bit_shift) as u32; + let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64; (val_shifted & self.mask) } @@ -123,13 +136,14 @@ mod test { assert_eq!(compute_num_bits(4), 3u8); assert_eq!(compute_num_bits(255), 8u8); assert_eq!(compute_num_bits(256), 9u8); + assert_eq!(compute_num_bits(5_000_000_000), 33u8); } fn test_bitpacker_util(len: usize, num_bits: usize) { let mut data = Vec::new(); let mut bitpacker = BitPacker::new(num_bits); - let max_val: u32 = (1 << num_bits) - 1; - let vals: Vec = (0u32..len as u32).map(|i| { + let max_val: u64 = (1 << num_bits) - 1; + let vals: Vec = (0u64..len as u64).map(|i| { if max_val == 0 { 0 } diff --git a/src/compression/composite.rs b/src/compression/composite.rs index 915ab9f42..a92023405 100644 --- a/src/compression/composite.rs +++ b/src/compression/composite.rs @@ -110,7 +110,7 @@ pub mod tests { let data = generate_array(10_000, 0.1); let mut encoder = CompositeEncoder::new(); let compressed = encoder.compress_unsorted(&data); - assert_eq!(compressed.len(), 19_790); + assert!(compressed.len() <= 19_794); let mut decoder = CompositeDecoder::new(); let result = decoder.uncompress_unsorted(&compressed, data.len()); for i in 0..data.len() { @@ -123,7 +123,7 @@ pub mod tests { let data = generate_array(10_000, 0.1); let mut encoder = CompositeEncoder::new(); let compressed = encoder.compress_sorted(&data); - assert_eq!(compressed.len(), 7_822); + assert!(compressed.len() <= 7_826); let mut decoder = CompositeDecoder::new(); let result = decoder.uncompress_sorted(&compressed, data.len()); for i in 0..data.len() { diff --git a/src/compression/mod.rs b/src/compression/mod.rs index 3e1a627df..722521c2c 100644 --- a/src/compression/mod.rs +++ b/src/compression/mod.rs @@ -4,16 +4,32 @@ mod composite; pub use self::composite::{CompositeEncoder, CompositeDecoder}; -#[cfg(feature="simdcompression")] -mod compression_simd; -#[cfg(feature="simdcompression")] -pub use self::compression_simd::{BlockEncoder, BlockDecoder}; - #[cfg(not(feature="simdcompression"))] -mod compression_nosimd; -#[cfg(not(feature="simdcompression"))] -pub use self::compression_nosimd::{BlockEncoder, BlockDecoder}; +mod pack { + mod compression_pack_nosimd; + pub use self::compression_pack_nosimd::*; +} + +#[cfg(feature="simdcompression")] +mod pack { + mod compression_pack_simd; + pub use self::compression_pack_simd::*; +} + +pub use self::pack::{BlockEncoder, BlockDecoder}; + +#[cfg( any(not(feature="simdcompression"), target_env="msvc") )] +mod vint { + mod compression_vint_nosimd; + pub use self::compression_vint_nosimd::*; +} + +#[cfg( all(feature="simdcompression", not(target_env="msvc")) )] +mod vint { + mod compression_vint_simd; + pub use self::compression_vint_simd::*; +} pub trait VIntEncoder { @@ -28,49 +44,14 @@ pub trait VIntDecoder { impl VIntEncoder for BlockEncoder { - fn compress_vint_sorted(&mut self, input: &[u32], mut offset: u32) -> &[u8] { - let mut byte_written = 0; - for &v in input { - let mut to_encode: u32 = v - offset; - offset = v; - loop { - let next_byte: u8 = (to_encode % 128u32) as u8; - to_encode /= 128u32; - if to_encode == 0u32 { - self.output[byte_written] = next_byte | 128u8; - byte_written += 1; - break; - } - else { - self.output[byte_written] = next_byte; - byte_written += 1; - } - } - } - &self.output[..byte_written] + fn compress_vint_sorted(&mut self, input: &[u32], offset: u32) -> &[u8] { + vint::compress_sorted(input, &mut self.output, offset) } fn compress_vint_unsorted(&mut self, input: &[u32]) -> &[u8] { - let mut byte_written = 0; - for &v in input { - let mut to_encode: u32 = v; - loop { - let next_byte: u8 = (to_encode % 128u32) as u8; - to_encode /= 128u32; - if to_encode == 0u32 { - self.output[byte_written] = next_byte | 128u8; - byte_written += 1; - break; - } - else { - self.output[byte_written] = next_byte; - byte_written += 1; - } - } - } - &self.output[..byte_written] + vint::compress_unsorted(input, &mut self.output) } -} +} impl VIntDecoder for BlockDecoder { @@ -79,52 +60,19 @@ impl VIntDecoder for BlockDecoder { compressed_data: &'a [u8], offset: u32, num_els: usize) -> &'a [u8] { - let mut read_byte = 0; - let mut result = offset; - for i in 0..num_els { - let mut shift = 0u32; - loop { - let cur_byte = compressed_data[read_byte]; - read_byte += 1; - result += ((cur_byte % 128u8) as u32) << shift; - if cur_byte & 128u8 != 0u8 { - break; - } - shift += 7; - } - self.output[i] = result; - } self.output_len = num_els; - &compressed_data[read_byte..] + vint::uncompress_sorted(compressed_data, &mut self.output[..num_els], offset) } fn uncompress_vint_unsorted<'a>( &mut self, compressed_data: &'a [u8], num_els: usize) -> &'a [u8] { - let mut read_byte = 0; - for i in 0..num_els { - let mut result = 0u32; - let mut shift = 0u32; - loop { - let cur_byte = compressed_data[read_byte]; - read_byte += 1; - result += ((cur_byte % 128u8) as u32) << shift; - if cur_byte & 128u8 != 0u8 { - break; - } - shift += 7; - } - self.output[i] = result; - } self.output_len = num_els; - &compressed_data[read_byte..] - } - + vint::uncompress_unsorted(compressed_data, &mut self.output[..num_els]) + } } - - pub const NUM_DOCS_PER_BLOCK: usize = 128; //< should be a power of 2 to let the compiler optimize. @@ -224,7 +172,7 @@ pub mod tests { #[test] fn test_encode_vint() { { - let expected_length = 123; + let expected_length = 154; let mut encoder = BlockEncoder::new(); let input: Vec = (0u32..123u32) .map(|i| 4 + i * 7 / 2) @@ -232,23 +180,13 @@ pub mod tests { .collect(); for offset in &[0u32, 1u32, 2u32] { let encoded_data = encoder.compress_vint_sorted(&input, *offset); - assert_eq!(encoded_data.len(), expected_length); + assert!(encoded_data.len() <= expected_length); let mut decoder = BlockDecoder::new(); let remaining_data = decoder.uncompress_vint_sorted(&encoded_data, *offset, input.len()); assert_eq!(0, remaining_data.len()); assert_eq!(input, decoder.output_array()); } } - { - let mut encoder = BlockEncoder::new(); - let input = vec!(3u32, 17u32, 187u32); - let encoded_data = encoder.compress_vint_sorted(&input, 0); - assert_eq!(encoded_data.len(), 4); - assert_eq!(encoded_data[0], 3u8 + 128u8); - assert_eq!(encoded_data[1], (17u8 - 3u8) + 128u8); - assert_eq!(encoded_data[2], (187u8 - 17u8 - 128u8)); - assert_eq!(encoded_data[3], (1u8 + 128u8)); - } } @@ -272,4 +210,27 @@ pub mod tests { }); } + + const NUM_INTS_BENCH_VINT: usize = 10; + + #[bench] + fn bench_compress_vint(b: &mut Bencher) { + let mut encoder = BlockEncoder::new(); + let data = generate_array(NUM_INTS_BENCH_VINT, 0.001); + b.iter(|| { + encoder.compress_vint_sorted(&data, 0u32); + }); + } + + #[bench] + fn bench_uncompress_vint(b: &mut Bencher) { + let mut encoder = BlockEncoder::new(); + let data = generate_array(NUM_INTS_BENCH_VINT, 0.001); + let compressed = encoder.compress_vint_sorted(&data, 0u32); + let mut decoder = BlockDecoder::new(); + b.iter(|| { + decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT); + }); + } + } diff --git a/src/compression/compression_nosimd.rs b/src/compression/pack/compression_pack_nosimd.rs similarity index 99% rename from src/compression/compression_nosimd.rs rename to src/compression/pack/compression_pack_nosimd.rs index 501a4e1fe..979cd4eb8 100644 --- a/src/compression/compression_nosimd.rs +++ b/src/compression/pack/compression_pack_nosimd.rs @@ -2,7 +2,7 @@ use common::bitpacker::compute_num_bits; use common::bitpacker::{BitPacker, BitUnpacker}; use std::cmp; use std::io::Write; -use super::NUM_DOCS_PER_BLOCK; +use super::super::NUM_DOCS_PER_BLOCK; const COMPRESSED_BLOCK_MAX_SIZE: usize = NUM_DOCS_PER_BLOCK * 4 + 1; diff --git a/src/compression/compression_simd.rs b/src/compression/pack/compression_pack_simd.rs similarity index 98% rename from src/compression/compression_simd.rs rename to src/compression/pack/compression_pack_simd.rs index 308e13445..dcdcf7065 100644 --- a/src/compression/compression_simd.rs +++ b/src/compression/pack/compression_pack_simd.rs @@ -1,4 +1,4 @@ -use super::NUM_DOCS_PER_BLOCK; +use super::super::NUM_DOCS_PER_BLOCK; const COMPRESSED_BLOCK_MAX_SIZE: usize = NUM_DOCS_PER_BLOCK * 4 + 1; diff --git a/src/compression/vint/compression_vint_nosimd.rs b/src/compression/vint/compression_vint_nosimd.rs new file mode 100644 index 000000000..b53dee1c7 --- /dev/null +++ b/src/compression/vint/compression_vint_nosimd.rs @@ -0,0 +1,92 @@ + +#[inline(always)] +pub fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], mut offset: u32) -> &'a [u8] { + let mut byte_written = 0; + for &v in input { + let mut to_encode: u32 = v - offset; + offset = v; + loop { + let next_byte: u8 = (to_encode % 128u32) as u8; + to_encode /= 128u32; + if to_encode == 0u32 { + output[byte_written] = next_byte | 128u8; + byte_written += 1; + break; + } + else { + output[byte_written] = next_byte; + byte_written += 1; + } + } + } + &output[..byte_written] +} + +#[inline(always)] +pub fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a [u8] { + let mut byte_written = 0; + for &v in input { + let mut to_encode: u32 = v; + loop { + let next_byte: u8 = (to_encode % 128u32) as u8; + to_encode /= 128u32; + if to_encode == 0u32 { + output[byte_written] = next_byte | 128u8; + byte_written += 1; + break; + } + else { + output[byte_written] = next_byte; + byte_written += 1; + } + } + } + &output[..byte_written] +} + +#[inline(always)] +pub fn uncompress_sorted<'a>( + compressed_data: &'a [u8], + output: &mut [u32], + offset: u32) -> &'a [u8] { + let mut read_byte = 0; + let mut result = offset; + let num_els = output.len(); + for i in 0..num_els { + let mut shift = 0u32; + loop { + let cur_byte = compressed_data[read_byte]; + read_byte += 1; + result += ((cur_byte % 128u8) as u32) << shift; + if cur_byte & 128u8 != 0u8 { + break; + } + shift += 7; + } + output[i] = result; + } + &compressed_data[read_byte..] +} + +#[inline(always)] +pub fn uncompress_unsorted<'a>( + compressed_data: &'a [u8], + output: &mut [u32]) -> &'a [u8] { + let mut read_byte = 0; + let num_els = output.len(); + for i in 0..num_els { + let mut result = 0u32; + let mut shift = 0u32; + loop { + let cur_byte = compressed_data[read_byte]; + read_byte += 1; + result += ((cur_byte % 128u8) as u32) << shift; + if cur_byte & 128u8 != 0u8 { + break; + } + shift += 7; + } + output[i] = result; + } + &compressed_data[read_byte..] +} diff --git a/src/compression/vint/compression_vint_simd.rs b/src/compression/vint/compression_vint_simd.rs new file mode 100644 index 000000000..9d7264994 --- /dev/null +++ b/src/compression/vint/compression_vint_simd.rs @@ -0,0 +1,82 @@ + +mod streamvbyte { + + use libc::size_t; + + extern { + pub fn streamvbyte_delta_encode( + data: *const u32, + num_els: u32, + output: *mut u8, + offset: u32) -> size_t; + + pub fn streamvbyte_delta_decode( + compressed_data: *const u8, + output: *mut u32, + num_els: u32, + offset: u32) -> size_t; + + pub fn streamvbyte_encode( + data: *const u32, + num_els: u32, + output: *mut u8) -> size_t; + + pub fn streamvbyte_decode( + compressed_data: *const u8, + output: *mut u32, + num_els: usize) -> size_t; + } +} + + +#[inline(always)] +pub fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], offset: u32) -> &'a [u8] { + let compress_length = unsafe { + streamvbyte::streamvbyte_delta_encode( + input.as_ptr(), + input.len() as u32, + output.as_mut_ptr(), + offset) + }; + &output[..compress_length] +} + +#[inline(always)] +pub fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a [u8] { + let compress_length = unsafe { + streamvbyte::streamvbyte_encode( + input.as_ptr(), + input.len() as u32, + output.as_mut_ptr()) + }; + &output[..compress_length] +} + +#[inline(always)] +pub fn uncompress_sorted<'a>( + compressed_data: &'a [u8], + output: &mut [u32], + offset: u32) -> &'a [u8] { + let consumed_bytes = unsafe { + streamvbyte::streamvbyte_delta_decode( + compressed_data.as_ptr(), + output.as_mut_ptr(), + output.len() as u32, + offset) + }; + &compressed_data[consumed_bytes..] +} + +#[inline(always)] +pub fn uncompress_unsorted<'a>( + compressed_data: &'a [u8], + output: &mut [u32]) -> &'a [u8] { + let consumed_bytes = unsafe { + streamvbyte::streamvbyte_decode( + compressed_data.as_ptr(), + output.as_mut_ptr(), + output.len()) + }; + &compressed_data[consumed_bytes..] +} + diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 7a3325730..e57cc61bf 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use std::fmt; use schema::Field; use postings::{SegmentPostings, BlockSegmentPostings, SegmentPostingsOption}; -use fastfield::{U32FastFieldsReader, U32FastFieldReader}; +use fastfield::{U64FastFieldsReader, U64FastFieldReader}; use schema::Schema; use schema::FieldType; use postings::FreqHandler; @@ -41,8 +41,8 @@ pub struct SegmentReader { term_infos: Arc>, postings_data: ReadOnlySource, store_reader: StoreReader, - fast_fields_reader: Arc, - fieldnorms_reader: Arc, + fast_fields_reader: Arc, + fieldnorms_reader: Arc, delete_bitset: DeleteBitSet, positions_data: ReadOnlySource, schema: Schema, @@ -78,11 +78,11 @@ impl SegmentReader { } /// Accessor to a segment's fast field reader given a field. - pub fn get_fast_field_reader(&self, field: Field) -> Option { - /// Returns the u32 fast value reader if the field - /// is a u32 field indexed as "fast". + pub fn get_fast_field_reader(&self, field: Field) -> Option { + /// Returns the u64 fast value reader if the field + /// is a u64 field indexed as "fast". /// - /// Return None if the field is not a u32 field + /// Return None if the field is not a u64 field /// indexed with the fast option. /// /// # Panics @@ -93,8 +93,8 @@ impl SegmentReader { warn!("Field <{}> is not a fast field. It is a text field, and fast text fields are not supported yet.", field_entry.name()); None }, - &FieldType::U32(ref u32_options) => { - if u32_options.is_fast() { + &FieldType::U64(ref u64_options) => { + if u64_options.is_fast() { self.fast_fields_reader.get_field(field) } else { @@ -112,7 +112,7 @@ impl SegmentReader { /// /// They are simply stored as a fast field, serialized in /// the `.fieldnorm` file of the segment. - pub fn get_fieldnorms_reader(&self, field: Field) -> Option { + pub fn get_fieldnorms_reader(&self, field: Field) -> Option { self.fieldnorms_reader.get_field(field) } @@ -138,10 +138,10 @@ impl SegmentReader { let postings_shared_mmap = try!(segment.open_read(SegmentComponent::POSTINGS)); let fast_field_data = try!(segment.open_read(SegmentComponent::FASTFIELDS)); - let fast_fields_reader = try!(U32FastFieldsReader::open(fast_field_data)); + let fast_fields_reader = try!(U64FastFieldsReader::open(fast_field_data)); let fieldnorms_data = try!(segment.open_read(SegmentComponent::FIELDNORMS)); - let fieldnorms_reader = try!(U32FastFieldsReader::open(fieldnorms_data)); + let fieldnorms_reader = try!(U64FastFieldsReader::open(fieldnorms_data)); let positions_data = segment .open_read(SegmentComponent::POSITIONS) @@ -274,7 +274,7 @@ impl SegmentReader { _ => SegmentPostingsOption::NoFreq, } } - FieldType::U32(_) => SegmentPostingsOption::NoFreq + FieldType::U64(_) => SegmentPostingsOption::NoFreq }; self.read_postings(term, segment_posting_option) } diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index cb03faa87..b09db6fd1 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -8,15 +8,15 @@ /// They are useful when a field is required for all or most of /// the `DocSet` : for instance for scoring, grouping, filtering, or facetting. /// -/// Currently only u32 fastfield are supported. +/// Currently only u64 fastfield are supported. mod reader; mod writer; mod serializer; pub mod delete; -pub use self::writer::{U32FastFieldsWriter, U32FastFieldWriter}; -pub use self::reader::{U32FastFieldsReader, U32FastFieldReader}; +pub use self::writer::{U64FastFieldsWriter, U64FastFieldWriter}; +pub use self::reader::{U64FastFieldsReader, U64FastFieldReader}; pub use self::serializer::FastFieldSerializer; #[cfg(test)] @@ -37,7 +37,7 @@ mod tests { lazy_static! { static ref SCHEMA: Schema = { let mut schema_builder = SchemaBuilder::default(); - schema_builder.add_u32_field("field", FAST); + schema_builder.add_u64_field("field", FAST); schema_builder.build() }; static ref FIELD: Field = { @@ -45,15 +45,15 @@ mod tests { }; } - fn add_single_field_doc(fast_field_writers: &mut U32FastFieldsWriter, field: Field, value: u32) { + fn add_single_field_doc(fast_field_writers: &mut U64FastFieldsWriter, field: Field, value: u64) { let mut doc = Document::default(); - doc.add_u32(field, value); + doc.add_u64(field, value); fast_field_writers.add_document(&doc); } #[test] pub fn test_fastfield() { - let test_fastfield = U32FastFieldReader::from(vec!(100,200,300)); + let test_fastfield = U64FastFieldReader::from(vec!(100,200,300)); assert_eq!(test_fastfield.get(0), 100); assert_eq!(test_fastfield.get(1), 200); assert_eq!(test_fastfield.get(2), 300); @@ -66,23 +66,23 @@ mod tests { { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::new(write).unwrap(); - let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA); - add_single_field_doc(&mut fast_field_writers, *FIELD, 13u32); - add_single_field_doc(&mut fast_field_writers, *FIELD, 14u32); - add_single_field_doc(&mut fast_field_writers, *FIELD, 2u32); + let mut fast_field_writers = U64FastFieldsWriter::from_schema(&SCHEMA); + add_single_field_doc(&mut fast_field_writers, *FIELD, 13u64); + add_single_field_doc(&mut fast_field_writers, *FIELD, 14u64); + add_single_field_doc(&mut fast_field_writers, *FIELD, 2u64); fast_field_writers.serialize(&mut serializer).unwrap(); serializer.close().unwrap(); } let source = directory.open_read(&path).unwrap(); { - assert_eq!(source.len(), 23 as usize); + assert_eq!(source.len(), 31 as usize); } { - let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); + let fast_field_readers = U64FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap(); - assert_eq!(fast_field_reader.get(0), 13u32); - assert_eq!(fast_field_reader.get(1), 14u32); - assert_eq!(fast_field_reader.get(2), 2u32); + assert_eq!(fast_field_reader.get(0), 13u64); + assert_eq!(fast_field_reader.get(1), 14u64); + assert_eq!(fast_field_reader.get(2), 2u64); } } @@ -93,35 +93,35 @@ mod tests { { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::new(write).unwrap(); - let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA); - add_single_field_doc(&mut fast_field_writers, *FIELD, 4u32); - add_single_field_doc(&mut fast_field_writers, *FIELD, 14_082_001u32); - add_single_field_doc(&mut fast_field_writers, *FIELD, 3_052u32); - add_single_field_doc(&mut fast_field_writers, *FIELD, 9002u32); - add_single_field_doc(&mut fast_field_writers, *FIELD, 15_001u32); - add_single_field_doc(&mut fast_field_writers, *FIELD, 777u32); - add_single_field_doc(&mut fast_field_writers, *FIELD, 1_002u32); - add_single_field_doc(&mut fast_field_writers, *FIELD, 1_501u32); - add_single_field_doc(&mut fast_field_writers, *FIELD, 215u32); + let mut fast_field_writers = U64FastFieldsWriter::from_schema(&SCHEMA); + add_single_field_doc(&mut fast_field_writers, *FIELD, 4u64); + add_single_field_doc(&mut fast_field_writers, *FIELD, 14_082_001u64); + add_single_field_doc(&mut fast_field_writers, *FIELD, 3_052u64); + add_single_field_doc(&mut fast_field_writers, *FIELD, 9002u64); + add_single_field_doc(&mut fast_field_writers, *FIELD, 15_001u64); + add_single_field_doc(&mut fast_field_writers, *FIELD, 777u64); + add_single_field_doc(&mut fast_field_writers, *FIELD, 1_002u64); + add_single_field_doc(&mut fast_field_writers, *FIELD, 1_501u64); + add_single_field_doc(&mut fast_field_writers, *FIELD, 215u64); fast_field_writers.serialize(&mut serializer).unwrap(); serializer.close().unwrap(); } let source = directory.open_read(&path).unwrap(); { - assert_eq!(source.len(), 48 as usize); + assert_eq!(source.len(), 56 as usize); } { - let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); + let fast_field_readers = U64FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap(); - assert_eq!(fast_field_reader.get(0), 4u32); - assert_eq!(fast_field_reader.get(1), 14_082_001u32); - assert_eq!(fast_field_reader.get(2), 3_052u32); - assert_eq!(fast_field_reader.get(3), 9002u32); - assert_eq!(fast_field_reader.get(4), 15_001u32); - assert_eq!(fast_field_reader.get(5), 777u32); - assert_eq!(fast_field_reader.get(6), 1_002u32); - assert_eq!(fast_field_reader.get(7), 1_501u32); - assert_eq!(fast_field_reader.get(8), 215u32); + assert_eq!(fast_field_reader.get(0), 4u64); + assert_eq!(fast_field_reader.get(1), 14_082_001u64); + assert_eq!(fast_field_reader.get(2), 3_052u64); + assert_eq!(fast_field_reader.get(3), 9002u64); + assert_eq!(fast_field_reader.get(4), 15_001u64); + assert_eq!(fast_field_reader.get(5), 777u64); + assert_eq!(fast_field_reader.get(6), 1_002u64); + assert_eq!(fast_field_reader.get(7), 1_501u64); + assert_eq!(fast_field_reader.get(8), 215u64); } } @@ -134,30 +134,62 @@ mod tests { { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::new(write).unwrap(); - let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA); + let mut fast_field_writers = U64FastFieldsWriter::from_schema(&SCHEMA); for _ in 0..10_000 { - add_single_field_doc(&mut fast_field_writers, *FIELD, 100_000u32); + add_single_field_doc(&mut fast_field_writers, *FIELD, 100_000u64); } fast_field_writers.serialize(&mut serializer).unwrap(); serializer.close().unwrap(); } let source = directory.open_read(&path).unwrap(); { - assert_eq!(source.len(), 21 as usize); + assert_eq!(source.len(), 29 as usize); } { - let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); + let fast_field_readers = U64FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap(); for doc in 0..10_000 { - assert_eq!(fast_field_reader.get(doc), 100_000u32); + assert_eq!(fast_field_reader.get(doc), 100_000u64); } } } - fn generate_permutation() -> Vec { + + #[test] + fn test_intfastfield_large_numbers() { + let path = Path::new("test"); + let mut directory: RAMDirectory = RAMDirectory::create(); + + { + let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); + let mut serializer = FastFieldSerializer::new(write).unwrap(); + let mut fast_field_writers = U64FastFieldsWriter::from_schema(&SCHEMA); + // forcing the amplitude to be high + add_single_field_doc(&mut fast_field_writers, *FIELD, 0u64); + for i in 0u64..10_000u64 { + add_single_field_doc(&mut fast_field_writers, *FIELD, 5_000_000_000_000_000_000u64 + i); + } + fast_field_writers.serialize(&mut serializer).unwrap(); + serializer.close().unwrap(); + } + let source = directory.open_read(&path).unwrap(); + { + assert_eq!(source.len(), 80037 as usize); + } + { + let fast_field_readers = U64FastFieldsReader::open(source).unwrap(); + let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap(); + assert_eq!(fast_field_reader.get(0), 0u64); + for doc in 1..10_001 { + assert_eq!(fast_field_reader.get(doc), 5_000_000_000_000_000_000u64 + doc as u64 - 1u64); + } + } + } + + fn generate_permutation() -> Vec { let seed: &[u32; 4] = &[1, 2, 3, 4]; let mut rng = XorShiftRng::from_seed(*seed); - let mut permutation: Vec = (0u32..1_000_000u32).collect(); + let mut permutation: Vec = (0u64..1_000_000u64).collect(); rng.shuffle(&mut permutation); permutation } @@ -171,7 +203,7 @@ mod tests { { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::new(write).unwrap(); - let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA); + let mut fast_field_writers = U64FastFieldsWriter::from_schema(&SCHEMA); for x in &permutation { add_single_field_doc(&mut fast_field_writers, *FIELD, *x); } @@ -180,10 +212,11 @@ mod tests { } let source = directory.open_read(&path).unwrap(); { - let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); + let fast_field_readers = U64FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap(); - let mut a = 0u32; + let mut a = 0u64; for _ in 0..n { + println!("i {}=> {} {}", a, fast_field_reader.get(a as u32), permutation[a as usize]); assert_eq!(fast_field_reader.get(a as u32), permutation[a as usize]); a = fast_field_reader.get(a as u32); } @@ -195,7 +228,7 @@ mod tests { let permutation = generate_permutation(); b.iter(|| { let n = test::black_box(7000u32); - let mut a = 0u32; + let mut a = 0u64; for i in (0u32..n).step_by(7) { a ^= permutation[i as usize]; } @@ -208,7 +241,7 @@ mod tests { let permutation = generate_permutation(); b.iter(|| { let n = test::black_box(1000u32); - let mut a = 0u32; + let mut a = 0u64; for _ in 0u32..n { a = permutation[a as usize]; } @@ -224,7 +257,7 @@ mod tests { { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::new(write).unwrap(); - let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA); + let mut fast_field_writers = U64FastFieldsWriter::from_schema(&SCHEMA); for x in &permutation { add_single_field_doc(&mut fast_field_writers, *FIELD, *x); } @@ -233,11 +266,11 @@ mod tests { } let source = directory.open_read(&path).unwrap(); { - let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); + let fast_field_readers = U64FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap(); b.iter(|| { let n = test::black_box(7000u32); - let mut a = 0u32; + let mut a = 0u64; for i in (0u32..n).step_by(7) { a ^= fast_field_reader.get(i); } @@ -254,7 +287,7 @@ mod tests { { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::new(write).unwrap(); - let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA); + let mut fast_field_writers = U64FastFieldsWriter::from_schema(&SCHEMA); for x in &permutation { add_single_field_doc(&mut fast_field_writers, *FIELD, *x); } @@ -263,13 +296,13 @@ mod tests { } let source = directory.open_read(&path).unwrap(); { - let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); + let fast_field_readers = U64FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap(); b.iter(|| { let n = test::black_box(1000u32); let mut a = 0u32; for _ in 0u32..n { - a = fast_field_reader.get(a); + a = fast_field_reader.get(a) as u32; } a }); diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 2fd1a342a..0b91cf234 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -1,7 +1,5 @@ use std::io; use std::collections::HashMap; -use std::ops::Deref; - use directory::ReadOnlySource; use common::BinarySerializable; use DocId; @@ -10,36 +8,36 @@ use std::path::Path; use schema::FAST; use directory::{WritePtr, RAMDirectory, Directory}; use fastfield::FastFieldSerializer; -use fastfield::U32FastFieldsWriter; +use fastfield::U64FastFieldsWriter; use common::bitpacker::compute_num_bits; use common::bitpacker::BitUnpacker; lazy_static! { - static ref U32_FAST_FIELD_EMPTY: ReadOnlySource = { - let u32_fast_field = U32FastFieldReader::from(Vec::new()); - u32_fast_field._data.clone() + static ref U64_FAST_FIELD_EMPTY: ReadOnlySource = { + let u64_fast_field = U64FastFieldReader::from(Vec::new()); + u64_fast_field._data.clone() }; } -pub struct U32FastFieldReader { +pub struct U64FastFieldReader { _data: ReadOnlySource, bit_unpacker: BitUnpacker, - min_val: u32, - max_val: u32, + min_val: u64, + max_val: u64, } -impl U32FastFieldReader { +impl U64FastFieldReader { - pub fn empty() -> U32FastFieldReader { - U32FastFieldReader::open(U32_FAST_FIELD_EMPTY.clone()) + pub fn empty() -> U64FastFieldReader { + U64FastFieldReader::open(U64_FAST_FIELD_EMPTY.clone()) } - pub fn min_val(&self,) -> u32 { + pub fn min_val(&self,) -> u64 { self.min_val } - pub fn max_val(&self,) -> u32 { + pub fn max_val(&self,) -> u64 { self.max_val } @@ -47,22 +45,22 @@ impl U32FastFieldReader { /// /// # Panics /// Panics if the data is corrupted. - pub fn open(data: ReadOnlySource) -> U32FastFieldReader { - let min_val; - let amplitude; - let max_val; + pub fn open(data: ReadOnlySource) -> U64FastFieldReader { + + let min_val: u64; + let max_val: u64; + let bit_unpacker: BitUnpacker; + { - let mut cursor = data.as_slice(); - min_val = u32::deserialize(&mut cursor).unwrap(); - amplitude = u32::deserialize(&mut cursor).unwrap(); + let mut cursor: &[u8] = data.as_slice(); + min_val = u64::deserialize(&mut cursor).expect("Failed to read the min_val of fast field."); + let amplitude = u64::deserialize(&mut cursor).expect("Failed to read the amplitude of fast field."); max_val = min_val + amplitude; + let num_bits = compute_num_bits(amplitude); + bit_unpacker = BitUnpacker::new(cursor, num_bits as usize) } - let num_bits = compute_num_bits(amplitude); - let bit_unpacker = { - let data_arr = &(data.deref()[8..]); - BitUnpacker::new(data_arr, num_bits as usize) - }; - U32FastFieldReader { + + U64FastFieldReader { _data: data, bit_unpacker: bit_unpacker, min_val: min_val, @@ -70,23 +68,23 @@ impl U32FastFieldReader { } } - pub fn get(&self, doc: DocId) -> u32 { + pub fn get(&self, doc: DocId) -> u64 { self.min_val + self.bit_unpacker.get(doc as usize) } } -impl From> for U32FastFieldReader { - fn from(vals: Vec) -> U32FastFieldReader { +impl From> for U64FastFieldReader { + fn from(vals: Vec) -> U64FastFieldReader { let mut schema_builder = SchemaBuilder::default(); - let field = schema_builder.add_u32_field("field", FAST); + let field = schema_builder.add_u64_field("field", FAST); let schema = schema_builder.build(); let path = Path::new("test"); let mut directory: RAMDirectory = RAMDirectory::create(); { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = FastFieldSerializer::new(write).unwrap(); - let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema); + let mut fast_field_writers = U64FastFieldsWriter::from_schema(&schema); for val in vals { let mut fast_field_writer = fast_field_writers.get_field_writer(field).unwrap(); fast_field_writer.add_val(val); @@ -95,24 +93,22 @@ impl From> for U32FastFieldReader { serializer.close().unwrap(); } let source = directory.open_read(&path).unwrap(); - let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); + let fast_field_readers = U64FastFieldsReader::open(source).unwrap(); fast_field_readers.get_field(field).unwrap() } } -pub struct U32FastFieldsReader { +pub struct U64FastFieldsReader { source: ReadOnlySource, field_offsets: HashMap, } -unsafe impl Send for U32FastFieldReader {} -unsafe impl Sync for U32FastFieldReader {} -unsafe impl Send for U32FastFieldsReader {} -unsafe impl Sync for U32FastFieldsReader {} +unsafe impl Send for U64FastFieldsReader {} +unsafe impl Sync for U64FastFieldsReader {} -impl U32FastFieldsReader { - pub fn open(source: ReadOnlySource) -> io::Result { +impl U64FastFieldsReader { + pub fn open(source: ReadOnlySource) -> io::Result { let header_offset; let field_offsets: Vec<(Field, u32)>; { @@ -136,26 +132,26 @@ impl U32FastFieldsReader { let (field, start_offset) = *field_start_offsets; field_offsets_map.insert(field, (start_offset, *stop_offset)); } - Ok(U32FastFieldsReader { + Ok(U64FastFieldsReader { field_offsets: field_offsets_map, source: source, }) } - /// Returns the u32 fast value reader if the field - /// is a u32 field indexed as "fast". + /// Returns the u64 fast value reader if the field + /// is a u64 field indexed as "fast". /// - /// Return None if the field is not a u32 field + /// Return None if the field is not a u64 field /// indexed with the fast option. /// /// # Panics /// May panic if the index is corrupted. - pub fn get_field(&self, field: Field) -> Option { + pub fn get_field(&self, field: Field) -> Option { self.field_offsets .get(&field) .map(|&(start, stop)| { let field_source = self.source.slice(start as usize, stop as usize); - U32FastFieldReader::open(field_source) + U64FastFieldReader::open(field_source) }) } } diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs index fef738f3a..a68e03a38 100644 --- a/src/fastfield/serializer.rs +++ b/src/fastfield/serializer.rs @@ -14,13 +14,13 @@ use std::io::{self, Write, Seek, SeekFrom}; /// the serializer. /// The serializer expects to receive the following calls. /// -/// * `new_u32_fast_field(...)` +/// * `new_u64_fast_field(...)` /// * `add_val(...)` /// * `add_val(...)` /// * `add_val(...)` /// * ... /// * `close_field()` -/// * `new_u32_fast_field(...)` +/// * `new_u64_fast_field(...)` /// * `add_val(...)` /// * ... /// * `close_field()` @@ -29,7 +29,7 @@ pub struct FastFieldSerializer { write: WritePtr, written_size: usize, fields: Vec<(Field, u32)>, - min_value: u32, + min_value: u64, field_open: bool, bit_packer: BitPacker, } @@ -50,8 +50,8 @@ impl FastFieldSerializer { }) } - /// Start serializing a new u32 fast field - pub fn new_u32_fast_field(&mut self, field: Field, min_value: u32, max_value: u32) -> io::Result<()> { + /// Start serializing a new u64 fast field + pub fn new_u64_fast_field(&mut self, field: Field, min_value: u64, max_value: u64) -> io::Result<()> { if self.field_open { return Err(io::Error::new(io::ErrorKind::Other, "Previous field not closed")); } @@ -68,14 +68,14 @@ impl FastFieldSerializer { } - /// Pushes a new value to the currently open u32 fast field. - pub fn add_val(&mut self, val: u32) -> io::Result<()> { - let val_to_write: u32 = val - self.min_value; + /// Pushes a new value to the currently open u64 fast field. + pub fn add_val(&mut self, val: u64) -> io::Result<()> { + let val_to_write: u64 = val - self.min_value; self.bit_packer.write(val_to_write, &mut self.write)?; Ok(()) } - /// Close the u32 fast field. + /// Close the u64 fast field. pub fn close_field(&mut self,) -> io::Result<()> { if !self.field_open { return Err(io::Error::new(io::ErrorKind::Other, "Current field is already closed")); diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index c3d5c7b88..51c857b4b 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -4,32 +4,32 @@ use std::io; use schema::Value; use DocId; -pub struct U32FastFieldsWriter { - field_writers: Vec, +pub struct U64FastFieldsWriter { + field_writers: Vec, } -impl U32FastFieldsWriter { +impl U64FastFieldsWriter { - pub fn from_schema(schema: &Schema) -> U32FastFieldsWriter { - let u32_fields: Vec = schema.fields() + pub fn from_schema(schema: &Schema) -> U64FastFieldsWriter { + let u64_fields: Vec = schema.fields() .iter() .enumerate() - .filter(|&(_, field_entry)| field_entry.is_u32_fast()) + .filter(|&(_, field_entry)| field_entry.is_u64_fast()) .map(|(field_id, _)| Field(field_id as u32)) .collect(); - U32FastFieldsWriter::new(u32_fields) + U64FastFieldsWriter::new(u64_fields) } - pub fn new(fields: Vec) -> U32FastFieldsWriter { - U32FastFieldsWriter { + pub fn new(fields: Vec) -> U64FastFieldsWriter { + U64FastFieldsWriter { field_writers: fields .into_iter() - .map(U32FastFieldWriter::new) + .map(U64FastFieldWriter::new) .collect(), } } - pub fn get_field_writer(&mut self, field: Field) -> Option<&mut U32FastFieldWriter> { + pub fn get_field_writer(&mut self, field: Field) -> Option<&mut U64FastFieldWriter> { self.field_writers .iter_mut() .find(|field_writer| field_writer.field == field) @@ -60,14 +60,14 @@ impl U32FastFieldsWriter { } } -pub struct U32FastFieldWriter { +pub struct U64FastFieldWriter { field: Field, - vals: Vec, + vals: Vec, } -impl U32FastFieldWriter { - pub fn new(field: Field) -> U32FastFieldWriter { - U32FastFieldWriter { +impl U64FastFieldWriter { + pub fn new(field: Field) -> U64FastFieldWriter { + U64FastFieldWriter { field: field, vals: Vec::new(), } @@ -81,24 +81,24 @@ impl U32FastFieldWriter { let target = doc as usize + 1; debug_assert!(self.vals.len() <= target); while self.vals.len() < target { - self.add_val(0u32) + self.add_val(0u64) } } - pub fn add_val(&mut self, val: u32) { + pub fn add_val(&mut self, val: u64) { self.vals.push(val); } - fn extract_val(&self, doc: &Document) -> u32 { + fn extract_val(&self, doc: &Document) -> u64 { match doc.get_first(self.field) { Some(v) => { match *v { - Value::U32(ref val) => { *val } - _ => { panic!("Expected a u32field, got {:?} ", v) } + Value::U64(ref val) => { *val } + _ => { panic!("Expected a u64field, got {:?} ", v) } } }, None => { - 0u32 + 0u64 } } } @@ -112,7 +112,7 @@ impl U32FastFieldWriter { let zero = 0; let min = *self.vals.iter().min().unwrap_or(&zero); let max = *self.vals.iter().max().unwrap_or(&min); - try!(serializer.new_u32_fast_field(self.field, min, max)); + try!(serializer.new_u64_fast_field(self.field, min, max)); for &val in &self.vals { try!(serializer.add_val(val)); } diff --git a/src/functional_test.rs b/src/functional_test.rs index e8f8797dd..4c82fcd8e 100644 --- a/src/functional_test.rs +++ b/src/functional_test.rs @@ -6,7 +6,7 @@ use Index; use Searcher; use rand::distributions::{IndependentSample, Range}; -fn check_index_content(searcher: &Searcher, vals: &HashSet) { +fn check_index_content(searcher: &Searcher, vals: &HashSet) { assert!(searcher.segment_readers().len() < 20); assert_eq!(searcher.num_docs() as usize, vals.len()); } @@ -17,19 +17,19 @@ fn test_indexing() { let mut schema_builder = SchemaBuilder::default(); - let id_field = schema_builder.add_u32_field("id", U32_INDEXED); - let multiples_field = schema_builder.add_u32_field("multiples", U32_INDEXED); + let id_field = schema_builder.add_u64_field("id", U64_INDEXED); + let multiples_field = schema_builder.add_u64_field("multiples", U64_INDEXED); let schema = schema_builder.build(); let index = Index::create_from_tempdir(schema).unwrap(); - let universe = Range::new(0u32, 20u32); + let universe = Range::new(0u64, 20u64); let mut rng = thread_rng(); let mut index_writer = index.writer_with_num_threads(3, 120_000_000).unwrap(); - let mut committed_docs: HashSet = HashSet::new(); - let mut uncommitted_docs: HashSet = HashSet::new(); + let mut committed_docs: HashSet = HashSet::new(); + let mut uncommitted_docs: HashSet = HashSet::new(); for _ in 0..200 { let random_val = universe.ind_sample(&mut rng); @@ -45,15 +45,15 @@ fn test_indexing() { else { if committed_docs.remove(&random_val) || uncommitted_docs.remove(&random_val) { - let doc_id_term = Term::from_field_u32(id_field, random_val); + let doc_id_term = Term::from_field_u64(id_field, random_val); index_writer.delete_term(doc_id_term); } else { uncommitted_docs.insert(random_val); let mut doc = Document::new(); - doc.add_u32(id_field, random_val); - for i in 1u32..10u32 { - doc.add_u32(multiples_field, random_val * i); + doc.add_u64(id_field, random_val); + for i in 1u64..10u64 { + doc.add_u64(multiples_field, random_val * i); } index_writer.add_document(doc); } diff --git a/src/indexer/delete_queue.rs b/src/indexer/delete_queue.rs index dbc6a7a7c..3a6b6a3e9 100644 --- a/src/indexer/delete_queue.rs +++ b/src/indexer/delete_queue.rs @@ -283,7 +283,7 @@ mod tests { let field = Field(1u32); DeleteOperation { opstamp: i as u64, - term: Term::from_field_u32(field, i as u32) + term: Term::from_field_u64(field, i as u64) } }; diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 8feecc145..1dda92055 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -6,7 +6,7 @@ use core::SerializableSegment; use schema::FieldValue; use indexer::SegmentSerializer; use postings::PostingsSerializer; -use fastfield::U32FastFieldReader; +use fastfield::U64FastFieldReader; use itertools::Itertools; use postings::Postings; use postings::DocSet; @@ -50,31 +50,31 @@ impl DeltaPositionComputer { } -fn compute_min_max_val(u32_reader: &U32FastFieldReader, max_doc: DocId, delete_bitset: &DeleteBitSet) -> Option<(u32, u32)> { +fn compute_min_max_val(u64_reader: &U64FastFieldReader, max_doc: DocId, delete_bitset: &DeleteBitSet) -> Option<(u64, u64)> { if max_doc == 0 { None } else if !delete_bitset.has_deletes() { // no deleted documents, // we can use the previous min_val, max_val. - Some((u32_reader.min_val(), u32_reader.max_val())) + Some((u64_reader.min_val(), u64_reader.max_val())) } else { // some deleted documents, // we need to recompute the max / min (0..max_doc) .filter(|doc_id| !delete_bitset.is_deleted(*doc_id)) - .map(|doc_id| u32_reader.get(doc_id)) + .map(|doc_id| u64_reader.get(doc_id)) .minmax() .into_option() } } -fn extract_fieldnorm_reader(segment_reader: &SegmentReader, field: Field) -> Option { +fn extract_fieldnorm_reader(segment_reader: &SegmentReader, field: Field) -> Option { segment_reader.get_fieldnorms_reader(field) } -fn extract_fast_field_reader(segment_reader: &SegmentReader, field: Field) -> Option { +fn extract_fast_field_reader(segment_reader: &SegmentReader, field: Field) -> Option { segment_reader.get_fast_field_reader(field) } @@ -113,37 +113,37 @@ impl IndexMerger { .fields() .iter() .enumerate() - .filter(|&(_, field_entry)| field_entry.is_u32_fast()) + .filter(|&(_, field_entry)| field_entry.is_u64_fast()) .map(|(field_id, _)| Field(field_id as u32)) .collect(); self.generic_write_fast_field(fast_fields, &extract_fast_field_reader, fast_field_serializer) } - // used both to merge field norms and regular u32 fast fields. + // used both to merge field norms and regular u64 fast fields. fn generic_write_fast_field(&self, fields: Vec, - field_reader_extractor: &Fn(&SegmentReader, Field) -> Option, + field_reader_extractor: &Fn(&SegmentReader, Field) -> Option, fast_field_serializer: &mut FastFieldSerializer) -> Result<()> { for field in fields { - let mut u32_readers = vec!(); - let mut min_val = u32::max_value(); - let mut max_val = u32::min_value(); + let mut u64_readers = vec!(); + let mut min_val = u64::max_value(); + let mut max_val = u64::min_value(); for reader in &self.readers { match field_reader_extractor(reader, field) { - Some(u32_reader) => { - if let Some((seg_min_val, seg_max_val)) = compute_min_max_val(&u32_reader, reader.max_doc(), reader.delete_bitset()) { + Some(u64_reader) => { + if let Some((seg_min_val, seg_max_val)) = compute_min_max_val(&u64_reader, reader.max_doc(), reader.delete_bitset()) { // the segment has some non-deleted documents min_val = min(min_val, seg_min_val); max_val = max(max_val, seg_max_val); - u32_readers.push((reader.max_doc(), u32_reader, reader.delete_bitset())); + u64_readers.push((reader.max_doc(), u64_reader, reader.delete_bitset())); } } None => { - let error_msg = format!("Failed to find a u32_reader for field {:?}", field); + let error_msg = format!("Failed to find a u64_reader for field {:?}", field); error!("{}", error_msg); return Err(Error::SchemaError(error_msg)) } @@ -151,7 +151,7 @@ impl IndexMerger { } - if u32_readers.is_empty() { + if u64_readers.is_empty() { // we have actually zero documents. min_val = 0; max_val = 0; @@ -159,11 +159,11 @@ impl IndexMerger { assert!(min_val <= max_val); - try!(fast_field_serializer.new_u32_fast_field(field, min_val, max_val)); - for (max_doc, u32_reader, delete_bitset) in u32_readers { + try!(fast_field_serializer.new_u64_fast_field(field, min_val, max_val)); + for (max_doc, u64_reader, delete_bitset) in u64_readers { for doc_id in 0..max_doc { if !delete_bitset.is_deleted(doc_id) { - let val = u32_reader.get(doc_id); + let val = u64_reader.get(doc_id); try!(fast_field_serializer.add_val(val)); } } @@ -311,8 +311,8 @@ mod tests { .set_indexing_options(TextIndexingOptions::TokenizedWithFreq) .set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); - let score_fieldtype = schema::U32Options::default().set_fast(); - let score_field = schema_builder.add_u32_field("score", score_fieldtype); + let score_fieldtype = schema::IntOptions::default().set_fast(); + let score_field = schema_builder.add_u64_field("score", score_fieldtype); let index = Index::create_in_ram(schema_builder.build()); { @@ -322,19 +322,19 @@ mod tests { { let mut doc = Document::default(); doc.add_text(text_field, "af b"); - doc.add_u32(score_field, 3); + doc.add_u64(score_field, 3); index_writer.add_document(doc); } { let mut doc = Document::default(); doc.add_text(text_field, "a b c"); - doc.add_u32(score_field, 5); + doc.add_u64(score_field, 5); index_writer.add_document(doc); } { let mut doc = Document::default(); doc.add_text(text_field, "a b c d"); - doc.add_u32(score_field, 7); + doc.add_u64(score_field, 7); index_writer.add_document(doc); } index_writer.commit().expect("committed"); @@ -345,13 +345,13 @@ mod tests { { let mut doc = Document::default(); doc.add_text(text_field, "af b"); - doc.add_u32(score_field, 11); + doc.add_u64(score_field, 11); index_writer.add_document(doc); } { let mut doc = Document::default(); doc.add_text(text_field, "a b c g"); - doc.add_u32(score_field, 13); + doc.add_u64(score_field, 13); index_writer.add_document(doc); } index_writer.commit().expect("Commit failed"); @@ -417,7 +417,7 @@ mod tests { } } - fn search_term(searcher: &Searcher, term: Term) -> Vec { + fn search_term(searcher: &Searcher, term: Term) -> Vec { let mut collector = FastFieldTestCollector::for_field(Field(1)); let term_query = TermQuery::new(term, SegmentPostingsOption::NoFreq); searcher.search(&term_query, &mut collector).unwrap(); @@ -432,8 +432,8 @@ mod tests { .set_indexing_options(TextIndexingOptions::TokenizedWithFreq) .set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); - let score_fieldtype = schema::U32Options::default().set_fast(); - let score_field = schema_builder.add_u32_field("score", score_fieldtype); + let score_fieldtype = schema::IntOptions::default().set_fast(); + let score_field = schema_builder.add_u64_field("score", score_fieldtype); let index = Index::create_in_ram(schema_builder.build()); let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 04faaa62c..196c0cf61 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -6,7 +6,7 @@ use schema::Term; use core::Segment; use core::SerializableSegment; use postings::PostingsWriter; -use fastfield::U32FastFieldsWriter; +use fastfield::U64FastFieldsWriter; use schema::Field; use schema::FieldEntry; use schema::FieldValue; @@ -30,20 +30,20 @@ pub struct SegmentWriter<'a> { max_doc: DocId, per_field_postings_writers: Vec>, segment_serializer: SegmentSerializer, - fast_field_writers: U32FastFieldsWriter, - fieldnorms_writer: U32FastFieldsWriter, + fast_field_writers: U64FastFieldsWriter, + fieldnorms_writer: U64FastFieldsWriter, doc_opstamps: Vec, } -fn create_fieldnorms_writer(schema: &Schema) -> U32FastFieldsWriter { - let u32_fields: Vec = schema.fields() +fn create_fieldnorms_writer(schema: &Schema) -> U64FastFieldsWriter { + let u64_fields: Vec = schema.fields() .iter() .enumerate() .filter(|&(_, field_entry)| field_entry.is_indexed()) .map(|(field_id, _)| Field(field_id as u32)) .collect(); - U32FastFieldsWriter::new(u32_fields) + U64FastFieldsWriter::new(u64_fields) } @@ -62,7 +62,7 @@ fn posting_from_field_entry<'a>(field_entry: &FieldEntry, heap: &'a Heap) -> Box } } } - FieldType::U32(_) => { + FieldType::U64(_) => { SpecializedPostingsWriter::::new_boxed(heap) } } @@ -95,7 +95,7 @@ impl<'a> SegmentWriter<'a> { per_field_postings_writers: per_field_postings_writers, fieldnorms_writer: create_fieldnorms_writer(schema), segment_serializer: segment_serializer, - fast_field_writers: U32FastFieldsWriter::from_schema(schema), + fast_field_writers: U64FastFieldsWriter::from_schema(schema), doc_opstamps: Vec::with_capacity(1_000), }) } @@ -154,13 +154,13 @@ impl<'a> SegmentWriter<'a> { self.fieldnorms_writer .get_field_writer(field) .map(|field_norms_writer| { - field_norms_writer.add_val(num_tokens as u32) + field_norms_writer.add_val(num_tokens as u64) }); } - FieldType::U32(ref u32_options) => { - if u32_options.is_indexed() { + FieldType::U64(ref u64_options) => { + if u64_options.is_indexed() { for field_value in field_values { - let term = Term::from_field_u32(field_value.field(), field_value.value().u32_value()); + let term = Term::from_field_u64(field_value.field(), field_value.value().u64_value()); field_posting_writer.suscribe(doc_id, 0, &term, self.heap); } } @@ -205,8 +205,8 @@ impl<'a> SegmentWriter<'a> { // This method is used as a trick to workaround the borrow checker fn write<'a>(per_field_postings_writers: &[Box], - fast_field_writers: &U32FastFieldsWriter, - fieldnorms_writer: &U32FastFieldsWriter, + fast_field_writers: &U64FastFieldsWriter, + fieldnorms_writer: &U64FastFieldsWriter, mut serializer: SegmentSerializer, heap: &'a Heap,) -> Result<()> { for per_field_postings_writer in per_field_postings_writers { diff --git a/src/lib.rs b/src/lib.rs index 0b63aec19..54e54863e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -438,9 +438,9 @@ mod tests { #[test] - fn test_indexed_u32() { + fn test_indexed_u64() { let mut schema_builder = SchemaBuilder::default(); - let field = schema_builder.add_u32_field("text", U32_INDEXED); + let field = schema_builder.add_u64_field("text", U64_INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -451,7 +451,7 @@ mod tests { index_writer.commit().unwrap(); index.load_searchers().unwrap(); let searcher = index.searcher(); - let term = Term::from_field_u32(field, 1u32); + let term = Term::from_field_u64(field, 1u64); let mut postings = searcher.segment_reader(0).read_postings(&term, SegmentPostingsOption::NoFreq).unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 0); diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 3f196c43d..2d5f2d7d8 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -68,7 +68,7 @@ mod tests { posting_serializer.close_term().unwrap(); posting_serializer.close().unwrap(); let read = segment.open_read(SegmentComponent::POSITIONS).unwrap(); - assert_eq!(read.len(), 13); + assert!(read.len() <= 16); } #[test] @@ -120,7 +120,7 @@ mod tests { assert_eq!(fieldnorm_reader.get(0), 8 + 5); assert_eq!(fieldnorm_reader.get(1), 2); for i in 2 .. 1000 { - assert_eq!(fieldnorm_reader.get(i), i + 1); + assert_eq!(fieldnorm_reader.get(i), (i + 1) as u64); } } { diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index 238559640..eb8fa5fcb 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -109,8 +109,8 @@ impl PostingsSerializer { let field_entry: &FieldEntry = self.schema.get_field_entry(field); self.text_indexing_options = match *field_entry.field_type() { FieldType::Str(ref text_options) => text_options.get_indexing_options(), - FieldType::U32(ref u32_options) => { - if u32_options.is_indexed() { + FieldType::U64(ref u64_options) => { + if u64_options.is_indexed() { TextIndexingOptions::Unindexed } else { TextIndexingOptions::Untokenized diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index 1b41a8996..fc1a33821 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -23,7 +23,7 @@ mod tests { use collector::tests::TestCollector; use Index; use schema::*; - use fastfield::{U32FastFieldReader}; + use fastfield::{U64FastFieldReader}; use postings::SegmentPostingsOption; fn abs_diff(left: f32, right: f32) -> f32 { @@ -111,7 +111,7 @@ mod tests { let occurs = vec!(Occur::Should, Occur::Should); let occur_filter = OccurFilter::new(&occurs); - let left_fieldnorms = U32FastFieldReader::from(vec!(100,200,300)); + let left_fieldnorms = U64FastFieldReader::from(vec!(100,200,300)); let left = VecPostings::from(vec!(1, 2, 3)); let left_scorer = TermScorer { @@ -120,7 +120,7 @@ mod tests { postings: left, }; - let right_fieldnorms = U32FastFieldReader::from(vec!(15,25,35)); + let right_fieldnorms = U64FastFieldReader::from(vec!(15,25,35)); let right = VecPostings::from(vec!(1, 3, 8)); let right_scorer = TermScorer { diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 427e86844..9338266fe 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -22,10 +22,9 @@ pub enum QueryParserError { /// `FieldDoesNotExist(field_name: String)` /// The query references a field that is not in the schema FieldDoesNotExist(String), - /// `ExpectedU32(field_name: String, field_value: String)` - /// The query contains a term for a `u32`-field, but the value - /// is not a u32. - ExpectedU32(String, String), + /// The query contains a term for a `u64`-field, but the value + /// is not a u64. + ExpectedU64(String, String), /// It is forbidden queries that are only "excluding". (e.g. -title:pop) AllButQueryForbidden, /// If no default field is declared, running a query without any @@ -138,7 +137,7 @@ impl QueryParser { loop { if let Some(token) = token_iter.next() { let text = token.to_string(); - // TODO Handle u32 + // TODO Handle u64 let term = Term::from_field_text(field, &text); tokens.push(term); } else { diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index 8aa56484a..ad25b2648 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -14,7 +14,7 @@ mod tests { use query::Scorer; use query::term_query::TermScorer; use query::Query; - use fastfield::U32FastFieldReader; + use fastfield::U64FastFieldReader; use query::TermQuery; use Index; use schema::*; @@ -55,7 +55,7 @@ mod tests { #[test] pub fn test_term_scorer() { - let left_fieldnorms = U32FastFieldReader::from(vec!(10, 4)); + let left_fieldnorms = U64FastFieldReader::from(vec!(10, 4)); assert_eq!(left_fieldnorms.get(0), 10); assert_eq!(left_fieldnorms.get(1), 4); let left = VecPostings::from(vec!(1)); diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index 81b683c99..db03539e6 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -1,13 +1,13 @@ use Score; use DocId; -use fastfield::U32FastFieldReader; +use fastfield::U64FastFieldReader; use postings::DocSet; use query::Scorer; use postings::Postings; pub struct TermScorer where TPostings: Postings { pub idf: Score, - pub fieldnorm_reader_opt: Option, + pub fieldnorm_reader_opt: Option, pub postings: TPostings, } diff --git a/src/schema/document.rs b/src/schema/document.rs index 87d0b46f7..7c49874c0 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -52,9 +52,9 @@ impl Document { self.add(FieldValue::new(field, value)); } - /// Add a u32 field - pub fn add_u32(&mut self, field: Field, value: u32) { - self.add(FieldValue::new(field, Value::U32(value))); + /// Add a u64 field + pub fn add_u64(&mut self, field: Field, value: u64) { + self.add(FieldValue::new(field, Value::U64(value))); } /// Add a field value diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 99f3cb42c..2d3965749 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -1,5 +1,5 @@ use schema::TextOptions; -use schema::U32Options; +use schema::IntOptions; use rustc_serialize::Decodable; use rustc_serialize::Decoder; @@ -22,7 +22,7 @@ pub struct FieldEntry { impl FieldEntry { - /// Creates a new u32 field entry in the schema, given + /// Creates a new u64 field entry in the schema, given /// a name, and some options. pub fn new_text(field_name: String, field_type: TextOptions) -> FieldEntry { FieldEntry { @@ -31,12 +31,12 @@ impl FieldEntry { } } - /// Creates a new u32 field entry in the schema, given + /// Creates a new u64 field entry in the schema, given /// a name, and some options. - pub fn new_u32(field_name: String, field_type: U32Options) -> FieldEntry { + pub fn new_u64(field_name: String, field_type: IntOptions) -> FieldEntry { FieldEntry { name: field_name, - field_type: FieldType::U32(field_type), + field_type: FieldType::U64(field_type), } } @@ -54,14 +54,14 @@ impl FieldEntry { pub fn is_indexed(&self,) -> bool { match self.field_type { FieldType::Str(ref options) => options.get_indexing_options().is_indexed(), - FieldType::U32(ref options) => options.is_indexed(), + FieldType::U64(ref options) => options.is_indexed(), } } - /// Returns true iff the field is a u32 fast field - pub fn is_u32_fast(&self,) -> bool { + /// Returns true iff the field is a u64 fast field + pub fn is_u64_fast(&self,) -> bool { match self.field_type { - FieldType::U32(ref options) => options.is_fast(), + FieldType::U64(ref options) => options.is_fast(), _ => false, } } @@ -69,7 +69,7 @@ impl FieldEntry { /// Returns true iff the field is stored pub fn is_stored(&self,) -> bool { match self.field_type { - FieldType::U32(ref options) => { + FieldType::U64(ref options) => { options.is_stored() } FieldType::Str(ref options) => { @@ -96,9 +96,9 @@ impl Encodable for FieldEntry { options.encode(s) })); } - FieldType::U32(ref options) => { + FieldType::U64(ref options) => { try!(s.emit_struct_field("type", 1, |s| { - s.emit_str("u32") + s.emit_str("u64") })); try!(s.emit_struct_field("options", 2, |s| { options.encode(s) @@ -122,9 +122,9 @@ impl Decodable for FieldEntry { })); d.read_struct_field("options", 2, |d| { match field_type.as_ref() { - "u32" => { - let u32_options = try!(U32Options::decode(d)); - Ok(FieldEntry::new_u32(name, u32_options)) + "u64" => { + let u64_options = try!(IntOptions::decode(d)); + Ok(FieldEntry::new_u64(name, u64_options)) } "text" => { let text_options = try!(TextOptions::decode(d)); diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 5debe63b0..361c18f15 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -1,5 +1,5 @@ use schema::TextOptions; -use schema::U32Options; +use schema::IntOptions; use rustc_serialize::json::Json; use schema::Value; @@ -11,20 +11,22 @@ use schema::Value; pub enum ValueParsingError { /// Encounterred a numerical value that overflows or underflow its integer type. OverflowError(String), - /// The json node is not of the correct type. (e.g. 3 for a `Str` type or `"abc"` for a u32 type) + /// The json node is not of the correct type. (e.g. 3 for a `Str` type or `"abc"` for a u64 type) /// Tantivy will try to autocast values. TypeError(String), } -/// A `FieldType` describes the type (text, u32) of a field as well as +/// A `FieldType` describes the type (text, u64) of a field as well as /// how it should be handled by tantivy. #[derive(Clone, Debug, RustcDecodable, RustcEncodable)] pub enum FieldType { /// String field type configuration Str(TextOptions), - /// U32 field type configuration - U32(U32Options), + /// Unsigned 64-bits integers field type configuration + U64(IntOptions), + // /// Signed 64-bits integers 64 field type configuration + // I64(IntOptions), } impl FieldType { @@ -41,20 +43,15 @@ impl FieldType { FieldType::Str(_) => { Ok(Value::Str(field_text.clone())) } - FieldType::U32(_) => { - Err(ValueParsingError::TypeError(format!("Expected a u32 int, got {:?}", json))) + FieldType::U64(_) => { + Err(ValueParsingError::TypeError(format!("Expected a u64 int, got {:?}", json))) } } } Json::U64(ref field_val_u64) => { match *self { - FieldType::U32(_) => { - if *field_val_u64 > (u32::max_value() as u64) { - Err(ValueParsingError::OverflowError(format!("Expected u32, but value {:?} overflows.", field_val_u64))) - } - else { - Ok(Value::U32(*field_val_u64 as u32)) - } + FieldType::U64(_) => { + Ok(Value::U64(*field_val_u64 as u64)) } _ => { Err(ValueParsingError::TypeError(format!("Expected a string, got {:?}", json))) @@ -62,7 +59,7 @@ impl FieldType { } }, _ => { - Err(ValueParsingError::TypeError(format!("Expected a string or a u32, got {:?}", json))) + Err(ValueParsingError::TypeError(format!("Expected a string or a u64, got {:?}", json))) } } } diff --git a/src/schema/u32_options.rs b/src/schema/int_options.rs similarity index 66% rename from src/schema/u32_options.rs rename to src/schema/int_options.rs index 5f29f63b5..a543c1288 100644 --- a/src/schema/u32_options.rs +++ b/src/schema/int_options.rs @@ -1,14 +1,14 @@ use std::ops::BitOr; -/// Define how a U32 field should be handled by tantivy. +/// Define how a u64 field should be handled by tantivy. #[derive(Clone,Debug,PartialEq,Eq, RustcDecodable, RustcEncodable)] -pub struct U32Options { +pub struct IntOptions { indexed: bool, fast: bool, stored: bool, } -impl U32Options { +impl IntOptions { /// Returns true iff the value is stored. pub fn is_stored(&self,) -> bool { @@ -26,39 +26,39 @@ impl U32Options { self.fast } - /// Set the u32 options as stored. + /// Set the u64 options as stored. /// /// Only the fields that are set as *stored* are /// persisted into the Tantivy's store. - pub fn set_stored(mut self,) -> U32Options { + pub fn set_stored(mut self,) -> IntOptions { self.stored = true; self } - /// Set the u32 options as indexed. + /// Set the u64 options as indexed. /// /// Setting an integer as indexed will generate /// a posting list for each value taken by the integer. - pub fn set_indexed(mut self,) -> U32Options { + pub fn set_indexed(mut self,) -> IntOptions { self.indexed = true; self } - /// Set the u32 options as a fast field. + /// Set the u64 options as a fast field. /// /// Fast fields are designed for random access. /// Access time are similar to a random lookup in an array. /// If more than one value is associated to a fast field, only the last one is /// kept. - pub fn set_fast(mut self,) -> U32Options { + pub fn set_fast(mut self,) -> IntOptions { self.fast = true; self } } -impl Default for U32Options { - fn default() -> U32Options { - U32Options { +impl Default for IntOptions { + fn default() -> IntOptions { + IntOptions { fast: false, indexed: false, stored: false, @@ -67,40 +67,40 @@ impl Default for U32Options { } -/// Shortcut for a u32 fast field. +/// Shortcut for a u64 fast field. /// -/// Such a shortcut can be composed as follows `STORED | FAST | U32_INDEXED` -pub const FAST: U32Options = U32Options { +/// Such a shortcut can be composed as follows `STORED | FAST | U64_INDEXED` +pub const FAST: IntOptions = IntOptions { indexed: false, stored: false, fast: true, }; -/// Shortcut for a u32 indexed field. +/// Shortcut for a u64 indexed field. /// -/// Such a shortcut can be composed as follows `STORED | FAST | U32_INDEXED` -pub const U32_INDEXED: U32Options = U32Options { +/// Such a shortcut can be composed as follows `STORED | FAST | U64_INDEXED` +pub const U64_INDEXED: IntOptions = IntOptions { indexed: true, stored: false, fast: false, }; -/// Shortcut for a u32 stored field. +/// Shortcut for a u64 stored field. /// -/// Such a shortcut can be composed as follows `STORED | FAST | U32_INDEXED` -pub const U32_STORED: U32Options = U32Options { +/// Such a shortcut can be composed as follows `STORED | FAST | U64_INDEXED` +pub const U64_STORED: IntOptions = IntOptions { indexed: false, stored: true, fast: false, }; -impl BitOr for U32Options { +impl BitOr for IntOptions { - type Output = U32Options; + type Output = IntOptions; - fn bitor(self, other: U32Options) -> U32Options { - let mut res = U32Options::default(); + fn bitor(self, other: IntOptions) -> IntOptions { + let mut res = IntOptions::default(); res.indexed = self.indexed | other.indexed; res.stored = self.stored | other.stored; res.fast = self.fast | other.fast; diff --git a/src/schema/mod.rs b/src/schema/mod.rs index b1802d1e5..2aeb073c5 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -7,7 +7,7 @@ Tantivy has a very strict schema. The schema defines information about the fields your index contains, that is, for each field : * the field name (may only contain letters `[a-zA-Z]`, number `[0-9]`, and `_`) -* the type of the field (currently only `text` and `u32` are supported) +* the type of the field (currently only `text` and `u64` are supported) * how the field should be indexed / stored. This very last point is critical as it will enable / disable some of the functionality @@ -64,17 +64,17 @@ let schema = schema_builder.build(); -## Setting a u32 field +## Setting a u64 field ### Example ``` use tantivy::schema::*; let mut schema_builder = SchemaBuilder::default(); -let num_stars_options = U32Options::default() +let num_stars_options = IntOptions::default() .set_stored() .set_indexed(); -schema_builder.add_u32_field("num_stars", num_stars_options); +schema_builder.add_u64_field("num_stars", num_stars_options); let schema = schema_builder.build(); ``` @@ -82,15 +82,15 @@ Just like for Text fields (see above), setting the field as stored defines whether the field will be returned when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc) is called, and setting the field as indexed means that we will be able perform queries such as `num_stars:10`. -Note that unlike text fields, u32 can only be indexed in one way for the moment. +Note that unlike text fields, u64 can only be indexed in one way for the moment. This may change when we will start supporting range queries. -The `fast` option on the other hand is specific to u32 fields, and is only relevant +The `fast` option on the other hand is specific to u64 fields, and is only relevant if you are implementing your own queries. This functionality is somewhat similar to Lucene's `DocValues`. -u32 that are indexed as fast will be stored in a special data structure that will -make it possible to access the u32 value given the doc id rapidly. This is useful if the value of +u64 that are indexed as fast will be stored in a special data structure that will +make it possible to access the u64 value given the doc id rapidly. This is useful if the value of the field is required during scoring or collection for instance. */ @@ -104,7 +104,7 @@ mod field_entry; mod field_value; mod text_options; -mod u32_options; +mod int_options; mod field; mod value; mod named_field_document; @@ -129,10 +129,10 @@ pub use self::text_options::TEXT; pub use self::text_options::STRING; pub use self::text_options::STORED; -pub use self::u32_options::U32Options; -pub use self::u32_options::FAST; -pub use self::u32_options::U32_INDEXED; -pub use self::u32_options::U32_STORED; +pub use self::int_options::IntOptions; +pub use self::int_options::FAST; +pub use self::int_options::U64_INDEXED; +pub use self::int_options::U64_STORED; use regex::Regex; diff --git a/src/schema/named_field_document.rs b/src/schema/named_field_document.rs index 89d55ea73..398583a05 100644 --- a/src/schema/named_field_document.rs +++ b/src/schema/named_field_document.rs @@ -26,8 +26,8 @@ impl Encodable for NamedFieldDocument { Value::Str(ref text) => { s.emit_str(text) }, - Value::U32(ref val) => { - s.emit_u32(*val) + Value::U64(ref val) => { + s.emit_u64(*val) } } }) diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 70ca86cb9..6517319e7 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -12,8 +12,6 @@ use std::sync::Arc; use super::*; use std::fmt; -const MAX_NUM_FIELDS: usize = 255; - /// Tantivy has a very strict schema. /// You need to specify in advance whether a field is indexed or not, /// stored or not, and RAM-based or not. @@ -48,7 +46,7 @@ impl SchemaBuilder { SchemaBuilder::default() } - /// Adds a new u32 field. + /// Adds a new u64 field. /// Returns the associated field handle /// /// # Caution @@ -58,12 +56,12 @@ impl SchemaBuilder { /// by the second one. /// The first field will get a field id /// but only the second one will be indexed - pub fn add_u32_field( + pub fn add_u64_field( &mut self, field_name_str: &str, - field_options: U32Options) -> Field { + field_options: IntOptions) -> Field { let field_name = String::from(field_name_str); - let field_entry = FieldEntry::new_u32(field_name, field_options); + let field_entry = FieldEntry::new_u64(field_name, field_options); self.add_field(field_entry) } @@ -325,14 +323,15 @@ mod tests { use schema::*; use rustc_serialize::json; use schema::field_type::ValueParsingError; + use schema::schema::DocParsingError::NotJSON; #[test] pub fn test_schema_serialization() { let mut schema_builder = SchemaBuilder::default(); - let count_options = U32Options::default().set_stored().set_fast(); + let count_options = IntOptions::default().set_stored().set_fast(); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field("author", STRING); - schema_builder.add_u32_field("count", count_options); + schema_builder.add_u64_field("count", count_options); let schema = schema_builder.build(); let schema_json: String = format!("{}", json::as_pretty_json(&schema)); let expected = r#"[ @@ -354,7 +353,7 @@ mod tests { }, { "name": "count", - "type": "u32", + "type": "u64", "options": { "indexed": false, "fast": true, @@ -371,10 +370,10 @@ mod tests { #[test] pub fn test_document_to_json() { let mut schema_builder = SchemaBuilder::default(); - let count_options = U32Options::default().set_stored().set_fast(); + let count_options = IntOptions::default().set_stored().set_fast(); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field("author", STRING); - schema_builder.add_u32_field("count", count_options); + schema_builder.add_u64_field("count", count_options); let schema = schema_builder.build(); let doc_json = r#"{ "title": "my title", @@ -389,10 +388,10 @@ mod tests { #[test] pub fn test_parse_document() { let mut schema_builder = SchemaBuilder::default(); - let count_options = U32Options::default().set_stored().set_fast(); + let count_options = IntOptions::default().set_stored().set_fast(); let title_field = schema_builder.add_text_field("title", TEXT); let author_field = schema_builder.add_text_field("author", STRING); - let count_field = schema_builder.add_u32_field("count", count_options); + let count_field = schema_builder.add_u64_field("count", count_options); let schema = schema_builder.build(); { let doc = schema.parse_document("{}").unwrap(); @@ -406,7 +405,7 @@ mod tests { }"#).unwrap(); assert_eq!(doc.get_first(title_field).unwrap().text(), "my title"); assert_eq!(doc.get_first(author_field).unwrap().text(), "fulmicoton"); - assert_eq!(doc.get_first(count_field).unwrap().u32_value(), 4); + assert_eq!(doc.get_first(count_field).unwrap().u64_value(), 4); } { let json_err = schema.parse_document(r#"{ @@ -478,10 +477,25 @@ mod tests { }"#); match json_err { Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => { + assert!(false); + } + _ => { + assert!(true); + } + } + } + { + let json_err = schema.parse_document(r#"{ + "title": "my title", + "author": "fulmicoton", + "count": 50000000000000000000 + }"#); + match json_err { + Err(NotJSON(_)) => { assert!(true); } _ => { - assert!(false); + assert!(false) } } } diff --git a/src/schema/term.rs b/src/schema/term.rs index 294c2d346..3e0304569 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -39,20 +39,20 @@ impl Term { Field(self.field_id()) } - /// Builds a term given a field, and a u32-value + /// Builds a term given a field, and a u64-value /// - /// Assuming the term has a field id of 1, and a u32 value of 3234, + /// Assuming the term has a field id of 1, and a u64 value of 3234, /// the Term will have 8 bytes. /// - /// The first four byte are dedicated to storing the field id as a u32. - /// The 4 following bytes are encoding the u32 value. - pub fn from_field_u32(field: Field, val: u32) -> Term { - const U32_TERM_LEN: usize = 4 + 4; - let mut buffer = allocate_vec(U32_TERM_LEN); + /// The first four byte are dedicated to storing the field id as a u64. + /// The 4 following bytes are encoding the u64 value. + pub fn from_field_u64(field: Field, val: u64) -> Term { + const U64_TERM_LEN: usize = 4 + 8; + let mut buffer = allocate_vec(U64_TERM_LEN); // we want BigEndian here to have lexicographic order // match the natural order of `(field, val)` BigEndian::write_u32(&mut buffer[0..4], field.0); - BigEndian::write_u32(&mut buffer[4..], val); + BigEndian::write_u64(&mut buffer[4..], val); Term(buffer) } @@ -69,11 +69,11 @@ impl Term { Term(buffer) } - /// Assume the term is a u32 field. + /// Assume the term is a u64 field. /// - /// Panics if the term is not a u32 field. - pub fn get_u32(&self) -> u32 { - BigEndian::read_u32(&self.0[4..]) + /// Panics if the term is not a u64 field. + pub fn get_u64(&self) -> u64 { + BigEndian::read_u64(&self.0[4..]) } /// Builds a term from its byte representation. @@ -88,7 +88,7 @@ impl Term { /// (this does not include the field.) /// /// If the term is a string, its value is utf-8 encoded. - /// If the term is a u32, its value is encoded according + /// If the term is a u64, its value is encoded according /// to `byteorder::LittleEndian`. pub fn value(&self) -> &[u8] { &self.0[4..] @@ -147,14 +147,18 @@ mod tests { assert_eq!(&term.as_slice()[4..], "test".as_bytes()); } { - let term = Term::from_field_u32(count_field, 983u32); + let term = Term::from_field_u64(count_field, 983u64); assert_eq!(term.field(), count_field); assert_eq!(&term.as_slice()[0..4], &[0u8, 0u8, 0u8, 2u8]); - assert_eq!(term.as_slice().len(), 8); + assert_eq!(term.as_slice().len(), 4 + 8); assert_eq!(term.as_slice()[4], 0u8); assert_eq!(term.as_slice()[5], 0u8); - assert_eq!(term.as_slice()[6], (933u32 / 256u32) as u8); - assert_eq!(term.as_slice()[7], (983u32 % 256u32) as u8); + assert_eq!(term.as_slice()[6], 0u8); + assert_eq!(term.as_slice()[7], 0u8); + assert_eq!(term.as_slice()[8], 0u8); + assert_eq!(term.as_slice()[9], 0u8); + assert_eq!(term.as_slice()[10], (933u64 / 256u64) as u8); + assert_eq!(term.as_slice()[11], (983u64 % 256u64) as u8); } } diff --git a/src/schema/value.rs b/src/schema/value.rs index 084b8d373..16b6e45fd 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -10,8 +10,8 @@ use std::io::Read; pub enum Value { /// The str type is used for any text information. Str(String), - /// Unsigned 32-bits Integer `u32` - U32(u32), + /// Unsigned 32-bits Integer `u64` + U64(u64), } impl Value { @@ -30,13 +30,13 @@ impl Value { } } - /// Returns the u32-value, provided the value is of the `U32` type. + /// Returns the u64-value, provided the value is of the `U64` type. /// /// # Panics - /// If the value is not of type `U32` - pub fn u32_value(&self) -> u32 { + /// If the value is not of type `U64` + pub fn u64_value(&self) -> u64 { match *self { - Value::U32(ref value) => { + Value::U64(ref value) => { *value } _ => { @@ -53,9 +53,9 @@ impl From for Value { } -impl From for Value { - fn from(v: u32) -> Value { - Value::U32(v) +impl From for Value { + fn from(v: u64) -> Value { + Value::U64(v) } } @@ -66,7 +66,7 @@ impl<'a> From<&'a str> for Value { } const TEXT_CODE: u8 = 0; -const U32_CODE: u8 = 1; +const U64_CODE: u8 = 1; impl BinarySerializable for Value { @@ -77,8 +77,8 @@ impl BinarySerializable for Value { written_size += try!(TEXT_CODE.serialize(writer)); written_size += try!(text.serialize(writer)); }, - Value::U32(ref val) => { - written_size += try!(U32_CODE.serialize(writer)); + Value::U64(ref val) => { + written_size += try!(U64_CODE.serialize(writer)); written_size += try!(val.serialize(writer)); }, } @@ -91,9 +91,9 @@ impl BinarySerializable for Value { let text = try!(String::deserialize(reader)); Ok(Value::Str(text)) } - U32_CODE => { - let value = try!(u32::deserialize(reader)); - Ok(Value::U32(value)) + U64_CODE => { + let value = try!(u64::deserialize(reader)); + Ok(Value::U64(value)) } _ => { Err(io::Error::new(io::ErrorKind::InvalidData, format!("No field type is associated with code {:?}", type_code)))