mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-09 10:32:55 +00:00
Merge commit 'f07ccd6e4fbc5bbfeb94d40e0f14bc527a7d5439' as 'cpp/simdcomp'
This commit is contained in:
40
cpp/simdcomp/include/avxbitpacking.h
Normal file
40
cpp/simdcomp/include/avxbitpacking.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* This code is released under a BSD License.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_AVXBITPACKING_H_
|
||||
#define INCLUDE_AVXBITPACKING_H_
|
||||
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include "portability.h"
|
||||
|
||||
|
||||
/* AVX2 is required */
|
||||
#include <immintrin.h>
|
||||
/* for memset */
|
||||
#include <string.h>
|
||||
|
||||
#include "simdcomputil.h"
|
||||
|
||||
enum{ AVXBlockSize = 256};
|
||||
|
||||
/* max integer logarithm over a range of AVXBlockSize integers (256 integer) */
|
||||
uint32_t avxmaxbits(const uint32_t * begin);
|
||||
|
||||
/* reads 256 values from "in", writes "bit" 256-bit vectors to "out" */
|
||||
void avxpack(const uint32_t * in,__m256i * out, const uint32_t bit);
|
||||
|
||||
/* reads 256 values from "in", writes "bit" 256-bit vectors to "out" */
|
||||
void avxpackwithoutmask(const uint32_t * in,__m256i * out, const uint32_t bit);
|
||||
|
||||
/* reads "bit" 256-bit vectors from "in", writes 256 values to "out" */
|
||||
void avxunpack(const __m256i * in,uint32_t * out, const uint32_t bit);
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* __AVX2__ */
|
||||
|
||||
#endif /* INCLUDE_AVXBITPACKING_H_ */
|
||||
81
cpp/simdcomp/include/portability.h
Normal file
81
cpp/simdcomp/include/portability.h
Normal file
@@ -0,0 +1,81 @@
|
||||
/**
|
||||
* This code is released under a BSD License.
|
||||
*/
|
||||
#ifndef SIMDBITCOMPAT_H_
|
||||
#define SIMDBITCOMPAT_H_
|
||||
|
||||
#include <iso646.h> /* mostly for Microsoft compilers */
|
||||
#include <string.h>
|
||||
|
||||
#if SIMDCOMP_DEBUG
|
||||
# define SIMDCOMP_ALWAYS_INLINE inline
|
||||
# define SIMDCOMP_NEVER_INLINE
|
||||
# define SIMDCOMP_PURE
|
||||
#else
|
||||
# if defined(__GNUC__)
|
||||
# if __GNUC__ >= 3
|
||||
# define SIMDCOMP_ALWAYS_INLINE inline __attribute__((always_inline))
|
||||
# define SIMDCOMP_NEVER_INLINE __attribute__((noinline))
|
||||
# define SIMDCOMP_PURE __attribute__((pure))
|
||||
# else
|
||||
# define SIMDCOMP_ALWAYS_INLINE inline
|
||||
# define SIMDCOMP_NEVER_INLINE
|
||||
# define SIMDCOMP_PURE
|
||||
# endif
|
||||
# elif defined(_MSC_VER)
|
||||
# define SIMDCOMP_ALWAYS_INLINE __forceinline
|
||||
# define SIMDCOMP_NEVER_INLINE
|
||||
# define SIMDCOMP_PURE
|
||||
# else
|
||||
# if __has_attribute(always_inline)
|
||||
# define SIMDCOMP_ALWAYS_INLINE inline __attribute__((always_inline))
|
||||
# else
|
||||
# define SIMDCOMP_ALWAYS_INLINE inline
|
||||
# endif
|
||||
# if __has_attribute(noinline)
|
||||
# define SIMDCOMP_NEVER_INLINE __attribute__((noinline))
|
||||
# else
|
||||
# define SIMDCOMP_NEVER_INLINE
|
||||
# endif
|
||||
# if __has_attribute(pure)
|
||||
# define SIMDCOMP_PURE __attribute__((pure))
|
||||
# else
|
||||
# define SIMDCOMP_PURE
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1600
|
||||
typedef unsigned int uint32_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef signed char int8_t;
|
||||
#else
|
||||
#include <stdint.h> /* part of Visual Studio 2010 and better, others likely anyway */
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define SIMDCOMP_ALIGNED(x) __declspec(align(x))
|
||||
#else
|
||||
#if defined(__GNUC__)
|
||||
#define SIMDCOMP_ALIGNED(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# include <intrin.h>
|
||||
/* 64-bit needs extending */
|
||||
# define SIMDCOMP_CTZ(result, mask) do { \
|
||||
unsigned long index; \
|
||||
if (!_BitScanForward(&(index), (mask))) { \
|
||||
(result) = 32U; \
|
||||
} else { \
|
||||
(result) = (uint32_t)(index); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
# define SIMDCOMP_CTZ(result, mask) \
|
||||
result = __builtin_ctz(mask)
|
||||
#endif
|
||||
|
||||
#endif /* SIMDBITCOMPAT_H_ */
|
||||
|
||||
72
cpp/simdcomp/include/simdbitpacking.h
Normal file
72
cpp/simdcomp/include/simdbitpacking.h
Normal file
@@ -0,0 +1,72 @@
|
||||
/**
|
||||
* This code is released under a BSD License.
|
||||
*/
|
||||
#ifndef SIMDBITPACKING_H_
|
||||
#define SIMDBITPACKING_H_
|
||||
|
||||
#include "portability.h"
|
||||
|
||||
/* SSE2 is required */
|
||||
#include <emmintrin.h>
|
||||
/* for memset */
|
||||
#include <string.h>
|
||||
|
||||
#include "simdcomputil.h"
|
||||
|
||||
/***
|
||||
* Please see example.c for various examples on how to make good use
|
||||
* of these functions.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* reads 128 values from "in", writes "bit" 128-bit vectors to "out".
|
||||
* The input values are masked so that only the least significant "bit" bits are used. */
|
||||
void simdpack(const uint32_t * in,__m128i * out, const uint32_t bit);
|
||||
|
||||
/* reads 128 values from "in", writes "bit" 128-bit vectors to "out".
|
||||
* The input values are assumed to be less than 1<<bit. */
|
||||
void simdpackwithoutmask(const uint32_t * in,__m128i * out, const uint32_t bit);
|
||||
|
||||
/* reads "bit" 128-bit vectors from "in", writes 128 values to "out" */
|
||||
void simdunpack(const __m128i * in,uint32_t * out, const uint32_t bit);
|
||||
|
||||
|
||||
|
||||
/* how many compressed bytes are needed to compressed length integers using a bit width of bit with
|
||||
the simdpackFOR_length function. */
|
||||
int simdpack_compressedbytes(int length, const uint32_t bit);
|
||||
|
||||
/* like simdpack, but supports an undetermined number of inputs.
|
||||
* This is useful if you need to unpack an array of integers that is not divisible by 128 integers.
|
||||
* Returns a pointer to the (advanced) compressed array. Compressed data is stored in the memory location between
|
||||
the provided (out) pointer and the returned pointer. */
|
||||
__m128i * simdpack_length(const uint32_t * in, size_t length, __m128i * out, const uint32_t bit);
|
||||
|
||||
/* like simdunpack, but supports an undetermined number of inputs.
|
||||
* This is useful if you need to unpack an array of integers that is not divisible by 128 integers.
|
||||
* Returns a pointer to the (advanced) compressed array. The read compressed data is between the provided
|
||||
(in) pointer and the returned pointer. */
|
||||
const __m128i * simdunpack_length(const __m128i * in, size_t length, uint32_t * out, const uint32_t bit);
|
||||
|
||||
|
||||
|
||||
|
||||
/* like simdpack, but supports an undetermined small number of inputs. This is useful if you need to pack less
|
||||
than 128 integers.
|
||||
* Note that this function is much slower.
|
||||
* Returns a pointer to the (advanced) compressed array. Compressed data is stored in the memory location
|
||||
between the provided (out) pointer and the returned pointer. */
|
||||
__m128i * simdpack_shortlength(const uint32_t * in, int length, __m128i * out, const uint32_t bit);
|
||||
|
||||
/* like simdunpack, but supports an undetermined small number of inputs. This is useful if you need to unpack less
|
||||
than 128 integers.
|
||||
* Note that this function is much slower.
|
||||
* Returns a pointer to the (advanced) compressed array. The read compressed data is between the provided (in)
|
||||
pointer and the returned pointer. */
|
||||
const __m128i * simdunpack_shortlength(const __m128i * in, int length, uint32_t * out, const uint32_t bit);
|
||||
|
||||
/* given a block of 128 packed values, this function sets the value at index "index" to "value" */
|
||||
void simdfastset(__m128i * in128, uint32_t b, uint32_t value, size_t index);
|
||||
|
||||
#endif /* SIMDBITPACKING_H_ */
|
||||
22
cpp/simdcomp/include/simdcomp.h
Normal file
22
cpp/simdcomp/include/simdcomp.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* This code is released under a BSD License.
|
||||
*/
|
||||
|
||||
#ifndef SIMDCOMP_H_
|
||||
#define SIMDCOMP_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "simdbitpacking.h"
|
||||
#include "simdcomputil.h"
|
||||
#include "simdfor.h"
|
||||
#include "simdintegratedbitpacking.h"
|
||||
#include "avxbitpacking.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
54
cpp/simdcomp/include/simdcomputil.h
Normal file
54
cpp/simdcomp/include/simdcomputil.h
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
* This code is released under a BSD License.
|
||||
*/
|
||||
|
||||
#ifndef SIMDCOMPUTIL_H_
|
||||
#define SIMDCOMPUTIL_H_
|
||||
|
||||
#include "portability.h"
|
||||
|
||||
/* SSE2 is required */
|
||||
#include <emmintrin.h>
|
||||
|
||||
|
||||
|
||||
|
||||
/* returns the integer logarithm of v (bit width) */
|
||||
uint32_t bits(const uint32_t v);
|
||||
|
||||
/* max integer logarithm over a range of SIMDBlockSize integers (128 integer) */
|
||||
uint32_t maxbits(const uint32_t * begin);
|
||||
|
||||
/* same as maxbits, but we specify the number of integers */
|
||||
uint32_t maxbits_length(const uint32_t * in,uint32_t length);
|
||||
|
||||
enum{ SIMDBlockSize = 128};
|
||||
|
||||
|
||||
/* computes (quickly) the minimal value of 128 values */
|
||||
uint32_t simdmin(const uint32_t * in);
|
||||
|
||||
/* computes (quickly) the minimal value of the specified number of values */
|
||||
uint32_t simdmin_length(const uint32_t * in, uint32_t length);
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
/* computes (quickly) the minimal and maximal value of the specified number of values */
|
||||
void simdmaxmin_length(const uint32_t * in, uint32_t length, uint32_t * getmin, uint32_t * getmax);
|
||||
|
||||
/* computes (quickly) the minimal and maximal value of the 128 values */
|
||||
void simdmaxmin(const uint32_t * in, uint32_t * getmin, uint32_t * getmax);
|
||||
|
||||
#endif
|
||||
|
||||
/* like maxbit over 128 integers (SIMDBlockSize) with provided initial value
|
||||
and using differential coding */
|
||||
uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in);
|
||||
|
||||
/* like simdmaxbitsd1, but calculates maxbits over |length| integers
|
||||
with provided initial value. |length| can be any arbitrary value. */
|
||||
uint32_t simdmaxbitsd1_length(uint32_t initvalue, const uint32_t * in,
|
||||
uint32_t length);
|
||||
|
||||
|
||||
|
||||
#endif /* SIMDCOMPUTIL_H_ */
|
||||
72
cpp/simdcomp/include/simdfor.h
Normal file
72
cpp/simdcomp/include/simdfor.h
Normal file
@@ -0,0 +1,72 @@
|
||||
/**
|
||||
* This code is released under a BSD License.
|
||||
*/
|
||||
#ifndef INCLUDE_SIMDFOR_H_
|
||||
#define INCLUDE_SIMDFOR_H_
|
||||
|
||||
#include "portability.h"
|
||||
|
||||
/* SSE2 is required */
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "simdcomputil.h"
|
||||
#include "simdbitpacking.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* reads 128 values from "in", writes "bit" 128-bit vectors to "out" */
|
||||
void simdpackFOR(uint32_t initvalue, const uint32_t * in,__m128i * out, const uint32_t bit);
|
||||
|
||||
|
||||
/* reads "bit" 128-bit vectors from "in", writes 128 values to "out" */
|
||||
void simdunpackFOR(uint32_t initvalue, const __m128i * in,uint32_t * out, const uint32_t bit);
|
||||
|
||||
|
||||
/* how many compressed bytes are needed to compressed length integers using a bit width of bit with
|
||||
the simdpackFOR_length function. */
|
||||
int simdpackFOR_compressedbytes(int length, const uint32_t bit);
|
||||
|
||||
/* like simdpackFOR, but supports an undetermined number of inputs.
|
||||
This is useful if you need to pack less than 128 integers. Note that this function is much slower.
|
||||
Compressed data is stored in the memory location between
|
||||
the provided (out) pointer and the returned pointer. */
|
||||
__m128i * simdpackFOR_length(uint32_t initvalue, const uint32_t * in, int length, __m128i * out, const uint32_t bit);
|
||||
|
||||
/* like simdunpackFOR, but supports an undetermined number of inputs.
|
||||
This is useful if you need to unpack less than 128 integers. Note that this function is much slower.
|
||||
The read compressed data is between the provided
|
||||
(in) pointer and the returned pointer. */
|
||||
const __m128i * simdunpackFOR_length(uint32_t initvalue, const __m128i * in, int length, uint32_t * out, const uint32_t bit);
|
||||
|
||||
|
||||
/* returns the value stored at the specified "slot".
|
||||
* */
|
||||
uint32_t simdselectFOR(uint32_t initvalue, const __m128i *in, uint32_t bit,
|
||||
int slot);
|
||||
|
||||
/* given a block of 128 packed values, this function sets the value at index "index" to "value" */
|
||||
void simdfastsetFOR(uint32_t initvalue, __m128i * in, uint32_t bit, uint32_t value, size_t index);
|
||||
|
||||
|
||||
/* searches "bit" 128-bit vectors from "in" (= length<=128 encoded integers) for the first encoded uint32 value
|
||||
* which is >= |key|, and returns its position. It is assumed that the values
|
||||
* stored are in sorted order.
|
||||
* The encoded key is stored in "*presult".
|
||||
* The first length decoded integers, ignoring others. If no value is larger or equal to the key,
|
||||
* length is returned. Length should be no larger than 128.
|
||||
*
|
||||
* If no value is larger or equal to the key,
|
||||
* length is returned */
|
||||
int simdsearchwithlengthFOR(uint32_t initvalue, const __m128i *in, uint32_t bit,
|
||||
int length, uint32_t key, uint32_t *presult);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* INCLUDE_SIMDFOR_H_ */
|
||||
98
cpp/simdcomp/include/simdintegratedbitpacking.h
Normal file
98
cpp/simdcomp/include/simdintegratedbitpacking.h
Normal file
@@ -0,0 +1,98 @@
|
||||
/**
|
||||
* This code is released under a BSD License.
|
||||
*/
|
||||
|
||||
#ifndef SIMD_INTEGRATED_BITPACKING_H
|
||||
#define SIMD_INTEGRATED_BITPACKING_H
|
||||
|
||||
#include "portability.h"
|
||||
|
||||
/* SSE2 is required */
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "simdcomputil.h"
|
||||
#include "simdbitpacking.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* reads 128 values from "in", writes "bit" 128-bit vectors to "out"
|
||||
integer values should be in sorted order (for best results).
|
||||
The differences are masked so that only the least significant "bit" bits are used. */
|
||||
void simdpackd1(uint32_t initvalue, const uint32_t * in,__m128i * out, const uint32_t bit);
|
||||
|
||||
|
||||
/* reads 128 values from "in", writes "bit" 128-bit vectors to "out"
|
||||
integer values should be in sorted order (for best results).
|
||||
The difference values are assumed to be less than 1<<bit. */
|
||||
void simdpackwithoutmaskd1(uint32_t initvalue, const uint32_t * in,__m128i * out, const uint32_t bit);
|
||||
|
||||
|
||||
/* reads "bit" 128-bit vectors from "in", writes 128 values to "out" */
|
||||
void simdunpackd1(uint32_t initvalue, const __m128i * in,uint32_t * out, const uint32_t bit);
|
||||
|
||||
|
||||
/* searches "bit" 128-bit vectors from "in" (= 128 encoded integers) for the first encoded uint32 value
|
||||
* which is >= |key|, and returns its position. It is assumed that the values
|
||||
* stored are in sorted order.
|
||||
* The encoded key is stored in "*presult". If no value is larger or equal to the key,
|
||||
* 128 is returned. The pointer initOffset is a pointer to the last four value decoded
|
||||
* (when starting out, this can be a zero vector or initialized with _mm_set1_epi32(init)),
|
||||
* and the vector gets updated.
|
||||
**/
|
||||
int
|
||||
simdsearchd1(__m128i * initOffset, const __m128i *in, uint32_t bit,
|
||||
uint32_t key, uint32_t *presult);
|
||||
|
||||
|
||||
/* searches "bit" 128-bit vectors from "in" (= length<=128 encoded integers) for the first encoded uint32 value
|
||||
* which is >= |key|, and returns its position. It is assumed that the values
|
||||
* stored are in sorted order.
|
||||
* The encoded key is stored in "*presult".
|
||||
* The first length decoded integers, ignoring others. If no value is larger or equal to the key,
|
||||
* length is returned. Length should be no larger than 128.
|
||||
*
|
||||
* If no value is larger or equal to the key,
|
||||
* length is returned */
|
||||
int simdsearchwithlengthd1(uint32_t initvalue, const __m128i *in, uint32_t bit,
|
||||
int length, uint32_t key, uint32_t *presult);
|
||||
|
||||
|
||||
|
||||
/* returns the value stored at the specified "slot".
|
||||
* */
|
||||
uint32_t simdselectd1(uint32_t initvalue, const __m128i *in, uint32_t bit,
|
||||
int slot);
|
||||
|
||||
/* given a block of 128 packed values, this function sets the value at index "index" to "value",
|
||||
* you must somehow know the previous value.
|
||||
* Because of differential coding, all following values are incremented by the offset between this new
|
||||
* value and the old value...
|
||||
* This functions is useful if you want to modify the last value.
|
||||
*/
|
||||
void simdfastsetd1fromprevious( __m128i * in, uint32_t bit, uint32_t previousvalue, uint32_t value, size_t index);
|
||||
|
||||
/* given a block of 128 packed values, this function sets the value at index "index" to "value",
|
||||
* This function computes the previous value if needed.
|
||||
* Because of differential coding, all following values are incremented by the offset between this new
|
||||
* value and the old value...
|
||||
* This functions is useful if you want to modify the last value.
|
||||
*/
|
||||
void simdfastsetd1(uint32_t initvalue, __m128i * in, uint32_t bit, uint32_t value, size_t index);
|
||||
|
||||
|
||||
/*Simply scan the data
|
||||
* The pointer initOffset is a pointer to the last four value decoded
|
||||
* (when starting out, this can be a zero vector or initialized with _mm_set1_epi32(init);),
|
||||
* and the vector gets updated.
|
||||
* */
|
||||
|
||||
void
|
||||
simdscand1(__m128i * initOffset, const __m128i *in, uint32_t bit);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user