mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-03 07:42:54 +00:00
153 lines
4.4 KiB
Python
Executable File
153 lines
4.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
|
|
from math import ceil
|
|
|
|
print("""
|
|
/**
|
|
* Blablabla
|
|
*
|
|
*/
|
|
|
|
""");
|
|
|
|
def mask(bit):
|
|
return str((1 << bit) - 1)
|
|
|
|
for length in [32]:
|
|
print("""
|
|
static __m128i iunpackFOR0(__m128i initOffset, const __m128i * _in , uint32_t * _out) {
|
|
__m128i *out = (__m128i*)(_out);
|
|
int i;
|
|
(void) _in;
|
|
for (i = 0; i < 8; ++i) {
|
|
_mm_store_si128(out++, initOffset);
|
|
_mm_store_si128(out++, initOffset);
|
|
_mm_store_si128(out++, initOffset);
|
|
_mm_store_si128(out++, initOffset);
|
|
}
|
|
|
|
return initOffset;
|
|
}
|
|
|
|
""")
|
|
print("""
|
|
|
|
static void ipackFOR0(__m128i initOffset , const uint32_t * _in , __m128i * out ) {
|
|
(void) initOffset;
|
|
(void) _in;
|
|
(void) out;
|
|
}
|
|
""")
|
|
for bit in range(1,33):
|
|
offsetVar = " initOffset";
|
|
print("""
|
|
static void ipackFOR"""+str(bit)+"""(__m128i """+offsetVar+""", const uint32_t * _in, __m128i * out) {
|
|
const __m128i *in = (const __m128i*)(_in);
|
|
__m128i OutReg;
|
|
|
|
""");
|
|
|
|
if (bit != 32):
|
|
print(" __m128i CurrIn = _mm_load_si128(in);");
|
|
print(" __m128i InReg = _mm_sub_epi32(CurrIn, initOffset);");
|
|
else:
|
|
print(" __m128i InReg = _mm_load_si128(in);");
|
|
print(" (void) initOffset;");
|
|
|
|
|
|
inwordpointer = 0
|
|
valuecounter = 0
|
|
for k in range(ceil((length * bit) / 32)):
|
|
if(valuecounter == length): break
|
|
for x in range(inwordpointer,32,bit):
|
|
if(x!=0) :
|
|
print(" OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, " + str(x) + "));");
|
|
else:
|
|
print(" OutReg = InReg; ");
|
|
if((x+bit>=32) ):
|
|
while(inwordpointer<32):
|
|
inwordpointer += bit
|
|
print(" _mm_store_si128(out, OutReg);");
|
|
print("");
|
|
|
|
if(valuecounter + 1 < length):
|
|
print(" ++out;")
|
|
inwordpointer -= 32;
|
|
if(inwordpointer>0):
|
|
print(" OutReg = _mm_srli_epi32(InReg, " + str(bit) + " - " + str(inwordpointer) + ");");
|
|
if(valuecounter + 1 < length):
|
|
print(" ++in;")
|
|
|
|
if (bit != 32):
|
|
print(" CurrIn = _mm_load_si128(in);");
|
|
print(" InReg = _mm_sub_epi32(CurrIn, initOffset);");
|
|
else:
|
|
print(" InReg = _mm_load_si128(in);");
|
|
print("");
|
|
valuecounter = valuecounter + 1
|
|
if(valuecounter == length): break
|
|
assert(valuecounter == length)
|
|
print("\n}\n\n""")
|
|
|
|
for bit in range(1,32):
|
|
offsetVar = " initOffset";
|
|
print("""\n
|
|
static __m128i iunpackFOR"""+str(bit)+"""(__m128i """+offsetVar+""", const __m128i* in, uint32_t * _out) {
|
|
""");
|
|
print(""" __m128i* out = (__m128i*)(_out);
|
|
__m128i InReg = _mm_load_si128(in);
|
|
__m128i OutReg;
|
|
__m128i tmp;
|
|
const __m128i mask = _mm_set1_epi32((1U<<"""+str(bit)+""")-1);
|
|
|
|
""");
|
|
|
|
MainText = "";
|
|
|
|
MainText += "\n";
|
|
inwordpointer = 0
|
|
valuecounter = 0
|
|
for k in range(ceil((length * bit) / 32)):
|
|
for x in range(inwordpointer,32,bit):
|
|
if(valuecounter == length): break
|
|
if (x > 0):
|
|
MainText += " tmp = _mm_srli_epi32(InReg," + str(x) +");\n";
|
|
else:
|
|
MainText += " tmp = InReg;\n";
|
|
if(x+bit<32):
|
|
MainText += " OutReg = _mm_and_si128(tmp, mask);\n";
|
|
else:
|
|
MainText += " OutReg = tmp;\n";
|
|
if((x+bit>=32) ):
|
|
while(inwordpointer<32):
|
|
inwordpointer += bit
|
|
if(valuecounter + 1 < length):
|
|
MainText += " ++in;"
|
|
MainText += " InReg = _mm_load_si128(in);\n";
|
|
inwordpointer -= 32;
|
|
if(inwordpointer>0):
|
|
MainText += " OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, " + str(bit) + "-" + str(inwordpointer) + "), mask));\n\n";
|
|
if (bit != 32):
|
|
MainText += " OutReg = _mm_add_epi32(OutReg, initOffset);\n";
|
|
MainText += " _mm_store_si128(out++, OutReg);\n\n";
|
|
MainText += "";
|
|
valuecounter = valuecounter + 1
|
|
if(valuecounter == length): break
|
|
assert(valuecounter == length)
|
|
print(MainText)
|
|
print(" return initOffset;");
|
|
print("\n}\n\n")
|
|
print("""
|
|
static __m128i iunpackFOR32(__m128i initvalue , const __m128i* in, uint32_t * _out) {
|
|
__m128i * mout = (__m128i *)_out;
|
|
__m128i invec;
|
|
size_t k;
|
|
for(k = 0; k < 128/4; ++k) {
|
|
invec = _mm_load_si128(in++);
|
|
_mm_store_si128(mout++, invec);
|
|
}
|
|
return invec;
|
|
}
|
|
""")
|