Scratchpad size increased to 1 MiB

New AES-based scratchpad hashing function
This commit is contained in:
tevador 2019-01-18 23:51:18 +01:00
parent 93c324709b
commit 16db607025
12 changed files with 923 additions and 925 deletions

View File

@ -11,7 +11,7 @@ SRCDIR=src
OBJDIR=obj OBJDIR=obj
LDFLAGS=-lpthread LDFLAGS=-lpthread
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o) TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o AddressTransform.o) ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o AddressTransform.o hashAes1Rx4.o)
ifeq ($(PLATFORM),x86_64) ifeq ($(PLATFORM),x86_64)
ROBJS += $(OBJDIR)/JitCompilerX86-static.o ROBJS += $(OBJDIR)/JitCompilerX86-static.o
endif endif
@ -68,6 +68,9 @@ $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) |
$(OBJDIR)/divideByConstantCodegen.o: $(addprefix $(SRCDIR)/,divideByConstantCodegen.c divideByConstantCodegen.h) | $(OBJDIR) $(OBJDIR)/divideByConstantCodegen.o: $(addprefix $(SRCDIR)/,divideByConstantCodegen.c divideByConstantCodegen.h) | $(OBJDIR)
$(CC) $(CCFLAGS) -c $(SRCDIR)/divideByConstantCodegen.c -o $@ $(CC) $(CCFLAGS) -c $(SRCDIR)/divideByConstantCodegen.c -o $@
$(OBJDIR)/hashAes1Rx4.o: $(addprefix $(SRCDIR)/,hashAes1Rx4.cpp softAes.h) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/hashAes1Rx4.cpp -o $@
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR) $(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@

View File

@ -73,6 +73,7 @@ namespace RandomX {
asmCode << "rx_body_" << i << ":" << std::endl; asmCode << "rx_body_" << i << ":" << std::endl;
if ((instr.loca & 192) == 0) if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rax" << std::endl; asmCode << "\txor " << regMx << ", rax" << std::endl;
if (instr.loca & 15) {
if (instr.loca & 3) { if (instr.loca & 3) {
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl; asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
} }
@ -80,6 +81,10 @@ namespace RandomX {
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl; asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
} }
} }
else {
asmCode << "\tand eax, " << (ScratchpadL3 - 1) << std::endl;
}
}
void AssemblyGeneratorX86::genar(Instruction& instr, int i) { void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
gena(instr, i); gena(instr, i);
@ -123,40 +128,32 @@ namespace RandomX {
} }
void AssemblyGeneratorX86::gencr(Instruction& instr, bool rax = true) { void AssemblyGeneratorX86::gencr(Instruction& instr, bool rax = true) {
switch (instr.locc & 7) if (instr.locc & 16) { //write to register
{
case 0:
if(rax)
asmCode << "\tmov rcx, rax" << std::endl;
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
if (trace) {
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], rcx" << std::endl;
}
return;
case 1:
case 2:
case 3:
if (rax)
asmCode << "\tmov rcx, rax" << std::endl;
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
if (trace) {
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], rcx" << std::endl;
}
return;
default:
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", " << (rax ? "rax" : "rcx") << std::endl; asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", " << (rax ? "rax" : "rcx") << std::endl;
if (trace) { if (trace) {
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << (rax ? "rax" : "rcx") << std::endl; asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << (rax ? "rax" : "rcx") << std::endl;
} }
return; }
else { //write to scratchpad
if (rax)
asmCode << "\tmov rcx, rax" << std::endl;
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
if (instr.locc & 15) {
if (instr.locc & 3) {
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
}
else {
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
}
}
else {
asmCode << "\tand eax, " << (ScratchpadL3 - 1) << std::endl;
}
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
if (trace) {
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], rcx" << std::endl;
}
} }
} }
@ -164,23 +161,21 @@ namespace RandomX {
if(move) if(move)
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl; asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
const char* store = (instr.locc & 128) ? "movhpd" : "movlpd"; const char* store = (instr.locc & 128) ? "movhpd" : "movlpd";
switch (instr.locc & 7) if (instr.locc & 16) { //write to scratchpad
{
case 4:
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
break;
case 5:
case 6:
case 7:
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl; asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl; asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
if (instr.locc & 15) {
if (instr.locc & 3) {
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl; asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
}
else {
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
}
}
else {
asmCode << "\tand eax, " << (ScratchpadL3 - 1) << std::endl;
}
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl; asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
break;
} }
if (trace) { if (trace) {
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl; asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;

View File

@ -182,11 +182,16 @@ namespace RandomX {
emitByte(0xe8); //xor rbp, rax emitByte(0xe8); //xor rbp, rax
} }
emitByte(0x25); //and eax, emitByte(0x25); //and eax,
if (instr.loca & 15) {
if (instr.loca & 3) { if (instr.loca & 3) {
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
} }
else { else {
emit(ScratchpadL2 - 1); //whole scratchpad emit(ScratchpadL2 - 1); //first 256 KiB of scratchpad
}
}
else {
emit(ScratchpadL3 - 1); //whole scratchpad
} }
} }
@ -266,19 +271,7 @@ namespace RandomX {
} }
void JitCompilerX86::gencr(Instruction& instr, bool rax = true) { void JitCompilerX86::gencr(Instruction& instr, bool rax = true) {
switch (instr.locc & 7) if (instr.locc & 16) { //write to register
{
case 0:
scratchpadStoreR(instr, ScratchpadL2, rax);
break;
case 1:
case 2:
case 3:
scratchpadStoreR(instr, ScratchpadL1, rax);
break;
default:
emit(uint16_t(0x8b4c)); //mov emit(uint16_t(0x8b4c)); //mov
if (rax) { if (rax) {
emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
@ -286,7 +279,19 @@ namespace RandomX {
else { else {
emitByte(0xc1 + 8 * (instr.regc % RegistersCount)); //regc, rcx emitByte(0xc1 + 8 * (instr.regc % RegistersCount)); //regc, rcx
} }
break; }
else {
if (instr.locc & 15) {
if (instr.locc & 3) {
scratchpadStoreR(instr, ScratchpadL1, rax);
}
else {
scratchpadStoreR(instr, ScratchpadL2, rax);
}
}
else {
scratchpadStoreR(instr, ScratchpadL3, rax);
}
} }
} }
@ -314,13 +319,17 @@ namespace RandomX {
} }
emit(uint16_t(0x280f)); //movaps emit(uint16_t(0x280f)); //movaps
emitByte(0xc0 + 8 * regc); // regc, xmm0 emitByte(0xc0 + 8 * regc); // regc, xmm0
if (instr.locc & 4) //C.LOC.R if (instr.locc & 16) { //write to scratchpad
{ if (instr.locc & 15) {
if (instr.locc & 3) { //C.LOC.W if (instr.locc & 3) { //C.LOC.W
scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad
} }
else { else {
scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //whole scratchpad scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //first 256 KiB of scratchpad
}
}
else {
scratchpadStoreF(instr, regc, ScratchpadL3, (instr.locc & 128)); //whole scratchpad
} }
} }
} }

View File

@ -19,7 +19,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "VirtualMachine.hpp" #include "VirtualMachine.hpp"
#include "common.hpp" #include "common.hpp"
#include "t1ha/t1ha.h" #include "hashAes1Rx4.hpp"
#include "blake2/blake2.h" #include "blake2/blake2.h"
#include <cstring> #include <cstring>
#include <iomanip> #include <iomanip>
@ -40,10 +40,10 @@ namespace RandomX {
} }
void VirtualMachine::getResult(void* out) { void VirtualMachine::getResult(void* out) {
constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 2; constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 8;
uint64_t smallState[smallStateLength]; alignas(16) uint64_t smallState[smallStateLength];
memcpy(smallState, &reg, sizeof(RegisterFile)); memcpy(smallState, &reg, sizeof(RegisterFile));
smallState[smallStateLength - 1] = t1ha2_atonce128(&smallState[smallStateLength - 2], scratchpad, ScratchpadSize, reg.r[0].u64); hashAes1Rx4<false>(scratchpad, ScratchpadSize, smallState + 24);
blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0); blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0);
} }
} }

View File

@ -74,10 +74,11 @@ namespace RandomX {
constexpr int ProgramLength = 512; constexpr int ProgramLength = 512;
constexpr uint32_t InstructionCount = 1024 * 1024; constexpr uint32_t InstructionCount = 1024 * 1024;
constexpr uint32_t ScratchpadSize = 256 * 1024; constexpr uint32_t ScratchpadSize = 1024 * 1024;
constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t); constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t);
constexpr uint32_t ScratchpadL1 = ScratchpadSize / 16 / sizeof(convertible_t); constexpr uint32_t ScratchpadL1 = ScratchpadSize / 64 / sizeof(convertible_t);
constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t); constexpr uint32_t ScratchpadL2 = ScratchpadSize / 4 / sizeof(convertible_t);
constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(convertible_t);
constexpr uint32_t TransformationCount = 90; constexpr uint32_t TransformationCount = 90;
constexpr int RegistersCount = 8; constexpr int RegistersCount = 8;

View File

@ -27,6 +27,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "Pcg32.hpp" #include "Pcg32.hpp"
#include "Cache.hpp" #include "Cache.hpp"
#include "virtualMemory.hpp" #include "virtualMemory.hpp"
#include "softAes.h"
#if defined(__SSE2__) #if defined(__SSE2__)
#include <wmmintrin.h> #include <wmmintrin.h>
@ -46,21 +47,6 @@ namespace RandomX {
} }
} }
template<bool soft>
static inline __m128i aesenc(__m128i in, __m128i key) {
return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
}
template<bool soft>
static inline __m128i aesdec(__m128i in, __m128i key) {
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
}
#define AES_ROUND(i) x0 = aesdec<soft>(x0, keys[i]); \
x1 = aesenc<soft>(x1, keys[i]); \
x2 = aesdec<soft>(x2, keys[i]); \
x3 = aesenc<soft>(x3, keys[i])
template<bool soft> template<bool soft>
void initBlock(const uint8_t* intermediate, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) { void initBlock(const uint8_t* intermediate, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
__m128i x0, x1, x2, x3; __m128i x0, x1, x2, x3;
@ -73,13 +59,13 @@ namespace RandomX {
for (auto i = 0; i < DatasetIterations; ++i) { for (auto i = 0; i < DatasetIterations; ++i) {
x0 = aesenc<soft>(x0, keys[0]); x0 = aesenc<soft>(x0, keys[0]);
x0 = aesenc<soft>(x0, keys[1]); //x0 = aesenc<soft>(x0, keys[1]);
x1 = aesenc<soft>(x0, keys[2]); x1 = aesenc<soft>(x0, keys[2]);
x1 = aesenc<soft>(x1, keys[3]); //x1 = aesenc<soft>(x1, keys[3]);
x2 = aesenc<soft>(x1, keys[4]); x2 = aesenc<soft>(x1, keys[4]);
x2 = aesenc<soft>(x2, keys[5]); //x2 = aesenc<soft>(x2, keys[5]);
x3 = aesenc<soft>(x2, keys[6]); x3 = aesenc<soft>(x2, keys[6]);
x3 = aesenc<soft>(x3, keys[7]); //x3 = aesenc<soft>(x3, keys[7]);
int index = _mm_cvtsi128_si32(x3); int index = _mm_cvtsi128_si32(x3);
index &= mask; index &= mask;

View File

@ -23,7 +23,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include <array> #include <array>
#include "intrinPortable.h" #include "intrinPortable.h"
#include "common.hpp" #include "common.hpp"
#include "softAes.h"
namespace RandomX { namespace RandomX {

73
src/hashAes1Rx4.cpp Normal file
View File

@ -0,0 +1,73 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "softAes.h"
template<bool softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
const uint8_t* inptr = (uint8_t*)input;
const uint8_t* inputEnd = inptr + inputSize;
__m128i state0, state1, state2, state3;
__m128i in0, in1, in2, in3;
//intial state
state0 = _mm_set_epi32(0x9d04b0ae, 0x59943385, 0x30ac8d93, 0x3fe49f5d);
state1 = _mm_set_epi32(0x8a39ebf1, 0xddc10935, 0xa724ecd3, 0x7b0c6064);
state2 = _mm_set_epi32(0x7ec70420, 0xdf01edda, 0x7c12ecf7, 0xfb5382e3);
state3 = _mm_set_epi32(0x94a9d201, 0x5082d1c8, 0xb2e74109, 0x7728b705);
//process 64 bytes at a time in 4 lanes
while (inptr < inputEnd) {
in0 = _mm_load_si128((__m128i*)inptr + 0);
in1 = _mm_load_si128((__m128i*)inptr + 1);
in2 = _mm_load_si128((__m128i*)inptr + 2);
in3 = _mm_load_si128((__m128i*)inptr + 3);
state0 = aesenc<softAes>(state0, in0);
state1 = aesdec<softAes>(state1, in1);
state2 = aesenc<softAes>(state2, in2);
state3 = aesdec<softAes>(state3, in3);
inptr += 64;
}
//two extra rounds to achieve full diffusion
__m128i xkey0 = _mm_set_epi32(0x4ff637c5, 0x053bd705, 0x8231a744, 0xc3767b17);
__m128i xkey1 = _mm_set_epi32(0x6594a1a6, 0xa8879d58, 0xb01da200, 0x8a8fae2e);
state0 = aesenc<softAes>(state0, xkey0);
state1 = aesdec<softAes>(state1, xkey0);
state2 = aesenc<softAes>(state2, xkey0);
state3 = aesdec<softAes>(state3, xkey0);
state0 = aesenc<softAes>(state0, xkey1);
state1 = aesdec<softAes>(state1, xkey1);
state2 = aesenc<softAes>(state2, xkey1);
state3 = aesdec<softAes>(state3, xkey1);
//output hash
_mm_store_si128((__m128i*)hash + 0, state0);
_mm_store_si128((__m128i*)hash + 1, state1);
_mm_store_si128((__m128i*)hash + 2, state2);
_mm_store_si128((__m128i*)hash + 3, state3);
}
template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash);
template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash);

23
src/hashAes1Rx4.hpp Normal file
View File

@ -0,0 +1,23 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "softAes.h"
template<bool softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);

View File

@ -145,7 +145,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
//std::cout << "Thread " << thread << " nonce " << nonce << std::endl; //std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
*noncePtr = nonce; *noncePtr = nonce;
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 63) << 8); int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
vm->initializeScratchpad(spIndex); vm->initializeScratchpad(spIndex);
vm->initializeProgram(hash); vm->initializeProgram(hash);
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt"); //dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");

File diff suppressed because it is too large Load Diff

View File

@ -26,3 +26,13 @@ __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
__m128i soft_aesenc(__m128i in, __m128i key); __m128i soft_aesenc(__m128i in, __m128i key);
__m128i soft_aesdec(__m128i in, __m128i key); __m128i soft_aesdec(__m128i in, __m128i key);
template<bool soft>
inline __m128i aesenc(__m128i in, __m128i key) {
return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
}
template<bool soft>
inline __m128i aesdec(__m128i in, __m128i key) {
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
}