mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-31 20:28:53 +00:00
Scratchpad size increased to 1 MiB
New AES-based scratchpad hashing function
This commit is contained in:
parent
93c324709b
commit
16db607025
5
makefile
5
makefile
@ -11,7 +11,7 @@ SRCDIR=src
|
|||||||
OBJDIR=obj
|
OBJDIR=obj
|
||||||
LDFLAGS=-lpthread
|
LDFLAGS=-lpthread
|
||||||
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
||||||
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o AddressTransform.o)
|
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o AddressTransform.o hashAes1Rx4.o)
|
||||||
ifeq ($(PLATFORM),x86_64)
|
ifeq ($(PLATFORM),x86_64)
|
||||||
ROBJS += $(OBJDIR)/JitCompilerX86-static.o
|
ROBJS += $(OBJDIR)/JitCompilerX86-static.o
|
||||||
endif
|
endif
|
||||||
@ -68,6 +68,9 @@ $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) |
|
|||||||
$(OBJDIR)/divideByConstantCodegen.o: $(addprefix $(SRCDIR)/,divideByConstantCodegen.c divideByConstantCodegen.h) | $(OBJDIR)
|
$(OBJDIR)/divideByConstantCodegen.o: $(addprefix $(SRCDIR)/,divideByConstantCodegen.c divideByConstantCodegen.h) | $(OBJDIR)
|
||||||
$(CC) $(CCFLAGS) -c $(SRCDIR)/divideByConstantCodegen.c -o $@
|
$(CC) $(CCFLAGS) -c $(SRCDIR)/divideByConstantCodegen.c -o $@
|
||||||
|
|
||||||
|
$(OBJDIR)/hashAes1Rx4.o: $(addprefix $(SRCDIR)/,hashAes1Rx4.cpp softAes.h) | $(OBJDIR)
|
||||||
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/hashAes1Rx4.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR)
|
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
|
||||||
|
|
||||||
|
@ -73,6 +73,7 @@ namespace RandomX {
|
|||||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||||
if ((instr.loca & 192) == 0)
|
if ((instr.loca & 192) == 0)
|
||||||
asmCode << "\txor " << regMx << ", rax" << std::endl;
|
asmCode << "\txor " << regMx << ", rax" << std::endl;
|
||||||
|
if (instr.loca & 15) {
|
||||||
if (instr.loca & 3) {
|
if (instr.loca & 3) {
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||||
}
|
}
|
||||||
@ -80,6 +81,10 @@ namespace RandomX {
|
|||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\tand eax, " << (ScratchpadL3 - 1) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
|
||||||
gena(instr, i);
|
gena(instr, i);
|
||||||
@ -123,40 +128,32 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::gencr(Instruction& instr, bool rax = true) {
|
void AssemblyGeneratorX86::gencr(Instruction& instr, bool rax = true) {
|
||||||
switch (instr.locc & 7)
|
if (instr.locc & 16) { //write to register
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
if(rax)
|
|
||||||
asmCode << "\tmov rcx, rax" << std::endl;
|
|
||||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
|
||||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
|
|
||||||
if (trace) {
|
|
||||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], rcx" << std::endl;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
case 3:
|
|
||||||
if (rax)
|
|
||||||
asmCode << "\tmov rcx, rax" << std::endl;
|
|
||||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
|
||||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
|
|
||||||
if (trace) {
|
|
||||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], rcx" << std::endl;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
|
|
||||||
default:
|
|
||||||
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", " << (rax ? "rax" : "rcx") << std::endl;
|
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", " << (rax ? "rax" : "rcx") << std::endl;
|
||||||
if (trace) {
|
if (trace) {
|
||||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << (rax ? "rax" : "rcx") << std::endl;
|
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << (rax ? "rax" : "rcx") << std::endl;
|
||||||
}
|
}
|
||||||
return;
|
}
|
||||||
|
else { //write to scratchpad
|
||||||
|
if (rax)
|
||||||
|
asmCode << "\tmov rcx, rax" << std::endl;
|
||||||
|
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||||
|
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||||
|
if (instr.locc & 15) {
|
||||||
|
if (instr.locc & 3) {
|
||||||
|
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\tand eax, " << (ScratchpadL3 - 1) << std::endl;
|
||||||
|
}
|
||||||
|
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
|
||||||
|
if (trace) {
|
||||||
|
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], rcx" << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,23 +161,21 @@ namespace RandomX {
|
|||||||
if(move)
|
if(move)
|
||||||
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||||
const char* store = (instr.locc & 128) ? "movhpd" : "movlpd";
|
const char* store = (instr.locc & 128) ? "movhpd" : "movlpd";
|
||||||
switch (instr.locc & 7)
|
if (instr.locc & 16) { //write to scratchpad
|
||||||
{
|
|
||||||
case 4:
|
|
||||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
|
||||||
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 5:
|
|
||||||
case 6:
|
|
||||||
case 7:
|
|
||||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||||
|
if (instr.locc & 15) {
|
||||||
|
if (instr.locc & 3) {
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\tand eax, " << (ScratchpadL3 - 1) << std::endl;
|
||||||
|
}
|
||||||
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
if (trace) {
|
if (trace) {
|
||||||
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||||
|
@ -182,11 +182,16 @@ namespace RandomX {
|
|||||||
emitByte(0xe8); //xor rbp, rax
|
emitByte(0xe8); //xor rbp, rax
|
||||||
}
|
}
|
||||||
emitByte(0x25); //and eax,
|
emitByte(0x25); //and eax,
|
||||||
|
if (instr.loca & 15) {
|
||||||
if (instr.loca & 3) {
|
if (instr.loca & 3) {
|
||||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
emit(ScratchpadL2 - 1); //whole scratchpad
|
emit(ScratchpadL2 - 1); //first 256 KiB of scratchpad
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
emit(ScratchpadL3 - 1); //whole scratchpad
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -266,19 +271,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::gencr(Instruction& instr, bool rax = true) {
|
void JitCompilerX86::gencr(Instruction& instr, bool rax = true) {
|
||||||
switch (instr.locc & 7)
|
if (instr.locc & 16) { //write to register
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
scratchpadStoreR(instr, ScratchpadL2, rax);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
case 3:
|
|
||||||
scratchpadStoreR(instr, ScratchpadL1, rax);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
emit(uint16_t(0x8b4c)); //mov
|
emit(uint16_t(0x8b4c)); //mov
|
||||||
if (rax) {
|
if (rax) {
|
||||||
emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
|
emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
|
||||||
@ -286,7 +279,19 @@ namespace RandomX {
|
|||||||
else {
|
else {
|
||||||
emitByte(0xc1 + 8 * (instr.regc % RegistersCount)); //regc, rcx
|
emitByte(0xc1 + 8 * (instr.regc % RegistersCount)); //regc, rcx
|
||||||
}
|
}
|
||||||
break;
|
}
|
||||||
|
else {
|
||||||
|
if (instr.locc & 15) {
|
||||||
|
if (instr.locc & 3) {
|
||||||
|
scratchpadStoreR(instr, ScratchpadL1, rax);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
scratchpadStoreR(instr, ScratchpadL2, rax);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
scratchpadStoreR(instr, ScratchpadL3, rax);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -314,13 +319,17 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
emit(uint16_t(0x280f)); //movaps
|
emit(uint16_t(0x280f)); //movaps
|
||||||
emitByte(0xc0 + 8 * regc); // regc, xmm0
|
emitByte(0xc0 + 8 * regc); // regc, xmm0
|
||||||
if (instr.locc & 4) //C.LOC.R
|
if (instr.locc & 16) { //write to scratchpad
|
||||||
{
|
if (instr.locc & 15) {
|
||||||
if (instr.locc & 3) { //C.LOC.W
|
if (instr.locc & 3) { //C.LOC.W
|
||||||
scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad
|
scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //whole scratchpad
|
scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //first 256 KiB of scratchpad
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
scratchpadStoreF(instr, regc, ScratchpadL3, (instr.locc & 128)); //whole scratchpad
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#include "VirtualMachine.hpp"
|
#include "VirtualMachine.hpp"
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
#include "t1ha/t1ha.h"
|
#include "hashAes1Rx4.hpp"
|
||||||
#include "blake2/blake2.h"
|
#include "blake2/blake2.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
@ -40,10 +40,10 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void VirtualMachine::getResult(void* out) {
|
void VirtualMachine::getResult(void* out) {
|
||||||
constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 2;
|
constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 8;
|
||||||
uint64_t smallState[smallStateLength];
|
alignas(16) uint64_t smallState[smallStateLength];
|
||||||
memcpy(smallState, ®, sizeof(RegisterFile));
|
memcpy(smallState, ®, sizeof(RegisterFile));
|
||||||
smallState[smallStateLength - 1] = t1ha2_atonce128(&smallState[smallStateLength - 2], scratchpad, ScratchpadSize, reg.r[0].u64);
|
hashAes1Rx4<false>(scratchpad, ScratchpadSize, smallState + 24);
|
||||||
blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0);
|
blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -74,10 +74,11 @@ namespace RandomX {
|
|||||||
|
|
||||||
constexpr int ProgramLength = 512;
|
constexpr int ProgramLength = 512;
|
||||||
constexpr uint32_t InstructionCount = 1024 * 1024;
|
constexpr uint32_t InstructionCount = 1024 * 1024;
|
||||||
constexpr uint32_t ScratchpadSize = 256 * 1024;
|
constexpr uint32_t ScratchpadSize = 1024 * 1024;
|
||||||
constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t);
|
||||||
constexpr uint32_t ScratchpadL1 = ScratchpadSize / 16 / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadL1 = ScratchpadSize / 64 / sizeof(convertible_t);
|
||||||
constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadL2 = ScratchpadSize / 4 / sizeof(convertible_t);
|
||||||
|
constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(convertible_t);
|
||||||
constexpr uint32_t TransformationCount = 90;
|
constexpr uint32_t TransformationCount = 90;
|
||||||
constexpr int RegistersCount = 8;
|
constexpr int RegistersCount = 8;
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
#include "Pcg32.hpp"
|
#include "Pcg32.hpp"
|
||||||
#include "Cache.hpp"
|
#include "Cache.hpp"
|
||||||
#include "virtualMemory.hpp"
|
#include "virtualMemory.hpp"
|
||||||
|
#include "softAes.h"
|
||||||
|
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
#include <wmmintrin.h>
|
#include <wmmintrin.h>
|
||||||
@ -46,21 +47,6 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool soft>
|
|
||||||
static inline __m128i aesenc(__m128i in, __m128i key) {
|
|
||||||
return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<bool soft>
|
|
||||||
static inline __m128i aesdec(__m128i in, __m128i key) {
|
|
||||||
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define AES_ROUND(i) x0 = aesdec<soft>(x0, keys[i]); \
|
|
||||||
x1 = aesenc<soft>(x1, keys[i]); \
|
|
||||||
x2 = aesdec<soft>(x2, keys[i]); \
|
|
||||||
x3 = aesenc<soft>(x3, keys[i])
|
|
||||||
|
|
||||||
template<bool soft>
|
template<bool soft>
|
||||||
void initBlock(const uint8_t* intermediate, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
void initBlock(const uint8_t* intermediate, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
||||||
__m128i x0, x1, x2, x3;
|
__m128i x0, x1, x2, x3;
|
||||||
@ -73,13 +59,13 @@ namespace RandomX {
|
|||||||
|
|
||||||
for (auto i = 0; i < DatasetIterations; ++i) {
|
for (auto i = 0; i < DatasetIterations; ++i) {
|
||||||
x0 = aesenc<soft>(x0, keys[0]);
|
x0 = aesenc<soft>(x0, keys[0]);
|
||||||
x0 = aesenc<soft>(x0, keys[1]);
|
//x0 = aesenc<soft>(x0, keys[1]);
|
||||||
x1 = aesenc<soft>(x0, keys[2]);
|
x1 = aesenc<soft>(x0, keys[2]);
|
||||||
x1 = aesenc<soft>(x1, keys[3]);
|
//x1 = aesenc<soft>(x1, keys[3]);
|
||||||
x2 = aesenc<soft>(x1, keys[4]);
|
x2 = aesenc<soft>(x1, keys[4]);
|
||||||
x2 = aesenc<soft>(x2, keys[5]);
|
//x2 = aesenc<soft>(x2, keys[5]);
|
||||||
x3 = aesenc<soft>(x2, keys[6]);
|
x3 = aesenc<soft>(x2, keys[6]);
|
||||||
x3 = aesenc<soft>(x3, keys[7]);
|
//x3 = aesenc<soft>(x3, keys[7]);
|
||||||
|
|
||||||
int index = _mm_cvtsi128_si32(x3);
|
int index = _mm_cvtsi128_si32(x3);
|
||||||
index &= mask;
|
index &= mask;
|
||||||
|
@ -23,7 +23,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
#include <array>
|
#include <array>
|
||||||
#include "intrinPortable.h"
|
#include "intrinPortable.h"
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
#include "softAes.h"
|
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
|
73
src/hashAes1Rx4.cpp
Normal file
73
src/hashAes1Rx4.cpp
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2019 tevador
|
||||||
|
|
||||||
|
This file is part of RandomX.
|
||||||
|
|
||||||
|
RandomX is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
RandomX is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "softAes.h"
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
|
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
|
||||||
|
const uint8_t* inptr = (uint8_t*)input;
|
||||||
|
const uint8_t* inputEnd = inptr + inputSize;
|
||||||
|
|
||||||
|
__m128i state0, state1, state2, state3;
|
||||||
|
__m128i in0, in1, in2, in3;
|
||||||
|
|
||||||
|
//intial state
|
||||||
|
state0 = _mm_set_epi32(0x9d04b0ae, 0x59943385, 0x30ac8d93, 0x3fe49f5d);
|
||||||
|
state1 = _mm_set_epi32(0x8a39ebf1, 0xddc10935, 0xa724ecd3, 0x7b0c6064);
|
||||||
|
state2 = _mm_set_epi32(0x7ec70420, 0xdf01edda, 0x7c12ecf7, 0xfb5382e3);
|
||||||
|
state3 = _mm_set_epi32(0x94a9d201, 0x5082d1c8, 0xb2e74109, 0x7728b705);
|
||||||
|
|
||||||
|
//process 64 bytes at a time in 4 lanes
|
||||||
|
while (inptr < inputEnd) {
|
||||||
|
in0 = _mm_load_si128((__m128i*)inptr + 0);
|
||||||
|
in1 = _mm_load_si128((__m128i*)inptr + 1);
|
||||||
|
in2 = _mm_load_si128((__m128i*)inptr + 2);
|
||||||
|
in3 = _mm_load_si128((__m128i*)inptr + 3);
|
||||||
|
|
||||||
|
state0 = aesenc<softAes>(state0, in0);
|
||||||
|
state1 = aesdec<softAes>(state1, in1);
|
||||||
|
state2 = aesenc<softAes>(state2, in2);
|
||||||
|
state3 = aesdec<softAes>(state3, in3);
|
||||||
|
|
||||||
|
inptr += 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
//two extra rounds to achieve full diffusion
|
||||||
|
__m128i xkey0 = _mm_set_epi32(0x4ff637c5, 0x053bd705, 0x8231a744, 0xc3767b17);
|
||||||
|
__m128i xkey1 = _mm_set_epi32(0x6594a1a6, 0xa8879d58, 0xb01da200, 0x8a8fae2e);
|
||||||
|
|
||||||
|
state0 = aesenc<softAes>(state0, xkey0);
|
||||||
|
state1 = aesdec<softAes>(state1, xkey0);
|
||||||
|
state2 = aesenc<softAes>(state2, xkey0);
|
||||||
|
state3 = aesdec<softAes>(state3, xkey0);
|
||||||
|
|
||||||
|
state0 = aesenc<softAes>(state0, xkey1);
|
||||||
|
state1 = aesdec<softAes>(state1, xkey1);
|
||||||
|
state2 = aesenc<softAes>(state2, xkey1);
|
||||||
|
state3 = aesdec<softAes>(state3, xkey1);
|
||||||
|
|
||||||
|
//output hash
|
||||||
|
_mm_store_si128((__m128i*)hash + 0, state0);
|
||||||
|
_mm_store_si128((__m128i*)hash + 1, state1);
|
||||||
|
_mm_store_si128((__m128i*)hash + 2, state2);
|
||||||
|
_mm_store_si128((__m128i*)hash + 3, state3);
|
||||||
|
}
|
||||||
|
|
||||||
|
template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash);
|
||||||
|
template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash);
|
23
src/hashAes1Rx4.hpp
Normal file
23
src/hashAes1Rx4.hpp
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2019 tevador
|
||||||
|
|
||||||
|
This file is part of RandomX.
|
||||||
|
|
||||||
|
RandomX is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
RandomX is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "softAes.h"
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
|
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
@ -145,7 +145,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
|||||||
//std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
|
//std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
|
||||||
*noncePtr = nonce;
|
*noncePtr = nonce;
|
||||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||||
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 63) << 8);
|
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
|
||||||
vm->initializeScratchpad(spIndex);
|
vm->initializeScratchpad(spIndex);
|
||||||
vm->initializeProgram(hash);
|
vm->initializeProgram(hash);
|
||||||
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
||||||
|
1539
src/program.inc
1539
src/program.inc
File diff suppressed because it is too large
Load Diff
@ -26,3 +26,13 @@ __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
|
|||||||
__m128i soft_aesenc(__m128i in, __m128i key);
|
__m128i soft_aesenc(__m128i in, __m128i key);
|
||||||
|
|
||||||
__m128i soft_aesdec(__m128i in, __m128i key);
|
__m128i soft_aesdec(__m128i in, __m128i key);
|
||||||
|
|
||||||
|
template<bool soft>
|
||||||
|
inline __m128i aesenc(__m128i in, __m128i key) {
|
||||||
|
return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool soft>
|
||||||
|
inline __m128i aesdec(__m128i in, __m128i key) {
|
||||||
|
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user