From b4c02051fa45b1542afaea2a66814d234cf7d338 Mon Sep 17 00:00:00 2001 From: tevador Date: Sun, 7 Apr 2019 15:38:51 +0200 Subject: [PATCH] Reworked SuperscalarHash instruction set ASM and C code generator for SuperscalarHash Support for Superscalar hash in the light mode --- src/AssemblyGeneratorX86.cpp | 174 ++++++++++++++++++ src/AssemblyGeneratorX86.hpp | 19 +- src/CompiledLightVirtualMachine.cpp | 19 +- src/CompiledLightVirtualMachine.hpp | 5 +- src/CompiledVirtualMachine.cpp | 2 +- src/CompiledVirtualMachine.hpp | 2 +- src/InterpretedVirtualMachine.cpp | 2 +- src/InterpretedVirtualMachine.hpp | 2 +- src/JitCompilerX86-static.asm | 34 ++++ src/JitCompilerX86-static.hpp | 2 + src/JitCompilerX86.cpp | 121 +++++++++++- src/JitCompilerX86.hpp | 5 +- src/LightProgramGenerator.cpp | 276 ++++++++++++---------------- src/LightProgramGenerator.hpp | 21 +++ src/VirtualMachine.hpp | 4 +- src/main.cpp | 29 +-- 16 files changed, 505 insertions(+), 212 deletions(-) diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index dc4cea2..a25a377 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -23,6 +23,7 @@ along with RandomX. If not, see. #include "common.hpp" #include "reciprocal.h" #include "Program.hpp" +#include "./LightProgramGenerator.hpp" namespace RandomX { @@ -46,6 +47,179 @@ namespace RandomX { static const char* regDatasetAddr = "rdi"; static const char* regScratchpadAddr = "rsi"; + void AssemblyGeneratorX86::generateProgram(Program& prog) { + for (unsigned i = 0; i < 8; ++i) { + registerUsage[i] = -1; + } + asmCode.str(std::string()); //clear + for (unsigned i = 0; i < prog.getSize(); ++i) { + asmCode << "randomx_isn_" << i << ":" << std::endl; + Instruction& instr = prog(i); + instr.src %= RegistersCount; + instr.dst %= RegistersCount; + generateCode(instr, i); + //asmCode << std::endl; + } + } + + void AssemblyGeneratorX86::generateAsm(LightProgram& prog) { + asmCode.str(std::string()); //clear + asmCode << "ALIGN 16" << std::endl; + for (unsigned i = 0; i < prog.getSize(); ++i) { + Instruction& instr = prog(i); + switch (instr.opcode) + { + case RandomX::LightInstructionType::ISUB_R: + asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + break; + case RandomX::LightInstructionType::IXOR_R: + asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + break; + case RandomX::LightInstructionType::IADD_RS: + asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl; + break; + case RandomX::LightInstructionType::IMUL_R: + asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + break; + case RandomX::LightInstructionType::IROR_C: + asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl; + break; + case RandomX::LightInstructionType::IADD_C7: + asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + break; + case RandomX::LightInstructionType::IXOR_C7: + asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + break; + case RandomX::LightInstructionType::IADD_C8: + asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + asmCode << "nop" << std::endl; + break; + case RandomX::LightInstructionType::IXOR_C8: + asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + asmCode << "nop" << std::endl; + break; + case RandomX::LightInstructionType::IADD_C9: + asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + asmCode << "xchg ax, ax ;nop" << std::endl; + break; + case RandomX::LightInstructionType::IXOR_C9: + asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + asmCode << "xchg ax, ax ;nop" << std::endl; + break; + case RandomX::LightInstructionType::IMULH_R: + asmCode << "mov rax, " << regR[instr.dst] << std::endl; + asmCode << "mul " << regR[instr.src] << std::endl; + asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl; + break; + case RandomX::LightInstructionType::ISMULH_R: + asmCode << "mov rax, " << regR[instr.dst] << std::endl; + asmCode << "imul " << regR[instr.src] << std::endl; + asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl; + break; + case RandomX::LightInstructionType::IMUL_RCP: + asmCode << "mov rax, " << (int64_t)reciprocal(instr.getImm32()) << std::endl; + asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl; + break; + default: + UNREACHABLE; + } + } + } + + void AssemblyGeneratorX86::generateC(LightProgram& prog) { + asmCode.str(std::string()); //clear + asmCode << "#include " << std::endl; + asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl; + asmCode << " static inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl; + asmCode << " return ((unsigned __int128)a * b) >> 64;" << std::endl; + asmCode << " }" << std::endl; + asmCode << " static inline int64_t smulh(int64_t a, int64_t b) {" << std::endl; + asmCode << " return ((__int128)a * b) >> 64;" << std::endl; + asmCode << " }" << std::endl; + asmCode << " #define HAVE_MULH" << std::endl; + asmCode << " #define HAVE_SMULH" << std::endl; + asmCode << "#endif" << std::endl; + asmCode << "#if defined(_MSC_VER)" << std::endl; + asmCode << " #define HAS_VALUE(X) X ## 0" << std::endl; + asmCode << " #define EVAL_DEFINE(X) HAS_VALUE(X)" << std::endl; + asmCode << " #include " << std::endl; + asmCode << " #include " << std::endl; + asmCode << " static __inline uint64_t rotr(uint64_t x , int c) {" << std::endl; + asmCode << " return _rotr64(x, c);" << std::endl; + asmCode << " }" << std::endl; + asmCode << " #define HAVE_ROTR" << std::endl; + asmCode << " #if EVAL_DEFINE(__MACHINEARM64_X64(1))" << std::endl; + asmCode << " static __inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl; + asmCode << " return __umulh(a, b);" << std::endl; + asmCode << " }" << std::endl; + asmCode << " #define HAVE_MULH" << std::endl; + asmCode << " #endif" << std::endl; + asmCode << " #if EVAL_DEFINE(__MACHINEX64(1))" << std::endl; + asmCode << " static __inline int64_t smulh(int64_t a, int64_t b) {" << std::endl; + asmCode << " int64_t hi;" << std::endl; + asmCode << " _mul128(a, b, &hi);" << std::endl; + asmCode << " return hi;" << std::endl; + asmCode << " }" << std::endl; + asmCode << " #define HAVE_SMULH" << std::endl; + asmCode << " #endif" << std::endl; + asmCode << "#endif" << std::endl; + asmCode << "#ifndef HAVE_ROTR" << std::endl; + asmCode << " static inline uint64_t rotr(uint64_t a, int b) {" << std::endl; + asmCode << " return (a >> b) | (a << (64 - b));" << std::endl; + asmCode << " }" << std::endl; + asmCode << " #define HAVE_ROTR" << std::endl; + asmCode << "#endif" << std::endl; + asmCode << "#if !defined(HAVE_MULH) || !defined(HAVE_SMULH) || !defined(HAVE_ROTR)" << std::endl; + asmCode << " #error \"Required functions are not defined\"" << std::endl; + asmCode << "#endif" << std::endl; + asmCode << "void superScalar(uint64_t r[8]) {" << std::endl; + asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl; + for (unsigned i = 0; i < prog.getSize(); ++i) { + Instruction& instr = prog(i); + switch (instr.opcode) + { + case RandomX::LightInstructionType::ISUB_R: + asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl; + break; + case RandomX::LightInstructionType::IXOR_R: + asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl; + break; + case RandomX::LightInstructionType::IADD_RS: + asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << ";" << std::endl; + break; + case RandomX::LightInstructionType::IMUL_R: + asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl; + break; + case RandomX::LightInstructionType::IROR_C: + asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl; + break; + case RandomX::LightInstructionType::IADD_C7: + case RandomX::LightInstructionType::IADD_C8: + case RandomX::LightInstructionType::IADD_C9: + asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl; + break; + case RandomX::LightInstructionType::IXOR_C7: + case RandomX::LightInstructionType::IXOR_C8: + case RandomX::LightInstructionType::IXOR_C9: + asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl; + break; + case RandomX::LightInstructionType::IMULH_R: + asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl; + break; + case RandomX::LightInstructionType::ISMULH_R: + asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl; + break; + case RandomX::LightInstructionType::IMUL_RCP: + asmCode << regR[instr.dst] << " *= " << (int64_t)reciprocal(instr.getImm32()) << ";" << std::endl; + break; + default: + UNREACHABLE; + } + } + asmCode << "r[0] = r8; r[1] = r9; r[2] = r10; r[3] = r11; r[4] = r12; r[5] = r13; r[6] = r14; r[7] = r15;" << std::endl; + asmCode << "}" << std::endl; + } + int AssemblyGeneratorX86::getConditionRegister() { int min = INT_MAX; int minIndex; diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp index 601d278..8688cd4 100644 --- a/src/AssemblyGeneratorX86.hpp +++ b/src/AssemblyGeneratorX86.hpp @@ -27,27 +27,16 @@ along with RandomX. If not, see. namespace RandomX { class Program; + class LightProgram; class AssemblyGeneratorX86; typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int); class AssemblyGeneratorX86 { public: - template - void generateProgram(P& prog) { - for (unsigned i = 0; i < 8; ++i) { - registerUsage[i] = -1; - } - asmCode.str(std::string()); //clear - for (unsigned i = 0; i < prog.getSize(); ++i) { - asmCode << "randomx_isn_" << i << ":" << std::endl; - Instruction& instr = prog(i); - instr.src %= RegistersCount; - instr.dst %= RegistersCount; - generateCode(instr, i); - //asmCode << std::endl; - } - } + void generateProgram(Program& prog); + void generateAsm(LightProgram& prog); + void generateC(LightProgram& prog); void printCode(std::ostream& os) { os << asmCode.rdbuf(); } diff --git a/src/CompiledLightVirtualMachine.cpp b/src/CompiledLightVirtualMachine.cpp index 49e593c..760842a 100644 --- a/src/CompiledLightVirtualMachine.cpp +++ b/src/CompiledLightVirtualMachine.cpp @@ -23,18 +23,25 @@ along with RandomX. If not, see. namespace RandomX { - CompiledLightVirtualMachine::CompiledLightVirtualMachine() { - } - - void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size) { + template + void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { mem.ds = ds; datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; + if(superscalar) + compiler.generateSuperScalarHash(programs); //datasetBasePtr = ds.dataset.memory; } - void CompiledLightVirtualMachine::initialize() { + template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); + template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); + + template + void CompiledLightVirtualMachine::initialize() { VirtualMachine::initialize(); - compiler.generateProgramLight(program); + compiler.generateProgramLight(program); //mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize); } + + template void CompiledLightVirtualMachine::initialize(); + template void CompiledLightVirtualMachine::initialize(); } \ No newline at end of file diff --git a/src/CompiledLightVirtualMachine.hpp b/src/CompiledLightVirtualMachine.hpp index 9ac52be..9493c58 100644 --- a/src/CompiledLightVirtualMachine.hpp +++ b/src/CompiledLightVirtualMachine.hpp @@ -26,6 +26,7 @@ along with RandomX. If not, see. namespace RandomX { + template class CompiledLightVirtualMachine : public CompiledVirtualMachine { public: void* operator new(size_t size) { @@ -37,8 +38,8 @@ namespace RandomX { void operator delete(void* ptr) { _mm_free(ptr); } - CompiledLightVirtualMachine(); - void setDataset(dataset_t ds, uint64_t size) override; + CompiledLightVirtualMachine() {} + void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; void initialize() override; }; } \ No newline at end of file diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp index c313209..4984938 100644 --- a/src/CompiledVirtualMachine.cpp +++ b/src/CompiledVirtualMachine.cpp @@ -29,7 +29,7 @@ namespace RandomX { CompiledVirtualMachine::CompiledVirtualMachine() { } - void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size) { + void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { mem.ds = ds; datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; datasetBasePtr = ds.dataset.memory; diff --git a/src/CompiledVirtualMachine.hpp b/src/CompiledVirtualMachine.hpp index 9deb621..65b1885 100644 --- a/src/CompiledVirtualMachine.hpp +++ b/src/CompiledVirtualMachine.hpp @@ -42,7 +42,7 @@ namespace RandomX { _mm_free(ptr); } CompiledVirtualMachine(); - void setDataset(dataset_t ds, uint64_t size) override; + void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; void initialize() override; virtual void execute() override; void* getProgram() { diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index ebb3571..636b95b 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -49,7 +49,7 @@ namespace RandomX { } - void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size) { + void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { mem.ds = ds; readDataset = &datasetReadLight; datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp index d6da7e3..49178bc 100644 --- a/src/InterpretedVirtualMachine.hpp +++ b/src/InterpretedVirtualMachine.hpp @@ -75,7 +75,7 @@ namespace RandomX { } InterpretedVirtualMachine(bool soft) : softAes(soft) {} ~InterpretedVirtualMachine(); - void setDataset(dataset_t ds, uint64_t size) override; + void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; void initialize() override; void execute() override; private: diff --git a/src/JitCompilerX86-static.asm b/src/JitCompilerX86-static.asm index d16cab7..f149655 100644 --- a/src/JitCompilerX86-static.asm +++ b/src/JitCompilerX86-static.asm @@ -25,6 +25,8 @@ PUBLIC randomx_program_loop_load PUBLIC randomx_program_start PUBLIC randomx_program_read_dataset PUBLIC randomx_program_read_dataset_light +PUBLIC randomx_program_read_dataset_sshash_init +PUBLIC randomx_program_read_dataset_sshash_fin PUBLIC randomx_program_read_dataset_light_sub PUBLIC randomx_dataset_init PUBLIC randomx_program_loop_store @@ -65,6 +67,38 @@ randomx_program_read_dataset_light PROC include asm/program_read_dataset_light.inc randomx_program_read_dataset_light ENDP +randomx_program_read_dataset_sshash_init PROC + sub rsp, 72 + mov qword ptr [rsp+64], rbx + mov qword ptr [rsp+56], r8 + mov qword ptr [rsp+48], r9 + mov qword ptr [rsp+40], r10 + mov qword ptr [rsp+32], r11 + mov qword ptr [rsp+24], r12 + mov qword ptr [rsp+16], r13 + mov qword ptr [rsp+8], r14 + mov qword ptr [rsp+0], r15 + xor rbp, rax ;# modify "mx" + ror rbp, 32 ;# swap "ma" and "mx" + mov ebx, ebp ;# ecx = ma + and ebx, 2147483584 ;# align "ma" to the start of a cache line + shr ebx, 6 ;# ebx = Dataset block number + ;# call 32768 +randomx_program_read_dataset_sshash_init ENDP + +randomx_program_read_dataset_sshash_fin PROC + mov rbx, qword ptr [rsp+64] + xor r8, qword ptr [rsp+56] + xor r9, qword ptr [rsp+48] + xor r10, qword ptr [rsp+40] + xor r11, qword ptr [rsp+32] + xor r12, qword ptr [rsp+24] + xor r13, qword ptr [rsp+16] + xor r14, qword ptr [rsp+8] + xor r15, qword ptr [rsp+0] + add rsp, 72 +randomx_program_read_dataset_sshash_fin ENDP + randomx_program_loop_store PROC include asm/program_loop_store.inc randomx_program_loop_store ENDP diff --git a/src/JitCompilerX86-static.hpp b/src/JitCompilerX86-static.hpp index cf250c2..3bb56ac 100644 --- a/src/JitCompilerX86-static.hpp +++ b/src/JitCompilerX86-static.hpp @@ -24,6 +24,8 @@ extern "C" { void randomx_program_start(); void randomx_program_read_dataset(); void randomx_program_read_dataset_light(); + void randomx_program_read_dataset_sshash_init(); + void randomx_program_read_dataset_sshash_fin(); void randomx_program_loop_store(); void randomx_program_loop_end(); void randomx_program_read_dataset_light_sub(); diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index d6e27f1..c4b8ea8 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -87,6 +87,7 @@ namespace RandomX { */ #include "JitCompilerX86-static.hpp" +#include "LightProgramGenerator.hpp" #define NOP_TEST true @@ -96,6 +97,8 @@ namespace RandomX { const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start; const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset; const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light; + const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init; + const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin; const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init; const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store; const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end; @@ -110,7 +113,9 @@ namespace RandomX { const int32_t prologueSize = codeLoopBegin - codePrologue; const int32_t loopLoadSize = codeProgamStart - codeLoopLoad; const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset; - const int32_t readDatasetLightSize = codeLoopStore - codeReadDatasetLight; + const int32_t readDatasetLightSize = codeReadDatasetLightSshInit - codeReadDatasetLight; + const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit; + const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin; const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; const int32_t readDatasetLightSubSize = codeDatasetInit - codeReadDatasetLightSub; const int32_t datasetInitSize = codeEpilogue - codeDatasetInit; @@ -199,7 +204,7 @@ namespace RandomX { static const uint8_t NOP1[] = { 0x90 }; static const uint8_t NOP2[] = { 0x66, 0x90 }; - static const uint8_t NOP3[] = { 0x0F, 0x1F, 0x00 }; + static const uint8_t NOP3[] = { 0x66, 0x66, 0x90 }; static const uint8_t NOP4[] = { 0x0F, 0x1F, 0x40, 0x00 }; static const uint8_t NOP5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 }; static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 }; @@ -230,19 +235,31 @@ namespace RandomX { generateProgramEpilogue(prog); } + template void JitCompilerX86::generateProgramLight(Program& prog) { if (RANDOMX_CACHE_ACCESSES != 8) throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES"); if (RANDOMX_ARGON_GROWTH != 0) throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH"); generateProgramPrologue(prog); - memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize); - codePos += readDatasetLightSize; - emitByte(CALL); - emit32(readDatasetLightSubOffset - (codePos + 4)); + if (superscalar) { + emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); + emitByte(CALL); + emit32(superScalarHashOffset - (codePos + 4)); + emit(codeReadDatasetLightSshFin, readDatasetLightFinSize); + } + else { + memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize); + codePos += readDatasetLightSize; + emitByte(CALL); + emit32(readDatasetLightSubOffset - (codePos + 4)); + } generateProgramEpilogue(prog); } + template void JitCompilerX86::generateProgramLight(Program& prog); + template void JitCompilerX86::generateProgramLight(Program& prog); + template void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[N]) { memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize); @@ -253,7 +270,7 @@ namespace RandomX { Instruction& instr = prog(i); instr.src %= RegistersCount; instr.dst %= RegistersCount; - generateCode(instr, i); + generateCode(instr, i); } emit(codeShhLoad, codeSshLoadSize); if (j < N - 1) { @@ -318,6 +335,7 @@ namespace RandomX { emit32(epilogueOffset - codePos - 4); } + template void JitCompilerX86::generateCode(Instruction& instr, int i) { #ifdef RANDOMX_JUMP instructionOffsets.push_back(codePos); @@ -326,6 +344,95 @@ namespace RandomX { (this->*generator)(instr, i); } + template<> + void JitCompilerX86::generateCode(Instruction& instr, int i) { + switch (instr.opcode) + { + case RandomX::LightInstructionType::ISUB_R: + emit(REX_SUB_RR); + emitByte(0xc0 + 8 * instr.dst + instr.src); + break; + case RandomX::LightInstructionType::IXOR_R: + emit(REX_XOR_RR); + emitByte(0xc0 + 8 * instr.dst + instr.src); + break; + case RandomX::LightInstructionType::IADD_RS: + emit(REX_LEA); + emitByte(0x04 + 8 * instr.dst); + genSIB(instr.mod % 4, instr.src, instr.dst); + break; + case RandomX::LightInstructionType::IMUL_R: + emit(REX_IMUL_RR); + emitByte(0xc0 + 8 * instr.dst + instr.src); + break; + case RandomX::LightInstructionType::IROR_C: + emit(REX_ROT_I8); + emitByte(0xc8 + instr.dst); + emitByte(instr.getImm32() & 63); + break; + case RandomX::LightInstructionType::IADD_C7: + emit(REX_81); + emitByte(0xc0 + instr.dst); + emit32(instr.getImm32()); + break; + case RandomX::LightInstructionType::IXOR_C7: + emit(REX_XOR_RI); + emitByte(0xf0 + instr.dst); + emit32(instr.getImm32()); + break; + case RandomX::LightInstructionType::IADD_C8: + emit(REX_81); + emitByte(0xc0 + instr.dst); + emit32(instr.getImm32()); + emit(NOP1); + break; + case RandomX::LightInstructionType::IXOR_C8: + emit(REX_XOR_RI); + emitByte(0xf0 + instr.dst); + emit32(instr.getImm32()); + emit(NOP1); + break; + case RandomX::LightInstructionType::IADD_C9: + emit(REX_81); + emitByte(0xc0 + instr.dst); + emit32(instr.getImm32()); + emit(NOP2); + break; + case RandomX::LightInstructionType::IXOR_C9: + emit(REX_XOR_RI); + emitByte(0xf0 + instr.dst); + emit32(instr.getImm32()); + emit(NOP2); + break; + case RandomX::LightInstructionType::IMULH_R: + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.dst); + emit(REX_MUL_R); + emitByte(0xe0 + instr.src); + emit(REX_MOV_R64R); + emitByte(0xc2 + 8 * instr.dst); + break; + case RandomX::LightInstructionType::ISMULH_R: + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.dst); + emit(REX_MUL_R); + emitByte(0xe8 + instr.src); + emit(REX_MOV_R64R); + emitByte(0xc2 + 8 * instr.dst); + break; + case RandomX::LightInstructionType::IMUL_RCP: + emit(MOV_RAX_I); + emit64(reciprocal(instr.getImm32())); + emit(REX_IMUL_RM); + emitByte(0xc0 + 8 * instr.dst); + break; + default: + UNREACHABLE; + } + } + + template void JitCompilerX86::generateCode(Instruction& instr, int i); + void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) { emit(REX_MOV_RR); emitByte((rax ? 0xc0 : 0xc8) + instr.src); diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp index 16fe26d..9240cfe 100644 --- a/src/JitCompilerX86.hpp +++ b/src/JitCompilerX86.hpp @@ -39,6 +39,7 @@ namespace RandomX { JitCompilerX86(); ~JitCompilerX86(); void generateProgram(Program&); + template void generateProgramLight(Program&); template void generateSuperScalarHash(LightProgram (&programs)[N]); @@ -66,7 +67,7 @@ namespace RandomX { Instruction& instr = prog(i); instr.src %= RegistersCount; instr.dst %= RegistersCount; - generateCode(instr, i); + generateCode

(instr, i); } } @@ -81,6 +82,8 @@ namespace RandomX { void genSIB(int scale, int index, int base); void handleCondition(Instruction&, int); + + template void generateCode(Instruction&, int); void emitByte(uint8_t val) { diff --git a/src/LightProgramGenerator.cpp b/src/LightProgramGenerator.cpp index d5ebadf..eeb09de 100644 --- a/src/LightProgramGenerator.cpp +++ b/src/LightProgramGenerator.cpp @@ -29,23 +29,6 @@ along with RandomX. If not, see. #include "LightProgramGenerator.hpp" namespace RandomX { - // Intel Ivy Bridge reference - namespace LightInstructionType { //uOPs (decode) execution ports latency code size - constexpr int IADD_RS = 0; //1 p01 1 4 - constexpr int ISUB_R = 1; //1 p015 1 3 - constexpr int ISUB_C = 2; //1 p015 3 7 - constexpr int IMUL_R = 3; //1 p1 3 4 - constexpr int IMUL_C = 4; //1 p1 3 7 - constexpr int IMULH_R = 5; //1+2+1 0+(p1,p5)+0 3 3+3+3 - constexpr int ISMULH_R = 6; //1+2+1 0+(p1,p5)+0 3 3+3+3 - constexpr int IMUL_RCP = 7; //1+1 p015+p1 4 10+4 - constexpr int IXOR_R = 8; //1 p015 1 3 - constexpr int IXOR_C = 9; //1 p015 1 7 - constexpr int IROR_R = 10; //1+2 0+(p0,p5) 1 3+3 - constexpr int IROR_C = 11; //1 p05 1 4 - constexpr int COND_R = 12; //1+1+1+1+1+1 p015+p5+0+p015+p05+p015 3 7+13+3+7+3+3 - constexpr int COUNT = 13; - } namespace LightInstructionOpcode { constexpr int IADD_RS = 0; @@ -62,7 +45,7 @@ namespace RandomX { } static bool isMul(int type) { - return type == LightInstructionType::IMUL_R || type == LightInstructionType::IMUL_C || type == LightInstructionType::IMULH_R || type == LightInstructionType::ISMULH_R || type == LightInstructionType::IMUL_RCP; + return type == LightInstructionType::IMUL_R || type == LightInstructionType::IMULH_R || type == LightInstructionType::ISMULH_R || type == LightInstructionType::IMUL_RCP; } const int lightInstructionOpcode[] = { @@ -289,19 +272,20 @@ namespace RandomX { int getSrcOp() const { return srcOp_; } - static const LightInstructionInfo IADD_RS; static const LightInstructionInfo ISUB_R; - static const LightInstructionInfo ISUB_C; + static const LightInstructionInfo IXOR_R; + static const LightInstructionInfo IADD_RS; static const LightInstructionInfo IMUL_R; - static const LightInstructionInfo IMUL_C; + static const LightInstructionInfo IROR_C; + static const LightInstructionInfo IADD_C7; + static const LightInstructionInfo IXOR_C7; + static const LightInstructionInfo IADD_C8; + static const LightInstructionInfo IXOR_C8; + static const LightInstructionInfo IADD_C9; + static const LightInstructionInfo IXOR_C9; static const LightInstructionInfo IMULH_R; static const LightInstructionInfo ISMULH_R; static const LightInstructionInfo IMUL_RCP; - static const LightInstructionInfo IXOR_R; - static const LightInstructionInfo IXOR_C; - static const LightInstructionInfo IROR_R; - static const LightInstructionInfo IROR_C; - static const LightInstructionInfo COND_R; static const LightInstructionInfo NOP; private: const char* name_; @@ -316,28 +300,31 @@ namespace RandomX { : name_(name), type_(-1), latency_(0) {} }; - const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", LightInstructionType::IADD_RS, MacroOp::Lea_sib, 0); const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", LightInstructionType::ISUB_R, MacroOp::Sub_rr, 0); - const LightInstructionInfo LightInstructionInfo::ISUB_C = LightInstructionInfo("ISUB_C", LightInstructionType::ISUB_C, MacroOp::Sub_ri, -1); + const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", LightInstructionType::IXOR_R, MacroOp::Xor_rr, 0); + const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", LightInstructionType::IADD_RS, MacroOp::Lea_sib, 0); const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", LightInstructionType::IMUL_R, MacroOp::Imul_rr, 0); - const LightInstructionInfo LightInstructionInfo::IMUL_C = LightInstructionInfo("IMUL_C", LightInstructionType::IMUL_C, MacroOp::Imul_rri, -1); + const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", LightInstructionType::IROR_C, MacroOp::Ror_ri, -1); + + const LightInstructionInfo LightInstructionInfo::IADD_C7 = LightInstructionInfo("IADD_C7", LightInstructionType::IADD_C7, MacroOp::Add_ri, -1); + const LightInstructionInfo LightInstructionInfo::IXOR_C7 = LightInstructionInfo("IXOR_C7", LightInstructionType::IXOR_C7, MacroOp::Xor_ri, -1); + const LightInstructionInfo LightInstructionInfo::IADD_C8 = LightInstructionInfo("IADD_C8", LightInstructionType::IADD_C8, MacroOp::Add_ri, -1); + const LightInstructionInfo LightInstructionInfo::IXOR_C8 = LightInstructionInfo("IXOR_C8", LightInstructionType::IXOR_C8, MacroOp::Xor_ri, -1); + const LightInstructionInfo LightInstructionInfo::IADD_C9 = LightInstructionInfo("IADD_C9", LightInstructionType::IADD_C9, MacroOp::Add_ri, -1); + const LightInstructionInfo LightInstructionInfo::IXOR_C9 = LightInstructionInfo("IXOR_C9", LightInstructionType::IXOR_C9, MacroOp::Xor_ri, -1); + const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", LightInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1); const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", LightInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1); const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", LightInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1); - const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", LightInstructionType::IXOR_R, MacroOp::Xor_rr, 0); - const LightInstructionInfo LightInstructionInfo::IXOR_C = LightInstructionInfo("IXOR_C", LightInstructionType::IXOR_C, MacroOp::Xor_ri, -1); - const LightInstructionInfo LightInstructionInfo::IROR_R = LightInstructionInfo("IROR_R", LightInstructionType::IROR_R, IROR_R_ops_array, 1, 1, 0); - const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", LightInstructionType::IROR_C, MacroOp::Ror_ri, -1); - const LightInstructionInfo LightInstructionInfo::COND_R = LightInstructionInfo("COND_R", LightInstructionType::COND_R, COND_R_ops_array, 5, 5, 3); + const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP"); - const int buffer0[] = { 3, 3, 10 }; + const int buffer0[] = { 4, 8, 4 }; const int buffer1[] = { 7, 3, 3, 3 }; - const int buffer2[] = { 3, 3, 3, 7 }; + const int buffer2[] = { 3, 7, 3, 3 }; + const int buffer3[] = { 4, 9, 3 }; const int buffer4[] = { 4, 4, 4, 4 }; - const int buffer5[] = { 3, 7, 3, 3 }; - const int buffer6[] = { 3, 3, 7, 3 }; - const int buffer7[] = { 13, 3 }; + const int buffer5[] = { 3, 3, 10 }; class DecoderBuffer { public: @@ -360,16 +347,10 @@ namespace RandomX { const DecoderBuffer* fetchNext(int instrType, int cycle, int mulCount, Blake2Generator& gen) const { if (instrType == LightInstructionType::IMULH_R || instrType == LightInstructionType::ISMULH_R) return &decodeBuffer3310; //2-1-1 decode - if (mulCount < cycle) - return &decodeBuffer4444_mul; - if (index_ == 0) { - return &decodeBuffer4444; //IMUL_RCP end - } - /*if (index_ == 2) { - return &decodeBuffer133; //COND_R middle - }*/ - if (index_ == 7) { - return &decodeBuffer7333; //COND_R end + if (mulCount < cycle + 1) + return &decodeBuffer4444; + if (index_ == 5) { //IMUL_RCP end + return (gen.getByte() & 1) ? &decodeBuffer484 : &decodeBuffer493; } return fetchNextDefault(gen); } @@ -379,49 +360,40 @@ namespace RandomX { const int* counts_; int opsCount_; DecoderBuffer() : index_(-1) {} - static const DecoderBuffer decodeBuffer3310; + static const DecoderBuffer decodeBuffer484; static const DecoderBuffer decodeBuffer7333; - static const DecoderBuffer decodeBuffer3337; - static const DecoderBuffer decodeBuffer4444; - static const DecoderBuffer decodeBuffer4444_mul; static const DecoderBuffer decodeBuffer3733; - static const DecoderBuffer decodeBuffer3373; - static const DecoderBuffer decodeBuffer133; - static const DecoderBuffer* decodeBuffers[7]; + static const DecoderBuffer decodeBuffer493; + static const DecoderBuffer decodeBuffer4444; + static const DecoderBuffer decodeBuffer3310; + static const DecoderBuffer* decodeBuffers[4]; const DecoderBuffer* fetchNextDefault(Blake2Generator& gen) const { - int select; - //do { - select = gen.getByte() & 3; - //} while (select == 7); - return decodeBuffers[select]; + return decodeBuffers[gen.getByte() & 3]; } }; - const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 0, buffer0); + const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0); const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1); - const DecoderBuffer DecoderBuffer::decodeBuffer3337 = DecoderBuffer("3,3,3,7", 2, buffer2); - const DecoderBuffer DecoderBuffer::decodeBuffer4444_mul = DecoderBuffer("4,4,4,4-MUL", 3, buffer4); + const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2); + const DecoderBuffer DecoderBuffer::decodeBuffer493 = DecoderBuffer("4,9,3", 3, buffer3); const DecoderBuffer DecoderBuffer::decodeBuffer4444 = DecoderBuffer("4,4,4,4", 4, buffer4); - - const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 5, buffer5); - const DecoderBuffer DecoderBuffer::decodeBuffer3373 = DecoderBuffer("3,3,7,3", 6, buffer6); - const DecoderBuffer DecoderBuffer::decodeBuffer133 = DecoderBuffer("13,3", 7, buffer7); + const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 5, buffer5); - const DecoderBuffer* DecoderBuffer::decodeBuffers[7] = { - &DecoderBuffer::decodeBuffer3310, - &DecoderBuffer::decodeBuffer3337, + const DecoderBuffer* DecoderBuffer::decodeBuffers[4] = { + &DecoderBuffer::decodeBuffer484, + &DecoderBuffer::decodeBuffer7333, &DecoderBuffer::decodeBuffer3733, - &DecoderBuffer::decodeBuffer3373, + &DecoderBuffer::decodeBuffer493, }; const DecoderBuffer DecoderBuffer::Default = DecoderBuffer(); const LightInstructionInfo* slot_3[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R }; const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R }; - const LightInstructionInfo* slot_3C[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IROR_R, &LightInstructionInfo::IXOR_R }; const LightInstructionInfo* slot_4[] = { &LightInstructionInfo::IROR_C, &LightInstructionInfo::IADD_RS }; - const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::IXOR_C, &LightInstructionInfo::ISUB_C }; - const LightInstructionInfo* slot_7L = &LightInstructionInfo::COND_R; + const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::IXOR_C7, &LightInstructionInfo::IADD_C7 }; + const LightInstructionInfo* slot_8[] = { &LightInstructionInfo::IXOR_C8, &LightInstructionInfo::IADD_C8 }; + const LightInstructionInfo* slot_9[] = { &LightInstructionInfo::IXOR_C9, &LightInstructionInfo::IADD_C9 }; const LightInstructionInfo* slot_10 = &LightInstructionInfo::IMUL_RCP; static bool selectRegister(std::vector& availableRegisters, Blake2Generator& gen, int& reg) { @@ -443,7 +415,7 @@ namespace RandomX { class LightInstruction { public: void toInstr(Instruction& instr) { - instr.opcode = lightInstructionOpcode[getType()]; + instr.opcode = getType(); instr.dst = dst_; instr.src = src_ >= 0 ? src_ : dst_; instr.mod = mod_; @@ -457,28 +429,22 @@ namespace RandomX { if (isLast) { return create(slot_3L[gen.getByte() & 3], gen); } - else if (false && isFirst && fetchType == 0) { - return create(slot_3C[gen.getByte() & 3], gen); - } else { return create(slot_3[gen.getByte() & 1], gen); } case 4: - if (fetchType == 3 && !isLast) { + if (fetchType == 4 && !isLast) { return create(&LightInstructionInfo::IMUL_R, gen); } else { return create(slot_4[gen.getByte() & 1], gen); } case 7: - if (false && isLast) { - return create(slot_7L, gen); - } - if (false && isFirst) { - return create(&LightInstructionInfo::IMUL_C, gen); - } else { - return create(slot_7[gen.getByte() & 1], gen); - } + return create(slot_7[gen.getByte() & 1], gen); + case 8: + return create(slot_8[gen.getByte() & 1], gen); + case 9: + return create(slot_9[gen.getByte() & 1], gen); case 10: return create(slot_10, gen); default: @@ -490,13 +456,6 @@ namespace RandomX { LightInstruction li(info); switch (info->getType()) { - case LightInstructionType::IADD_RS: { - li.mod_ = gen.getByte(); - li.imm32_ = 0; - li.opGroup_ = LightInstructionType::IADD_RS; - li.groupParIsSource_ = true; - } break; - case LightInstructionType::ISUB_R: { li.mod_ = 0; li.imm32_ = 0; @@ -504,24 +463,51 @@ namespace RandomX { li.groupParIsSource_ = true; } break; - case LightInstructionType::ISUB_C: { + case LightInstructionType::IXOR_R: { li.mod_ = 0; - li.imm32_ = gen.getInt32(); - li.opGroup_ = LightInstructionType::ISUB_C; - li.opGroupPar_ = -1; + li.imm32_ = 0; + li.opGroup_ = LightInstructionType::IXOR_R; + li.groupParIsSource_ = true; + } break; + + case LightInstructionType::IADD_RS: { + li.mod_ = gen.getByte(); + li.imm32_ = 0; + li.opGroup_ = LightInstructionType::IADD_RS; + li.groupParIsSource_ = true; } break; case LightInstructionType::IMUL_R: { li.mod_ = 0; li.imm32_ = 0; li.opGroup_ = LightInstructionType::IMUL_R; - li.opGroupPar_ = gen.getInt32(); + li.opGroupPar_ = -1; //TODO } break; - case LightInstructionType::IMUL_C: { + case LightInstructionType::IROR_C: { + li.mod_ = 0; + do { + li.imm32_ = gen.getByte() & 63; + } while (li.imm32_ == 0); + li.opGroup_ = LightInstructionType::IROR_C; + li.opGroupPar_ = -1; + } break; + + case LightInstructionType::IADD_C7: + case LightInstructionType::IADD_C8: + case LightInstructionType::IADD_C9: { li.mod_ = 0; li.imm32_ = gen.getInt32(); - li.opGroup_ = LightInstructionType::IMUL_C; + li.opGroup_ = LightInstructionType::IADD_C7; + li.opGroupPar_ = -1; + } break; + + case LightInstructionType::IXOR_C7: + case LightInstructionType::IXOR_C8: + case LightInstructionType::IXOR_C9: { + li.mod_ = 0; + li.imm32_ = gen.getInt32(); + li.opGroup_ = LightInstructionType::IXOR_C7; li.opGroupPar_ = -1; } break; @@ -542,50 +528,14 @@ namespace RandomX { } break; case LightInstructionType::IMUL_RCP: { - li.mod_ = 0; - li.imm32_ = gen.getInt32(); - li.opGroup_ = LightInstructionType::IMUL_C; - li.opGroupPar_ = -1; - } break; - - case LightInstructionType::IXOR_R: { - li.mod_ = 0; - li.imm32_ = 0; - li.opGroup_ = LightInstructionType::IXOR_R; - li.groupParIsSource_ = true; - } break; - - case LightInstructionType::IXOR_C: { - li.mod_ = 0; - li.imm32_ = gen.getInt32(); - li.opGroup_ = LightInstructionType::IXOR_R; - li.opGroupPar_ = -1; - } break; - - case LightInstructionType::IROR_R: { - li.mod_ = 0; - li.imm32_ = 0; - li.opGroup_ = LightInstructionType::IROR_R; - li.opGroupPar_ = -1; - } break; - - case LightInstructionType::IROR_C: { li.mod_ = 0; do { - li.imm32_ = gen.getByte(); - } while ((li.imm32_ & 63) == 0); - li.opGroup_ = LightInstructionType::IROR_R; + li.imm32_ = gen.getInt32(); + } while ((li.imm32_ & (li.imm32_ - 1)) == 0); + li.opGroup_ = LightInstructionType::IMUL_RCP; li.opGroupPar_ = -1; } break; - case LightInstructionType::COND_R: { - li.canReuse_ = true; - li.mod_ = gen.getByte(); - li.imm32_ = gen.getInt32(); - li.opGroup_ = LightInstructionType::COND_R; - li.opGroupPar_ = li.imm32_; - } break; - default: break; } @@ -675,8 +625,10 @@ namespace RandomX { constexpr int CYCLE_MAP_SIZE = RANDOMX_LPROG_LATENCY + 3; #ifndef _DEBUG constexpr bool TRACE = false; + constexpr bool INFO = false; #else constexpr bool TRACE = true; + constexpr bool INFO = true; #endif static int blakeCounter = 0; @@ -806,6 +758,7 @@ namespace RandomX { int codeSize = 0; int macroOpCount = 0; int cycle = 0; + int fetchCycle = 0; int depCycle = 0; int retireCycle = 0; int mopIndex = 0; @@ -816,7 +769,7 @@ namespace RandomX { constexpr int MAX_ATTEMPTS = 4; while(!portsSaturated) { - fetchLine = fetchLine->fetchNext(currentInstruction.getType(), cycle, mulCount, gen); + fetchLine = fetchLine->fetchNext(currentInstruction.getType(), fetchCycle++, mulCount, gen); if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << fetchLine->getName() << ")" << std::endl; mopIndex = 0; @@ -833,7 +786,6 @@ namespace RandomX { MacroOp& mop = currentInstruction.getInfo().getOp(instrIndex); if (fetchLine->getCounts()[mopIndex] != mop.getSize()) { if (TRACE) std::cout << "ERROR instruction " << mop.getName() << " doesn't fit into slot of size " << fetchLine->getCounts()[mopIndex] << std::endl; - return DBL_MIN; } if (TRACE) std::cout << mop.getName() << " "; @@ -899,8 +851,8 @@ namespace RandomX { ++cycle; } - std::cout << "; ALU port utilization:" << std::endl; - std::cout << "; (* = in use, _ = idle)" << std::endl; + if(INFO) std::cout << "; ALU port utilization:" << std::endl; + if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl; int portCycles = 0; /*for (int i = 0; i < CYCLE_MAP_SIZE; ++i) { @@ -914,13 +866,13 @@ namespace RandomX { double ipc = (macroOpCount / (double)retireCycle); - std::cout << "; code size " << codeSize << " bytes" << std::endl; - std::cout << "; x86 macro-ops: " << macroOpCount << std::endl; - std::cout << "; RandomX instructions: " << outIndex << std::endl; - std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl; - std::cout << "; IPC = " << ipc << std::endl; - std::cout << "; Port-cycles: " << portCycles << std::endl; - std::cout << "; Multiplications: " << mulCount << std::endl; + if (INFO) std::cout << "; code size " << codeSize << " bytes" << std::endl; + if (INFO) std::cout << "; x86 macro-ops: " << macroOpCount << std::endl; + if (INFO) std::cout << "; RandomX instructions: " << outIndex << std::endl; + if (INFO) std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl; + if (INFO) std::cout << "; IPC = " << ipc << std::endl; + if (INFO) std::cout << "; Port-cycles: " << portCycles << std::endl; + if (INFO) std::cout << "; Multiplications: " << mulCount << std::endl; int asicLatency[8]; memset(asicLatency, 0, sizeof(asicLatency)); @@ -942,19 +894,21 @@ namespace RandomX { } } - std::cout << "; ASIC latency: " << asicLatencyFinal << std::endl; + if (INFO) std::cout << "; ASIC latency: " << asicLatencyFinal << std::endl; - std::cout << "; ASIC latency:" << std::endl; - for (int i = 0; i < 8; ++i) { - std::cout << "; r" << i << " = " << asicLatency[i] << std::endl; - } - std::cout << "; CPU latency:" << std::endl; - for (int i = 0; i < 8; ++i) { - std::cout << "; r" << i << " = " << registers[i].latency << std::endl; + if (INFO) { + std::cout << "; ASIC latency:" << std::endl; + for (int i = 0; i < 8; ++i) { + std::cout << "; r" << i << " = " << asicLatency[i] << std::endl; + } + if (INFO) std::cout << "; CPU latency:" << std::endl; + for (int i = 0; i < 8; ++i) { + std::cout << "; r" << i << " = " << registers[i].latency << std::endl; + } } prog.setSize(outIndex); prog.setAddressRegister(addressReg); - return addressReg; + return outIndex; } } \ No newline at end of file diff --git a/src/LightProgramGenerator.hpp b/src/LightProgramGenerator.hpp index e7b1bda..d920dd0 100644 --- a/src/LightProgramGenerator.hpp +++ b/src/LightProgramGenerator.hpp @@ -21,6 +21,27 @@ along with RandomX. If not, see. namespace RandomX { + // Intel Ivy Bridge reference + namespace LightInstructionType { //uOPs (decode) execution ports latency code size + constexpr int ISUB_R = 0; //1 p015 1 3 + constexpr int IXOR_R = 1; //1 p015 1 3 + constexpr int IADD_RS = 2; //1 p01 1 4 + constexpr int IMUL_R = 3; //1 p1 3 4 + constexpr int IROR_C = 4; //1 p05 1 4 + constexpr int IADD_C7 = 5; //1 p015 1 7 + constexpr int IXOR_C7 = 6; //1 p015 1 7 + constexpr int IADD_C8 = 7; //1+0 p015 1 8 + constexpr int IXOR_C8 = 8; //1+0 p015 1 8 + constexpr int IADD_C9 = 9; //1+0 p015 1 9 + constexpr int IXOR_C9 = 10; //1+0 p015 1 9 + constexpr int IMULH_R = 11; //1+2+1 0+(p1,p5)+0 3 3+3+3 + constexpr int ISMULH_R = 12; //1+2+1 0+(p1,p5)+0 3 3+3+3 + constexpr int IMUL_RCP = 13; //1+1 p015+p1 4 10+4 + + constexpr int COUNT = 14; + constexpr int INVALID = -1; + } + class Blake2Generator { public: Blake2Generator(const void* seed, int nonce); diff --git a/src/VirtualMachine.hpp b/src/VirtualMachine.hpp index 00a14de..1edacdb 100644 --- a/src/VirtualMachine.hpp +++ b/src/VirtualMachine.hpp @@ -24,13 +24,11 @@ along with RandomX. If not, see. namespace RandomX { - - class VirtualMachine { public: VirtualMachine(); virtual ~VirtualMachine() {} - virtual void setDataset(dataset_t ds, uint64_t size) = 0; + virtual void setDataset(dataset_t ds, uint64_t size, LightProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0; void setScratchpad(void* ptr) { scratchpad = (uint8_t*)ptr; } diff --git a/src/main.cpp b/src/main.cpp index d5e4657..9410881 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -205,7 +205,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, Atomi } int main(int argc, char** argv) { - bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit, genLight, useSuperscalar; + bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit, genSuperscalar, useSuperscalar; int programCount, threadCount, initThreadCount, epoch; readOption("--softAes", argc, argv, softAes); @@ -220,15 +220,15 @@ int main(int argc, char** argv) { readOption("--jit", argc, argv, jit); readOption("--genNative", argc, argv, genNative); readOption("--help", argc, argv, help); - readOption("--genLight", argc, argv, genLight); + readOption("--genSuperscalar", argc, argv, genSuperscalar); readOption("--useSuperscalar", argc, argv, useSuperscalar); - if (genLight) { + if (genSuperscalar) { RandomX::LightProgram p; RandomX::Blake2Generator gen(seed, programCount); RandomX::generateLightProg2(p, gen); RandomX::AssemblyGeneratorX86 asmX86; - asmX86.generateProgram(p); + asmX86.generateAsm(p); //std::ofstream file("lightProg2.asm"); asmX86.printCode(std::cout); return 0; @@ -266,6 +266,7 @@ int main(int argc, char** argv) { const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize; const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch); dataset.cache.size = cacheSize; + RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES]; std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl; @@ -282,6 +283,12 @@ int main(int argc, char** argv) { outputHex(std::cout, (char*)dataset.cache.memory, sizeof(__m128i)); std::cout << std::endl; } + if (useSuperscalar) { + RandomX::Blake2Generator gen(seed, programCount); + for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { + RandomX::generateLightProg2(programs[i], gen); + } + } if (!miningMode) { std::cout << "Cache (" << cacheSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl; } @@ -291,11 +298,6 @@ int main(int argc, char** argv) { RandomX::datasetAlloc(dataset, largePages); const uint64_t datasetBlockCount = datasetSize / RandomX::CacheLineSize; if (useSuperscalar) { - RandomX::Blake2Generator gen(seed, programCount); - RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES]; - for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { - RandomX::generateLightProg2(programs[i], gen); - } RandomX::JitCompilerX86 jit86; jit86.generateSuperScalarHash(programs); jit86.getDatasetInitFunc()(cache.memory, dataset.dataset.memory, 0, datasetBlockCount); @@ -320,7 +322,6 @@ int main(int argc, char** argv) { threads.clear(); std::cout << "Dataset (" << datasetSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl; } - return 0; std::cout << "Initializing " << threadCount << " virtual machine(s) ..." << std::endl; for (int i = 0; i < threadCount; ++i) { RandomX::VirtualMachine* vm; @@ -328,12 +329,14 @@ int main(int argc, char** argv) { vm = new RandomX::CompiledVirtualMachine(); } else { - if (jit) - vm = new RandomX::CompiledLightVirtualMachine(); + if (jit && useSuperscalar) + vm = new RandomX::CompiledLightVirtualMachine(); + else if(jit) + vm = new RandomX::CompiledLightVirtualMachine(); else vm = new RandomX::InterpretedVirtualMachine(softAes); } - vm->setDataset(dataset, datasetSize); + vm->setDataset(dataset, datasetSize, programs); vms.push_back(vm); } uint8_t* scratchpadMem;