mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-21 23:38:54 +00:00
Reworked SuperscalarHash instruction set
ASM and C code generator for SuperscalarHash Support for Superscalar hash in the light mode
This commit is contained in:
parent
6e3136b37f
commit
b4c02051fa
@ -23,6 +23,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "common.hpp"
|
||||
#include "reciprocal.h"
|
||||
#include "Program.hpp"
|
||||
#include "./LightProgramGenerator.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
@ -46,6 +47,179 @@ namespace RandomX {
|
||||
static const char* regDatasetAddr = "rdi";
|
||||
static const char* regScratchpadAddr = "rsi";
|
||||
|
||||
void AssemblyGeneratorX86::generateProgram(Program& prog) {
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
asmCode.str(std::string()); //clear
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
asmCode << "randomx_isn_" << i << ":" << std::endl;
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode(instr, i);
|
||||
//asmCode << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::generateAsm(LightProgram& prog) {
|
||||
asmCode.str(std::string()); //clear
|
||||
asmCode << "ALIGN 16" << std::endl;
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
switch (instr.opcode)
|
||||
{
|
||||
case RandomX::LightInstructionType::ISUB_R:
|
||||
asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IXOR_R:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IADD_RS:
|
||||
asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IMUL_R:
|
||||
asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IROR_C:
|
||||
asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IADD_C7:
|
||||
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IXOR_C7:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IADD_C8:
|
||||
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
asmCode << "nop" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IXOR_C8:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
asmCode << "nop" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IADD_C9:
|
||||
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
asmCode << "xchg ax, ax ;nop" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IXOR_C9:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
asmCode << "xchg ax, ax ;nop" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IMULH_R:
|
||||
asmCode << "mov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "mul " << regR[instr.src] << std::endl;
|
||||
asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::ISMULH_R:
|
||||
asmCode << "mov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "imul " << regR[instr.src] << std::endl;
|
||||
asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IMUL_RCP:
|
||||
asmCode << "mov rax, " << (int64_t)reciprocal(instr.getImm32()) << std::endl;
|
||||
asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::generateC(LightProgram& prog) {
|
||||
asmCode.str(std::string()); //clear
|
||||
asmCode << "#include <stdint.h>" << std::endl;
|
||||
asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl;
|
||||
asmCode << " static inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
|
||||
asmCode << " return ((unsigned __int128)a * b) >> 64;" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " static inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
|
||||
asmCode << " return ((__int128)a * b) >> 64;" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " #define HAVE_MULH" << std::endl;
|
||||
asmCode << " #define HAVE_SMULH" << std::endl;
|
||||
asmCode << "#endif" << std::endl;
|
||||
asmCode << "#if defined(_MSC_VER)" << std::endl;
|
||||
asmCode << " #define HAS_VALUE(X) X ## 0" << std::endl;
|
||||
asmCode << " #define EVAL_DEFINE(X) HAS_VALUE(X)" << std::endl;
|
||||
asmCode << " #include <intrin.h>" << std::endl;
|
||||
asmCode << " #include <stdlib.h>" << std::endl;
|
||||
asmCode << " static __inline uint64_t rotr(uint64_t x , int c) {" << std::endl;
|
||||
asmCode << " return _rotr64(x, c);" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " #define HAVE_ROTR" << std::endl;
|
||||
asmCode << " #if EVAL_DEFINE(__MACHINEARM64_X64(1))" << std::endl;
|
||||
asmCode << " static __inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
|
||||
asmCode << " return __umulh(a, b);" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " #define HAVE_MULH" << std::endl;
|
||||
asmCode << " #endif" << std::endl;
|
||||
asmCode << " #if EVAL_DEFINE(__MACHINEX64(1))" << std::endl;
|
||||
asmCode << " static __inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
|
||||
asmCode << " int64_t hi;" << std::endl;
|
||||
asmCode << " _mul128(a, b, &hi);" << std::endl;
|
||||
asmCode << " return hi;" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " #define HAVE_SMULH" << std::endl;
|
||||
asmCode << " #endif" << std::endl;
|
||||
asmCode << "#endif" << std::endl;
|
||||
asmCode << "#ifndef HAVE_ROTR" << std::endl;
|
||||
asmCode << " static inline uint64_t rotr(uint64_t a, int b) {" << std::endl;
|
||||
asmCode << " return (a >> b) | (a << (64 - b));" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " #define HAVE_ROTR" << std::endl;
|
||||
asmCode << "#endif" << std::endl;
|
||||
asmCode << "#if !defined(HAVE_MULH) || !defined(HAVE_SMULH) || !defined(HAVE_ROTR)" << std::endl;
|
||||
asmCode << " #error \"Required functions are not defined\"" << std::endl;
|
||||
asmCode << "#endif" << std::endl;
|
||||
asmCode << "void superScalar(uint64_t r[8]) {" << std::endl;
|
||||
asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl;
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
switch (instr.opcode)
|
||||
{
|
||||
case RandomX::LightInstructionType::ISUB_R:
|
||||
asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IXOR_R:
|
||||
asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IADD_RS:
|
||||
asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IMUL_R:
|
||||
asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IROR_C:
|
||||
asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IADD_C7:
|
||||
case RandomX::LightInstructionType::IADD_C8:
|
||||
case RandomX::LightInstructionType::IADD_C9:
|
||||
asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IXOR_C7:
|
||||
case RandomX::LightInstructionType::IXOR_C8:
|
||||
case RandomX::LightInstructionType::IXOR_C9:
|
||||
asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IMULH_R:
|
||||
asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::ISMULH_R:
|
||||
asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
|
||||
break;
|
||||
case RandomX::LightInstructionType::IMUL_RCP:
|
||||
asmCode << regR[instr.dst] << " *= " << (int64_t)reciprocal(instr.getImm32()) << ";" << std::endl;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
asmCode << "r[0] = r8; r[1] = r9; r[2] = r10; r[3] = r11; r[4] = r12; r[5] = r13; r[6] = r14; r[7] = r15;" << std::endl;
|
||||
asmCode << "}" << std::endl;
|
||||
}
|
||||
|
||||
int AssemblyGeneratorX86::getConditionRegister() {
|
||||
int min = INT_MAX;
|
||||
int minIndex;
|
||||
|
@ -27,27 +27,16 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
namespace RandomX {
|
||||
|
||||
class Program;
|
||||
class LightProgram;
|
||||
class AssemblyGeneratorX86;
|
||||
|
||||
typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int);
|
||||
|
||||
class AssemblyGeneratorX86 {
|
||||
public:
|
||||
template<class P>
|
||||
void generateProgram(P& prog) {
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
asmCode.str(std::string()); //clear
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
asmCode << "randomx_isn_" << i << ":" << std::endl;
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode(instr, i);
|
||||
//asmCode << std::endl;
|
||||
}
|
||||
}
|
||||
void generateProgram(Program& prog);
|
||||
void generateAsm(LightProgram& prog);
|
||||
void generateC(LightProgram& prog);
|
||||
void printCode(std::ostream& os) {
|
||||
os << asmCode.rdbuf();
|
||||
}
|
||||
|
@ -23,18 +23,25 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
CompiledLightVirtualMachine::CompiledLightVirtualMachine() {
|
||||
}
|
||||
|
||||
void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size) {
|
||||
template<bool superscalar>
|
||||
void CompiledLightVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
|
||||
mem.ds = ds;
|
||||
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||
if(superscalar)
|
||||
compiler.generateSuperScalarHash(programs);
|
||||
//datasetBasePtr = ds.dataset.memory;
|
||||
}
|
||||
|
||||
void CompiledLightVirtualMachine::initialize() {
|
||||
template void CompiledLightVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
|
||||
template void CompiledLightVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
|
||||
|
||||
template<bool superscalar>
|
||||
void CompiledLightVirtualMachine<superscalar>::initialize() {
|
||||
VirtualMachine::initialize();
|
||||
compiler.generateProgramLight(program);
|
||||
compiler.generateProgramLight<superscalar>(program);
|
||||
//mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
|
||||
}
|
||||
|
||||
template void CompiledLightVirtualMachine<true>::initialize();
|
||||
template void CompiledLightVirtualMachine<false>::initialize();
|
||||
}
|
@ -26,6 +26,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
template<bool superscalar>
|
||||
class CompiledLightVirtualMachine : public CompiledVirtualMachine {
|
||||
public:
|
||||
void* operator new(size_t size) {
|
||||
@ -37,8 +38,8 @@ namespace RandomX {
|
||||
void operator delete(void* ptr) {
|
||||
_mm_free(ptr);
|
||||
}
|
||||
CompiledLightVirtualMachine();
|
||||
void setDataset(dataset_t ds, uint64_t size) override;
|
||||
CompiledLightVirtualMachine() {}
|
||||
void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
|
||||
void initialize() override;
|
||||
};
|
||||
}
|
@ -29,7 +29,7 @@ namespace RandomX {
|
||||
CompiledVirtualMachine::CompiledVirtualMachine() {
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size) {
|
||||
void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
|
||||
mem.ds = ds;
|
||||
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||
datasetBasePtr = ds.dataset.memory;
|
||||
|
@ -42,7 +42,7 @@ namespace RandomX {
|
||||
_mm_free(ptr);
|
||||
}
|
||||
CompiledVirtualMachine();
|
||||
void setDataset(dataset_t ds, uint64_t size) override;
|
||||
void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
|
||||
void initialize() override;
|
||||
virtual void execute() override;
|
||||
void* getProgram() {
|
||||
|
@ -49,7 +49,7 @@ namespace RandomX {
|
||||
|
||||
}
|
||||
|
||||
void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size) {
|
||||
void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
|
||||
mem.ds = ds;
|
||||
readDataset = &datasetReadLight;
|
||||
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||
|
@ -75,7 +75,7 @@ namespace RandomX {
|
||||
}
|
||||
InterpretedVirtualMachine(bool soft) : softAes(soft) {}
|
||||
~InterpretedVirtualMachine();
|
||||
void setDataset(dataset_t ds, uint64_t size) override;
|
||||
void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
|
||||
void initialize() override;
|
||||
void execute() override;
|
||||
private:
|
||||
|
@ -25,6 +25,8 @@ PUBLIC randomx_program_loop_load
|
||||
PUBLIC randomx_program_start
|
||||
PUBLIC randomx_program_read_dataset
|
||||
PUBLIC randomx_program_read_dataset_light
|
||||
PUBLIC randomx_program_read_dataset_sshash_init
|
||||
PUBLIC randomx_program_read_dataset_sshash_fin
|
||||
PUBLIC randomx_program_read_dataset_light_sub
|
||||
PUBLIC randomx_dataset_init
|
||||
PUBLIC randomx_program_loop_store
|
||||
@ -65,6 +67,38 @@ randomx_program_read_dataset_light PROC
|
||||
include asm/program_read_dataset_light.inc
|
||||
randomx_program_read_dataset_light ENDP
|
||||
|
||||
randomx_program_read_dataset_sshash_init PROC
|
||||
sub rsp, 72
|
||||
mov qword ptr [rsp+64], rbx
|
||||
mov qword ptr [rsp+56], r8
|
||||
mov qword ptr [rsp+48], r9
|
||||
mov qword ptr [rsp+40], r10
|
||||
mov qword ptr [rsp+32], r11
|
||||
mov qword ptr [rsp+24], r12
|
||||
mov qword ptr [rsp+16], r13
|
||||
mov qword ptr [rsp+8], r14
|
||||
mov qword ptr [rsp+0], r15
|
||||
xor rbp, rax ;# modify "mx"
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov ebx, ebp ;# ecx = ma
|
||||
and ebx, 2147483584 ;# align "ma" to the start of a cache line
|
||||
shr ebx, 6 ;# ebx = Dataset block number
|
||||
;# call 32768
|
||||
randomx_program_read_dataset_sshash_init ENDP
|
||||
|
||||
randomx_program_read_dataset_sshash_fin PROC
|
||||
mov rbx, qword ptr [rsp+64]
|
||||
xor r8, qword ptr [rsp+56]
|
||||
xor r9, qword ptr [rsp+48]
|
||||
xor r10, qword ptr [rsp+40]
|
||||
xor r11, qword ptr [rsp+32]
|
||||
xor r12, qword ptr [rsp+24]
|
||||
xor r13, qword ptr [rsp+16]
|
||||
xor r14, qword ptr [rsp+8]
|
||||
xor r15, qword ptr [rsp+0]
|
||||
add rsp, 72
|
||||
randomx_program_read_dataset_sshash_fin ENDP
|
||||
|
||||
randomx_program_loop_store PROC
|
||||
include asm/program_loop_store.inc
|
||||
randomx_program_loop_store ENDP
|
||||
|
@ -24,6 +24,8 @@ extern "C" {
|
||||
void randomx_program_start();
|
||||
void randomx_program_read_dataset();
|
||||
void randomx_program_read_dataset_light();
|
||||
void randomx_program_read_dataset_sshash_init();
|
||||
void randomx_program_read_dataset_sshash_fin();
|
||||
void randomx_program_loop_store();
|
||||
void randomx_program_loop_end();
|
||||
void randomx_program_read_dataset_light_sub();
|
||||
|
@ -87,6 +87,7 @@ namespace RandomX {
|
||||
*/
|
||||
|
||||
#include "JitCompilerX86-static.hpp"
|
||||
#include "LightProgramGenerator.hpp"
|
||||
|
||||
#define NOP_TEST true
|
||||
|
||||
@ -96,6 +97,8 @@ namespace RandomX {
|
||||
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
|
||||
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
|
||||
const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light;
|
||||
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
|
||||
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
|
||||
const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store;
|
||||
const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end;
|
||||
@ -110,7 +113,9 @@ namespace RandomX {
|
||||
const int32_t prologueSize = codeLoopBegin - codePrologue;
|
||||
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
|
||||
const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset;
|
||||
const int32_t readDatasetLightSize = codeLoopStore - codeReadDatasetLight;
|
||||
const int32_t readDatasetLightSize = codeReadDatasetLightSshInit - codeReadDatasetLight;
|
||||
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
|
||||
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
|
||||
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
|
||||
const int32_t readDatasetLightSubSize = codeDatasetInit - codeReadDatasetLightSub;
|
||||
const int32_t datasetInitSize = codeEpilogue - codeDatasetInit;
|
||||
@ -199,7 +204,7 @@ namespace RandomX {
|
||||
|
||||
static const uint8_t NOP1[] = { 0x90 };
|
||||
static const uint8_t NOP2[] = { 0x66, 0x90 };
|
||||
static const uint8_t NOP3[] = { 0x0F, 0x1F, 0x00 };
|
||||
static const uint8_t NOP3[] = { 0x66, 0x66, 0x90 };
|
||||
static const uint8_t NOP4[] = { 0x0F, 0x1F, 0x40, 0x00 };
|
||||
static const uint8_t NOP5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||
static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||
@ -230,19 +235,31 @@ namespace RandomX {
|
||||
generateProgramEpilogue(prog);
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
void JitCompilerX86::generateProgramLight(Program& prog) {
|
||||
if (RANDOMX_CACHE_ACCESSES != 8)
|
||||
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES");
|
||||
if (RANDOMX_ARGON_GROWTH != 0)
|
||||
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH");
|
||||
generateProgramPrologue(prog);
|
||||
memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize);
|
||||
codePos += readDatasetLightSize;
|
||||
emitByte(CALL);
|
||||
emit32(readDatasetLightSubOffset - (codePos + 4));
|
||||
if (superscalar) {
|
||||
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
|
||||
emitByte(CALL);
|
||||
emit32(superScalarHashOffset - (codePos + 4));
|
||||
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
|
||||
}
|
||||
else {
|
||||
memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize);
|
||||
codePos += readDatasetLightSize;
|
||||
emitByte(CALL);
|
||||
emit32(readDatasetLightSubOffset - (codePos + 4));
|
||||
}
|
||||
generateProgramEpilogue(prog);
|
||||
}
|
||||
|
||||
template void JitCompilerX86::generateProgramLight<true>(Program& prog);
|
||||
template void JitCompilerX86::generateProgramLight<false>(Program& prog);
|
||||
|
||||
template<size_t N>
|
||||
void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[N]) {
|
||||
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
|
||||
@ -253,7 +270,7 @@ namespace RandomX {
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode(instr, i);
|
||||
generateCode<LightProgram>(instr, i);
|
||||
}
|
||||
emit(codeShhLoad, codeSshLoadSize);
|
||||
if (j < N - 1) {
|
||||
@ -318,6 +335,7 @@ namespace RandomX {
|
||||
emit32(epilogueOffset - codePos - 4);
|
||||
}
|
||||
|
||||
template<class P>
|
||||
void JitCompilerX86::generateCode(Instruction& instr, int i) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
instructionOffsets.push_back(codePos);
|
||||
@ -326,6 +344,95 @@ namespace RandomX {
|
||||
(this->*generator)(instr, i);
|
||||
}
|
||||
|
||||
template<>
|
||||
void JitCompilerX86::generateCode<LightProgram>(Instruction& instr, int i) {
|
||||
switch (instr.opcode)
|
||||
{
|
||||
case RandomX::LightInstructionType::ISUB_R:
|
||||
emit(REX_SUB_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case RandomX::LightInstructionType::IXOR_R:
|
||||
emit(REX_XOR_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case RandomX::LightInstructionType::IADD_RS:
|
||||
emit(REX_LEA);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
genSIB(instr.mod % 4, instr.src, instr.dst);
|
||||
break;
|
||||
case RandomX::LightInstructionType::IMUL_R:
|
||||
emit(REX_IMUL_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case RandomX::LightInstructionType::IROR_C:
|
||||
emit(REX_ROT_I8);
|
||||
emitByte(0xc8 + instr.dst);
|
||||
emitByte(instr.getImm32() & 63);
|
||||
break;
|
||||
case RandomX::LightInstructionType::IADD_C7:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
break;
|
||||
case RandomX::LightInstructionType::IXOR_C7:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
break;
|
||||
case RandomX::LightInstructionType::IADD_C8:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
emit(NOP1);
|
||||
break;
|
||||
case RandomX::LightInstructionType::IXOR_C8:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
emit(NOP1);
|
||||
break;
|
||||
case RandomX::LightInstructionType::IADD_C9:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
emit(NOP2);
|
||||
break;
|
||||
case RandomX::LightInstructionType::IXOR_C9:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
emit(NOP2);
|
||||
break;
|
||||
case RandomX::LightInstructionType::IMULH_R:
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
emitByte(0xe0 + instr.src);
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
break;
|
||||
case RandomX::LightInstructionType::ISMULH_R:
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
emitByte(0xe8 + instr.src);
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
break;
|
||||
case RandomX::LightInstructionType::IMUL_RCP:
|
||||
emit(MOV_RAX_I);
|
||||
emit64(reciprocal(instr.getImm32()));
|
||||
emit(REX_IMUL_RM);
|
||||
emitByte(0xc0 + 8 * instr.dst);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
template void JitCompilerX86::generateCode<Program>(Instruction& instr, int i);
|
||||
|
||||
void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
|
||||
emit(REX_MOV_RR);
|
||||
emitByte((rax ? 0xc0 : 0xc8) + instr.src);
|
||||
|
@ -39,6 +39,7 @@ namespace RandomX {
|
||||
JitCompilerX86();
|
||||
~JitCompilerX86();
|
||||
void generateProgram(Program&);
|
||||
template<bool superscalar>
|
||||
void generateProgramLight(Program&);
|
||||
template<size_t N>
|
||||
void generateSuperScalarHash(LightProgram (&programs)[N]);
|
||||
@ -66,7 +67,7 @@ namespace RandomX {
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode(instr, i);
|
||||
generateCode<P>(instr, i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -81,6 +82,8 @@ namespace RandomX {
|
||||
void genSIB(int scale, int index, int base);
|
||||
|
||||
void handleCondition(Instruction&, int);
|
||||
|
||||
template<class P>
|
||||
void generateCode(Instruction&, int);
|
||||
|
||||
void emitByte(uint8_t val) {
|
||||
|
@ -29,23 +29,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "LightProgramGenerator.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
// Intel Ivy Bridge reference
|
||||
namespace LightInstructionType { //uOPs (decode) execution ports latency code size
|
||||
constexpr int IADD_RS = 0; //1 p01 1 4
|
||||
constexpr int ISUB_R = 1; //1 p015 1 3
|
||||
constexpr int ISUB_C = 2; //1 p015 3 7
|
||||
constexpr int IMUL_R = 3; //1 p1 3 4
|
||||
constexpr int IMUL_C = 4; //1 p1 3 7
|
||||
constexpr int IMULH_R = 5; //1+2+1 0+(p1,p5)+0 3 3+3+3
|
||||
constexpr int ISMULH_R = 6; //1+2+1 0+(p1,p5)+0 3 3+3+3
|
||||
constexpr int IMUL_RCP = 7; //1+1 p015+p1 4 10+4
|
||||
constexpr int IXOR_R = 8; //1 p015 1 3
|
||||
constexpr int IXOR_C = 9; //1 p015 1 7
|
||||
constexpr int IROR_R = 10; //1+2 0+(p0,p5) 1 3+3
|
||||
constexpr int IROR_C = 11; //1 p05 1 4
|
||||
constexpr int COND_R = 12; //1+1+1+1+1+1 p015+p5+0+p015+p05+p015 3 7+13+3+7+3+3
|
||||
constexpr int COUNT = 13;
|
||||
}
|
||||
|
||||
namespace LightInstructionOpcode {
|
||||
constexpr int IADD_RS = 0;
|
||||
@ -62,7 +45,7 @@ namespace RandomX {
|
||||
}
|
||||
|
||||
static bool isMul(int type) {
|
||||
return type == LightInstructionType::IMUL_R || type == LightInstructionType::IMUL_C || type == LightInstructionType::IMULH_R || type == LightInstructionType::ISMULH_R || type == LightInstructionType::IMUL_RCP;
|
||||
return type == LightInstructionType::IMUL_R || type == LightInstructionType::IMULH_R || type == LightInstructionType::ISMULH_R || type == LightInstructionType::IMUL_RCP;
|
||||
}
|
||||
|
||||
const int lightInstructionOpcode[] = {
|
||||
@ -289,19 +272,20 @@ namespace RandomX {
|
||||
int getSrcOp() const {
|
||||
return srcOp_;
|
||||
}
|
||||
static const LightInstructionInfo IADD_RS;
|
||||
static const LightInstructionInfo ISUB_R;
|
||||
static const LightInstructionInfo ISUB_C;
|
||||
static const LightInstructionInfo IXOR_R;
|
||||
static const LightInstructionInfo IADD_RS;
|
||||
static const LightInstructionInfo IMUL_R;
|
||||
static const LightInstructionInfo IMUL_C;
|
||||
static const LightInstructionInfo IROR_C;
|
||||
static const LightInstructionInfo IADD_C7;
|
||||
static const LightInstructionInfo IXOR_C7;
|
||||
static const LightInstructionInfo IADD_C8;
|
||||
static const LightInstructionInfo IXOR_C8;
|
||||
static const LightInstructionInfo IADD_C9;
|
||||
static const LightInstructionInfo IXOR_C9;
|
||||
static const LightInstructionInfo IMULH_R;
|
||||
static const LightInstructionInfo ISMULH_R;
|
||||
static const LightInstructionInfo IMUL_RCP;
|
||||
static const LightInstructionInfo IXOR_R;
|
||||
static const LightInstructionInfo IXOR_C;
|
||||
static const LightInstructionInfo IROR_R;
|
||||
static const LightInstructionInfo IROR_C;
|
||||
static const LightInstructionInfo COND_R;
|
||||
static const LightInstructionInfo NOP;
|
||||
private:
|
||||
const char* name_;
|
||||
@ -316,28 +300,31 @@ namespace RandomX {
|
||||
: name_(name), type_(-1), latency_(0) {}
|
||||
};
|
||||
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", LightInstructionType::IADD_RS, MacroOp::Lea_sib, 0);
|
||||
const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", LightInstructionType::ISUB_R, MacroOp::Sub_rr, 0);
|
||||
const LightInstructionInfo LightInstructionInfo::ISUB_C = LightInstructionInfo("ISUB_C", LightInstructionType::ISUB_C, MacroOp::Sub_ri, -1);
|
||||
const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", LightInstructionType::IXOR_R, MacroOp::Xor_rr, 0);
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", LightInstructionType::IADD_RS, MacroOp::Lea_sib, 0);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", LightInstructionType::IMUL_R, MacroOp::Imul_rr, 0);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_C = LightInstructionInfo("IMUL_C", LightInstructionType::IMUL_C, MacroOp::Imul_rri, -1);
|
||||
const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", LightInstructionType::IROR_C, MacroOp::Ror_ri, -1);
|
||||
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_C7 = LightInstructionInfo("IADD_C7", LightInstructionType::IADD_C7, MacroOp::Add_ri, -1);
|
||||
const LightInstructionInfo LightInstructionInfo::IXOR_C7 = LightInstructionInfo("IXOR_C7", LightInstructionType::IXOR_C7, MacroOp::Xor_ri, -1);
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_C8 = LightInstructionInfo("IADD_C8", LightInstructionType::IADD_C8, MacroOp::Add_ri, -1);
|
||||
const LightInstructionInfo LightInstructionInfo::IXOR_C8 = LightInstructionInfo("IXOR_C8", LightInstructionType::IXOR_C8, MacroOp::Xor_ri, -1);
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_C9 = LightInstructionInfo("IADD_C9", LightInstructionType::IADD_C9, MacroOp::Add_ri, -1);
|
||||
const LightInstructionInfo LightInstructionInfo::IXOR_C9 = LightInstructionInfo("IXOR_C9", LightInstructionType::IXOR_C9, MacroOp::Xor_ri, -1);
|
||||
|
||||
const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", LightInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
|
||||
const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", LightInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", LightInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
|
||||
const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", LightInstructionType::IXOR_R, MacroOp::Xor_rr, 0);
|
||||
const LightInstructionInfo LightInstructionInfo::IXOR_C = LightInstructionInfo("IXOR_C", LightInstructionType::IXOR_C, MacroOp::Xor_ri, -1);
|
||||
const LightInstructionInfo LightInstructionInfo::IROR_R = LightInstructionInfo("IROR_R", LightInstructionType::IROR_R, IROR_R_ops_array, 1, 1, 0);
|
||||
const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", LightInstructionType::IROR_C, MacroOp::Ror_ri, -1);
|
||||
const LightInstructionInfo LightInstructionInfo::COND_R = LightInstructionInfo("COND_R", LightInstructionType::COND_R, COND_R_ops_array, 5, 5, 3);
|
||||
|
||||
const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP");
|
||||
|
||||
const int buffer0[] = { 3, 3, 10 };
|
||||
const int buffer0[] = { 4, 8, 4 };
|
||||
const int buffer1[] = { 7, 3, 3, 3 };
|
||||
const int buffer2[] = { 3, 3, 3, 7 };
|
||||
const int buffer2[] = { 3, 7, 3, 3 };
|
||||
const int buffer3[] = { 4, 9, 3 };
|
||||
const int buffer4[] = { 4, 4, 4, 4 };
|
||||
const int buffer5[] = { 3, 7, 3, 3 };
|
||||
const int buffer6[] = { 3, 3, 7, 3 };
|
||||
const int buffer7[] = { 13, 3 };
|
||||
const int buffer5[] = { 3, 3, 10 };
|
||||
|
||||
class DecoderBuffer {
|
||||
public:
|
||||
@ -360,16 +347,10 @@ namespace RandomX {
|
||||
const DecoderBuffer* fetchNext(int instrType, int cycle, int mulCount, Blake2Generator& gen) const {
|
||||
if (instrType == LightInstructionType::IMULH_R || instrType == LightInstructionType::ISMULH_R)
|
||||
return &decodeBuffer3310; //2-1-1 decode
|
||||
if (mulCount < cycle)
|
||||
return &decodeBuffer4444_mul;
|
||||
if (index_ == 0) {
|
||||
return &decodeBuffer4444; //IMUL_RCP end
|
||||
}
|
||||
/*if (index_ == 2) {
|
||||
return &decodeBuffer133; //COND_R middle
|
||||
}*/
|
||||
if (index_ == 7) {
|
||||
return &decodeBuffer7333; //COND_R end
|
||||
if (mulCount < cycle + 1)
|
||||
return &decodeBuffer4444;
|
||||
if (index_ == 5) { //IMUL_RCP end
|
||||
return (gen.getByte() & 1) ? &decodeBuffer484 : &decodeBuffer493;
|
||||
}
|
||||
return fetchNextDefault(gen);
|
||||
}
|
||||
@ -379,49 +360,40 @@ namespace RandomX {
|
||||
const int* counts_;
|
||||
int opsCount_;
|
||||
DecoderBuffer() : index_(-1) {}
|
||||
static const DecoderBuffer decodeBuffer3310;
|
||||
static const DecoderBuffer decodeBuffer484;
|
||||
static const DecoderBuffer decodeBuffer7333;
|
||||
static const DecoderBuffer decodeBuffer3337;
|
||||
static const DecoderBuffer decodeBuffer4444;
|
||||
static const DecoderBuffer decodeBuffer4444_mul;
|
||||
static const DecoderBuffer decodeBuffer3733;
|
||||
static const DecoderBuffer decodeBuffer3373;
|
||||
static const DecoderBuffer decodeBuffer133;
|
||||
static const DecoderBuffer* decodeBuffers[7];
|
||||
static const DecoderBuffer decodeBuffer493;
|
||||
static const DecoderBuffer decodeBuffer4444;
|
||||
static const DecoderBuffer decodeBuffer3310;
|
||||
static const DecoderBuffer* decodeBuffers[4];
|
||||
const DecoderBuffer* fetchNextDefault(Blake2Generator& gen) const {
|
||||
int select;
|
||||
//do {
|
||||
select = gen.getByte() & 3;
|
||||
//} while (select == 7);
|
||||
return decodeBuffers[select];
|
||||
return decodeBuffers[gen.getByte() & 3];
|
||||
}
|
||||
};
|
||||
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 0, buffer0);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer3337 = DecoderBuffer("3,3,3,7", 2, buffer2);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer4444_mul = DecoderBuffer("4,4,4,4-MUL", 3, buffer4);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer493 = DecoderBuffer("4,9,3", 3, buffer3);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer4444 = DecoderBuffer("4,4,4,4", 4, buffer4);
|
||||
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 5, buffer5);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer3373 = DecoderBuffer("3,3,7,3", 6, buffer6);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer133 = DecoderBuffer("13,3", 7, buffer7);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 5, buffer5);
|
||||
|
||||
const DecoderBuffer* DecoderBuffer::decodeBuffers[7] = {
|
||||
&DecoderBuffer::decodeBuffer3310,
|
||||
&DecoderBuffer::decodeBuffer3337,
|
||||
const DecoderBuffer* DecoderBuffer::decodeBuffers[4] = {
|
||||
&DecoderBuffer::decodeBuffer484,
|
||||
&DecoderBuffer::decodeBuffer7333,
|
||||
&DecoderBuffer::decodeBuffer3733,
|
||||
&DecoderBuffer::decodeBuffer3373,
|
||||
&DecoderBuffer::decodeBuffer493,
|
||||
};
|
||||
|
||||
const DecoderBuffer DecoderBuffer::Default = DecoderBuffer();
|
||||
|
||||
const LightInstructionInfo* slot_3[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R };
|
||||
const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R };
|
||||
const LightInstructionInfo* slot_3C[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IROR_R, &LightInstructionInfo::IXOR_R };
|
||||
const LightInstructionInfo* slot_4[] = { &LightInstructionInfo::IROR_C, &LightInstructionInfo::IADD_RS };
|
||||
const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::IXOR_C, &LightInstructionInfo::ISUB_C };
|
||||
const LightInstructionInfo* slot_7L = &LightInstructionInfo::COND_R;
|
||||
const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::IXOR_C7, &LightInstructionInfo::IADD_C7 };
|
||||
const LightInstructionInfo* slot_8[] = { &LightInstructionInfo::IXOR_C8, &LightInstructionInfo::IADD_C8 };
|
||||
const LightInstructionInfo* slot_9[] = { &LightInstructionInfo::IXOR_C9, &LightInstructionInfo::IADD_C9 };
|
||||
const LightInstructionInfo* slot_10 = &LightInstructionInfo::IMUL_RCP;
|
||||
|
||||
static bool selectRegister(std::vector<int>& availableRegisters, Blake2Generator& gen, int& reg) {
|
||||
@ -443,7 +415,7 @@ namespace RandomX {
|
||||
class LightInstruction {
|
||||
public:
|
||||
void toInstr(Instruction& instr) {
|
||||
instr.opcode = lightInstructionOpcode[getType()];
|
||||
instr.opcode = getType();
|
||||
instr.dst = dst_;
|
||||
instr.src = src_ >= 0 ? src_ : dst_;
|
||||
instr.mod = mod_;
|
||||
@ -457,28 +429,22 @@ namespace RandomX {
|
||||
if (isLast) {
|
||||
return create(slot_3L[gen.getByte() & 3], gen);
|
||||
}
|
||||
else if (false && isFirst && fetchType == 0) {
|
||||
return create(slot_3C[gen.getByte() & 3], gen);
|
||||
}
|
||||
else {
|
||||
return create(slot_3[gen.getByte() & 1], gen);
|
||||
}
|
||||
case 4:
|
||||
if (fetchType == 3 && !isLast) {
|
||||
if (fetchType == 4 && !isLast) {
|
||||
return create(&LightInstructionInfo::IMUL_R, gen);
|
||||
}
|
||||
else {
|
||||
return create(slot_4[gen.getByte() & 1], gen);
|
||||
}
|
||||
case 7:
|
||||
if (false && isLast) {
|
||||
return create(slot_7L, gen);
|
||||
}
|
||||
if (false && isFirst) {
|
||||
return create(&LightInstructionInfo::IMUL_C, gen);
|
||||
} else {
|
||||
return create(slot_7[gen.getByte() & 1], gen);
|
||||
}
|
||||
return create(slot_7[gen.getByte() & 1], gen);
|
||||
case 8:
|
||||
return create(slot_8[gen.getByte() & 1], gen);
|
||||
case 9:
|
||||
return create(slot_9[gen.getByte() & 1], gen);
|
||||
case 10:
|
||||
return create(slot_10, gen);
|
||||
default:
|
||||
@ -490,13 +456,6 @@ namespace RandomX {
|
||||
LightInstruction li(info);
|
||||
switch (info->getType())
|
||||
{
|
||||
case LightInstructionType::IADD_RS: {
|
||||
li.mod_ = gen.getByte();
|
||||
li.imm32_ = 0;
|
||||
li.opGroup_ = LightInstructionType::IADD_RS;
|
||||
li.groupParIsSource_ = true;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::ISUB_R: {
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
@ -504,24 +463,51 @@ namespace RandomX {
|
||||
li.groupParIsSource_ = true;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::ISUB_C: {
|
||||
case LightInstructionType::IXOR_R: {
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.opGroup_ = LightInstructionType::ISUB_C;
|
||||
li.opGroupPar_ = -1;
|
||||
li.imm32_ = 0;
|
||||
li.opGroup_ = LightInstructionType::IXOR_R;
|
||||
li.groupParIsSource_ = true;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::IADD_RS: {
|
||||
li.mod_ = gen.getByte();
|
||||
li.imm32_ = 0;
|
||||
li.opGroup_ = LightInstructionType::IADD_RS;
|
||||
li.groupParIsSource_ = true;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::IMUL_R: {
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
li.opGroup_ = LightInstructionType::IMUL_R;
|
||||
li.opGroupPar_ = gen.getInt32();
|
||||
li.opGroupPar_ = -1; //TODO
|
||||
} break;
|
||||
|
||||
case LightInstructionType::IMUL_C: {
|
||||
case LightInstructionType::IROR_C: {
|
||||
li.mod_ = 0;
|
||||
do {
|
||||
li.imm32_ = gen.getByte() & 63;
|
||||
} while (li.imm32_ == 0);
|
||||
li.opGroup_ = LightInstructionType::IROR_C;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::IADD_C7:
|
||||
case LightInstructionType::IADD_C8:
|
||||
case LightInstructionType::IADD_C9: {
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.opGroup_ = LightInstructionType::IMUL_C;
|
||||
li.opGroup_ = LightInstructionType::IADD_C7;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::IXOR_C7:
|
||||
case LightInstructionType::IXOR_C8:
|
||||
case LightInstructionType::IXOR_C9: {
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.opGroup_ = LightInstructionType::IXOR_C7;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
@ -542,50 +528,14 @@ namespace RandomX {
|
||||
} break;
|
||||
|
||||
case LightInstructionType::IMUL_RCP: {
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.opGroup_ = LightInstructionType::IMUL_C;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::IXOR_R: {
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
li.opGroup_ = LightInstructionType::IXOR_R;
|
||||
li.groupParIsSource_ = true;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::IXOR_C: {
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.opGroup_ = LightInstructionType::IXOR_R;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::IROR_R: {
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
li.opGroup_ = LightInstructionType::IROR_R;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::IROR_C: {
|
||||
li.mod_ = 0;
|
||||
do {
|
||||
li.imm32_ = gen.getByte();
|
||||
} while ((li.imm32_ & 63) == 0);
|
||||
li.opGroup_ = LightInstructionType::IROR_R;
|
||||
li.imm32_ = gen.getInt32();
|
||||
} while ((li.imm32_ & (li.imm32_ - 1)) == 0);
|
||||
li.opGroup_ = LightInstructionType::IMUL_RCP;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
case LightInstructionType::COND_R: {
|
||||
li.canReuse_ = true;
|
||||
li.mod_ = gen.getByte();
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.opGroup_ = LightInstructionType::COND_R;
|
||||
li.opGroupPar_ = li.imm32_;
|
||||
} break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -675,8 +625,10 @@ namespace RandomX {
|
||||
constexpr int CYCLE_MAP_SIZE = RANDOMX_LPROG_LATENCY + 3;
|
||||
#ifndef _DEBUG
|
||||
constexpr bool TRACE = false;
|
||||
constexpr bool INFO = false;
|
||||
#else
|
||||
constexpr bool TRACE = true;
|
||||
constexpr bool INFO = true;
|
||||
#endif
|
||||
|
||||
static int blakeCounter = 0;
|
||||
@ -806,6 +758,7 @@ namespace RandomX {
|
||||
int codeSize = 0;
|
||||
int macroOpCount = 0;
|
||||
int cycle = 0;
|
||||
int fetchCycle = 0;
|
||||
int depCycle = 0;
|
||||
int retireCycle = 0;
|
||||
int mopIndex = 0;
|
||||
@ -816,7 +769,7 @@ namespace RandomX {
|
||||
constexpr int MAX_ATTEMPTS = 4;
|
||||
|
||||
while(!portsSaturated) {
|
||||
fetchLine = fetchLine->fetchNext(currentInstruction.getType(), cycle, mulCount, gen);
|
||||
fetchLine = fetchLine->fetchNext(currentInstruction.getType(), fetchCycle++, mulCount, gen);
|
||||
if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << fetchLine->getName() << ")" << std::endl;
|
||||
|
||||
mopIndex = 0;
|
||||
@ -833,7 +786,6 @@ namespace RandomX {
|
||||
MacroOp& mop = currentInstruction.getInfo().getOp(instrIndex);
|
||||
if (fetchLine->getCounts()[mopIndex] != mop.getSize()) {
|
||||
if (TRACE) std::cout << "ERROR instruction " << mop.getName() << " doesn't fit into slot of size " << fetchLine->getCounts()[mopIndex] << std::endl;
|
||||
return DBL_MIN;
|
||||
}
|
||||
|
||||
if (TRACE) std::cout << mop.getName() << " ";
|
||||
@ -899,8 +851,8 @@ namespace RandomX {
|
||||
++cycle;
|
||||
}
|
||||
|
||||
std::cout << "; ALU port utilization:" << std::endl;
|
||||
std::cout << "; (* = in use, _ = idle)" << std::endl;
|
||||
if(INFO) std::cout << "; ALU port utilization:" << std::endl;
|
||||
if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
|
||||
|
||||
int portCycles = 0;
|
||||
/*for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
|
||||
@ -914,13 +866,13 @@ namespace RandomX {
|
||||
|
||||
double ipc = (macroOpCount / (double)retireCycle);
|
||||
|
||||
std::cout << "; code size " << codeSize << " bytes" << std::endl;
|
||||
std::cout << "; x86 macro-ops: " << macroOpCount << std::endl;
|
||||
std::cout << "; RandomX instructions: " << outIndex << std::endl;
|
||||
std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl;
|
||||
std::cout << "; IPC = " << ipc << std::endl;
|
||||
std::cout << "; Port-cycles: " << portCycles << std::endl;
|
||||
std::cout << "; Multiplications: " << mulCount << std::endl;
|
||||
if (INFO) std::cout << "; code size " << codeSize << " bytes" << std::endl;
|
||||
if (INFO) std::cout << "; x86 macro-ops: " << macroOpCount << std::endl;
|
||||
if (INFO) std::cout << "; RandomX instructions: " << outIndex << std::endl;
|
||||
if (INFO) std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl;
|
||||
if (INFO) std::cout << "; IPC = " << ipc << std::endl;
|
||||
if (INFO) std::cout << "; Port-cycles: " << portCycles << std::endl;
|
||||
if (INFO) std::cout << "; Multiplications: " << mulCount << std::endl;
|
||||
|
||||
int asicLatency[8];
|
||||
memset(asicLatency, 0, sizeof(asicLatency));
|
||||
@ -942,19 +894,21 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "; ASIC latency: " << asicLatencyFinal << std::endl;
|
||||
if (INFO) std::cout << "; ASIC latency: " << asicLatencyFinal << std::endl;
|
||||
|
||||
std::cout << "; ASIC latency:" << std::endl;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
std::cout << "; r" << i << " = " << asicLatency[i] << std::endl;
|
||||
}
|
||||
std::cout << "; CPU latency:" << std::endl;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
std::cout << "; r" << i << " = " << registers[i].latency << std::endl;
|
||||
if (INFO) {
|
||||
std::cout << "; ASIC latency:" << std::endl;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
std::cout << "; r" << i << " = " << asicLatency[i] << std::endl;
|
||||
}
|
||||
if (INFO) std::cout << "; CPU latency:" << std::endl;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
std::cout << "; r" << i << " = " << registers[i].latency << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
prog.setSize(outIndex);
|
||||
prog.setAddressRegister(addressReg);
|
||||
return addressReg;
|
||||
return outIndex;
|
||||
}
|
||||
}
|
@ -21,6 +21,27 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
// Intel Ivy Bridge reference
|
||||
namespace LightInstructionType { //uOPs (decode) execution ports latency code size
|
||||
constexpr int ISUB_R = 0; //1 p015 1 3
|
||||
constexpr int IXOR_R = 1; //1 p015 1 3
|
||||
constexpr int IADD_RS = 2; //1 p01 1 4
|
||||
constexpr int IMUL_R = 3; //1 p1 3 4
|
||||
constexpr int IROR_C = 4; //1 p05 1 4
|
||||
constexpr int IADD_C7 = 5; //1 p015 1 7
|
||||
constexpr int IXOR_C7 = 6; //1 p015 1 7
|
||||
constexpr int IADD_C8 = 7; //1+0 p015 1 8
|
||||
constexpr int IXOR_C8 = 8; //1+0 p015 1 8
|
||||
constexpr int IADD_C9 = 9; //1+0 p015 1 9
|
||||
constexpr int IXOR_C9 = 10; //1+0 p015 1 9
|
||||
constexpr int IMULH_R = 11; //1+2+1 0+(p1,p5)+0 3 3+3+3
|
||||
constexpr int ISMULH_R = 12; //1+2+1 0+(p1,p5)+0 3 3+3+3
|
||||
constexpr int IMUL_RCP = 13; //1+1 p015+p1 4 10+4
|
||||
|
||||
constexpr int COUNT = 14;
|
||||
constexpr int INVALID = -1;
|
||||
}
|
||||
|
||||
class Blake2Generator {
|
||||
public:
|
||||
Blake2Generator(const void* seed, int nonce);
|
||||
|
@ -24,13 +24,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
|
||||
|
||||
class VirtualMachine {
|
||||
public:
|
||||
VirtualMachine();
|
||||
virtual ~VirtualMachine() {}
|
||||
virtual void setDataset(dataset_t ds, uint64_t size) = 0;
|
||||
virtual void setDataset(dataset_t ds, uint64_t size, LightProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0;
|
||||
void setScratchpad(void* ptr) {
|
||||
scratchpad = (uint8_t*)ptr;
|
||||
}
|
||||
|
29
src/main.cpp
29
src/main.cpp
@ -205,7 +205,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<uint32_t>& atomicNonce, Atomi
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit, genLight, useSuperscalar;
|
||||
bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit, genSuperscalar, useSuperscalar;
|
||||
int programCount, threadCount, initThreadCount, epoch;
|
||||
|
||||
readOption("--softAes", argc, argv, softAes);
|
||||
@ -220,15 +220,15 @@ int main(int argc, char** argv) {
|
||||
readOption("--jit", argc, argv, jit);
|
||||
readOption("--genNative", argc, argv, genNative);
|
||||
readOption("--help", argc, argv, help);
|
||||
readOption("--genLight", argc, argv, genLight);
|
||||
readOption("--genSuperscalar", argc, argv, genSuperscalar);
|
||||
readOption("--useSuperscalar", argc, argv, useSuperscalar);
|
||||
|
||||
if (genLight) {
|
||||
if (genSuperscalar) {
|
||||
RandomX::LightProgram p;
|
||||
RandomX::Blake2Generator gen(seed, programCount);
|
||||
RandomX::generateLightProg2(p, gen);
|
||||
RandomX::AssemblyGeneratorX86 asmX86;
|
||||
asmX86.generateProgram(p);
|
||||
asmX86.generateAsm(p);
|
||||
//std::ofstream file("lightProg2.asm");
|
||||
asmX86.printCode(std::cout);
|
||||
return 0;
|
||||
@ -266,6 +266,7 @@ int main(int argc, char** argv) {
|
||||
const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize;
|
||||
const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch);
|
||||
dataset.cache.size = cacheSize;
|
||||
RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES];
|
||||
|
||||
std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl;
|
||||
|
||||
@ -282,6 +283,12 @@ int main(int argc, char** argv) {
|
||||
outputHex(std::cout, (char*)dataset.cache.memory, sizeof(__m128i));
|
||||
std::cout << std::endl;
|
||||
}
|
||||
if (useSuperscalar) {
|
||||
RandomX::Blake2Generator gen(seed, programCount);
|
||||
for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
|
||||
RandomX::generateLightProg2(programs[i], gen);
|
||||
}
|
||||
}
|
||||
if (!miningMode) {
|
||||
std::cout << "Cache (" << cacheSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
}
|
||||
@ -291,11 +298,6 @@ int main(int argc, char** argv) {
|
||||
RandomX::datasetAlloc(dataset, largePages);
|
||||
const uint64_t datasetBlockCount = datasetSize / RandomX::CacheLineSize;
|
||||
if (useSuperscalar) {
|
||||
RandomX::Blake2Generator gen(seed, programCount);
|
||||
RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES];
|
||||
for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
|
||||
RandomX::generateLightProg2(programs[i], gen);
|
||||
}
|
||||
RandomX::JitCompilerX86 jit86;
|
||||
jit86.generateSuperScalarHash(programs);
|
||||
jit86.getDatasetInitFunc()(cache.memory, dataset.dataset.memory, 0, datasetBlockCount);
|
||||
@ -320,7 +322,6 @@ int main(int argc, char** argv) {
|
||||
threads.clear();
|
||||
std::cout << "Dataset (" << datasetSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
}
|
||||
return 0;
|
||||
std::cout << "Initializing " << threadCount << " virtual machine(s) ..." << std::endl;
|
||||
for (int i = 0; i < threadCount; ++i) {
|
||||
RandomX::VirtualMachine* vm;
|
||||
@ -328,12 +329,14 @@ int main(int argc, char** argv) {
|
||||
vm = new RandomX::CompiledVirtualMachine();
|
||||
}
|
||||
else {
|
||||
if (jit)
|
||||
vm = new RandomX::CompiledLightVirtualMachine();
|
||||
if (jit && useSuperscalar)
|
||||
vm = new RandomX::CompiledLightVirtualMachine<true>();
|
||||
else if(jit)
|
||||
vm = new RandomX::CompiledLightVirtualMachine<false>();
|
||||
else
|
||||
vm = new RandomX::InterpretedVirtualMachine(softAes);
|
||||
}
|
||||
vm->setDataset(dataset, datasetSize);
|
||||
vm->setDataset(dataset, datasetSize, programs);
|
||||
vms.push_back(vm);
|
||||
}
|
||||
uint8_t* scratchpadMem;
|
||||
|
Loading…
Reference in New Issue
Block a user