mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-21 23:38:54 +00:00
Merge branch 'feature/branches' into dev
Conflicts: src/JitCompilerX86.cpp src/JitCompilerX86.hpp src/main.cpp
This commit is contained in:
commit
4c1ae951de
@ -18,6 +18,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
//#define TRACE
|
||||
|
||||
#include <climits>
|
||||
#include "AssemblyGeneratorX86.hpp"
|
||||
#include "common.hpp"
|
||||
#include "reciprocal.h"
|
||||
@ -45,9 +46,25 @@ namespace RandomX {
|
||||
static const char* regDatasetAddr = "rdi";
|
||||
static const char* regScratchpadAddr = "rsi";
|
||||
|
||||
int AssemblyGeneratorX86::getConditionRegister() {
|
||||
int min = INT_MAX;
|
||||
int minIndex;
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (registerUsage[i] < min) {
|
||||
min = registerUsage[i];
|
||||
minIndex = i;
|
||||
}
|
||||
}
|
||||
return minIndex;
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::generateProgram(Program& prog) {
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
asmCode.str(std::string()); //clear
|
||||
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
|
||||
asmCode << "randomx_isn_" << i << ":" << std::endl;
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
@ -96,6 +113,7 @@ namespace RandomX {
|
||||
|
||||
//1 uOP
|
||||
void AssemblyGeneratorX86::h_IADD_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\tadd " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
}
|
||||
@ -107,6 +125,7 @@ namespace RandomX {
|
||||
|
||||
//2.75 uOP
|
||||
void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
|
||||
@ -119,12 +138,14 @@ namespace RandomX {
|
||||
|
||||
//1 uOP
|
||||
void AssemblyGeneratorX86::h_IADD_RC(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
//1 uOP
|
||||
void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
}
|
||||
@ -136,6 +157,7 @@ namespace RandomX {
|
||||
|
||||
//2.75 uOP
|
||||
void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
|
||||
@ -148,12 +170,14 @@ namespace RandomX {
|
||||
|
||||
//1 uOP
|
||||
void AssemblyGeneratorX86::h_IMUL_9C(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.dst] << "*8" << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
//1 uOP
|
||||
void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
}
|
||||
@ -165,6 +189,7 @@ namespace RandomX {
|
||||
|
||||
//2.75 uOP
|
||||
void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
asmCode << "\timul " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
|
||||
@ -177,6 +202,7 @@ namespace RandomX {
|
||||
|
||||
//4 uOPs
|
||||
void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "\tmul " << regR[instr.src] << std::endl;
|
||||
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
@ -185,6 +211,7 @@ namespace RandomX {
|
||||
|
||||
//5.75 uOPs
|
||||
void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, "ecx");
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
@ -200,6 +227,7 @@ namespace RandomX {
|
||||
|
||||
//4 uOPs
|
||||
void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "\timul " << regR[instr.src] << std::endl;
|
||||
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
@ -208,6 +236,7 @@ namespace RandomX {
|
||||
|
||||
//5.75 uOPs
|
||||
void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, "ecx");
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
@ -223,12 +252,14 @@ namespace RandomX {
|
||||
|
||||
//1 uOP
|
||||
void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
asmCode << "\tneg " << regR[instr.dst] << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
//1 uOP
|
||||
void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
}
|
||||
@ -240,6 +271,7 @@ namespace RandomX {
|
||||
|
||||
//2.75 uOP
|
||||
void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
asmCode << "\txor " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
|
||||
@ -252,6 +284,7 @@ namespace RandomX {
|
||||
|
||||
//1.75 uOPs
|
||||
void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
|
||||
asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl;
|
||||
@ -264,6 +297,7 @@ namespace RandomX {
|
||||
|
||||
//1.75 uOPs
|
||||
void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
|
||||
asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl;
|
||||
@ -277,6 +311,7 @@ namespace RandomX {
|
||||
//2 uOPs
|
||||
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
|
||||
if (instr.getImm32() != 0) {
|
||||
registerUsage[instr.dst] = i;
|
||||
uint32_t divisor = instr.getImm32();
|
||||
asmCode << "\tmov rax, " << reciprocal(instr.getImm32()) << std::endl;
|
||||
asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl;
|
||||
@ -295,6 +330,9 @@ namespace RandomX {
|
||||
//2 uOPs
|
||||
void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) {
|
||||
if (instr.src != instr.dst) {
|
||||
//std::swap(registerUsage[instr.dst], registerUsage[instr.src]);
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[instr.src] = i;
|
||||
asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
@ -435,8 +473,23 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) {
|
||||
const int shift = (instr.mod >> 5);
|
||||
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
|
||||
int reg = getConditionRegister();
|
||||
int target = registerUsage[reg] + 1;
|
||||
registerUsage[reg] = i;
|
||||
asmCode << "\tadd " << regR[reg] << ", " << (1 << shift) << std::endl;
|
||||
asmCode << "\ttest " << regR[reg] << ", " << conditionMask << std::endl;
|
||||
asmCode << "\tjz randomx_isn_" << target << std::endl;
|
||||
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
//4 uOPs
|
||||
void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) {
|
||||
handleCondition(instr, i);
|
||||
asmCode << "\txor ecx, ecx" << std::endl;
|
||||
asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
asmCode << "\tset" << condition(instr) << " cl" << std::endl;
|
||||
@ -446,6 +499,7 @@ namespace RandomX {
|
||||
|
||||
//6 uOPs
|
||||
void AssemblyGeneratorX86::h_COND_M(Instruction& instr, int i) {
|
||||
handleCondition(instr, i);
|
||||
asmCode << "\txor ecx, ecx" << std::endl;
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tcmp dword ptr [rsi+rax], " << (int32_t)instr.getImm32() << std::endl;
|
||||
|
@ -38,10 +38,13 @@ namespace RandomX {
|
||||
private:
|
||||
static InstructionGenerator engine[256];
|
||||
std::stringstream asmCode;
|
||||
int registerUsage[8];
|
||||
|
||||
void genAddressReg(Instruction&, const char*);
|
||||
void genAddressRegDst(Instruction&, int);
|
||||
int32_t genAddressImm(Instruction&);
|
||||
int getConditionRegister();
|
||||
void handleCondition(Instruction&, int);
|
||||
|
||||
void generateCode(Instruction&, int);
|
||||
|
||||
|
@ -302,13 +302,13 @@ namespace RandomX {
|
||||
}
|
||||
|
||||
void Instruction::h_COND_R(std::ostream& os) const {
|
||||
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << ")" << std::endl;
|
||||
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), " << (int)(mod >> 5) << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_COND_M(std::ostream& os) const {
|
||||
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(";
|
||||
genAddressReg(os);
|
||||
os << ", " << (int32_t)getImm32() << ")" << std::endl;
|
||||
os << ", " << (int32_t)getImm32() << "), " << (int)(mod >> 5) << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_ISTORE(std::ostream& os) const {
|
||||
|
@ -18,6 +18,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
//#define TRACE
|
||||
//#define FPUCHECK
|
||||
#define RANDOMX_JUMP
|
||||
#include "InterpretedVirtualMachine.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "Cache.hpp"
|
||||
@ -45,25 +46,12 @@ constexpr bool fpuCheck = false;
|
||||
namespace RandomX {
|
||||
|
||||
InterpretedVirtualMachine::~InterpretedVirtualMachine() {
|
||||
if (asyncWorker) {
|
||||
delete mem.ds.asyncWorker;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size) {
|
||||
if (asyncWorker) {
|
||||
if (softAes) {
|
||||
mem.ds.asyncWorker = new LightClientAsyncWorker(ds.cache);
|
||||
}
|
||||
else {
|
||||
mem.ds.asyncWorker = new LightClientAsyncWorker(ds.cache);
|
||||
}
|
||||
readDataset = &datasetReadLightAsync;
|
||||
}
|
||||
else {
|
||||
mem.ds = ds;
|
||||
readDataset = &datasetReadLight;
|
||||
}
|
||||
mem.ds = ds;
|
||||
readDataset = &datasetReadLight;
|
||||
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||
}
|
||||
|
||||
@ -75,14 +63,10 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
executeBytecode(N, r, f, e, a);
|
||||
executeBytecode<N + 1>(r, f, e, a);
|
||||
}
|
||||
|
||||
template<>
|
||||
void InterpretedVirtualMachine::executeBytecode<RANDOMX_PROGRAM_SIZE>(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) {
|
||||
executeBytecode(ic, r, f, e, a);
|
||||
}
|
||||
}
|
||||
|
||||
static void print(int_reg_t r) {
|
||||
@ -114,8 +98,9 @@ namespace RandomX {
|
||||
return std::fpclassify(x) == FP_SUBNORMAL;
|
||||
}
|
||||
|
||||
FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
auto& ibc = byteCode[i];
|
||||
FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
auto& ibc = byteCode[ic];
|
||||
if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
|
||||
//if(trace) printState(r, f, e, a);
|
||||
switch (ibc.type)
|
||||
{
|
||||
@ -234,10 +219,38 @@ namespace RandomX {
|
||||
} break;
|
||||
|
||||
case InstructionType::COND_R: {
|
||||
#ifdef RANDOMX_JUMP
|
||||
*ibc.creg += (1 << ibc.shift);
|
||||
const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift;
|
||||
if ((*ibc.creg & conditionMask) == 0) {
|
||||
#ifdef STATS
|
||||
count_JUMP_taken++;
|
||||
#endif
|
||||
ic = ibc.target;
|
||||
break;
|
||||
}
|
||||
#ifdef STATS
|
||||
count_JUMP_not_taken++;
|
||||
#endif
|
||||
#endif
|
||||
*ibc.idst += condition(ibc.condition, *ibc.isrc, ibc.imm) ? 1 : 0;
|
||||
} break;
|
||||
|
||||
case InstructionType::COND_M: {
|
||||
#ifdef RANDOMX_JUMP
|
||||
*ibc.creg += (1uLL << ibc.shift);
|
||||
const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift;
|
||||
if ((*ibc.creg & conditionMask) == 0) {
|
||||
#ifdef STATS
|
||||
count_JUMP_taken++;
|
||||
#endif
|
||||
ic = ibc.target;
|
||||
break;
|
||||
}
|
||||
#ifdef STATS
|
||||
count_JUMP_not_taken++;
|
||||
#endif
|
||||
#endif
|
||||
*ibc.idst += condition(ibc.condition, load64(scratchpad + (*ibc.isrc & ibc.memMask)), ibc.imm) ? 1 : 0;
|
||||
} break;
|
||||
|
||||
@ -257,7 +270,6 @@ namespace RandomX {
|
||||
UNREACHABLE;
|
||||
}
|
||||
if (trace) {
|
||||
std::cout << program(i);
|
||||
if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
|
||||
print(*ibc.idst);
|
||||
else //if(ibc.type >= 20 && ibc.type <= 30)
|
||||
@ -334,28 +346,15 @@ namespace RandomX {
|
||||
std::cout << "-----------------------------------" << std::endl;
|
||||
}
|
||||
|
||||
executeBytecode<0>(r, f, e, a);
|
||||
executeBytecode(r, f, e, a);
|
||||
|
||||
if (asyncWorker) {
|
||||
ILightClientAsyncWorker* aw = mem.ds.asyncWorker;
|
||||
const uint64_t* datasetLine = aw->getBlock(datasetBase + mem.ma);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= datasetLine[i];
|
||||
mem.mx ^= r[readReg2] ^ r[readReg3];
|
||||
mem.mx &= CacheLineAlignMask; //align to cache line
|
||||
std::swap(mem.mx, mem.ma);
|
||||
aw->prepareBlock(datasetBase + mem.ma);
|
||||
}
|
||||
else {
|
||||
mem.mx ^= r[readReg2] ^ r[readReg3];
|
||||
//mem.mx &= CacheLineAlignMask;
|
||||
Cache& cache = mem.ds.cache;
|
||||
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
||||
initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= datasetLine[i];
|
||||
std::swap(mem.mx, mem.ma);
|
||||
}
|
||||
mem.mx ^= r[readReg2] ^ r[readReg3];
|
||||
Cache& cache = mem.ds.cache;
|
||||
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
||||
initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= datasetLine[i];
|
||||
std::swap(mem.mx, mem.ma);
|
||||
|
||||
if (trace) {
|
||||
std::cout << "iteration " << std::dec << ic << std::endl;
|
||||
@ -419,9 +418,25 @@ namespace RandomX {
|
||||
_mm_store_pd(®.e[3].lo, e[3]);
|
||||
}
|
||||
|
||||
static int getConditionRegister(int(®isterUsage)[8]) {
|
||||
int min = INT_MAX;
|
||||
int minIndex;
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (registerUsage[i] < min) {
|
||||
min = registerUsage[i];
|
||||
minIndex = i;
|
||||
}
|
||||
}
|
||||
return minIndex;
|
||||
}
|
||||
|
||||
#include "instructionWeights.hpp"
|
||||
|
||||
void InterpretedVirtualMachine::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
int registerUsage[8];
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
|
||||
auto& instr = program(i);
|
||||
auto& ibc = byteCode[i];
|
||||
@ -438,6 +453,7 @@ namespace RandomX {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IADD_M) {
|
||||
@ -454,6 +470,7 @@ namespace RandomX {
|
||||
ibc.isrc = &ibc.imm;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IADD_RC) {
|
||||
@ -463,6 +480,7 @@ namespace RandomX {
|
||||
ibc.idst = &r[dst];
|
||||
ibc.isrc = &r[src];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(ISUB_R) {
|
||||
@ -477,6 +495,7 @@ namespace RandomX {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(ISUB_M) {
|
||||
@ -493,6 +512,7 @@ namespace RandomX {
|
||||
ibc.isrc = &ibc.imm;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMUL_9C) {
|
||||
@ -500,6 +520,7 @@ namespace RandomX {
|
||||
ibc.type = InstructionType::IMUL_9C;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMUL_R) {
|
||||
@ -514,6 +535,7 @@ namespace RandomX {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMUL_M) {
|
||||
@ -530,6 +552,7 @@ namespace RandomX {
|
||||
ibc.isrc = &ibc.imm;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMULH_R) {
|
||||
@ -538,6 +561,7 @@ namespace RandomX {
|
||||
ibc.type = InstructionType::IMULH_R;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.isrc = &r[src];
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMULH_M) {
|
||||
@ -554,6 +578,7 @@ namespace RandomX {
|
||||
ibc.isrc = &ibc.imm;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(ISMULH_R) {
|
||||
@ -562,6 +587,7 @@ namespace RandomX {
|
||||
ibc.type = InstructionType::ISMULH_R;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.isrc = &r[src];
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(ISMULH_M) {
|
||||
@ -578,6 +604,7 @@ namespace RandomX {
|
||||
ibc.isrc = &ibc.imm;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMUL_RCP) {
|
||||
@ -588,6 +615,7 @@ namespace RandomX {
|
||||
ibc.idst = &r[dst];
|
||||
ibc.imm = reciprocal(divisor);
|
||||
ibc.isrc = &ibc.imm;
|
||||
registerUsage[instr.dst] = i;
|
||||
}
|
||||
else {
|
||||
ibc.type = InstructionType::NOP;
|
||||
@ -598,6 +626,7 @@ namespace RandomX {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::INEG_R;
|
||||
ibc.idst = &r[dst];
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IXOR_R) {
|
||||
@ -612,6 +641,7 @@ namespace RandomX {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IXOR_M) {
|
||||
@ -628,6 +658,7 @@ namespace RandomX {
|
||||
ibc.isrc = &ibc.imm;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IROR_R) {
|
||||
@ -642,6 +673,7 @@ namespace RandomX {
|
||||
ibc.imm = instr.getImm32();
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IROL_R) {
|
||||
@ -656,6 +688,7 @@ namespace RandomX {
|
||||
ibc.imm = instr.getImm32();
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(ISWAP_R) {
|
||||
@ -665,6 +698,8 @@ namespace RandomX {
|
||||
ibc.idst = &r[dst];
|
||||
ibc.isrc = &r[src];
|
||||
ibc.type = InstructionType::ISWAP_R;
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[instr.src] = i;
|
||||
}
|
||||
else {
|
||||
ibc.type = InstructionType::NOP;
|
||||
@ -751,6 +786,14 @@ namespace RandomX {
|
||||
ibc.isrc = &r[src];
|
||||
ibc.condition = (instr.mod >> 2) & 7;
|
||||
ibc.imm = instr.getImm32();
|
||||
//jump condition
|
||||
int reg = getConditionRegister(registerUsage);
|
||||
ibc.target = registerUsage[reg];
|
||||
ibc.shift = (instr.mod >> 5);
|
||||
ibc.creg = &r[reg];
|
||||
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
} break;
|
||||
|
||||
CASE_REP(COND_M) {
|
||||
@ -762,6 +805,14 @@ namespace RandomX {
|
||||
ibc.condition = (instr.mod >> 2) & 7;
|
||||
ibc.imm = instr.getImm32();
|
||||
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
//jump condition
|
||||
int reg = getConditionRegister(registerUsage);
|
||||
ibc.target = registerUsage[reg];
|
||||
ibc.shift = (instr.mod >> 5);
|
||||
ibc.creg = &r[reg];
|
||||
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
} break;
|
||||
|
||||
CASE_REP(CFROUND) {
|
||||
|
@ -52,9 +52,12 @@ namespace RandomX {
|
||||
uint64_t imm;
|
||||
int64_t simm;
|
||||
};
|
||||
uint32_t condition;
|
||||
int_reg_t* creg;
|
||||
uint16_t condition;
|
||||
int16_t target;
|
||||
uint32_t memMask;
|
||||
uint32_t type;
|
||||
uint16_t type;
|
||||
uint16_t shift;
|
||||
};
|
||||
|
||||
constexpr int asedwfagdewsa = sizeof(InstructionByteCode);
|
||||
@ -70,7 +73,7 @@ namespace RandomX {
|
||||
void operator delete(void* ptr) {
|
||||
_mm_free(ptr);
|
||||
}
|
||||
InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
|
||||
InterpretedVirtualMachine(bool soft) : softAes(soft) {}
|
||||
~InterpretedVirtualMachine();
|
||||
void setDataset(dataset_t ds, uint64_t size) override;
|
||||
void initialize() override;
|
||||
@ -78,7 +81,7 @@ namespace RandomX {
|
||||
private:
|
||||
static InstructionHandler engine[256];
|
||||
DatasetReadFunc readDataset;
|
||||
bool softAes, asyncWorker;
|
||||
bool softAes;
|
||||
InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
|
||||
|
||||
#ifdef STATS
|
||||
@ -112,17 +115,13 @@ namespace RandomX {
|
||||
int count_FPROUND = 0;
|
||||
int count_JUMP_taken = 0;
|
||||
int count_JUMP_not_taken = 0;
|
||||
int count_CALL_taken = 0;
|
||||
int count_CALL_not_taken = 0;
|
||||
int count_RET_stack_empty = 0;
|
||||
int count_RET_taken = 0;
|
||||
int count_jump_taken[8] = { 0 };
|
||||
int count_jump_not_taken[8] = { 0 };
|
||||
int count_max_stack = 0;
|
||||
int count_retdepth = 0;
|
||||
int count_retdepth_max = 0;
|
||||
int count_endstack = 0;
|
||||
int count_instructions[ProgramLength] = { 0 };
|
||||
int count_instructions[RANDOMX_PROGRAM_SIZE] = { 0 };
|
||||
int count_FADD_nop = 0;
|
||||
int count_FADD_nop2 = 0;
|
||||
int count_FSUB_nop = 0;
|
||||
@ -132,8 +131,7 @@ namespace RandomX {
|
||||
int datasetAccess[256] = { 0 };
|
||||
#endif
|
||||
void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
template<int N>
|
||||
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
void executeBytecode(int i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
};
|
||||
}
|
@ -18,12 +18,15 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <climits>
|
||||
#include <stdexcept>
|
||||
#include "JitCompilerX86.hpp"
|
||||
#include "Program.hpp"
|
||||
#include "reciprocal.h"
|
||||
#include "virtualMemory.hpp"
|
||||
|
||||
#define RANDOMX_JUMP
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
#if !defined(_M_X64) && !defined(__x86_64__)
|
||||
@ -174,6 +177,9 @@ namespace RandomX {
|
||||
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
|
||||
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
|
||||
static const uint8_t CALL = 0xe8;
|
||||
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
|
||||
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
|
||||
static const uint8_t JZ[] = { 0x0f, 0x84 };
|
||||
|
||||
size_t JitCompilerX86::getCodeSize() {
|
||||
return codePos - prologueSize;
|
||||
@ -203,6 +209,12 @@ namespace RandomX {
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramPrologue(Program& prog) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
instructionOffsets.clear();
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
#endif
|
||||
auto addressRegisters = prog.getEntropy(12);
|
||||
uint32_t readReg0 = 0 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
@ -222,7 +234,7 @@ namespace RandomX {
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode(instr);
|
||||
generateCode(instr, i);
|
||||
}
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc0 + readReg2);
|
||||
@ -241,9 +253,12 @@ namespace RandomX {
|
||||
emitByte(0x90);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateCode(Instruction& instr) {
|
||||
void JitCompilerX86::generateCode(Instruction& instr, int i) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
instructionOffsets.push_back(codePos);
|
||||
#endif
|
||||
auto generator = engine[instr.opcode];
|
||||
(this->*generator)(instr);
|
||||
(this->*generator)(instr, i);
|
||||
}
|
||||
|
||||
void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
|
||||
@ -269,7 +284,8 @@ namespace RandomX {
|
||||
emit32(instr.getImm32() & ScratchpadL3Mask);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_IADD_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_ADD_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
@ -281,7 +297,8 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_IADD_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_ADD_RM);
|
||||
@ -299,14 +316,16 @@ namespace RandomX {
|
||||
emitByte((scale << 6) | (index << 3) | base);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_RC(Instruction& instr) {
|
||||
void JitCompilerX86::h_IADD_RC(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_LEA);
|
||||
emitByte(0x84 + 8 * instr.dst);
|
||||
genSIB(0, instr.src, instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISUB_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_SUB_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
@ -318,7 +337,8 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISUB_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_SUB_RM);
|
||||
@ -332,14 +352,16 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_9C(Instruction& instr) {
|
||||
void JitCompilerX86::h_IMUL_9C(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_LEA);
|
||||
emitByte(0x84 + 8 * instr.dst);
|
||||
genSIB(3, instr.dst, instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_IMUL_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
@ -351,7 +373,8 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_IMUL_RM);
|
||||
@ -365,7 +388,8 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMULH_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
@ -374,7 +398,8 @@ namespace RandomX {
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMULH_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, false);
|
||||
emit(REX_MOV_RR64);
|
||||
@ -392,7 +417,8 @@ namespace RandomX {
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISMULH_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
@ -401,7 +427,8 @@ namespace RandomX {
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISMULH_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, false);
|
||||
emit(REX_MOV_RR64);
|
||||
@ -419,8 +446,9 @@ namespace RandomX {
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_RCP(Instruction& instr) {
|
||||
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
|
||||
if (instr.getImm32() != 0) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(MOV_RAX_I);
|
||||
emit64(reciprocal(instr.getImm32()));
|
||||
emit(REX_IMUL_RM);
|
||||
@ -428,16 +456,18 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISDIV_C(Instruction& instr) {
|
||||
void JitCompilerX86::h_ISDIV_C(Instruction& instr, int i) {
|
||||
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_INEG_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_NEG);
|
||||
emitByte(0xd8 + instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IXOR_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_XOR_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
@ -449,7 +479,8 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IXOR_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_XOR_RM);
|
||||
@ -463,7 +494,8 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IROR_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc8 + instr.src);
|
||||
@ -477,7 +509,8 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IROL_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_IROL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc8 + instr.src);
|
||||
@ -491,20 +524,22 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISWAP_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) {
|
||||
if (instr.src != instr.dst) {
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[instr.src] = i;
|
||||
emit(REX_XCHG);
|
||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSWAP_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) {
|
||||
emit(SHUFPD);
|
||||
emitByte(0xc0 + 9 * instr.dst);
|
||||
emitByte(1);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FADD_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
emit(REX_ADDPD);
|
||||
@ -514,7 +549,7 @@ namespace RandomX {
|
||||
//emitByte(0xf8 + instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FADD_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
@ -522,7 +557,7 @@ namespace RandomX {
|
||||
emitByte(0xc4 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSUB_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
emit(REX_SUBPD);
|
||||
@ -532,7 +567,7 @@ namespace RandomX {
|
||||
//emitByte(0xf8 + instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSUB_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
@ -540,20 +575,20 @@ namespace RandomX {
|
||||
emitByte(0xc4 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSCAL_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
emit(REX_XORPS);
|
||||
emitByte(0xc7 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FMUL_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
emit(REX_MULPD);
|
||||
emitByte(0xe0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FMUL_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_FMUL_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
@ -564,7 +599,7 @@ namespace RandomX {
|
||||
emitByte(0xe5 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FDIV_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_FDIV_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
emit(REX_DIVPD);
|
||||
@ -573,7 +608,7 @@ namespace RandomX {
|
||||
emitByte(0xe5 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FDIV_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
@ -582,13 +617,13 @@ namespace RandomX {
|
||||
emitByte(0xe4 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSQRT_R(Instruction& instr) {
|
||||
void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
emit(SQRTPD);
|
||||
emitByte(0xe4 + 9 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_CFROUND(Instruction& instr) {
|
||||
void JitCompilerX86::h_CFROUND(Instruction& instr, int i) {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.src);
|
||||
int rotate = (13 - (instr.getImm32() & 63)) & 63;
|
||||
@ -599,6 +634,28 @@ namespace RandomX {
|
||||
emit(AND_OR_MOV_LDMXCSR);
|
||||
}
|
||||
|
||||
static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) {
|
||||
switch (((instr.mod >> 2) & 7) ^ invert)
|
||||
{
|
||||
case 0:
|
||||
return 0x76; //jbe
|
||||
case 1:
|
||||
return 0x77; //ja
|
||||
case 2:
|
||||
return 0x78; //js
|
||||
case 3:
|
||||
return 0x79; //jns
|
||||
case 4:
|
||||
return 0x70; //jo
|
||||
case 5:
|
||||
return 0x71; //jno
|
||||
case 6:
|
||||
return 0x7c; //jl
|
||||
case 7:
|
||||
return 0x7d; //jge
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint8_t condition(Instruction& instr) {
|
||||
switch ((instr.mod >> 2) & 7)
|
||||
{
|
||||
@ -623,7 +680,40 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_COND_R(Instruction& instr) {
|
||||
int JitCompilerX86::getConditionRegister() {
|
||||
int min = INT_MAX;
|
||||
int minIndex;
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (registerUsage[i] < min) {
|
||||
min = registerUsage[i];
|
||||
minIndex = i;
|
||||
}
|
||||
}
|
||||
return minIndex;
|
||||
}
|
||||
|
||||
void JitCompilerX86::handleCondition(Instruction& instr, int i) {
|
||||
const int shift = (instr.mod >> 5);
|
||||
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
|
||||
int reg = getConditionRegister();
|
||||
int target = registerUsage[reg] + 1;
|
||||
emit(REX_ADD_I);
|
||||
emitByte(0xc0 + reg);
|
||||
emit32(1 << shift);
|
||||
emit(REX_TEST);
|
||||
emitByte(0xc0 + reg);
|
||||
emit32(conditionMask);
|
||||
emit(JZ);
|
||||
emit32(instructionOffsets[target] - (codePos + 4));
|
||||
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_COND_R(Instruction& instr, int i) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
handleCondition(instr, i);
|
||||
#endif
|
||||
emit(XOR_ECX_ECX);
|
||||
emit(REX_CMP_R32I);
|
||||
emitByte(0xf8 + instr.src);
|
||||
@ -635,7 +725,10 @@ namespace RandomX {
|
||||
emitByte(0xc1 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_COND_M(Instruction& instr) {
|
||||
void JitCompilerX86::h_COND_M(Instruction& instr, int i) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
handleCondition(instr, i);
|
||||
#endif
|
||||
emit(XOR_ECX_ECX);
|
||||
genAddressReg(instr);
|
||||
emit(REX_CMP_M32I);
|
||||
@ -647,21 +740,21 @@ namespace RandomX {
|
||||
emitByte(0xc1 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISTORE(Instruction& instr) {
|
||||
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
|
||||
genAddressRegDst(instr);
|
||||
emit(REX_MOV_MR);
|
||||
emitByte(0x04 + 8 * instr.src);
|
||||
emitByte(0x06);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSTORE(Instruction& instr) {
|
||||
void JitCompilerX86::h_FSTORE(Instruction& instr, int i) {
|
||||
genAddressRegDst(instr, true);
|
||||
emit(MOVAPD);
|
||||
emitByte(0x04 + 8 * instr.src);
|
||||
emitByte(0x06);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_NOP(Instruction& instr) {
|
||||
void JitCompilerX86::h_NOP(Instruction& instr, int i) {
|
||||
emitByte(0x90);
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@ namespace RandomX {
|
||||
class Program;
|
||||
class JitCompilerX86;
|
||||
|
||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&);
|
||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
|
||||
|
||||
constexpr uint32_t CodeSize = 64 * 1024;
|
||||
|
||||
@ -47,17 +47,21 @@ namespace RandomX {
|
||||
size_t getCodeSize();
|
||||
private:
|
||||
static InstructionGeneratorX86 engine[256];
|
||||
std::vector<int32_t> instructionOffsets;
|
||||
int registerUsage[8];
|
||||
uint8_t* code;
|
||||
int32_t codePos;
|
||||
|
||||
void generateProgramPrologue(Program&);
|
||||
void generateProgramEpilogue(Program&);
|
||||
int getConditionRegister();
|
||||
void genAddressReg(Instruction&, bool);
|
||||
void genAddressRegDst(Instruction&, bool);
|
||||
void genAddressImm(Instruction&);
|
||||
void genSIB(int scale, int index, int base);
|
||||
|
||||
void generateCode(Instruction&);
|
||||
void handleCondition(Instruction&, int);
|
||||
void generateCode(Instruction&, int);
|
||||
|
||||
void emitByte(uint8_t val) {
|
||||
code[codePos] = val;
|
||||
@ -92,43 +96,43 @@ namespace RandomX {
|
||||
codePos += N;
|
||||
}
|
||||
|
||||
void h_IADD_R(Instruction&);
|
||||
void h_IADD_M(Instruction&);
|
||||
void h_IADD_RC(Instruction&);
|
||||
void h_ISUB_R(Instruction&);
|
||||
void h_ISUB_M(Instruction&);
|
||||
void h_IMUL_9C(Instruction&);
|
||||
void h_IMUL_R(Instruction&);
|
||||
void h_IMUL_M(Instruction&);
|
||||
void h_IMULH_R(Instruction&);
|
||||
void h_IMULH_M(Instruction&);
|
||||
void h_ISMULH_R(Instruction&);
|
||||
void h_ISMULH_M(Instruction&);
|
||||
void h_IMUL_RCP(Instruction&);
|
||||
void h_ISDIV_C(Instruction&);
|
||||
void h_INEG_R(Instruction&);
|
||||
void h_IXOR_R(Instruction&);
|
||||
void h_IXOR_M(Instruction&);
|
||||
void h_IROR_R(Instruction&);
|
||||
void h_IROL_R(Instruction&);
|
||||
void h_ISWAP_R(Instruction&);
|
||||
void h_FSWAP_R(Instruction&);
|
||||
void h_FADD_R(Instruction&);
|
||||
void h_FADD_M(Instruction&);
|
||||
void h_FSUB_R(Instruction&);
|
||||
void h_FSUB_M(Instruction&);
|
||||
void h_FSCAL_R(Instruction&);
|
||||
void h_FMUL_R(Instruction&);
|
||||
void h_FMUL_M(Instruction&);
|
||||
void h_FDIV_R(Instruction&);
|
||||
void h_FDIV_M(Instruction&);
|
||||
void h_FSQRT_R(Instruction&);
|
||||
void h_COND_R(Instruction&);
|
||||
void h_COND_M(Instruction&);
|
||||
void h_CFROUND(Instruction&);
|
||||
void h_ISTORE(Instruction&);
|
||||
void h_FSTORE(Instruction&);
|
||||
void h_NOP(Instruction&);
|
||||
void h_IADD_R(Instruction&, int);
|
||||
void h_IADD_M(Instruction&, int);
|
||||
void h_IADD_RC(Instruction&, int);
|
||||
void h_ISUB_R(Instruction&, int);
|
||||
void h_ISUB_M(Instruction&, int);
|
||||
void h_IMUL_9C(Instruction&, int);
|
||||
void h_IMUL_R(Instruction&, int);
|
||||
void h_IMUL_M(Instruction&, int);
|
||||
void h_IMULH_R(Instruction&, int);
|
||||
void h_IMULH_M(Instruction&, int);
|
||||
void h_ISMULH_R(Instruction&, int);
|
||||
void h_ISMULH_M(Instruction&, int);
|
||||
void h_IMUL_RCP(Instruction&, int);
|
||||
void h_ISDIV_C(Instruction&, int);
|
||||
void h_INEG_R(Instruction&, int);
|
||||
void h_IXOR_R(Instruction&, int);
|
||||
void h_IXOR_M(Instruction&, int);
|
||||
void h_IROR_R(Instruction&, int);
|
||||
void h_IROL_R(Instruction&, int);
|
||||
void h_ISWAP_R(Instruction&, int);
|
||||
void h_FSWAP_R(Instruction&, int);
|
||||
void h_FADD_R(Instruction&, int);
|
||||
void h_FADD_M(Instruction&, int);
|
||||
void h_FSUB_R(Instruction&, int);
|
||||
void h_FSUB_M(Instruction&, int);
|
||||
void h_FSCAL_R(Instruction&, int);
|
||||
void h_FMUL_R(Instruction&, int);
|
||||
void h_FMUL_M(Instruction&, int);
|
||||
void h_FDIV_R(Instruction&, int);
|
||||
void h_FDIV_M(Instruction&, int);
|
||||
void h_FSQRT_R(Instruction&, int);
|
||||
void h_COND_R(Instruction&, int);
|
||||
void h_COND_M(Instruction&, int);
|
||||
void h_CFROUND(Instruction&, int);
|
||||
void h_ISTORE(Instruction&, int);
|
||||
void h_FSTORE(Instruction&, int);
|
||||
void h_NOP(Instruction&, int);
|
||||
};
|
||||
|
||||
}
|
@ -67,6 +67,9 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
//Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
|
||||
#define RANDOMX_SCRATCHPAD_L1 (16 * 1024)
|
||||
|
||||
//How many register bits must be zero for a jump condition to be triggered
|
||||
#define RANDOMX_CONDITION_BITS 7
|
||||
|
||||
/*
|
||||
Instruction frequencies (per 256 opcodes)
|
||||
Total sum of frequencies must be 256
|
||||
|
@ -54,6 +54,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#define REP32(x) REP31(x) x,
|
||||
#define REP33(x) REP32(x) x,
|
||||
#define REP40(x) REP32(x) REP8(x)
|
||||
#define REP64(x) REP32(x) REP32(x)
|
||||
#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
|
||||
#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x)
|
||||
#define REP256(x) REP128(x) REP128(x)
|
||||
@ -95,6 +96,8 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#define REPCASE30(x) REPCASE29(x) case __COUNTER__:
|
||||
#define REPCASE31(x) REPCASE30(x) case __COUNTER__:
|
||||
#define REPCASE32(x) REPCASE31(x) case __COUNTER__:
|
||||
#define REPCASE64(x) REPCASE32(x) REPCASE32(x)
|
||||
#define REPCASE128(x) REPCASE64(x) REPCASE64(x)
|
||||
#define REPCASENX(x,N) REPCASE##N(x)
|
||||
#define REPCASEN(x,N) REPCASENX(x,N)
|
||||
#define CASE_REP(x) REPCASEN(x, WT(x))
|
@ -303,7 +303,7 @@ int main(int argc, char** argv) {
|
||||
if (jit)
|
||||
vm = new RandomX::CompiledLightVirtualMachine();
|
||||
else
|
||||
vm = new RandomX::InterpretedVirtualMachine(softAes, async);
|
||||
vm = new RandomX::InterpretedVirtualMachine(softAes);
|
||||
}
|
||||
vm->setDataset(dataset, datasetSize);
|
||||
vms.push_back(vm);
|
||||
@ -340,7 +340,7 @@ int main(int argc, char** argv) {
|
||||
std::cout << "Calculated result: ";
|
||||
result.print(std::cout);
|
||||
if(programCount == 1000)
|
||||
std::cout << "Reference result: 84f37cc43cb21eabf1d5b9def462060cd24218290678dd80a8ea2f663892629e" << std::endl;
|
||||
std::cout << "Reference result: 9e636a04a2517f37d8ed40b67a7051e02a7577e878fbba5c4352996b2c653f90" << std::endl;
|
||||
if (!miningMode) {
|
||||
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
|
||||
}
|
||||
|
1617
src/program.inc
1617
src/program.inc
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user