diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp
index 474b3bd..fd7ee06 100644
--- a/src/AssemblyGeneratorX86.cpp
+++ b/src/AssemblyGeneratorX86.cpp
@@ -18,6 +18,7 @@ along with RandomX. If not, see.
*/
//#define TRACE
+#include
#include "AssemblyGeneratorX86.hpp"
#include "common.hpp"
#include "reciprocal.h"
@@ -45,9 +46,25 @@ namespace RandomX {
static const char* regDatasetAddr = "rdi";
static const char* regScratchpadAddr = "rsi";
+ int AssemblyGeneratorX86::getConditionRegister() {
+ int min = INT_MAX;
+ int minIndex;
+ for (unsigned i = 0; i < 8; ++i) {
+ if (registerUsage[i] < min) {
+ min = registerUsage[i];
+ minIndex = i;
+ }
+ }
+ return minIndex;
+ }
+
void AssemblyGeneratorX86::generateProgram(Program& prog) {
+ for (unsigned i = 0; i < 8; ++i) {
+ registerUsage[i] = -1;
+ }
asmCode.str(std::string()); //clear
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
+ asmCode << "randomx_isn_" << i << ":" << std::endl;
Instruction& instr = prog(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
@@ -96,6 +113,7 @@ namespace RandomX {
//1 uOP
void AssemblyGeneratorX86::h_IADD_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\tadd " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
}
@@ -107,6 +125,7 @@ namespace RandomX {
//2.75 uOP
void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
@@ -119,12 +138,14 @@ namespace RandomX {
//1 uOP
void AssemblyGeneratorX86::h_IADD_RC(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
traceint(instr);
}
//1 uOP
void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
}
@@ -136,6 +157,7 @@ namespace RandomX {
//2.75 uOP
void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
@@ -148,12 +170,14 @@ namespace RandomX {
//1 uOP
void AssemblyGeneratorX86::h_IMUL_9C(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.dst] << "*8" << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
traceint(instr);
}
//1 uOP
void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
}
@@ -165,6 +189,7 @@ namespace RandomX {
//2.75 uOP
void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
asmCode << "\timul " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
@@ -177,6 +202,7 @@ namespace RandomX {
//4 uOPs
void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
asmCode << "\tmul " << regR[instr.src] << std::endl;
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
@@ -185,6 +211,7 @@ namespace RandomX {
//5.75 uOPs
void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, "ecx");
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
@@ -200,6 +227,7 @@ namespace RandomX {
//4 uOPs
void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
asmCode << "\timul " << regR[instr.src] << std::endl;
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
@@ -208,6 +236,7 @@ namespace RandomX {
//5.75 uOPs
void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, "ecx");
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
@@ -223,12 +252,14 @@ namespace RandomX {
//1 uOP
void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
asmCode << "\tneg " << regR[instr.dst] << std::endl;
traceint(instr);
}
//1 uOP
void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
}
@@ -240,6 +271,7 @@ namespace RandomX {
//2.75 uOP
void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
asmCode << "\txor " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
@@ -252,6 +284,7 @@ namespace RandomX {
//1.75 uOPs
void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl;
@@ -264,6 +297,7 @@ namespace RandomX {
//1.75 uOPs
void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl;
@@ -277,6 +311,7 @@ namespace RandomX {
//2 uOPs
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
if (instr.getImm32() != 0) {
+ registerUsage[instr.dst] = i;
uint32_t divisor = instr.getImm32();
asmCode << "\tmov rax, " << reciprocal(instr.getImm32()) << std::endl;
asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl;
@@ -295,6 +330,9 @@ namespace RandomX {
//2 uOPs
void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) {
if (instr.src != instr.dst) {
+ //std::swap(registerUsage[instr.dst], registerUsage[instr.src]);
+ registerUsage[instr.dst] = i;
+ registerUsage[instr.src] = i;
asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
traceint(instr);
}
@@ -435,8 +473,23 @@ namespace RandomX {
}
}
+ void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) {
+ const int shift = (instr.mod >> 5);
+ const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
+ int reg = getConditionRegister();
+ int target = registerUsage[reg] + 1;
+ registerUsage[reg] = i;
+ asmCode << "\tadd " << regR[reg] << ", " << (1 << shift) << std::endl;
+ asmCode << "\ttest " << regR[reg] << ", " << conditionMask << std::endl;
+ asmCode << "\tjz randomx_isn_" << target << std::endl;
+ for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
+ registerUsage[j] = i;
+ }
+ }
+
//4 uOPs
void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) {
+ handleCondition(instr, i);
asmCode << "\txor ecx, ecx" << std::endl;
asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl;
asmCode << "\tset" << condition(instr) << " cl" << std::endl;
@@ -446,6 +499,7 @@ namespace RandomX {
//6 uOPs
void AssemblyGeneratorX86::h_COND_M(Instruction& instr, int i) {
+ handleCondition(instr, i);
asmCode << "\txor ecx, ecx" << std::endl;
genAddressReg(instr);
asmCode << "\tcmp dword ptr [rsi+rax], " << (int32_t)instr.getImm32() << std::endl;
diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp
index 216e492..62a6081 100644
--- a/src/AssemblyGeneratorX86.hpp
+++ b/src/AssemblyGeneratorX86.hpp
@@ -38,10 +38,13 @@ namespace RandomX {
private:
static InstructionGenerator engine[256];
std::stringstream asmCode;
+ int registerUsage[8];
void genAddressReg(Instruction&, const char*);
void genAddressRegDst(Instruction&, int);
int32_t genAddressImm(Instruction&);
+ int getConditionRegister();
+ void handleCondition(Instruction&, int);
void generateCode(Instruction&, int);
diff --git a/src/Instruction.cpp b/src/Instruction.cpp
index f8d8507..7069926 100644
--- a/src/Instruction.cpp
+++ b/src/Instruction.cpp
@@ -302,13 +302,13 @@ namespace RandomX {
}
void Instruction::h_COND_R(std::ostream& os) const {
- os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << ")" << std::endl;
+ os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), " << (int)(mod >> 5) << std::endl;
}
void Instruction::h_COND_M(std::ostream& os) const {
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(";
genAddressReg(os);
- os << ", " << (int32_t)getImm32() << ")" << std::endl;
+ os << ", " << (int32_t)getImm32() << "), " << (int)(mod >> 5) << std::endl;
}
void Instruction::h_ISTORE(std::ostream& os) const {
diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp
index 1fbe825..54dd7be 100644
--- a/src/InterpretedVirtualMachine.cpp
+++ b/src/InterpretedVirtualMachine.cpp
@@ -18,6 +18,7 @@ along with RandomX. If not, see.
*/
//#define TRACE
//#define FPUCHECK
+#define RANDOMX_JUMP
#include "InterpretedVirtualMachine.hpp"
#include "dataset.hpp"
#include "Cache.hpp"
@@ -45,25 +46,12 @@ constexpr bool fpuCheck = false;
namespace RandomX {
InterpretedVirtualMachine::~InterpretedVirtualMachine() {
- if (asyncWorker) {
- delete mem.ds.asyncWorker;
- }
+
}
void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size) {
- if (asyncWorker) {
- if (softAes) {
- mem.ds.asyncWorker = new LightClientAsyncWorker(ds.cache);
- }
- else {
- mem.ds.asyncWorker = new LightClientAsyncWorker(ds.cache);
- }
- readDataset = &datasetReadLightAsync;
- }
- else {
- mem.ds = ds;
- readDataset = &datasetReadLight;
- }
+ mem.ds = ds;
+ readDataset = &datasetReadLight;
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
}
@@ -75,14 +63,10 @@ namespace RandomX {
}
}
- template
void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
- executeBytecode(N, r, f, e, a);
- executeBytecode(r, f, e, a);
- }
-
- template<>
- void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
+ for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) {
+ executeBytecode(ic, r, f, e, a);
+ }
}
static void print(int_reg_t r) {
@@ -114,8 +98,9 @@ namespace RandomX {
return std::fpclassify(x) == FP_SUBNORMAL;
}
- FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
- auto& ibc = byteCode[i];
+ FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
+ auto& ibc = byteCode[ic];
+ if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
//if(trace) printState(r, f, e, a);
switch (ibc.type)
{
@@ -234,10 +219,38 @@ namespace RandomX {
} break;
case InstructionType::COND_R: {
+#ifdef RANDOMX_JUMP
+ *ibc.creg += (1 << ibc.shift);
+ const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift;
+ if ((*ibc.creg & conditionMask) == 0) {
+#ifdef STATS
+ count_JUMP_taken++;
+#endif
+ ic = ibc.target;
+ break;
+ }
+#ifdef STATS
+ count_JUMP_not_taken++;
+#endif
+#endif
*ibc.idst += condition(ibc.condition, *ibc.isrc, ibc.imm) ? 1 : 0;
} break;
case InstructionType::COND_M: {
+#ifdef RANDOMX_JUMP
+ *ibc.creg += (1uLL << ibc.shift);
+ const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift;
+ if ((*ibc.creg & conditionMask) == 0) {
+#ifdef STATS
+ count_JUMP_taken++;
+#endif
+ ic = ibc.target;
+ break;
+ }
+#ifdef STATS
+ count_JUMP_not_taken++;
+#endif
+#endif
*ibc.idst += condition(ibc.condition, load64(scratchpad + (*ibc.isrc & ibc.memMask)), ibc.imm) ? 1 : 0;
} break;
@@ -257,7 +270,6 @@ namespace RandomX {
UNREACHABLE;
}
if (trace) {
- std::cout << program(i);
if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
print(*ibc.idst);
else //if(ibc.type >= 20 && ibc.type <= 30)
@@ -334,28 +346,15 @@ namespace RandomX {
std::cout << "-----------------------------------" << std::endl;
}
- executeBytecode<0>(r, f, e, a);
+ executeBytecode(r, f, e, a);
- if (asyncWorker) {
- ILightClientAsyncWorker* aw = mem.ds.asyncWorker;
- const uint64_t* datasetLine = aw->getBlock(datasetBase + mem.ma);
- for (int i = 0; i < RegistersCount; ++i)
- r[i] ^= datasetLine[i];
- mem.mx ^= r[readReg2] ^ r[readReg3];
- mem.mx &= CacheLineAlignMask; //align to cache line
- std::swap(mem.mx, mem.ma);
- aw->prepareBlock(datasetBase + mem.ma);
- }
- else {
- mem.mx ^= r[readReg2] ^ r[readReg3];
- //mem.mx &= CacheLineAlignMask;
- Cache& cache = mem.ds.cache;
- uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
- initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
- for (int i = 0; i < RegistersCount; ++i)
- r[i] ^= datasetLine[i];
- std::swap(mem.mx, mem.ma);
- }
+ mem.mx ^= r[readReg2] ^ r[readReg3];
+ Cache& cache = mem.ds.cache;
+ uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
+ initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
+ for (int i = 0; i < RegistersCount; ++i)
+ r[i] ^= datasetLine[i];
+ std::swap(mem.mx, mem.ma);
if (trace) {
std::cout << "iteration " << std::dec << ic << std::endl;
@@ -419,9 +418,25 @@ namespace RandomX {
_mm_store_pd(®.e[3].lo, e[3]);
}
+ static int getConditionRegister(int(®isterUsage)[8]) {
+ int min = INT_MAX;
+ int minIndex;
+ for (unsigned i = 0; i < 8; ++i) {
+ if (registerUsage[i] < min) {
+ min = registerUsage[i];
+ minIndex = i;
+ }
+ }
+ return minIndex;
+ }
+
#include "instructionWeights.hpp"
void InterpretedVirtualMachine::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
+ int registerUsage[8];
+ for (unsigned i = 0; i < 8; ++i) {
+ registerUsage[i] = -1;
+ }
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
auto& instr = program(i);
auto& ibc = byteCode[i];
@@ -438,6 +453,7 @@ namespace RandomX {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IADD_M) {
@@ -454,6 +470,7 @@ namespace RandomX {
ibc.isrc = &ibc.imm;
ibc.memMask = ScratchpadL3Mask;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IADD_RC) {
@@ -463,6 +480,7 @@ namespace RandomX {
ibc.idst = &r[dst];
ibc.isrc = &r[src];
ibc.imm = signExtend2sCompl(instr.getImm32());
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(ISUB_R) {
@@ -477,6 +495,7 @@ namespace RandomX {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(ISUB_M) {
@@ -493,6 +512,7 @@ namespace RandomX {
ibc.isrc = &ibc.imm;
ibc.memMask = ScratchpadL3Mask;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IMUL_9C) {
@@ -500,6 +520,7 @@ namespace RandomX {
ibc.type = InstructionType::IMUL_9C;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IMUL_R) {
@@ -514,6 +535,7 @@ namespace RandomX {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IMUL_M) {
@@ -530,6 +552,7 @@ namespace RandomX {
ibc.isrc = &ibc.imm;
ibc.memMask = ScratchpadL3Mask;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IMULH_R) {
@@ -538,6 +561,7 @@ namespace RandomX {
ibc.type = InstructionType::IMULH_R;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IMULH_M) {
@@ -554,6 +578,7 @@ namespace RandomX {
ibc.isrc = &ibc.imm;
ibc.memMask = ScratchpadL3Mask;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(ISMULH_R) {
@@ -562,6 +587,7 @@ namespace RandomX {
ibc.type = InstructionType::ISMULH_R;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(ISMULH_M) {
@@ -578,6 +604,7 @@ namespace RandomX {
ibc.isrc = &ibc.imm;
ibc.memMask = ScratchpadL3Mask;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IMUL_RCP) {
@@ -588,6 +615,7 @@ namespace RandomX {
ibc.idst = &r[dst];
ibc.imm = reciprocal(divisor);
ibc.isrc = &ibc.imm;
+ registerUsage[instr.dst] = i;
}
else {
ibc.type = InstructionType::NOP;
@@ -598,6 +626,7 @@ namespace RandomX {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::INEG_R;
ibc.idst = &r[dst];
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IXOR_R) {
@@ -612,6 +641,7 @@ namespace RandomX {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IXOR_M) {
@@ -628,6 +658,7 @@ namespace RandomX {
ibc.isrc = &ibc.imm;
ibc.memMask = ScratchpadL3Mask;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IROR_R) {
@@ -642,6 +673,7 @@ namespace RandomX {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(IROL_R) {
@@ -656,6 +688,7 @@ namespace RandomX {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
+ registerUsage[instr.dst] = i;
} break;
CASE_REP(ISWAP_R) {
@@ -665,6 +698,8 @@ namespace RandomX {
ibc.idst = &r[dst];
ibc.isrc = &r[src];
ibc.type = InstructionType::ISWAP_R;
+ registerUsage[instr.dst] = i;
+ registerUsage[instr.src] = i;
}
else {
ibc.type = InstructionType::NOP;
@@ -751,6 +786,14 @@ namespace RandomX {
ibc.isrc = &r[src];
ibc.condition = (instr.mod >> 2) & 7;
ibc.imm = instr.getImm32();
+ //jump condition
+ int reg = getConditionRegister(registerUsage);
+ ibc.target = registerUsage[reg];
+ ibc.shift = (instr.mod >> 5);
+ ibc.creg = &r[reg];
+ for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
+ registerUsage[j] = i;
+ }
} break;
CASE_REP(COND_M) {
@@ -762,6 +805,14 @@ namespace RandomX {
ibc.condition = (instr.mod >> 2) & 7;
ibc.imm = instr.getImm32();
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
+ //jump condition
+ int reg = getConditionRegister(registerUsage);
+ ibc.target = registerUsage[reg];
+ ibc.shift = (instr.mod >> 5);
+ ibc.creg = &r[reg];
+ for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
+ registerUsage[j] = i;
+ }
} break;
CASE_REP(CFROUND) {
diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp
index b3c7f80..d6da7e3 100644
--- a/src/InterpretedVirtualMachine.hpp
+++ b/src/InterpretedVirtualMachine.hpp
@@ -52,9 +52,12 @@ namespace RandomX {
uint64_t imm;
int64_t simm;
};
- uint32_t condition;
+ int_reg_t* creg;
+ uint16_t condition;
+ int16_t target;
uint32_t memMask;
- uint32_t type;
+ uint16_t type;
+ uint16_t shift;
};
constexpr int asedwfagdewsa = sizeof(InstructionByteCode);
@@ -70,7 +73,7 @@ namespace RandomX {
void operator delete(void* ptr) {
_mm_free(ptr);
}
- InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
+ InterpretedVirtualMachine(bool soft) : softAes(soft) {}
~InterpretedVirtualMachine();
void setDataset(dataset_t ds, uint64_t size) override;
void initialize() override;
@@ -78,7 +81,7 @@ namespace RandomX {
private:
static InstructionHandler engine[256];
DatasetReadFunc readDataset;
- bool softAes, asyncWorker;
+ bool softAes;
InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
#ifdef STATS
@@ -112,17 +115,13 @@ namespace RandomX {
int count_FPROUND = 0;
int count_JUMP_taken = 0;
int count_JUMP_not_taken = 0;
- int count_CALL_taken = 0;
- int count_CALL_not_taken = 0;
- int count_RET_stack_empty = 0;
- int count_RET_taken = 0;
int count_jump_taken[8] = { 0 };
int count_jump_not_taken[8] = { 0 };
int count_max_stack = 0;
int count_retdepth = 0;
int count_retdepth_max = 0;
int count_endstack = 0;
- int count_instructions[ProgramLength] = { 0 };
+ int count_instructions[RANDOMX_PROGRAM_SIZE] = { 0 };
int count_FADD_nop = 0;
int count_FADD_nop2 = 0;
int count_FSUB_nop = 0;
@@ -132,8 +131,7 @@ namespace RandomX {
int datasetAccess[256] = { 0 };
#endif
void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
- template
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
- void executeBytecode(int i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
+ void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
};
}
\ No newline at end of file
diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp
index bb3e578..6ab9077 100644
--- a/src/JitCompilerX86.cpp
+++ b/src/JitCompilerX86.cpp
@@ -18,12 +18,15 @@ along with RandomX. If not, see.
*/
#include
+#include
#include
#include "JitCompilerX86.hpp"
#include "Program.hpp"
#include "reciprocal.h"
#include "virtualMemory.hpp"
+#define RANDOMX_JUMP
+
namespace RandomX {
#if !defined(_M_X64) && !defined(__x86_64__)
@@ -174,6 +177,9 @@ namespace RandomX {
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
static const uint8_t CALL = 0xe8;
+ static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
+ static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
+ static const uint8_t JZ[] = { 0x0f, 0x84 };
size_t JitCompilerX86::getCodeSize() {
return codePos - prologueSize;
@@ -203,6 +209,12 @@ namespace RandomX {
}
void JitCompilerX86::generateProgramPrologue(Program& prog) {
+#ifdef RANDOMX_JUMP
+ instructionOffsets.clear();
+ for (unsigned i = 0; i < 8; ++i) {
+ registerUsage[i] = -1;
+ }
+#endif
auto addressRegisters = prog.getEntropy(12);
uint32_t readReg0 = 0 + (addressRegisters & 1);
addressRegisters >>= 1;
@@ -222,7 +234,7 @@ namespace RandomX {
Instruction& instr = prog(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
- generateCode(instr);
+ generateCode(instr, i);
}
emit(REX_MOV_RR);
emitByte(0xc0 + readReg2);
@@ -241,9 +253,12 @@ namespace RandomX {
emitByte(0x90);
}
- void JitCompilerX86::generateCode(Instruction& instr) {
+ void JitCompilerX86::generateCode(Instruction& instr, int i) {
+#ifdef RANDOMX_JUMP
+ instructionOffsets.push_back(codePos);
+#endif
auto generator = engine[instr.opcode];
- (this->*generator)(instr);
+ (this->*generator)(instr, i);
}
void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
@@ -269,7 +284,8 @@ namespace RandomX {
emit32(instr.getImm32() & ScratchpadL3Mask);
}
- void JitCompilerX86::h_IADD_R(Instruction& instr) {
+ void JitCompilerX86::h_IADD_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_ADD_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
@@ -281,7 +297,8 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_IADD_M(Instruction& instr) {
+ void JitCompilerX86::h_IADD_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
emit(REX_ADD_RM);
@@ -299,14 +316,16 @@ namespace RandomX {
emitByte((scale << 6) | (index << 3) | base);
}
- void JitCompilerX86::h_IADD_RC(Instruction& instr) {
+ void JitCompilerX86::h_IADD_RC(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
emit(REX_LEA);
emitByte(0x84 + 8 * instr.dst);
genSIB(0, instr.src, instr.dst);
emit32(instr.getImm32());
}
- void JitCompilerX86::h_ISUB_R(Instruction& instr) {
+ void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_SUB_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
@@ -318,7 +337,8 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_ISUB_M(Instruction& instr) {
+ void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
emit(REX_SUB_RM);
@@ -332,14 +352,16 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_IMUL_9C(Instruction& instr) {
+ void JitCompilerX86::h_IMUL_9C(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
emit(REX_LEA);
emitByte(0x84 + 8 * instr.dst);
genSIB(3, instr.dst, instr.dst);
emit32(instr.getImm32());
}
- void JitCompilerX86::h_IMUL_R(Instruction& instr) {
+ void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_IMUL_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
@@ -351,7 +373,8 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_IMUL_M(Instruction& instr) {
+ void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
emit(REX_IMUL_RM);
@@ -365,7 +388,8 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_IMULH_R(Instruction& instr) {
+ void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_R);
@@ -374,7 +398,8 @@ namespace RandomX {
emitByte(0xc2 + 8 * instr.dst);
}
- void JitCompilerX86::h_IMULH_M(Instruction& instr) {
+ void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, false);
emit(REX_MOV_RR64);
@@ -392,7 +417,8 @@ namespace RandomX {
emitByte(0xc2 + 8 * instr.dst);
}
- void JitCompilerX86::h_ISMULH_R(Instruction& instr) {
+ void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_R);
@@ -401,7 +427,8 @@ namespace RandomX {
emitByte(0xc2 + 8 * instr.dst);
}
- void JitCompilerX86::h_ISMULH_M(Instruction& instr) {
+ void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, false);
emit(REX_MOV_RR64);
@@ -419,8 +446,9 @@ namespace RandomX {
emitByte(0xc2 + 8 * instr.dst);
}
- void JitCompilerX86::h_IMUL_RCP(Instruction& instr) {
+ void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
if (instr.getImm32() != 0) {
+ registerUsage[instr.dst] = i;
emit(MOV_RAX_I);
emit64(reciprocal(instr.getImm32()));
emit(REX_IMUL_RM);
@@ -428,16 +456,18 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_ISDIV_C(Instruction& instr) {
+ void JitCompilerX86::h_ISDIV_C(Instruction& instr, int i) {
}
- void JitCompilerX86::h_INEG_R(Instruction& instr) {
+ void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
emit(REX_NEG);
emitByte(0xd8 + instr.dst);
}
- void JitCompilerX86::h_IXOR_R(Instruction& instr) {
+ void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_XOR_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
@@ -449,7 +479,8 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_IXOR_M(Instruction& instr) {
+ void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
emit(REX_XOR_RM);
@@ -463,7 +494,8 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_IROR_R(Instruction& instr) {
+ void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_MOV_RR);
emitByte(0xc8 + instr.src);
@@ -477,7 +509,8 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_IROL_R(Instruction& instr) {
+ void JitCompilerX86::h_IROL_R(Instruction& instr, int i) {
+ registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_MOV_RR);
emitByte(0xc8 + instr.src);
@@ -491,20 +524,22 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_ISWAP_R(Instruction& instr) {
+ void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) {
if (instr.src != instr.dst) {
+ registerUsage[instr.dst] = i;
+ registerUsage[instr.src] = i;
emit(REX_XCHG);
emitByte(0xc0 + instr.src + 8 * instr.dst);
}
}
- void JitCompilerX86::h_FSWAP_R(Instruction& instr) {
+ void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) {
emit(SHUFPD);
emitByte(0xc0 + 9 * instr.dst);
emitByte(1);
}
- void JitCompilerX86::h_FADD_R(Instruction& instr) {
+ void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
emit(REX_ADDPD);
@@ -514,7 +549,7 @@ namespace RandomX {
//emitByte(0xf8 + instr.dst);
}
- void JitCompilerX86::h_FADD_M(Instruction& instr) {
+ void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
instr.dst %= 4;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
@@ -522,7 +557,7 @@ namespace RandomX {
emitByte(0xc4 + 8 * instr.dst);
}
- void JitCompilerX86::h_FSUB_R(Instruction& instr) {
+ void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
emit(REX_SUBPD);
@@ -532,7 +567,7 @@ namespace RandomX {
//emitByte(0xf8 + instr.dst);
}
- void JitCompilerX86::h_FSUB_M(Instruction& instr) {
+ void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
instr.dst %= 4;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
@@ -540,20 +575,20 @@ namespace RandomX {
emitByte(0xc4 + 8 * instr.dst);
}
- void JitCompilerX86::h_FSCAL_R(Instruction& instr) {
+ void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
instr.dst %= 4;
emit(REX_XORPS);
emitByte(0xc7 + 8 * instr.dst);
}
- void JitCompilerX86::h_FMUL_R(Instruction& instr) {
+ void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
emit(REX_MULPD);
emitByte(0xe0 + instr.src + 8 * instr.dst);
}
- void JitCompilerX86::h_FMUL_M(Instruction& instr) {
+ void JitCompilerX86::h_FMUL_M(Instruction& instr, int i) {
instr.dst %= 4;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
@@ -564,7 +599,7 @@ namespace RandomX {
emitByte(0xe5 + 8 * instr.dst);
}
- void JitCompilerX86::h_FDIV_R(Instruction& instr) {
+ void JitCompilerX86::h_FDIV_R(Instruction& instr, int i) {
instr.dst %= 4;
instr.src %= 4;
emit(REX_DIVPD);
@@ -573,7 +608,7 @@ namespace RandomX {
emitByte(0xe5 + 8 * instr.dst);
}
- void JitCompilerX86::h_FDIV_M(Instruction& instr) {
+ void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
instr.dst %= 4;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
@@ -582,13 +617,13 @@ namespace RandomX {
emitByte(0xe4 + 8 * instr.dst);
}
- void JitCompilerX86::h_FSQRT_R(Instruction& instr) {
+ void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
instr.dst %= 4;
emit(SQRTPD);
emitByte(0xe4 + 9 * instr.dst);
}
- void JitCompilerX86::h_CFROUND(Instruction& instr) {
+ void JitCompilerX86::h_CFROUND(Instruction& instr, int i) {
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.src);
int rotate = (13 - (instr.getImm32() & 63)) & 63;
@@ -599,6 +634,28 @@ namespace RandomX {
emit(AND_OR_MOV_LDMXCSR);
}
+ static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) {
+ switch (((instr.mod >> 2) & 7) ^ invert)
+ {
+ case 0:
+ return 0x76; //jbe
+ case 1:
+ return 0x77; //ja
+ case 2:
+ return 0x78; //js
+ case 3:
+ return 0x79; //jns
+ case 4:
+ return 0x70; //jo
+ case 5:
+ return 0x71; //jno
+ case 6:
+ return 0x7c; //jl
+ case 7:
+ return 0x7d; //jge
+ }
+ }
+
static inline uint8_t condition(Instruction& instr) {
switch ((instr.mod >> 2) & 7)
{
@@ -623,7 +680,40 @@ namespace RandomX {
}
}
- void JitCompilerX86::h_COND_R(Instruction& instr) {
+ int JitCompilerX86::getConditionRegister() {
+ int min = INT_MAX;
+ int minIndex;
+ for (unsigned i = 0; i < 8; ++i) {
+ if (registerUsage[i] < min) {
+ min = registerUsage[i];
+ minIndex = i;
+ }
+ }
+ return minIndex;
+ }
+
+ void JitCompilerX86::handleCondition(Instruction& instr, int i) {
+ const int shift = (instr.mod >> 5);
+ const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
+ int reg = getConditionRegister();
+ int target = registerUsage[reg] + 1;
+ emit(REX_ADD_I);
+ emitByte(0xc0 + reg);
+ emit32(1 << shift);
+ emit(REX_TEST);
+ emitByte(0xc0 + reg);
+ emit32(conditionMask);
+ emit(JZ);
+ emit32(instructionOffsets[target] - (codePos + 4));
+ for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
+ registerUsage[j] = i;
+ }
+ }
+
+ void JitCompilerX86::h_COND_R(Instruction& instr, int i) {
+#ifdef RANDOMX_JUMP
+ handleCondition(instr, i);
+#endif
emit(XOR_ECX_ECX);
emit(REX_CMP_R32I);
emitByte(0xf8 + instr.src);
@@ -635,7 +725,10 @@ namespace RandomX {
emitByte(0xc1 + 8 * instr.dst);
}
- void JitCompilerX86::h_COND_M(Instruction& instr) {
+ void JitCompilerX86::h_COND_M(Instruction& instr, int i) {
+#ifdef RANDOMX_JUMP
+ handleCondition(instr, i);
+#endif
emit(XOR_ECX_ECX);
genAddressReg(instr);
emit(REX_CMP_M32I);
@@ -647,21 +740,21 @@ namespace RandomX {
emitByte(0xc1 + 8 * instr.dst);
}
- void JitCompilerX86::h_ISTORE(Instruction& instr) {
+ void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
genAddressRegDst(instr);
emit(REX_MOV_MR);
emitByte(0x04 + 8 * instr.src);
emitByte(0x06);
}
- void JitCompilerX86::h_FSTORE(Instruction& instr) {
+ void JitCompilerX86::h_FSTORE(Instruction& instr, int i) {
genAddressRegDst(instr, true);
emit(MOVAPD);
emitByte(0x04 + 8 * instr.src);
emitByte(0x06);
}
- void JitCompilerX86::h_NOP(Instruction& instr) {
+ void JitCompilerX86::h_NOP(Instruction& instr, int i) {
emitByte(0x90);
}
diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp
index 3c868c0..e127a40 100644
--- a/src/JitCompilerX86.hpp
+++ b/src/JitCompilerX86.hpp
@@ -29,7 +29,7 @@ namespace RandomX {
class Program;
class JitCompilerX86;
- typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&);
+ typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
constexpr uint32_t CodeSize = 64 * 1024;
@@ -47,17 +47,21 @@ namespace RandomX {
size_t getCodeSize();
private:
static InstructionGeneratorX86 engine[256];
+ std::vector instructionOffsets;
+ int registerUsage[8];
uint8_t* code;
int32_t codePos;
void generateProgramPrologue(Program&);
void generateProgramEpilogue(Program&);
+ int getConditionRegister();
void genAddressReg(Instruction&, bool);
void genAddressRegDst(Instruction&, bool);
void genAddressImm(Instruction&);
void genSIB(int scale, int index, int base);
- void generateCode(Instruction&);
+ void handleCondition(Instruction&, int);
+ void generateCode(Instruction&, int);
void emitByte(uint8_t val) {
code[codePos] = val;
@@ -92,43 +96,43 @@ namespace RandomX {
codePos += N;
}
- void h_IADD_R(Instruction&);
- void h_IADD_M(Instruction&);
- void h_IADD_RC(Instruction&);
- void h_ISUB_R(Instruction&);
- void h_ISUB_M(Instruction&);
- void h_IMUL_9C(Instruction&);
- void h_IMUL_R(Instruction&);
- void h_IMUL_M(Instruction&);
- void h_IMULH_R(Instruction&);
- void h_IMULH_M(Instruction&);
- void h_ISMULH_R(Instruction&);
- void h_ISMULH_M(Instruction&);
- void h_IMUL_RCP(Instruction&);
- void h_ISDIV_C(Instruction&);
- void h_INEG_R(Instruction&);
- void h_IXOR_R(Instruction&);
- void h_IXOR_M(Instruction&);
- void h_IROR_R(Instruction&);
- void h_IROL_R(Instruction&);
- void h_ISWAP_R(Instruction&);
- void h_FSWAP_R(Instruction&);
- void h_FADD_R(Instruction&);
- void h_FADD_M(Instruction&);
- void h_FSUB_R(Instruction&);
- void h_FSUB_M(Instruction&);
- void h_FSCAL_R(Instruction&);
- void h_FMUL_R(Instruction&);
- void h_FMUL_M(Instruction&);
- void h_FDIV_R(Instruction&);
- void h_FDIV_M(Instruction&);
- void h_FSQRT_R(Instruction&);
- void h_COND_R(Instruction&);
- void h_COND_M(Instruction&);
- void h_CFROUND(Instruction&);
- void h_ISTORE(Instruction&);
- void h_FSTORE(Instruction&);
- void h_NOP(Instruction&);
+ void h_IADD_R(Instruction&, int);
+ void h_IADD_M(Instruction&, int);
+ void h_IADD_RC(Instruction&, int);
+ void h_ISUB_R(Instruction&, int);
+ void h_ISUB_M(Instruction&, int);
+ void h_IMUL_9C(Instruction&, int);
+ void h_IMUL_R(Instruction&, int);
+ void h_IMUL_M(Instruction&, int);
+ void h_IMULH_R(Instruction&, int);
+ void h_IMULH_M(Instruction&, int);
+ void h_ISMULH_R(Instruction&, int);
+ void h_ISMULH_M(Instruction&, int);
+ void h_IMUL_RCP(Instruction&, int);
+ void h_ISDIV_C(Instruction&, int);
+ void h_INEG_R(Instruction&, int);
+ void h_IXOR_R(Instruction&, int);
+ void h_IXOR_M(Instruction&, int);
+ void h_IROR_R(Instruction&, int);
+ void h_IROL_R(Instruction&, int);
+ void h_ISWAP_R(Instruction&, int);
+ void h_FSWAP_R(Instruction&, int);
+ void h_FADD_R(Instruction&, int);
+ void h_FADD_M(Instruction&, int);
+ void h_FSUB_R(Instruction&, int);
+ void h_FSUB_M(Instruction&, int);
+ void h_FSCAL_R(Instruction&, int);
+ void h_FMUL_R(Instruction&, int);
+ void h_FMUL_M(Instruction&, int);
+ void h_FDIV_R(Instruction&, int);
+ void h_FDIV_M(Instruction&, int);
+ void h_FSQRT_R(Instruction&, int);
+ void h_COND_R(Instruction&, int);
+ void h_COND_M(Instruction&, int);
+ void h_CFROUND(Instruction&, int);
+ void h_ISTORE(Instruction&, int);
+ void h_FSTORE(Instruction&, int);
+ void h_NOP(Instruction&, int);
};
}
\ No newline at end of file
diff --git a/src/configuration.h b/src/configuration.h
index 146b329..4c30b59 100644
--- a/src/configuration.h
+++ b/src/configuration.h
@@ -67,6 +67,9 @@ along with RandomX. If not, see.
//Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 (16 * 1024)
+//How many register bits must be zero for a jump condition to be triggered
+#define RANDOMX_CONDITION_BITS 7
+
/*
Instruction frequencies (per 256 opcodes)
Total sum of frequencies must be 256
diff --git a/src/instructionWeights.hpp b/src/instructionWeights.hpp
index 7fceb0b..8c9f566 100644
--- a/src/instructionWeights.hpp
+++ b/src/instructionWeights.hpp
@@ -54,6 +54,7 @@ along with RandomX. If not, see.
#define REP32(x) REP31(x) x,
#define REP33(x) REP32(x) x,
#define REP40(x) REP32(x) REP8(x)
+#define REP64(x) REP32(x) REP32(x)
#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x)
#define REP256(x) REP128(x) REP128(x)
@@ -95,6 +96,8 @@ along with RandomX. If not, see.
#define REPCASE30(x) REPCASE29(x) case __COUNTER__:
#define REPCASE31(x) REPCASE30(x) case __COUNTER__:
#define REPCASE32(x) REPCASE31(x) case __COUNTER__:
+#define REPCASE64(x) REPCASE32(x) REPCASE32(x)
+#define REPCASE128(x) REPCASE64(x) REPCASE64(x)
#define REPCASENX(x,N) REPCASE##N(x)
#define REPCASEN(x,N) REPCASENX(x,N)
#define CASE_REP(x) REPCASEN(x, WT(x))
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index fc37f57..740ec28 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -303,7 +303,7 @@ int main(int argc, char** argv) {
if (jit)
vm = new RandomX::CompiledLightVirtualMachine();
else
- vm = new RandomX::InterpretedVirtualMachine(softAes, async);
+ vm = new RandomX::InterpretedVirtualMachine(softAes);
}
vm->setDataset(dataset, datasetSize);
vms.push_back(vm);
@@ -340,7 +340,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: ";
result.print(std::cout);
if(programCount == 1000)
- std::cout << "Reference result: 84f37cc43cb21eabf1d5b9def462060cd24218290678dd80a8ea2f663892629e" << std::endl;
+ std::cout << "Reference result: 9e636a04a2517f37d8ed40b67a7051e02a7577e878fbba5c4352996b2c653f90" << std::endl;
if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
}
diff --git a/src/program.inc b/src/program.inc
index 4171a54..46d8093 100644
--- a/src/program.inc
+++ b/src/program.inc
@@ -1,708 +1,895 @@
- ; IMULH_R r1, r0
- mov rax, r9
- mul r8
- mov r9, rdx
- ; IMULH_R r4, r5
- mov rax, r12
- mul r13
- mov r12, rdx
- ; FMUL_R e0, a1
- mulpd xmm4, xmm9
- ; IMUL_9C r6, 933674225
- lea r14, [r14+r14*8+933674225]
- ; IROR_R r7, r6
- mov ecx, r14d
- ror r15, cl
- ; FSQRT_R e1
- sqrtpd xmm5, xmm5
- ; IADD_R r1, r0
- add r9, r8
- ; FSCAL_R f1
- xorps xmm1, xmm15
- ; IMUL_R r6, r5
- imul r14, r13
- ; FSCAL_R f3
- xorps xmm3, xmm15
- ; IADD_M r5, L1[r0]
- mov eax, r8d
- and eax, 16376
- add r13, qword ptr [rsi+rax]
- ; IMUL_RCP r0, 3332750793
- mov rax, 11886301652177618669
- imul r8, rax
- ; ISTORE L1[r3], r0
- mov eax, r11d
- and eax, 16376
- mov qword ptr [rsi+rax], r8
- ; FSUB_R f3, a0
- subpd xmm3, xmm8
- ; ISUB_R r1, r3
- sub r9, r11
- ; ISMULH_R r4, r1
- mov rax, r12
- imul r9
- mov r12, rdx
- ; IADD_RC r3, r0, 1262539428
- lea r11, [r11+r8+1262539428]
- ; FSWAP_R e1
- shufpd xmm5, xmm5, 1
- ; FMUL_R e1, a3
- mulpd xmm5, xmm11
- ; FMUL_R e3, a3
- mulpd xmm7, xmm11
- ; ISWAP_R r0, r2
- xchg r8, r10
- ; COND_R r5, of(r4, 137305269)
- xor ecx, ecx
- cmp r12d, 137305269
- seto cl
- add r13, rcx
- ; IMUL_R r6, r4
- imul r14, r12
- ; FMUL_R e3, a0
- mulpd xmm7, xmm8
- ; FSCAL_R f0
- xorps xmm0, xmm15
- ; FADD_R f1, a0
- addpd xmm1, xmm8
- ; IADD_R r6, r3
- add r14, r11
- ; ISMULH_M r1, L3[777112]
- mov rax, r9
- imul qword ptr [rsi+777112]
- mov r9, rdx
- ; FADD_R f1, a1
- addpd xmm1, xmm9
- ; FSUB_M f2, L2[r3]
- mov eax, r11d
- and eax, 262136
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm2, xmm12
- ; IMUL_R r5, r7
- imul r13, r15
- ; ISUB_M r1, L1[r3]
- mov eax, r11d
- and eax, 16376
- sub r9, qword ptr [rsi+rax]
- ; IXOR_M r1, L1[r6]
- mov eax, r14d
- and eax, 16376
- xor r9, qword ptr [rsi+rax]
- ; COND_R r2, ns(r3, 1727033430)
- xor ecx, ecx
- cmp r11d, 1727033430
- setns cl
- add r10, rcx
- ; FADD_R f3, a1
- addpd xmm3, xmm9
- ; FADD_R f2, a2
- addpd xmm2, xmm10
- ; IADD_R r5, -1048707993
- add r13, -1048707993
- ; COND_R r2, ge(r5, -1016934677)
- xor ecx, ecx
- cmp r13d, -1016934677
- setge cl
- add r10, rcx
- ; FSUB_R f2, a3
- subpd xmm2, xmm11
- ; ISUB_M r1, L2[r4]
- mov eax, r12d
- and eax, 262136
- sub r9, qword ptr [rsi+rax]
- ; IMUL_R r5, r3
- imul r13, r11
- ; FSUB_R f1, a3
- subpd xmm1, xmm11
- ; IROR_R r1, r3
- mov ecx, r11d
- ror r9, cl
- ; FADD_R f3, a2
- addpd xmm3, xmm10
- ; ISUB_R r0, -28376526
- sub r8, -28376526
- ; IROR_R r6, r0
- mov ecx, r8d
- ror r14, cl
- ; FADD_R f1, a0
- addpd xmm1, xmm8
- ; FMUL_R e1, a0
- mulpd xmm5, xmm8
- ; IXOR_R r2, r4
- xor r10, r12
- ; FSUB_M f1, L1[r2]
- mov eax, r10d
+randomx_isn_0:
+ ; IROR_R r3, 30
+ ror r11, 30
+randomx_isn_1:
+ ; FSUB_M f1, L1[r7]
+ mov eax, r15d
and eax, 16376
cvtdq2pd xmm12, qword ptr [rsi+rax]
subpd xmm1, xmm12
- ; FSWAP_R f3
- shufpd xmm3, xmm3, 1
- ; FSUB_R f3, a0
- subpd xmm3, xmm8
- ; ISUB_R r7, r6
- sub r15, r14
- ; FADD_R f3, a1
- addpd xmm3, xmm9
- ; ISUB_R r1, r7
- sub r9, r15
- ; IADD_M r5, L2[r7]
- mov eax, r15d
- and eax, 262136
- add r13, qword ptr [rsi+rax]
- ; IADD_RC r1, r3, 145589392
- lea r9, [r9+r11+145589392]
- ; FADD_R f2, a1
- addpd xmm2, xmm9
- ; FSUB_R f1, a1
- subpd xmm1, xmm9
- ; FADD_M f0, L1[r3]
- mov eax, r11d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- addpd xmm0, xmm12
- ; FADD_R f3, a1
- addpd xmm3, xmm9
- ; FSUB_R f0, a3
- subpd xmm0, xmm11
- ; FMUL_R e2, a2
- mulpd xmm6, xmm10
- ; FADD_R f2, a1
- addpd xmm2, xmm9
- ; IXOR_R r7, r4
- xor r15, r12
- ; FSUB_R f1, a3
- subpd xmm1, xmm11
- ; IMUL_RCP r0, 3339947118
- mov rax, 11860691159940745144
- imul r8, rax
- ; FSCAL_R f2
- xorps xmm2, xmm15
- ; IMUL_9C r0, 850304074
- lea r8, [r8+r8*8+850304074]
- ; IADD_R r2, r4
- add r10, r12
- ; IADD_R r0, -1929760745
- add r8, -1929760745
- ; ISTORE L2[r4], r7
- mov eax, r12d
- and eax, 262136
- mov qword ptr [rsi+rax], r15
- ; IROR_R r2, r7
+randomx_isn_2:
+ ; IXOR_R r3, 1860630909
+ xor r11, 1860630909
+randomx_isn_3:
+ ; IROR_R r5, r7
mov ecx, r15d
- ror r10, cl
- ; FMUL_R e1, a1
- mulpd xmm5, xmm9
+ ror r13, cl
+randomx_isn_4:
+ ; IXOR_R r3, r4
+ xor r11, r12
+randomx_isn_5:
+ ; IROR_R r4, r0
+ mov ecx, r8d
+ ror r12, cl
+randomx_isn_6:
; FSQRT_R e3
sqrtpd xmm7, xmm7
- ; IXOR_R r0, -1150923249
- xor r8, -1150923249
- ; IMUL_9C r7, 586146619
- lea r15, [r15+r15*8+586146619]
- ; FSWAP_R f2
- shufpd xmm2, xmm2, 1
- ; FSUB_M f3, L1[r6]
- mov eax, r14d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm3, xmm12
- ; IXOR_R r0, 292938237
- xor r8, 292938237
- ; COND_R r6, no(r6, -2142285576)
- xor ecx, ecx
- cmp r14d, -2142285576
- setno cl
- add r14, rcx
- ; IMUL_RCP r3, 670137279
- mov rax, 14778345608621248183
- imul r11, rax
- ; ISTORE L1[r1], r5
- mov eax, r9d
- and eax, 16376
- mov qword ptr [rsi+rax], r13
- ; COND_R r3, sg(r1, 1638220289)
- xor ecx, ecx
- cmp r9d, 1638220289
- sets cl
- add r11, rcx
- ; IXOR_R r4, r2
- xor r12, r10
- ; COND_R r2, be(r2, 1131588253)
- xor ecx, ecx
- cmp r10d, 1131588253
- setbe cl
- add r10, rcx
- ; IMULH_R r3, r1
- mov rax, r11
- mul r9
- mov r11, rdx
- ; COND_R r3, sg(r6, 1528901692)
- xor ecx, ecx
- cmp r14d, 1528901692
- sets cl
- add r11, rcx
- ; IMUL_M r6, L2[r4]
- mov eax, r12d
- and eax, 262136
- imul r14, qword ptr [rsi+rax]
- ; ISMULH_M r1, L1[r2]
- mov ecx, r10d
- and ecx, 16376
- mov rax, r9
- imul qword ptr [rsi+rcx]
- mov r9, rdx
- ; ISUB_M r5, L1[r4]
- mov eax, r12d
- and eax, 16376
- sub r13, qword ptr [rsi+rax]
- ; IMUL_RCP r1, 1612208358
- mov rax, 12285658072842024305
- imul r9, rax
- ; COND_R r2, lt(r6, -1712049035)
- xor ecx, ecx
- cmp r14d, -1712049035
- setl cl
- add r10, rcx
- ; IMUL_RCP r2, 2888266520
- mov rax, 13715521397634789187
- imul r10, rax
- ; IADD_M r1, L2[r6]
- mov eax, r14d
- and eax, 262136
- add r9, qword ptr [rsi+rax]
- ; FMUL_R e0, a3
- mulpd xmm4, xmm11
- ; ISTORE L1[r7], r1
- mov eax, r15d
- and eax, 16376
- mov qword ptr [rsi+rax], r9
+randomx_isn_7:
; ISTORE L1[r0], r3
mov eax, r8d
and eax, 16376
mov qword ptr [rsi+rax], r11
- ; FSUB_R f0, a1
- subpd xmm0, xmm9
- ; FADD_R f2, a2
- addpd xmm2, xmm10
+randomx_isn_8:
+ ; FMUL_R e1, a1
+ mulpd xmm5, xmm9
+randomx_isn_9:
; FMUL_R e0, a1
mulpd xmm4, xmm9
- ; FMUL_R e2, a0
- mulpd xmm6, xmm8
- ; FMUL_R e3, a2
- mulpd xmm7, xmm10
- ; IROR_R r5, 21
- ror r13, 21
- ; FSQRT_R e1
- sqrtpd xmm5, xmm5
+randomx_isn_10:
+ ; IMUL_M r2, L1[r1]
+ mov eax, r9d
+ and eax, 16376
+ imul r10, qword ptr [rsi+rax]
+randomx_isn_11:
; ISTORE L1[r3], r1
mov eax, r11d
and eax, 16376
mov qword ptr [rsi+rax], r9
- ; IMUL_9C r2, -290275273
- lea r10, [r10+r10*8-290275273]
- ; ISUB_M r7, L1[r3]
- mov eax, r11d
- and eax, 16376
- sub r15, qword ptr [rsi+rax]
- ; IMUL_R r6, 1301522739
- imul r14, 1301522739
- ; ISWAP_R r2, r4
- xchg r10, r12
- ; FMUL_R e3, a2
- mulpd xmm7, xmm10
- ; IMUL_9C r2, 877307769
- lea r10, [r10+r10*8+877307769]
- ; IMUL_R r0, r3
- imul r8, r11
- ; IMUL_9C r0, 1293318220
- lea r8, [r8+r8*8+1293318220]
- ; FSQRT_R e0
- sqrtpd xmm4, xmm4
- ; ISTORE L1[r0], r2
- mov eax, r8d
- and eax, 16376
- mov qword ptr [rsi+rax], r10
- ; IMUL_RCP r5, 2071364883
- mov rax, 9562313618003962461
- imul r13, rax
- ; FMUL_R e1, a2
- mulpd xmm5, xmm10
- ; FSUB_R f1, a3
- subpd xmm1, xmm11
- ; FSUB_R f0, a1
- subpd xmm0, xmm9
- ; IMULH_R r6, r1
- mov rax, r14
- mul r9
- mov r14, rdx
- ; ISTORE L1[r6], r5
- mov eax, r14d
- and eax, 16376
- mov qword ptr [rsi+rax], r13
- ; ISTORE L2[r1], r2
- mov eax, r9d
- and eax, 262136
- mov qword ptr [rsi+rax], r10
- ; ISUB_M r1, L2[r4]
- mov eax, r12d
- and eax, 262136
- sub r9, qword ptr [rsi+rax]
- ; IADD_M r7, L1[r6]
- mov eax, r14d
- and eax, 16376
- add r15, qword ptr [rsi+rax]
- ; IADD_RC r2, r0, -1705364403
- lea r10, [r10+r8-1705364403]
- ; ISTORE L1[r6], r5
- mov eax, r14d
- and eax, 16376
- mov qword ptr [rsi+rax], r13
- ; FSUB_M f0, L1[r5]
- mov eax, r13d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm0, xmm12
- ; IXOR_R r1, r3
- xor r9, r11
- ; FADD_R f2, a0
- addpd xmm2, xmm8
- ; FSCAL_R f2
- xorps xmm2, xmm15
- ; ISUB_R r6, -789651909
- sub r14, -789651909
- ; COND_R r4, sg(r1, -1404926795)
- xor ecx, ecx
- cmp r9d, -1404926795
- sets cl
- add r12, rcx
- ; FSCAL_R f2
- xorps xmm2, xmm15
- ; ISUB_R r6, r7
- sub r14, r15
- ; IXOR_R r5, r2
- xor r13, r10
- ; IROR_R r6, r5
- mov ecx, r13d
- ror r14, cl
- ; FSUB_R f1, a2
- subpd xmm1, xmm10
- ; IMUL_M r4, L1[r5]
- mov eax, r13d
- and eax, 16376
- imul r12, qword ptr [rsi+rax]
- ; FSUB_R f3, a0
- subpd xmm3, xmm8
- ; FSWAP_R e1
- shufpd xmm5, xmm5, 1
- ; IADD_RC r6, r5, 1744830258
- lea r14, [r14+r13+1744830258]
- ; FSUB_R f3, a0
- subpd xmm3, xmm8
- ; ISUB_R r7, r0
- sub r15, r8
- ; FSUB_R f1, a3
- subpd xmm1, xmm11
- ; IMUL_9C r4, 241775739
- lea r12, [r12+r12*8+241775739]
- ; FADD_R f0, a0
- addpd xmm0, xmm8
- ; IMUL_R r4, r3
- imul r12, r11
- ; IMUL_RCP r4, 2389176791
- mov rax, 16580640414036304271
- imul r12, rax
- ; FSCAL_R f1
- xorps xmm1, xmm15
- ; FSUB_R f2, a1
- subpd xmm2, xmm9
- ; ISTORE L2[r2], r0
- mov eax, r10d
- and eax, 262136
- mov qword ptr [rsi+rax], r8
- ; IXOR_M r5, L1[r7]
- mov eax, r15d
- and eax, 16376
- xor r13, qword ptr [rsi+rax]
- ; IMULH_M r4, L1[r1]
- mov ecx, r9d
- and ecx, 16376
- mov rax, r12
- mul qword ptr [rsi+rcx]
- mov r12, rdx
- ; FMUL_R e2, a1
- mulpd xmm6, xmm9
- ; IXOR_R r0, r5
- xor r8, r13
- ; IROR_R r0, r7
- mov ecx, r15d
- ror r8, cl
- ; IADD_RC r6, r5, 472588845
- lea r14, [r14+r13+472588845]
- ; FADD_R f0, a0
- addpd xmm0, xmm8
- ; FSCAL_R f0
- xorps xmm0, xmm15
- ; IROR_R r2, r1
- mov ecx, r9d
- ror r10, cl
- ; IADD_RC r2, r1, 1968510355
- lea r10, [r10+r9+1968510355]
- ; FMUL_R e0, a0
- mulpd xmm4, xmm8
- ; ISUB_R r7, r1
- sub r15, r9
- ; IADD_RC r4, r7, 1111936914
- lea r12, [r12+r15+1111936914]
- ; IADD_RC r7, r3, 373642756
- lea r15, [r15+r11+373642756]
- ; FSUB_R f0, a0
- subpd xmm0, xmm8
- ; IMUL_RCP r6, 3388328460
- mov rax, 11691334451422153092
- imul r14, rax
- ; FSWAP_R e1
- shufpd xmm5, xmm5, 1
- ; IADD_RC r7, r5, -644292398
- lea r15, [r15+r13-644292398]
- ; IMUL_9C r7, -1398596563
- lea r15, [r15+r15*8-1398596563]
- ; FADD_R f0, a3
- addpd xmm0, xmm11
- ; FDIV_M e1, L1[r5]
- mov eax, r13d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- andps xmm12, xmm13
- orps xmm12, xmm14
- divpd xmm5, xmm12
- ; IXOR_M r2, L1[r5]
- mov eax, r13d
- and eax, 16376
- xor r10, qword ptr [rsi+rax]
- ; IADD_R r5, r6
- add r13, r14
- ; IROR_R r4, r0
- mov ecx, r8d
- ror r12, cl
- ; IXOR_R r0, r6
- xor r8, r14
- ; IMUL_RCP r1, 1035942442
- mov rax, 9559913671615977868
- imul r9, rax
- ; IMUL_9C r1, 105267179
- lea r9, [r9+r9*8+105267179]
- ; IMUL_M r1, L1[r2]
- mov eax, r10d
- and eax, 16376
- imul r9, qword ptr [rsi+rax]
- ; COND_R r6, be(r7, 1344676209)
- xor ecx, ecx
- cmp r15d, 1344676209
- setbe cl
- add r14, rcx
- ; IADD_R r6, r1
- add r14, r9
- ; IROR_R r5, r1
- mov ecx, r9d
- ror r13, cl
- ; ISMULH_R r0, r6
- mov rax, r8
- imul r14
- mov r8, rdx
- ; IXOR_R r6, r7
- xor r14, r15
- ; FSUB_R f1, a3
- subpd xmm1, xmm11
- ; IMUL_9C r1, 1991866007
- lea r9, [r9+r9*8+1991866007]
- ; IMUL_RCP r2, 4139294400
- mov rax, 9570249764581173254
- imul r10, rax
- ; FSWAP_R f0
- shufpd xmm0, xmm0, 1
- ; ISUB_R r5, r2
- sub r13, r10
- ; COND_R r6, lt(r1, -834783176)
- xor ecx, ecx
- cmp r9d, -834783176
- setl cl
- add r14, rcx
- ; ISTORE L2[r7], r3
- mov eax, r15d
- and eax, 262136
- mov qword ptr [rsi+rax], r11
- ; FADD_R f2, a2
- addpd xmm2, xmm10
- ; FSCAL_R f1
- xorps xmm1, xmm15
+randomx_isn_12:
; IMUL_R r7, r4
imul r15, r12
- ; IMUL_RCP r4, 3027698566
- mov rax, 13083892069700893994
- imul r12, rax
- ; IMULH_M r2, L1[r3]
+randomx_isn_13:
+ ; FSQRT_R e2
+ sqrtpd xmm6, xmm6
+randomx_isn_14:
+ ; FSQRT_R e2
+ sqrtpd xmm6, xmm6
+randomx_isn_15:
+ ; IADD_R r6, r2
+ add r14, r10
+randomx_isn_16:
+ ; FSUB_M f2, L1[r1]
+ mov eax, r9d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm2, xmm12
+randomx_isn_17:
+ ; IROR_R r4, r3
mov ecx, r11d
- and ecx, 16376
- mov rax, r10
- mul qword ptr [rsi+rcx]
- mov r10, rdx
- ; IADD_M r6, L1[r1]
- mov eax, r9d
+ ror r12, cl
+randomx_isn_18:
+ ; ISTORE L1[r4], r4
+ mov eax, r12d
and eax, 16376
- add r14, qword ptr [rsi+rax]
- ; IMUL_M r3, L1[r1]
- mov eax, r9d
+ mov qword ptr [rsi+rax], r12
+randomx_isn_19:
+ ; FMUL_R e1, a2
+ mulpd xmm5, xmm10
+randomx_isn_20:
+ ; COND_R r6, of(r3, 1593588996), 1
+ add r8, 2
+ test r8, 254
+ jz randomx_isn_0
+ xor ecx, ecx
+ cmp r11d, 1593588996
+ seto cl
+ add r14, rcx
+randomx_isn_21:
+ ; IXOR_M r7, L1[r2]
+ mov eax, r10d
and eax, 16376
- imul r11, qword ptr [rsi+rax]
+ xor r15, qword ptr [rsi+rax]
+randomx_isn_22:
+ ; IXOR_M r1, L1[r0]
+ mov eax, r8d
+ and eax, 16376
+ xor r9, qword ptr [rsi+rax]
+randomx_isn_23:
+ ; FMUL_R e2, a0
+ mulpd xmm6, xmm8
+randomx_isn_24:
+ ; COND_R r6, no(r0, 149087159), 6
+ add r8, 64
+ test r8, 8128
+ jz randomx_isn_21
+ xor ecx, ecx
+ cmp r8d, 149087159
+ setno cl
+ add r14, rcx
+randomx_isn_25:
+ ; FADD_R f3, a0
+ addpd xmm3, xmm8
+randomx_isn_26:
+ ; IADD_R r7, r0
+ add r15, r8
+randomx_isn_27:
+ ; IMUL_R r2, r3
+ imul r10, r11
+randomx_isn_28:
+ ; IADD_R r5, r7
+ add r13, r15
+randomx_isn_29:
+ ; ISTORE L2[r6], r2
+ mov eax, r14d
+ and eax, 262136
+ mov qword ptr [rsi+rax], r10
+randomx_isn_30:
; ISTORE L1[r7], r5
mov eax, r15d
and eax, 16376
mov qword ptr [rsi+rax], r13
- ; IADD_RC r3, r1, -183791073
- lea r11, [r11+r9-183791073]
- ; IMUL_9C r6, 1353963989
- lea r14, [r14+r14*8+1353963989]
- ; ISUB_R r2, r3
- sub r10, r11
- ; IMUL_R r2, r1
- imul r10, r9
- ; IMULH_R r6, r4
- mov rax, r14
- mul r12
- mov r14, rdx
- ; ISMULH_R r6, r4
- mov rax, r14
- imul r12
- mov r14, rdx
- ; IADD_R r7, r4
- add r15, r12
- ; FMUL_R e3, a1
- mulpd xmm7, xmm9
+randomx_isn_31:
+ ; FSUB_R f1, a2
+ subpd xmm1, xmm10
+randomx_isn_32:
+ ; IMUL_R r3, r5
+ imul r11, r13
+randomx_isn_33:
+ ; IROR_R r1, 20
+ ror r9, 20
+randomx_isn_34:
+ ; FSCAL_R f1
+ xorps xmm1, xmm15
+randomx_isn_35:
+ ; IMUL_R r6, 835132161
+ imul r14, 835132161
+randomx_isn_36:
+ ; IADD_M r3, L1[r4]
+ mov eax, r12d
+ and eax, 16376
+ add r11, qword ptr [rsi+rax]
+randomx_isn_37:
+ ; IMUL_9C r6, 1885029796
+ lea r14, [r14+r14*8+1885029796]
+randomx_isn_38:
+ ; FSCAL_R f2
+ xorps xmm2, xmm15
+randomx_isn_39:
+ ; ISUB_M r5, L1[r0]
+ mov eax, r8d
+ and eax, 16376
+ sub r13, qword ptr [rsi+rax]
+randomx_isn_40:
+ ; IMUL_R r7, r2
+ imul r15, r10
+randomx_isn_41:
+ ; FMUL_R e1, a0
+ mulpd xmm5, xmm8
+randomx_isn_42:
+ ; IXOR_R r5, r0
+ xor r13, r8
+randomx_isn_43:
+ ; FSWAP_R e0
+ shufpd xmm4, xmm4, 1
+randomx_isn_44:
; FADD_R f1, a2
addpd xmm1, xmm10
- ; IADD_R r5, r6
- add r13, r14
- ; IADD_RC r4, r0, -1810659257
- lea r12, [r12+r8-1810659257]
- ; IROR_R r2, r5
- mov ecx, r13d
+randomx_isn_45:
+ ; ISTORE L1[r0], r5
+ mov eax, r8d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r13
+randomx_isn_46:
+ ; IADD_M r0, L2[r7]
+ mov eax, r15d
+ and eax, 262136
+ add r8, qword ptr [rsi+rax]
+randomx_isn_47:
+ ; IXOR_R r5, r2
+ xor r13, r10
+randomx_isn_48:
+ ; FSUB_R f3, a3
+ subpd xmm3, xmm11
+randomx_isn_49:
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+randomx_isn_50:
+ ; FSUB_R f3, a0
+ subpd xmm3, xmm8
+randomx_isn_51:
+ ; COND_R r2, be(r3, -1975981803), 7
+ add r12, 128
+ test r12, 16256
+ jz randomx_isn_25
+ xor ecx, ecx
+ cmp r11d, -1975981803
+ setbe cl
+ add r10, rcx
+randomx_isn_52:
+ ; IADD_RC r1, r1, 878232328
+ lea r9, [r9+r9+878232328]
+randomx_isn_53:
+ ; FSUB_R f2, a0
+ subpd xmm2, xmm8
+randomx_isn_54:
+ ; COND_R r5, ns(r1, 1917049931), 6
+ add r8, 64
+ test r8, 8128
+ jz randomx_isn_52
+ xor ecx, ecx
+ cmp r9d, 1917049931
+ setns cl
+ add r13, rcx
+randomx_isn_55:
+ ; IXOR_R r2, r3
+ xor r10, r11
+randomx_isn_56:
+ ; FSCAL_R f0
+ xorps xmm0, xmm15
+randomx_isn_57:
+ ; IMUL_R r5, r1
+ imul r13, r9
+randomx_isn_58:
+ ; IADD_R r5, r1
+ add r13, r9
+randomx_isn_59:
+ ; FMUL_R e2, a2
+ mulpd xmm6, xmm10
+randomx_isn_60:
+ ; IROR_R r2, r6
+ mov ecx, r14d
ror r10, cl
- ; FADD_R f2, a2
- addpd xmm2, xmm10
- ; FSWAP_R e2
- shufpd xmm6, xmm6, 1
- ; FADD_M f0, L1[r2]
+randomx_isn_61:
+ ; IADD_RC r0, r3, 553576025
+ lea r8, [r8+r11+553576025]
+randomx_isn_62:
+ ; FSQRT_R e3
+ sqrtpd xmm7, xmm7
+randomx_isn_63:
+ ; IMUL_9C r6, -1165860156
+ lea r14, [r14+r14*8-1165860156]
+randomx_isn_64:
+ ; IMUL_9C r5, -1323706896
+ lea r13, [r13+r13*8-1323706896]
+randomx_isn_65:
+ ; IMUL_RCP r5, 2362240456
+ mov rax, 16769707400664451577
+ imul r13, rax
+randomx_isn_66:
+ ; ISUB_R r4, 841292629
+ sub r12, 841292629
+randomx_isn_67:
+ ; IADD_M r4, L1[r6]
+ mov eax, r14d
+ and eax, 16376
+ add r12, qword ptr [rsi+rax]
+randomx_isn_68:
+ ; FSUB_M f3, L1[r4]
+ mov eax, r12d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm3, xmm12
+randomx_isn_69:
+ ; IADD_RC r6, r4, -1863144764
+ lea r14, [r14+r12-1863144764]
+randomx_isn_70:
+ ; FSUB_M f1, L1[r5]
+ mov eax, r13d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm1, xmm12
+randomx_isn_71:
+ ; FSWAP_R e1
+ shufpd xmm5, xmm5, 1
+randomx_isn_72:
+ ; FADD_R f2, a0
+ addpd xmm2, xmm8
+randomx_isn_73:
+ ; FMUL_R e0, a0
+ mulpd xmm4, xmm8
+randomx_isn_74:
+ ; COND_R r6, ns(r3, -1200328848), 2
+ add r9, 4
+ test r9, 508
+ jz randomx_isn_55
+ xor ecx, ecx
+ cmp r11d, -1200328848
+ setns cl
+ add r14, rcx
+randomx_isn_75:
+ ; FMUL_R e0, a3
+ mulpd xmm4, xmm11
+randomx_isn_76:
+ ; FDIV_M e3, L1[r4]
+ mov eax, r12d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ andps xmm12, xmm13
+ orps xmm12, xmm14
+ divpd xmm7, xmm12
+randomx_isn_77:
+ ; FADD_R f1, a2
+ addpd xmm1, xmm10
+randomx_isn_78:
+ ; FMUL_R e2, a1
+ mulpd xmm6, xmm9
+randomx_isn_79:
+ ; IADD_RC r3, r1, -919815727
+ lea r11, [r11+r9-919815727]
+randomx_isn_80:
+ ; ISTORE L1[r2], r4
mov eax, r10d
and eax, 16376
+ mov qword ptr [rsi+rax], r12
+randomx_isn_81:
+ ; IMULH_R r3, r0
+ mov rax, r11
+ mul r8
+ mov r11, rdx
+randomx_isn_82:
+ ; IXOR_R r2, r6
+ xor r10, r14
+randomx_isn_83:
+ ; FSUB_R f0, a2
+ subpd xmm0, xmm10
+randomx_isn_84:
+ ; ISMULH_R r1, r7
+ mov rax, r9
+ imul r15
+ mov r9, rdx
+randomx_isn_85:
+ ; FSCAL_R f0
+ xorps xmm0, xmm15
+randomx_isn_86:
+ ; ISUB_R r7, r3
+ sub r15, r11
+randomx_isn_87:
+ ; IXOR_R r4, r2
+ xor r12, r10
+randomx_isn_88:
+ ; IMUL_R r1, r3
+ imul r9, r11
+randomx_isn_89:
+ ; COND_M r2, no(L1[r0], -122257389), 6
+ add r8, 64
+ test r8, 8128
+ jz randomx_isn_75
+ xor ecx, ecx
+ mov eax, r8d
+ and eax, 16376
+ cmp dword ptr [rsi+rax], -122257389
+ setno cl
+ add r10, rcx
+randomx_isn_90:
+ ; ISTORE L1[r5], r7
+ mov eax, r13d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r15
+randomx_isn_91:
+ ; ISTORE L1[r6], r5
+ mov eax, r14d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r13
+randomx_isn_92:
+ ; FSUB_R f2, a0
+ subpd xmm2, xmm8
+randomx_isn_93:
+ ; FADD_R f0, a1
+ addpd xmm0, xmm9
+randomx_isn_94:
+ ; IXOR_R r6, r1
+ xor r14, r9
+randomx_isn_95:
+ ; ISUB_M r0, L3[910032]
+ sub r8, qword ptr [rsi+910032]
+randomx_isn_96:
+ ; FSWAP_R e3
+ shufpd xmm7, xmm7, 1
+randomx_isn_97:
+ ; IMUL_M r4, L1[r2]
+ mov eax, r10d
+ and eax, 16376
+ imul r12, qword ptr [rsi+rax]
+randomx_isn_98:
+ ; IMUL_9C r0, 2144355962
+ lea r8, [r8+r8*8+2144355962]
+randomx_isn_99:
+ ; IMULH_R r1, r5
+ mov rax, r9
+ mul r13
+ mov r9, rdx
+randomx_isn_100:
+ ; ISTORE L1[r7], r3
+ mov eax, r15d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r11
+randomx_isn_101:
+ ; ISWAP_R r0, r0
+randomx_isn_102:
+ ; IMUL_R r2, r7
+ imul r10, r15
+randomx_isn_103:
+ ; ISUB_R r2, -1777504751
+ sub r10, -1777504751
+randomx_isn_104:
+ ; ISTORE L2[r6], r7
+ mov eax, r14d
+ and eax, 262136
+ mov qword ptr [rsi+rax], r15
+randomx_isn_105:
+ ; FADD_R f3, a1
+ addpd xmm3, xmm9
+randomx_isn_106:
+ ; FSUB_R f2, a2
+ subpd xmm2, xmm10
+randomx_isn_107:
+ ; ISMULH_R r6, r5
+ mov rax, r14
+ imul r13
+ mov r14, rdx
+randomx_isn_108:
+ ; IADD_M r7, L1[r0]
+ mov eax, r8d
+ and eax, 16376
+ add r15, qword ptr [rsi+rax]
+randomx_isn_109:
+ ; IMUL_R r6, r5
+ imul r14, r13
+randomx_isn_110:
+ ; IMUL_R r5, r1
+ imul r13, r9
+randomx_isn_111:
+ ; FADD_M f2, L1[r0]
+ mov eax, r8d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm2, xmm12
+randomx_isn_112:
+ ; IADD_R r0, r3
+ add r8, r11
+randomx_isn_113:
+ ; IADD_RC r3, r4, -1138304368
+ lea r11, [r11+r12-1138304368]
+randomx_isn_114:
+ ; IADD_M r2, L1[r4]
+ mov eax, r12d
+ and eax, 16376
+ add r10, qword ptr [rsi+rax]
+randomx_isn_115:
+ ; IMUL_M r7, L1[r2]
+ mov eax, r10d
+ and eax, 16376
+ imul r15, qword ptr [rsi+rax]
+randomx_isn_116:
+ ; FADD_R f1, a3
+ addpd xmm1, xmm11
+randomx_isn_117:
+ ; FSUB_R f2, a2
+ subpd xmm2, xmm10
+randomx_isn_118:
+ ; IADD_R r2, 160326201
+ add r10, 160326201
+randomx_isn_119:
+ ; ISUB_M r7, L3[1780152]
+ sub r15, qword ptr [rsi+1780152]
+randomx_isn_120:
+ ; IADD_R r4, r1
+ add r12, r9
+randomx_isn_121:
+ ; IADD_R r4, r7
+ add r12, r15
+randomx_isn_122:
+ ; FSUB_R f0, a1
+ subpd xmm0, xmm9
+randomx_isn_123:
+ ; FSQRT_R e0
+ sqrtpd xmm4, xmm4
+randomx_isn_124:
+ ; FSUB_R f2, a2
+ subpd xmm2, xmm10
+randomx_isn_125:
+ ; ISMULH_M r2, L2[r1]
+ mov ecx, r9d
+ and ecx, 262136
+ mov rax, r10
+ imul qword ptr [rsi+rcx]
+ mov r10, rdx
+randomx_isn_126:
+ ; FSUB_M f2, L2[r2]
+ mov eax, r10d
+ and eax, 262136
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm2, xmm12
+randomx_isn_127:
+ ; IMUL_R r2, r4
+ imul r10, r12
+randomx_isn_128:
+ ; FSUB_R f3, a1
+ subpd xmm3, xmm9
+randomx_isn_129:
+ ; IADD_RC r1, r2, 697183462
+ lea r9, [r9+r10+697183462]
+randomx_isn_130:
+ ; FSUB_R f1, a1
+ subpd xmm1, xmm9
+randomx_isn_131:
+ ; IMUL_M r2, L1[r3]
+ mov eax, r11d
+ and eax, 16376
+ imul r10, qword ptr [rsi+rax]
+randomx_isn_132:
+ ; IXOR_M r5, L3[1438200]
+ xor r13, qword ptr [rsi+1438200]
+randomx_isn_133:
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+randomx_isn_134:
+ ; IROR_R r5, r1
+ mov ecx, r9d
+ ror r13, cl
+randomx_isn_135:
+ ; FMUL_R e1, a2
+ mulpd xmm5, xmm10
+randomx_isn_136:
+ ; ISUB_M r3, L2[r6]
+ mov eax, r14d
+ and eax, 262136
+ sub r11, qword ptr [rsi+rax]
+randomx_isn_137:
+ ; IADD_RC r4, r1, -1660063210
+ lea r12, [r12+r9-1660063210]
+randomx_isn_138:
+ ; ISTORE L1[r0], r0
+ mov eax, r8d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r8
+randomx_isn_139:
+ ; FADD_M f0, L1[r5]
+ mov eax, r13d
+ and eax, 16376
cvtdq2pd xmm12, qword ptr [rsi+rax]
addpd xmm0, xmm12
- ; IADD_R r0, 52817665
- add r8, 52817665
- ; IMUL_RCP r6, 3388141601
- mov rax, 11691979238837063231
- imul r14, rax
- ; IMUL_RCP r3, 1356467790
- mov rax, 14601924774465956466
- imul r11, rax
- ; IADD_RC r7, r4, -2056421852
- lea r15, [r15+r12-2056421852]
- ; FSUB_M f1, L2[r4]
- mov eax, r12d
- and eax, 262136
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm1, xmm12
- ; ISWAP_R r1, r5
- xchg r9, r13
- ; ISTORE L2[r3], r5
+randomx_isn_140:
+ ; ISUB_M r7, L1[r3]
mov eax, r11d
- and eax, 262136
- mov qword ptr [rsi+rax], r13
- ; FMUL_R e0, a3
- mulpd xmm4, xmm11
- ; IADD_RC r1, r4, -129008866
- lea r9, [r9+r12-129008866]
- ; COND_R r6, no(r4, 311828213)
- xor ecx, ecx
- cmp r12d, 311828213
- setno cl
- add r14, rcx
- ; FSWAP_R e2
- shufpd xmm6, xmm6, 1
- ; IADD_RC r2, r2, 498744396
- lea r10, [r10+r10+498744396]
- ; IADD_RC r2, r3, 1515945097
- lea r10, [r10+r11+1515945097]
- ; FMUL_R e0, a2
- mulpd xmm4, xmm10
- ; ISTORE L2[r5], r7
- mov eax, r13d
- and eax, 262136
- mov qword ptr [rsi+rax], r15
- ; IMUL_M r7, L2[r0]
- mov eax, r8d
- and eax, 262136
- imul r15, qword ptr [rsi+rax]
- ; IADD_R r0, r2
- add r8, r10
- ; IADD_RC r7, r3, 1081450346
- lea r15, [r15+r11+1081450346]
- ; FADD_R f1, a3
- addpd xmm1, xmm11
- ; FSCAL_R f3
- xorps xmm3, xmm15
- ; FADD_M f3, L2[r7]
- mov eax, r15d
- and eax, 262136
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- addpd xmm3, xmm12
- ; FSUB_R f3, a0
- subpd xmm3, xmm8
- ; COND_M r2, of(L1[r5], -255033167)
+ and eax, 16376
+ sub r15, qword ptr [rsi+rax]
+randomx_isn_141:
+ ; IROR_R r3, r2
+ mov ecx, r10d
+ ror r11, cl
+randomx_isn_142:
+ ; FADD_R f1, a0
+ addpd xmm1, xmm8
+randomx_isn_143:
+ ; COND_R r5, ge(r1, 880467599), 2
+ add r14, 4
+ test r14, 508
+ jz randomx_isn_110
xor ecx, ecx
+ cmp r9d, 880467599
+ setge cl
+ add r13, rcx
+randomx_isn_144:
+ ; FSUB_M f1, L1[r5]
mov eax, r13d
and eax, 16376
- cmp dword ptr [rsi+rax], -255033167
- seto cl
- add r10, rcx
- ; FSUB_R f1, a1
- subpd xmm1, xmm9
- ; IADD_R r2, r5
- add r10, r13
- ; FSQRT_R e2
- sqrtpd xmm6, xmm6
- ; IMUL_9C r2, 1521722302
- lea r10, [r10+r10*8+1521722302]
- ; FADD_R f0, a3
- addpd xmm0, xmm11
- ; ISUB_R r0, r5
- sub r8, r13
- ; FADD_R f2, a0
- addpd xmm2, xmm8
- ; ISWAP_R r6, r0
- xchg r14, r8
- ; IADD_RC r1, r4, -693164762
- lea r9, [r9+r12-693164762]
- ; FDIV_M e0, L2[r2]
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm1, xmm12
+randomx_isn_145:
+ ; ISUB_R r5, r3
+ sub r13, r11
+randomx_isn_146:
+ ; IADD_RC r0, r3, 1228198394
+ lea r8, [r8+r11+1228198394]
+randomx_isn_147:
+ ; IADD_RC r1, r3, 1747766580
+ lea r9, [r9+r11+1747766580]
+randomx_isn_148:
+ ; FSQRT_R e1
+ sqrtpd xmm5, xmm5
+randomx_isn_149:
+ ; IADD_R r4, r3
+ add r12, r11
+randomx_isn_150:
+ ; FADD_M f1, L1[r0]
+ mov eax, r8d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm1, xmm12
+randomx_isn_151:
+ ; IADD_RC r1, r0, 1063245428
+ lea r9, [r9+r8+1063245428]
+randomx_isn_152:
+ ; FSUB_R f1, a0
+ subpd xmm1, xmm8
+randomx_isn_153:
+ ; FSUB_R f0, a1
+ subpd xmm0, xmm9
+randomx_isn_154:
+ ; IMUL_R r2, r6
+ imul r10, r14
+randomx_isn_155:
+ ; CFROUND r3, 46
+ mov rax, r11
+ rol rax, 31
+ and eax, 24576
+ or eax, 40896
+ mov dword ptr [rsp-8], eax
+ ldmxcsr dword ptr [rsp-8]
+randomx_isn_156:
+ ; FSUB_R f3, a2
+ subpd xmm3, xmm10
+randomx_isn_157:
+ ; ISTORE L1[r1], r1
+ mov eax, r9d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r9
+randomx_isn_158:
+ ; ISTORE L1[r6], r4
+ mov eax, r14d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r12
+randomx_isn_159:
+ ; IADD_M r7, L1[r2]
mov eax, r10d
- and eax, 262136
+ and eax, 16376
+ add r15, qword ptr [rsi+rax]
+randomx_isn_160:
+ ; IMUL_RCP r7, 2040763167
+ mov rax, 9705702723791900149
+ imul r15, rax
+randomx_isn_161:
+ ; FADD_R f3, a3
+ addpd xmm3, xmm11
+randomx_isn_162:
+ ; IADD_RC r6, r4, -783948693
+ lea r14, [r14+r12-783948693]
+randomx_isn_163:
+ ; ISWAP_R r3, r5
+ xchg r11, r13
+randomx_isn_164:
+ ; FSQRT_R e3
+ sqrtpd xmm7, xmm7
+randomx_isn_165:
+ ; FSUB_R f1, a2
+ subpd xmm1, xmm10
+randomx_isn_166:
+ ; IROR_R r5, r3
+ mov ecx, r11d
+ ror r13, cl
+randomx_isn_167:
+ ; IMUL_9C r2, 805006473
+ lea r10, [r10+r10*8+805006473]
+randomx_isn_168:
+ ; FDIV_M e0, L1[r4]
+ mov eax, r12d
+ and eax, 16376
cvtdq2pd xmm12, qword ptr [rsi+rax]
andps xmm12, xmm13
orps xmm12, xmm14
divpd xmm4, xmm12
- ; IMUL_9C r4, -1849458799
- lea r12, [r12+r12*8-1849458799]
- ; IADD_RC r1, r4, -651820510
- lea r9, [r9+r12-651820510]
- ; IMULH_R r6, r6
- mov rax, r14
- mul r14
- mov r14, rdx
- ; FSUB_M f3, L2[r0]
+randomx_isn_169:
+ ; IMUL_9C r3, 1773188989
+ lea r11, [r11+r11*8+1773188989]
+randomx_isn_170:
+ ; FADD_R f0, a3
+ addpd xmm0, xmm11
+randomx_isn_171:
+ ; FADD_R f1, a0
+ addpd xmm1, xmm8
+randomx_isn_172:
+ ; ISTORE L1[r7], r6
+ mov eax, r15d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r14
+randomx_isn_173:
+ ; FSUB_M f0, L1[r7]
+ mov eax, r15d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm0, xmm12
+randomx_isn_174:
+ ; ISWAP_R r3, r0
+ xchg r11, r8
+randomx_isn_175:
+ ; IMULH_R r0, r3
+ mov rax, r8
+ mul r11
+ mov r8, rdx
+randomx_isn_176:
+ ; IMUL_M r2, L3[1439696]
+ imul r10, qword ptr [rsi+1439696]
+randomx_isn_177:
+ ; IMUL_M r3, L3[232968]
+ imul r11, qword ptr [rsi+232968]
+randomx_isn_178:
+ ; IADD_RC r5, r3, -2108568616
+ lea r13, [r13+r11-2108568616]
+randomx_isn_179:
+ ; IADD_RC r3, r4, 1322108729
+ lea r11, [r11+r12+1322108729]
+randomx_isn_180:
+ ; FADD_R f3, a1
+ addpd xmm3, xmm9
+randomx_isn_181:
+ ; FSQRT_R e3
+ sqrtpd xmm7, xmm7
+randomx_isn_182:
+ ; FMUL_R e2, a2
+ mulpd xmm6, xmm10
+randomx_isn_183:
+ ; IADD_M r6, L2[r2]
+ mov eax, r10d
+ and eax, 262136
+ add r14, qword ptr [rsi+rax]
+randomx_isn_184:
+ ; FADD_R f2, a3
+ addpd xmm2, xmm11
+randomx_isn_185:
+ ; FSWAP_R f3
+ shufpd xmm3, xmm3, 1
+randomx_isn_186:
+ ; FSCAL_R f3
+ xorps xmm3, xmm15
+randomx_isn_187:
+ ; IADD_RC r6, r6, -914790425
+ lea r14, [r14+r14-914790425]
+randomx_isn_188:
+ ; FSCAL_R f2
+ xorps xmm2, xmm15
+randomx_isn_189:
+ ; IMUL_M r4, L1[r5]
+ mov eax, r13d
+ and eax, 16376
+ imul r12, qword ptr [rsi+rax]
+randomx_isn_190:
+ ; FSUB_M f2, L1[r3]
+ mov eax, r11d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm2, xmm12
+randomx_isn_191:
+ ; IMUL_M r4, L2[r3]
+ mov eax, r11d
+ and eax, 262136
+ imul r12, qword ptr [rsi+rax]
+randomx_isn_192:
+ ; ISUB_M r7, L1[r3]
+ mov eax, r11d
+ and eax, 16376
+ sub r15, qword ptr [rsi+rax]
+randomx_isn_193:
+ ; ISTORE L1[r1], r1
+ mov eax, r9d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r9
+randomx_isn_194:
+ ; ISTORE L1[r3], r4
+ mov eax, r11d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r12
+randomx_isn_195:
+ ; FMUL_R e2, a2
+ mulpd xmm6, xmm10
+randomx_isn_196:
+ ; FMUL_R e2, a0
+ mulpd xmm6, xmm8
+randomx_isn_197:
+ ; FADD_M f2, L2[r0]
mov eax, r8d
and eax, 262136
cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm3, xmm12
- ; FSUB_R f0, a2
- subpd xmm0, xmm10
+ addpd xmm2, xmm12
+randomx_isn_198:
+ ; FADD_R f1, a2
+ addpd xmm1, xmm10
+randomx_isn_199:
+ ; FSUB_R f3, a3
+ subpd xmm3, xmm11
+randomx_isn_200:
+ ; IADD_RC r2, r5, 248917123
+ lea r10, [r10+r13+248917123]
+randomx_isn_201:
+ ; IMUL_9C r6, 376384700
+ lea r14, [r14+r14*8+376384700]
+randomx_isn_202:
+ ; ISWAP_R r3, r6
+ xchg r11, r14
+randomx_isn_203:
+ ; ISTORE L1[r1], r3
+ mov eax, r9d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r11
+randomx_isn_204:
+ ; IMUL_R r6, r1
+ imul r14, r9
+randomx_isn_205:
+ ; ISUB_R r7, r5
+ sub r15, r13
+randomx_isn_206:
+ ; IADD_R r3, r5
+ add r11, r13
+randomx_isn_207:
+ ; FSCAL_R f1
+ xorps xmm1, xmm15
+randomx_isn_208:
+ ; IADD_R r6, r3
+ add r14, r11
+randomx_isn_209:
+ ; FSUB_M f0, L1[r4]
+ mov eax, r12d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm0, xmm12
+randomx_isn_210:
+ ; FSWAP_R e2
+ shufpd xmm6, xmm6, 1
+randomx_isn_211:
+ ; FMUL_R e2, a3
+ mulpd xmm6, xmm11
+randomx_isn_212:
+ ; IMUL_M r0, L1[r1]
+ mov eax, r9d
+ and eax, 16376
+ imul r8, qword ptr [rsi+rax]
+randomx_isn_213:
+ ; FSUB_M f2, L1[r5]
+ mov eax, r13d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm2, xmm12
+randomx_isn_214:
+ ; FMUL_R e1, a2
+ mulpd xmm5, xmm10
+randomx_isn_215:
+ ; FADD_R f3, a1
+ addpd xmm3, xmm9
+randomx_isn_216:
+ ; IXOR_M r4, L2[r1]
+ mov eax, r9d
+ and eax, 262136
+ xor r12, qword ptr [rsi+rax]
+randomx_isn_217:
+ ; IMUL_M r6, L1[r5]
+ mov eax, r13d
+ and eax, 16376
+ imul r14, qword ptr [rsi+rax]
+randomx_isn_218:
+ ; FSCAL_R f2
+ xorps xmm2, xmm15
+randomx_isn_219:
+ ; FADD_M f3, L1[r7]
+ mov eax, r15d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm3, xmm12
+randomx_isn_220:
+ ; FSUB_R f0, a0
+ subpd xmm0, xmm8
+randomx_isn_221:
+ ; IMUL_R r1, r0
+ imul r9, r8
+randomx_isn_222:
+ ; IADD_M r1, L1[r0]
+ mov eax, r8d
+ and eax, 16376
+ add r9, qword ptr [rsi+rax]
+randomx_isn_223:
+ ; FSCAL_R f2
+ xorps xmm2, xmm15
+randomx_isn_224:
+ ; IADD_R r5, r4
+ add r13, r12
+randomx_isn_225:
+ ; ISTORE L2[r2], r1
+ mov eax, r10d
+ and eax, 262136
+ mov qword ptr [rsi+rax], r9
+randomx_isn_226:
+ ; ISUB_R r6, -791575725
+ sub r14, -791575725
+randomx_isn_227:
; FDIV_M e3, L1[r0]
mov eax, r8d
and eax, 16376
@@ -710,11 +897,107 @@
andps xmm12, xmm13
orps xmm12, xmm14
divpd xmm7, xmm12
- ; IADD_M r3, L1[r7]
+randomx_isn_228:
+ ; IXOR_R r7, r1
+ xor r15, r9
+randomx_isn_229:
+ ; ISWAP_R r0, r6
+ xchg r8, r14
+randomx_isn_230:
+ ; IADD_M r2, L1[r7]
mov eax, r15d
and eax, 16376
- add r11, qword ptr [rsi+rax]
- ; IXOR_M r2, L2[r6]
- mov eax, r14d
- and eax, 262136
- xor r10, qword ptr [rsi+rax]
+ add r10, qword ptr [rsi+rax]
+randomx_isn_231:
+ ; FMUL_R e1, a0
+ mulpd xmm5, xmm8
+randomx_isn_232:
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+randomx_isn_233:
+ ; FMUL_R e0, a2
+ mulpd xmm4, xmm10
+randomx_isn_234:
+ ; IADD_RC r2, r7, 1435646464
+ lea r10, [r10+r15+1435646464]
+randomx_isn_235:
+ ; ISWAP_R r7, r6
+ xchg r15, r14
+randomx_isn_236:
+ ; FMUL_R e3, a2
+ mulpd xmm7, xmm10
+randomx_isn_237:
+ ; FSUB_R f1, a3
+ subpd xmm1, xmm11
+randomx_isn_238:
+ ; IADD_R r4, r2
+ add r12, r10
+randomx_isn_239:
+ ; IMUL_RCP r7, 3065786637
+ mov rax, 12921343181238534701
+ imul r15, rax
+randomx_isn_240:
+ ; IMUL_R r5, r7
+ imul r13, r15
+randomx_isn_241:
+ ; IROR_R r6, r5
+ mov ecx, r13d
+ ror r14, cl
+randomx_isn_242:
+ ; IMUL_R r6, r4
+ imul r14, r12
+randomx_isn_243:
+ ; FSUB_R f0, a3
+ subpd xmm0, xmm11
+randomx_isn_244:
+ ; FADD_M f1, L1[r0]
+ mov eax, r8d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm1, xmm12
+randomx_isn_245:
+ ; INEG_R r3
+ neg r11
+randomx_isn_246:
+ ; IMUL_9C r7, 1938400676
+ lea r15, [r15+r15*8+1938400676]
+randomx_isn_247:
+ ; COND_M r2, be(L1[r5], -8545330), 2
+ add r9, 4
+ test r9, 508
+ jz randomx_isn_223
+ xor ecx, ecx
+ mov eax, r13d
+ and eax, 16376
+ cmp dword ptr [rsi+rax], -8545330
+ setbe cl
+ add r10, rcx
+randomx_isn_248:
+ ; ISTORE L1[r0], r5
+ mov eax, r8d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r13
+randomx_isn_249:
+ ; IADD_RC r6, r5, 2052724836
+ lea r14, [r14+r13+2052724836]
+randomx_isn_250:
+ ; FADD_R f3, a0
+ addpd xmm3, xmm8
+randomx_isn_251:
+ ; IADD_R r0, -221201557
+ add r8, -221201557
+randomx_isn_252:
+ ; ISUB_M r4, L1[r2]
+ mov eax, r10d
+ and eax, 16376
+ sub r12, qword ptr [rsi+rax]
+randomx_isn_253:
+ ; IADD_RC r5, r4, 256175395
+ lea r13, [r13+r12+256175395]
+randomx_isn_254:
+ ; IADD_RC r6, r7, 1119815512
+ lea r14, [r14+r15+1119815512]
+randomx_isn_255:
+ ; IROR_R r7, r3
+ mov ecx, r11d
+ ror r15, cl