From 174754cb2b6795ca9b48f2d986e8a93d5c57c6cd Mon Sep 17 00:00:00 2001 From: tevador Date: Sun, 17 Mar 2019 23:09:11 +0100 Subject: [PATCH] Added branches - ASM and JIT only --- src/AssemblyGeneratorX86.cpp | 54 ++ src/AssemblyGeneratorX86.hpp | 3 + src/JitCompilerX86.cpp | 174 +++- src/JitCompilerX86.hpp | 82 +- src/configuration.h | 3 + src/instructionWeights.hpp | 3 + src/program.inc | 1617 ++++++++++++++++++++-------------- 7 files changed, 1190 insertions(+), 746 deletions(-) diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index 474b3bd..fd7ee06 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -18,6 +18,7 @@ along with RandomX. If not, see. */ //#define TRACE +#include #include "AssemblyGeneratorX86.hpp" #include "common.hpp" #include "reciprocal.h" @@ -45,9 +46,25 @@ namespace RandomX { static const char* regDatasetAddr = "rdi"; static const char* regScratchpadAddr = "rsi"; + int AssemblyGeneratorX86::getConditionRegister() { + int min = INT_MAX; + int minIndex; + for (unsigned i = 0; i < 8; ++i) { + if (registerUsage[i] < min) { + min = registerUsage[i]; + minIndex = i; + } + } + return minIndex; + } + void AssemblyGeneratorX86::generateProgram(Program& prog) { + for (unsigned i = 0; i < 8; ++i) { + registerUsage[i] = -1; + } asmCode.str(std::string()); //clear for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { + asmCode << "randomx_isn_" << i << ":" << std::endl; Instruction& instr = prog(i); instr.src %= RegistersCount; instr.dst %= RegistersCount; @@ -96,6 +113,7 @@ namespace RandomX { //1 uOP void AssemblyGeneratorX86::h_IADD_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\tadd " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; } @@ -107,6 +125,7 @@ namespace RandomX { //2.75 uOP void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; @@ -119,12 +138,14 @@ namespace RandomX { //1 uOP void AssemblyGeneratorX86::h_IADD_RC(Instruction& instr, int i) { + registerUsage[instr.dst] = i; asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; traceint(instr); } //1 uOP void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; } @@ -136,6 +157,7 @@ namespace RandomX { //2.75 uOP void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; @@ -148,12 +170,14 @@ namespace RandomX { //1 uOP void AssemblyGeneratorX86::h_IMUL_9C(Instruction& instr, int i) { + registerUsage[instr.dst] = i; asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.dst] << "*8" << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; traceint(instr); } //1 uOP void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; } @@ -165,6 +189,7 @@ namespace RandomX { //2.75 uOP void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); asmCode << "\timul " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; @@ -177,6 +202,7 @@ namespace RandomX { //4 uOPs void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; asmCode << "\tmul " << regR[instr.src] << std::endl; asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; @@ -185,6 +211,7 @@ namespace RandomX { //5.75 uOPs void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr, "ecx"); asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; @@ -200,6 +227,7 @@ namespace RandomX { //4 uOPs void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; asmCode << "\timul " << regR[instr.src] << std::endl; asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; @@ -208,6 +236,7 @@ namespace RandomX { //5.75 uOPs void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr, "ecx"); asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; @@ -223,12 +252,14 @@ namespace RandomX { //1 uOP void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; asmCode << "\tneg " << regR[instr.dst] << std::endl; traceint(instr); } //1 uOP void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; } @@ -240,6 +271,7 @@ namespace RandomX { //2.75 uOP void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); asmCode << "\txor " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; @@ -252,6 +284,7 @@ namespace RandomX { //1.75 uOPs void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl; asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl; @@ -264,6 +297,7 @@ namespace RandomX { //1.75 uOPs void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl; asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl; @@ -277,6 +311,7 @@ namespace RandomX { //2 uOPs void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { if (instr.getImm32() != 0) { + registerUsage[instr.dst] = i; uint32_t divisor = instr.getImm32(); asmCode << "\tmov rax, " << reciprocal(instr.getImm32()) << std::endl; asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl; @@ -295,6 +330,9 @@ namespace RandomX { //2 uOPs void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) { if (instr.src != instr.dst) { + //std::swap(registerUsage[instr.dst], registerUsage[instr.src]); + registerUsage[instr.dst] = i; + registerUsage[instr.src] = i; asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; traceint(instr); } @@ -435,8 +473,23 @@ namespace RandomX { } } + void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) { + const int shift = (instr.mod >> 5); + const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; + int reg = getConditionRegister(); + int target = registerUsage[reg] + 1; + registerUsage[reg] = i; + asmCode << "\tadd " << regR[reg] << ", " << (1 << shift) << std::endl; + asmCode << "\ttest " << regR[reg] << ", " << conditionMask << std::endl; + asmCode << "\tjz randomx_isn_" << target << std::endl; + for (unsigned j = 0; j < 8; ++j) { //mark all registers as used + registerUsage[j] = i; + } + } + //4 uOPs void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) { + handleCondition(instr, i); asmCode << "\txor ecx, ecx" << std::endl; asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl; asmCode << "\tset" << condition(instr) << " cl" << std::endl; @@ -446,6 +499,7 @@ namespace RandomX { //6 uOPs void AssemblyGeneratorX86::h_COND_M(Instruction& instr, int i) { + handleCondition(instr, i); asmCode << "\txor ecx, ecx" << std::endl; genAddressReg(instr); asmCode << "\tcmp dword ptr [rsi+rax], " << (int32_t)instr.getImm32() << std::endl; diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp index 216e492..62a6081 100644 --- a/src/AssemblyGeneratorX86.hpp +++ b/src/AssemblyGeneratorX86.hpp @@ -38,10 +38,13 @@ namespace RandomX { private: static InstructionGenerator engine[256]; std::stringstream asmCode; + int registerUsage[8]; void genAddressReg(Instruction&, const char*); void genAddressRegDst(Instruction&, int); int32_t genAddressImm(Instruction&); + int getConditionRegister(); + void handleCondition(Instruction&, int); void generateCode(Instruction&, int); diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index 6d9ed69..fc307a3 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -18,12 +18,15 @@ along with RandomX. If not, see. */ #include +#include #include #include "JitCompilerX86.hpp" #include "Program.hpp" #include "reciprocal.h" #include "virtualMemory.hpp" +#define RANDOMX_JUMP + namespace RandomX { #if !defined(_M_X64) && !defined(__x86_64__) @@ -168,6 +171,9 @@ namespace RandomX { static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 }; static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f }; static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 }; + static const uint8_t REX_ADD_I[] = { 0x49, 0x83 }; + static const uint8_t REX_TEST[] = { 0x49, 0xF7 }; + static const uint8_t JZ[] = { 0x0f, 0x84 }; size_t JitCompilerX86::getCodeSize() { return codePos - prologueSize; @@ -180,6 +186,12 @@ namespace RandomX { } void JitCompilerX86::generateProgram(Program& prog) { +#ifdef RANDOMX_JUMP + instructionOffsets.clear(); + for (unsigned i = 0; i < 8; ++i) { + registerUsage[i] = -1; + } +#endif auto addressRegisters = prog.getEntropy(12); uint32_t readReg0 = 0 + (addressRegisters & 1); addressRegisters >>= 1; @@ -199,7 +211,7 @@ namespace RandomX { Instruction& instr = prog(i); instr.src %= RegistersCount; instr.dst %= RegistersCount; - generateCode(instr); + generateCode(instr, i); } emit(REX_MOV_RR); emitByte(0xc0 + readReg2); @@ -217,9 +229,12 @@ namespace RandomX { emitByte(0x90); } - void JitCompilerX86::generateCode(Instruction& instr) { + void JitCompilerX86::generateCode(Instruction& instr, int i) { +#ifdef RANDOMX_JUMP + instructionOffsets.push_back(codePos); +#endif auto generator = engine[instr.opcode]; - (this->*generator)(instr); + (this->*generator)(instr, i); } void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) { @@ -245,7 +260,8 @@ namespace RandomX { emit32(instr.getImm32() & ScratchpadL3Mask); } - void JitCompilerX86::h_IADD_R(Instruction& instr) { + void JitCompilerX86::h_IADD_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_ADD_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); @@ -257,7 +273,8 @@ namespace RandomX { } } - void JitCompilerX86::h_IADD_M(Instruction& instr) { + void JitCompilerX86::h_IADD_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); emit(REX_ADD_RM); @@ -275,14 +292,16 @@ namespace RandomX { emitByte((scale << 6) | (index << 3) | base); } - void JitCompilerX86::h_IADD_RC(Instruction& instr) { + void JitCompilerX86::h_IADD_RC(Instruction& instr, int i) { + registerUsage[instr.dst] = i; emit(REX_LEA); emitByte(0x84 + 8 * instr.dst); genSIB(0, instr.src, instr.dst); emit32(instr.getImm32()); } - void JitCompilerX86::h_ISUB_R(Instruction& instr) { + void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_SUB_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); @@ -294,7 +313,8 @@ namespace RandomX { } } - void JitCompilerX86::h_ISUB_M(Instruction& instr) { + void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); emit(REX_SUB_RM); @@ -308,14 +328,16 @@ namespace RandomX { } } - void JitCompilerX86::h_IMUL_9C(Instruction& instr) { + void JitCompilerX86::h_IMUL_9C(Instruction& instr, int i) { + registerUsage[instr.dst] = i; emit(REX_LEA); emitByte(0x84 + 8 * instr.dst); genSIB(3, instr.dst, instr.dst); emit32(instr.getImm32()); } - void JitCompilerX86::h_IMUL_R(Instruction& instr) { + void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_IMUL_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); @@ -327,7 +349,8 @@ namespace RandomX { } } - void JitCompilerX86::h_IMUL_M(Instruction& instr) { + void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); emit(REX_IMUL_RM); @@ -341,7 +364,8 @@ namespace RandomX { } } - void JitCompilerX86::h_IMULH_R(Instruction& instr) { + void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; emit(REX_MOV_RR64); emitByte(0xc0 + instr.dst); emit(REX_MUL_R); @@ -350,7 +374,8 @@ namespace RandomX { emitByte(0xc2 + 8 * instr.dst); } - void JitCompilerX86::h_IMULH_M(Instruction& instr) { + void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr, false); emit(REX_MOV_RR64); @@ -368,7 +393,8 @@ namespace RandomX { emitByte(0xc2 + 8 * instr.dst); } - void JitCompilerX86::h_ISMULH_R(Instruction& instr) { + void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; emit(REX_MOV_RR64); emitByte(0xc0 + instr.dst); emit(REX_MUL_R); @@ -377,7 +403,8 @@ namespace RandomX { emitByte(0xc2 + 8 * instr.dst); } - void JitCompilerX86::h_ISMULH_M(Instruction& instr) { + void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr, false); emit(REX_MOV_RR64); @@ -395,8 +422,9 @@ namespace RandomX { emitByte(0xc2 + 8 * instr.dst); } - void JitCompilerX86::h_IMUL_RCP(Instruction& instr) { + void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { if (instr.getImm32() != 0) { + registerUsage[instr.dst] = i; emit(MOV_RAX_I); emit64(reciprocal(instr.getImm32())); emit(REX_IMUL_RM); @@ -404,16 +432,18 @@ namespace RandomX { } } - void JitCompilerX86::h_ISDIV_C(Instruction& instr) { + void JitCompilerX86::h_ISDIV_C(Instruction& instr, int i) { } - void JitCompilerX86::h_INEG_R(Instruction& instr) { + void JitCompilerX86::h_INEG_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; emit(REX_NEG); emitByte(0xd8 + instr.dst); } - void JitCompilerX86::h_IXOR_R(Instruction& instr) { + void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_XOR_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); @@ -425,7 +455,8 @@ namespace RandomX { } } - void JitCompilerX86::h_IXOR_M(Instruction& instr) { + void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); emit(REX_XOR_RM); @@ -439,7 +470,8 @@ namespace RandomX { } } - void JitCompilerX86::h_IROR_R(Instruction& instr) { + void JitCompilerX86::h_IROR_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_MOV_RR); emitByte(0xc8 + instr.src); @@ -453,7 +485,8 @@ namespace RandomX { } } - void JitCompilerX86::h_IROL_R(Instruction& instr) { + void JitCompilerX86::h_IROL_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { emit(REX_MOV_RR); emitByte(0xc8 + instr.src); @@ -467,20 +500,22 @@ namespace RandomX { } } - void JitCompilerX86::h_ISWAP_R(Instruction& instr) { + void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) { if (instr.src != instr.dst) { + registerUsage[instr.dst] = i; + registerUsage[instr.src] = i; emit(REX_XCHG); emitByte(0xc0 + instr.src + 8 * instr.dst); } } - void JitCompilerX86::h_FSWAP_R(Instruction& instr) { + void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) { emit(SHUFPD); emitByte(0xc0 + 9 * instr.dst); emitByte(1); } - void JitCompilerX86::h_FADD_R(Instruction& instr) { + void JitCompilerX86::h_FADD_R(Instruction& instr, int i) { instr.dst %= 4; instr.src %= 4; emit(REX_ADDPD); @@ -490,7 +525,7 @@ namespace RandomX { //emitByte(0xf8 + instr.dst); } - void JitCompilerX86::h_FADD_M(Instruction& instr) { + void JitCompilerX86::h_FADD_M(Instruction& instr, int i) { instr.dst %= 4; genAddressReg(instr); emit(REX_CVTDQ2PD_XMM12); @@ -498,7 +533,7 @@ namespace RandomX { emitByte(0xc4 + 8 * instr.dst); } - void JitCompilerX86::h_FSUB_R(Instruction& instr) { + void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) { instr.dst %= 4; instr.src %= 4; emit(REX_SUBPD); @@ -508,7 +543,7 @@ namespace RandomX { //emitByte(0xf8 + instr.dst); } - void JitCompilerX86::h_FSUB_M(Instruction& instr) { + void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) { instr.dst %= 4; genAddressReg(instr); emit(REX_CVTDQ2PD_XMM12); @@ -516,20 +551,20 @@ namespace RandomX { emitByte(0xc4 + 8 * instr.dst); } - void JitCompilerX86::h_FSCAL_R(Instruction& instr) { + void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) { instr.dst %= 4; emit(REX_XORPS); emitByte(0xc7 + 8 * instr.dst); } - void JitCompilerX86::h_FMUL_R(Instruction& instr) { + void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) { instr.dst %= 4; instr.src %= 4; emit(REX_MULPD); emitByte(0xe0 + instr.src + 8 * instr.dst); } - void JitCompilerX86::h_FMUL_M(Instruction& instr) { + void JitCompilerX86::h_FMUL_M(Instruction& instr, int i) { instr.dst %= 4; genAddressReg(instr); emit(REX_CVTDQ2PD_XMM12); @@ -540,7 +575,7 @@ namespace RandomX { emitByte(0xe5 + 8 * instr.dst); } - void JitCompilerX86::h_FDIV_R(Instruction& instr) { + void JitCompilerX86::h_FDIV_R(Instruction& instr, int i) { instr.dst %= 4; instr.src %= 4; emit(REX_DIVPD); @@ -549,7 +584,7 @@ namespace RandomX { emitByte(0xe5 + 8 * instr.dst); } - void JitCompilerX86::h_FDIV_M(Instruction& instr) { + void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) { instr.dst %= 4; genAddressReg(instr); emit(REX_CVTDQ2PD_XMM12); @@ -558,13 +593,13 @@ namespace RandomX { emitByte(0xe4 + 8 * instr.dst); } - void JitCompilerX86::h_FSQRT_R(Instruction& instr) { + void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) { instr.dst %= 4; emit(SQRTPD); emitByte(0xe4 + 9 * instr.dst); } - void JitCompilerX86::h_CFROUND(Instruction& instr) { + void JitCompilerX86::h_CFROUND(Instruction& instr, int i) { emit(REX_MOV_RR64); emitByte(0xc0 + instr.src); int rotate = (13 - (instr.getImm32() & 63)) & 63; @@ -575,6 +610,28 @@ namespace RandomX { emit(AND_OR_MOV_LDMXCSR); } + static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) { + switch (((instr.mod >> 2) & 7) ^ invert) + { + case 0: + return 0x76; //jbe + case 1: + return 0x77; //ja + case 2: + return 0x78; //js + case 3: + return 0x79; //jns + case 4: + return 0x70; //jo + case 5: + return 0x71; //jno + case 6: + return 0x7c; //jl + case 7: + return 0x7d; //jge + } + } + static inline uint8_t condition(Instruction& instr) { switch ((instr.mod >> 2) & 7) { @@ -599,7 +656,41 @@ namespace RandomX { } } - void JitCompilerX86::h_COND_R(Instruction& instr) { + int JitCompilerX86::getConditionRegister() { + int min = INT_MAX; + int minIndex; + for (unsigned i = 0; i < 8; ++i) { + if (registerUsage[i] < min) { + min = registerUsage[i]; + minIndex = i; + } + } + return minIndex; + } + + void JitCompilerX86::handleCondition(Instruction& instr, int i) { + const int shift = (instr.mod >> 5); + const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; + int reg = getConditionRegister(); + int target = registerUsage[reg] + 1; + registerUsage[reg] = i; + emit(REX_ADD_I); + emitByte(0xc0 + reg); + emitByte(1 << shift); + emit(REX_TEST); + emitByte(0xc0 + reg); + emit32(conditionMask); + emit(JZ); + emit32(instructionOffsets[target] - (codePos + 4)); + for (unsigned j = 0; j < 8; ++j) { //mark all registers as used + registerUsage[j] = i; + } + } + + void JitCompilerX86::h_COND_R(Instruction& instr, int i) { +#ifdef RANDOMX_JUMP + handleCondition(instr, i); +#endif emit(XOR_ECX_ECX); emit(REX_CMP_R32I); emitByte(0xf8 + instr.src); @@ -611,7 +702,10 @@ namespace RandomX { emitByte(0xc1 + 8 * instr.dst); } - void JitCompilerX86::h_COND_M(Instruction& instr) { + void JitCompilerX86::h_COND_M(Instruction& instr, int i) { +#ifdef RANDOMX_JUMP + handleCondition(instr, i); +#endif emit(XOR_ECX_ECX); genAddressReg(instr); emit(REX_CMP_M32I); @@ -623,21 +717,21 @@ namespace RandomX { emitByte(0xc1 + 8 * instr.dst); } - void JitCompilerX86::h_ISTORE(Instruction& instr) { + void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { genAddressRegDst(instr); emit(REX_MOV_MR); emitByte(0x04 + 8 * instr.src); emitByte(0x06); } - void JitCompilerX86::h_FSTORE(Instruction& instr) { + void JitCompilerX86::h_FSTORE(Instruction& instr, int i) { genAddressRegDst(instr, true); emit(MOVAPD); emitByte(0x04 + 8 * instr.src); emitByte(0x06); } - void JitCompilerX86::h_NOP(Instruction& instr) { + void JitCompilerX86::h_NOP(Instruction& instr, int i) { emitByte(0x90); } diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp index fed3a8a..3de0776 100644 --- a/src/JitCompilerX86.hpp +++ b/src/JitCompilerX86.hpp @@ -29,7 +29,7 @@ namespace RandomX { class Program; class JitCompilerX86; - typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&); + typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int); constexpr uint32_t CodeSize = 64 * 1024; @@ -46,15 +46,19 @@ namespace RandomX { size_t getCodeSize(); private: static InstructionGeneratorX86 engine[256]; + std::vector instructionOffsets; + int registerUsage[8]; uint8_t* code; int32_t codePos; + int getConditionRegister(); void genAddressReg(Instruction&, bool); void genAddressRegDst(Instruction&, bool); void genAddressImm(Instruction&); void genSIB(int scale, int index, int base); - void generateCode(Instruction&); + void handleCondition(Instruction&, int); + void generateCode(Instruction&, int); void emitByte(uint8_t val) { code[codePos] = val; @@ -89,43 +93,43 @@ namespace RandomX { codePos += N; } - void h_IADD_R(Instruction&); - void h_IADD_M(Instruction&); - void h_IADD_RC(Instruction&); - void h_ISUB_R(Instruction&); - void h_ISUB_M(Instruction&); - void h_IMUL_9C(Instruction&); - void h_IMUL_R(Instruction&); - void h_IMUL_M(Instruction&); - void h_IMULH_R(Instruction&); - void h_IMULH_M(Instruction&); - void h_ISMULH_R(Instruction&); - void h_ISMULH_M(Instruction&); - void h_IMUL_RCP(Instruction&); - void h_ISDIV_C(Instruction&); - void h_INEG_R(Instruction&); - void h_IXOR_R(Instruction&); - void h_IXOR_M(Instruction&); - void h_IROR_R(Instruction&); - void h_IROL_R(Instruction&); - void h_ISWAP_R(Instruction&); - void h_FSWAP_R(Instruction&); - void h_FADD_R(Instruction&); - void h_FADD_M(Instruction&); - void h_FSUB_R(Instruction&); - void h_FSUB_M(Instruction&); - void h_FSCAL_R(Instruction&); - void h_FMUL_R(Instruction&); - void h_FMUL_M(Instruction&); - void h_FDIV_R(Instruction&); - void h_FDIV_M(Instruction&); - void h_FSQRT_R(Instruction&); - void h_COND_R(Instruction&); - void h_COND_M(Instruction&); - void h_CFROUND(Instruction&); - void h_ISTORE(Instruction&); - void h_FSTORE(Instruction&); - void h_NOP(Instruction&); + void h_IADD_R(Instruction&, int); + void h_IADD_M(Instruction&, int); + void h_IADD_RC(Instruction&, int); + void h_ISUB_R(Instruction&, int); + void h_ISUB_M(Instruction&, int); + void h_IMUL_9C(Instruction&, int); + void h_IMUL_R(Instruction&, int); + void h_IMUL_M(Instruction&, int); + void h_IMULH_R(Instruction&, int); + void h_IMULH_M(Instruction&, int); + void h_ISMULH_R(Instruction&, int); + void h_ISMULH_M(Instruction&, int); + void h_IMUL_RCP(Instruction&, int); + void h_ISDIV_C(Instruction&, int); + void h_INEG_R(Instruction&, int); + void h_IXOR_R(Instruction&, int); + void h_IXOR_M(Instruction&, int); + void h_IROR_R(Instruction&, int); + void h_IROL_R(Instruction&, int); + void h_ISWAP_R(Instruction&, int); + void h_FSWAP_R(Instruction&, int); + void h_FADD_R(Instruction&, int); + void h_FADD_M(Instruction&, int); + void h_FSUB_R(Instruction&, int); + void h_FSUB_M(Instruction&, int); + void h_FSCAL_R(Instruction&, int); + void h_FMUL_R(Instruction&, int); + void h_FMUL_M(Instruction&, int); + void h_FDIV_R(Instruction&, int); + void h_FDIV_M(Instruction&, int); + void h_FSQRT_R(Instruction&, int); + void h_COND_R(Instruction&, int); + void h_COND_M(Instruction&, int); + void h_CFROUND(Instruction&, int); + void h_ISTORE(Instruction&, int); + void h_FSTORE(Instruction&, int); + void h_NOP(Instruction&, int); }; } \ No newline at end of file diff --git a/src/configuration.h b/src/configuration.h index 146b329..4c30b59 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -67,6 +67,9 @@ along with RandomX. If not, see. //Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2. #define RANDOMX_SCRATCHPAD_L1 (16 * 1024) +//How many register bits must be zero for a jump condition to be triggered +#define RANDOMX_CONDITION_BITS 7 + /* Instruction frequencies (per 256 opcodes) Total sum of frequencies must be 256 diff --git a/src/instructionWeights.hpp b/src/instructionWeights.hpp index 7fceb0b..8c9f566 100644 --- a/src/instructionWeights.hpp +++ b/src/instructionWeights.hpp @@ -54,6 +54,7 @@ along with RandomX. If not, see. #define REP32(x) REP31(x) x, #define REP33(x) REP32(x) x, #define REP40(x) REP32(x) REP8(x) +#define REP64(x) REP32(x) REP32(x) #define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x) #define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x) #define REP256(x) REP128(x) REP128(x) @@ -95,6 +96,8 @@ along with RandomX. If not, see. #define REPCASE30(x) REPCASE29(x) case __COUNTER__: #define REPCASE31(x) REPCASE30(x) case __COUNTER__: #define REPCASE32(x) REPCASE31(x) case __COUNTER__: +#define REPCASE64(x) REPCASE32(x) REPCASE32(x) +#define REPCASE128(x) REPCASE64(x) REPCASE64(x) #define REPCASENX(x,N) REPCASE##N(x) #define REPCASEN(x,N) REPCASENX(x,N) #define CASE_REP(x) REPCASEN(x, WT(x)) \ No newline at end of file diff --git a/src/program.inc b/src/program.inc index 4171a54..b12dc13 100644 --- a/src/program.inc +++ b/src/program.inc @@ -1,708 +1,895 @@ - ; IMULH_R r1, r0 - mov rax, r9 - mul r8 - mov r9, rdx - ; IMULH_R r4, r5 - mov rax, r12 - mul r13 - mov r12, rdx - ; FMUL_R e0, a1 - mulpd xmm4, xmm9 - ; IMUL_9C r6, 933674225 - lea r14, [r14+r14*8+933674225] - ; IROR_R r7, r6 - mov ecx, r14d - ror r15, cl - ; FSQRT_R e1 - sqrtpd xmm5, xmm5 - ; IADD_R r1, r0 - add r9, r8 - ; FSCAL_R f1 - xorps xmm1, xmm15 - ; IMUL_R r6, r5 - imul r14, r13 - ; FSCAL_R f3 - xorps xmm3, xmm15 - ; IADD_M r5, L1[r0] - mov eax, r8d - and eax, 16376 - add r13, qword ptr [rsi+rax] - ; IMUL_RCP r0, 3332750793 - mov rax, 11886301652177618669 - imul r8, rax - ; ISTORE L1[r3], r0 - mov eax, r11d - and eax, 16376 - mov qword ptr [rsi+rax], r8 - ; FSUB_R f3, a0 - subpd xmm3, xmm8 - ; ISUB_R r1, r3 - sub r9, r11 - ; ISMULH_R r4, r1 - mov rax, r12 - imul r9 - mov r12, rdx - ; IADD_RC r3, r0, 1262539428 - lea r11, [r11+r8+1262539428] - ; FSWAP_R e1 - shufpd xmm5, xmm5, 1 - ; FMUL_R e1, a3 - mulpd xmm5, xmm11 - ; FMUL_R e3, a3 - mulpd xmm7, xmm11 - ; ISWAP_R r0, r2 - xchg r8, r10 - ; COND_R r5, of(r4, 137305269) - xor ecx, ecx - cmp r12d, 137305269 - seto cl - add r13, rcx - ; IMUL_R r6, r4 - imul r14, r12 - ; FMUL_R e3, a0 - mulpd xmm7, xmm8 - ; FSCAL_R f0 - xorps xmm0, xmm15 - ; FADD_R f1, a0 - addpd xmm1, xmm8 - ; IADD_R r6, r3 - add r14, r11 - ; ISMULH_M r1, L3[777112] - mov rax, r9 - imul qword ptr [rsi+777112] - mov r9, rdx - ; FADD_R f1, a1 - addpd xmm1, xmm9 - ; FSUB_M f2, L2[r3] - mov eax, r11d - and eax, 262136 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm2, xmm12 - ; IMUL_R r5, r7 - imul r13, r15 - ; ISUB_M r1, L1[r3] - mov eax, r11d - and eax, 16376 - sub r9, qword ptr [rsi+rax] - ; IXOR_M r1, L1[r6] - mov eax, r14d - and eax, 16376 - xor r9, qword ptr [rsi+rax] - ; COND_R r2, ns(r3, 1727033430) - xor ecx, ecx - cmp r11d, 1727033430 - setns cl - add r10, rcx - ; FADD_R f3, a1 - addpd xmm3, xmm9 - ; FADD_R f2, a2 - addpd xmm2, xmm10 - ; IADD_R r5, -1048707993 - add r13, -1048707993 - ; COND_R r2, ge(r5, -1016934677) - xor ecx, ecx - cmp r13d, -1016934677 - setge cl - add r10, rcx - ; FSUB_R f2, a3 - subpd xmm2, xmm11 - ; ISUB_M r1, L2[r4] - mov eax, r12d - and eax, 262136 - sub r9, qword ptr [rsi+rax] - ; IMUL_R r5, r3 - imul r13, r11 - ; FSUB_R f1, a3 - subpd xmm1, xmm11 - ; IROR_R r1, r3 - mov ecx, r11d - ror r9, cl - ; FADD_R f3, a2 - addpd xmm3, xmm10 - ; ISUB_R r0, -28376526 - sub r8, -28376526 - ; IROR_R r6, r0 - mov ecx, r8d - ror r14, cl - ; FADD_R f1, a0 - addpd xmm1, xmm8 - ; FMUL_R e1, a0 - mulpd xmm5, xmm8 - ; IXOR_R r2, r4 - xor r10, r12 - ; FSUB_M f1, L1[r2] - mov eax, r10d +randomx_isn_0: + ; IROR_R r3, 30 + ror r11, 30 +randomx_isn_1: + ; FSUB_M f1, L1[r7] + mov eax, r15d and eax, 16376 cvtdq2pd xmm12, qword ptr [rsi+rax] subpd xmm1, xmm12 - ; FSWAP_R f3 - shufpd xmm3, xmm3, 1 - ; FSUB_R f3, a0 - subpd xmm3, xmm8 - ; ISUB_R r7, r6 - sub r15, r14 - ; FADD_R f3, a1 - addpd xmm3, xmm9 - ; ISUB_R r1, r7 - sub r9, r15 - ; IADD_M r5, L2[r7] - mov eax, r15d - and eax, 262136 - add r13, qword ptr [rsi+rax] - ; IADD_RC r1, r3, 145589392 - lea r9, [r9+r11+145589392] - ; FADD_R f2, a1 - addpd xmm2, xmm9 - ; FSUB_R f1, a1 - subpd xmm1, xmm9 - ; FADD_M f0, L1[r3] - mov eax, r11d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - addpd xmm0, xmm12 - ; FADD_R f3, a1 - addpd xmm3, xmm9 - ; FSUB_R f0, a3 - subpd xmm0, xmm11 - ; FMUL_R e2, a2 - mulpd xmm6, xmm10 - ; FADD_R f2, a1 - addpd xmm2, xmm9 - ; IXOR_R r7, r4 - xor r15, r12 - ; FSUB_R f1, a3 - subpd xmm1, xmm11 - ; IMUL_RCP r0, 3339947118 - mov rax, 11860691159940745144 - imul r8, rax - ; FSCAL_R f2 - xorps xmm2, xmm15 - ; IMUL_9C r0, 850304074 - lea r8, [r8+r8*8+850304074] - ; IADD_R r2, r4 - add r10, r12 - ; IADD_R r0, -1929760745 - add r8, -1929760745 - ; ISTORE L2[r4], r7 - mov eax, r12d - and eax, 262136 - mov qword ptr [rsi+rax], r15 - ; IROR_R r2, r7 +randomx_isn_2: + ; IXOR_R r3, 1860630909 + xor r11, 1860630909 +randomx_isn_3: + ; IROR_R r5, r7 mov ecx, r15d - ror r10, cl - ; FMUL_R e1, a1 - mulpd xmm5, xmm9 + ror r13, cl +randomx_isn_4: + ; IXOR_R r3, r4 + xor r11, r12 +randomx_isn_5: + ; IROR_R r4, r0 + mov ecx, r8d + ror r12, cl +randomx_isn_6: ; FSQRT_R e3 sqrtpd xmm7, xmm7 - ; IXOR_R r0, -1150923249 - xor r8, -1150923249 - ; IMUL_9C r7, 586146619 - lea r15, [r15+r15*8+586146619] - ; FSWAP_R f2 - shufpd xmm2, xmm2, 1 - ; FSUB_M f3, L1[r6] - mov eax, r14d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm3, xmm12 - ; IXOR_R r0, 292938237 - xor r8, 292938237 - ; COND_R r6, no(r6, -2142285576) - xor ecx, ecx - cmp r14d, -2142285576 - setno cl - add r14, rcx - ; IMUL_RCP r3, 670137279 - mov rax, 14778345608621248183 - imul r11, rax - ; ISTORE L1[r1], r5 - mov eax, r9d - and eax, 16376 - mov qword ptr [rsi+rax], r13 - ; COND_R r3, sg(r1, 1638220289) - xor ecx, ecx - cmp r9d, 1638220289 - sets cl - add r11, rcx - ; IXOR_R r4, r2 - xor r12, r10 - ; COND_R r2, be(r2, 1131588253) - xor ecx, ecx - cmp r10d, 1131588253 - setbe cl - add r10, rcx - ; IMULH_R r3, r1 - mov rax, r11 - mul r9 - mov r11, rdx - ; COND_R r3, sg(r6, 1528901692) - xor ecx, ecx - cmp r14d, 1528901692 - sets cl - add r11, rcx - ; IMUL_M r6, L2[r4] - mov eax, r12d - and eax, 262136 - imul r14, qword ptr [rsi+rax] - ; ISMULH_M r1, L1[r2] - mov ecx, r10d - and ecx, 16376 - mov rax, r9 - imul qword ptr [rsi+rcx] - mov r9, rdx - ; ISUB_M r5, L1[r4] - mov eax, r12d - and eax, 16376 - sub r13, qword ptr [rsi+rax] - ; IMUL_RCP r1, 1612208358 - mov rax, 12285658072842024305 - imul r9, rax - ; COND_R r2, lt(r6, -1712049035) - xor ecx, ecx - cmp r14d, -1712049035 - setl cl - add r10, rcx - ; IMUL_RCP r2, 2888266520 - mov rax, 13715521397634789187 - imul r10, rax - ; IADD_M r1, L2[r6] - mov eax, r14d - and eax, 262136 - add r9, qword ptr [rsi+rax] - ; FMUL_R e0, a3 - mulpd xmm4, xmm11 - ; ISTORE L1[r7], r1 - mov eax, r15d - and eax, 16376 - mov qword ptr [rsi+rax], r9 +randomx_isn_7: ; ISTORE L1[r0], r3 mov eax, r8d and eax, 16376 mov qword ptr [rsi+rax], r11 - ; FSUB_R f0, a1 - subpd xmm0, xmm9 - ; FADD_R f2, a2 - addpd xmm2, xmm10 +randomx_isn_8: + ; FMUL_R e1, a1 + mulpd xmm5, xmm9 +randomx_isn_9: ; FMUL_R e0, a1 mulpd xmm4, xmm9 - ; FMUL_R e2, a0 - mulpd xmm6, xmm8 - ; FMUL_R e3, a2 - mulpd xmm7, xmm10 - ; IROR_R r5, 21 - ror r13, 21 - ; FSQRT_R e1 - sqrtpd xmm5, xmm5 +randomx_isn_10: + ; IMUL_M r2, L1[r1] + mov eax, r9d + and eax, 16376 + imul r10, qword ptr [rsi+rax] +randomx_isn_11: ; ISTORE L1[r3], r1 mov eax, r11d and eax, 16376 mov qword ptr [rsi+rax], r9 - ; IMUL_9C r2, -290275273 - lea r10, [r10+r10*8-290275273] - ; ISUB_M r7, L1[r3] - mov eax, r11d - and eax, 16376 - sub r15, qword ptr [rsi+rax] - ; IMUL_R r6, 1301522739 - imul r14, 1301522739 - ; ISWAP_R r2, r4 - xchg r10, r12 - ; FMUL_R e3, a2 - mulpd xmm7, xmm10 - ; IMUL_9C r2, 877307769 - lea r10, [r10+r10*8+877307769] - ; IMUL_R r0, r3 - imul r8, r11 - ; IMUL_9C r0, 1293318220 - lea r8, [r8+r8*8+1293318220] - ; FSQRT_R e0 - sqrtpd xmm4, xmm4 - ; ISTORE L1[r0], r2 - mov eax, r8d - and eax, 16376 - mov qword ptr [rsi+rax], r10 - ; IMUL_RCP r5, 2071364883 - mov rax, 9562313618003962461 - imul r13, rax - ; FMUL_R e1, a2 - mulpd xmm5, xmm10 - ; FSUB_R f1, a3 - subpd xmm1, xmm11 - ; FSUB_R f0, a1 - subpd xmm0, xmm9 - ; IMULH_R r6, r1 - mov rax, r14 - mul r9 - mov r14, rdx - ; ISTORE L1[r6], r5 - mov eax, r14d - and eax, 16376 - mov qword ptr [rsi+rax], r13 - ; ISTORE L2[r1], r2 - mov eax, r9d - and eax, 262136 - mov qword ptr [rsi+rax], r10 - ; ISUB_M r1, L2[r4] - mov eax, r12d - and eax, 262136 - sub r9, qword ptr [rsi+rax] - ; IADD_M r7, L1[r6] - mov eax, r14d - and eax, 16376 - add r15, qword ptr [rsi+rax] - ; IADD_RC r2, r0, -1705364403 - lea r10, [r10+r8-1705364403] - ; ISTORE L1[r6], r5 - mov eax, r14d - and eax, 16376 - mov qword ptr [rsi+rax], r13 - ; FSUB_M f0, L1[r5] - mov eax, r13d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm0, xmm12 - ; IXOR_R r1, r3 - xor r9, r11 - ; FADD_R f2, a0 - addpd xmm2, xmm8 - ; FSCAL_R f2 - xorps xmm2, xmm15 - ; ISUB_R r6, -789651909 - sub r14, -789651909 - ; COND_R r4, sg(r1, -1404926795) - xor ecx, ecx - cmp r9d, -1404926795 - sets cl - add r12, rcx - ; FSCAL_R f2 - xorps xmm2, xmm15 - ; ISUB_R r6, r7 - sub r14, r15 - ; IXOR_R r5, r2 - xor r13, r10 - ; IROR_R r6, r5 - mov ecx, r13d - ror r14, cl - ; FSUB_R f1, a2 - subpd xmm1, xmm10 - ; IMUL_M r4, L1[r5] - mov eax, r13d - and eax, 16376 - imul r12, qword ptr [rsi+rax] - ; FSUB_R f3, a0 - subpd xmm3, xmm8 - ; FSWAP_R e1 - shufpd xmm5, xmm5, 1 - ; IADD_RC r6, r5, 1744830258 - lea r14, [r14+r13+1744830258] - ; FSUB_R f3, a0 - subpd xmm3, xmm8 - ; ISUB_R r7, r0 - sub r15, r8 - ; FSUB_R f1, a3 - subpd xmm1, xmm11 - ; IMUL_9C r4, 241775739 - lea r12, [r12+r12*8+241775739] - ; FADD_R f0, a0 - addpd xmm0, xmm8 - ; IMUL_R r4, r3 - imul r12, r11 - ; IMUL_RCP r4, 2389176791 - mov rax, 16580640414036304271 - imul r12, rax - ; FSCAL_R f1 - xorps xmm1, xmm15 - ; FSUB_R f2, a1 - subpd xmm2, xmm9 - ; ISTORE L2[r2], r0 - mov eax, r10d - and eax, 262136 - mov qword ptr [rsi+rax], r8 - ; IXOR_M r5, L1[r7] - mov eax, r15d - and eax, 16376 - xor r13, qword ptr [rsi+rax] - ; IMULH_M r4, L1[r1] - mov ecx, r9d - and ecx, 16376 - mov rax, r12 - mul qword ptr [rsi+rcx] - mov r12, rdx - ; FMUL_R e2, a1 - mulpd xmm6, xmm9 - ; IXOR_R r0, r5 - xor r8, r13 - ; IROR_R r0, r7 - mov ecx, r15d - ror r8, cl - ; IADD_RC r6, r5, 472588845 - lea r14, [r14+r13+472588845] - ; FADD_R f0, a0 - addpd xmm0, xmm8 - ; FSCAL_R f0 - xorps xmm0, xmm15 - ; IROR_R r2, r1 - mov ecx, r9d - ror r10, cl - ; IADD_RC r2, r1, 1968510355 - lea r10, [r10+r9+1968510355] - ; FMUL_R e0, a0 - mulpd xmm4, xmm8 - ; ISUB_R r7, r1 - sub r15, r9 - ; IADD_RC r4, r7, 1111936914 - lea r12, [r12+r15+1111936914] - ; IADD_RC r7, r3, 373642756 - lea r15, [r15+r11+373642756] - ; FSUB_R f0, a0 - subpd xmm0, xmm8 - ; IMUL_RCP r6, 3388328460 - mov rax, 11691334451422153092 - imul r14, rax - ; FSWAP_R e1 - shufpd xmm5, xmm5, 1 - ; IADD_RC r7, r5, -644292398 - lea r15, [r15+r13-644292398] - ; IMUL_9C r7, -1398596563 - lea r15, [r15+r15*8-1398596563] - ; FADD_R f0, a3 - addpd xmm0, xmm11 - ; FDIV_M e1, L1[r5] - mov eax, r13d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - andps xmm12, xmm13 - orps xmm12, xmm14 - divpd xmm5, xmm12 - ; IXOR_M r2, L1[r5] - mov eax, r13d - and eax, 16376 - xor r10, qword ptr [rsi+rax] - ; IADD_R r5, r6 - add r13, r14 - ; IROR_R r4, r0 - mov ecx, r8d - ror r12, cl - ; IXOR_R r0, r6 - xor r8, r14 - ; IMUL_RCP r1, 1035942442 - mov rax, 9559913671615977868 - imul r9, rax - ; IMUL_9C r1, 105267179 - lea r9, [r9+r9*8+105267179] - ; IMUL_M r1, L1[r2] - mov eax, r10d - and eax, 16376 - imul r9, qword ptr [rsi+rax] - ; COND_R r6, be(r7, 1344676209) - xor ecx, ecx - cmp r15d, 1344676209 - setbe cl - add r14, rcx - ; IADD_R r6, r1 - add r14, r9 - ; IROR_R r5, r1 - mov ecx, r9d - ror r13, cl - ; ISMULH_R r0, r6 - mov rax, r8 - imul r14 - mov r8, rdx - ; IXOR_R r6, r7 - xor r14, r15 - ; FSUB_R f1, a3 - subpd xmm1, xmm11 - ; IMUL_9C r1, 1991866007 - lea r9, [r9+r9*8+1991866007] - ; IMUL_RCP r2, 4139294400 - mov rax, 9570249764581173254 - imul r10, rax - ; FSWAP_R f0 - shufpd xmm0, xmm0, 1 - ; ISUB_R r5, r2 - sub r13, r10 - ; COND_R r6, lt(r1, -834783176) - xor ecx, ecx - cmp r9d, -834783176 - setl cl - add r14, rcx - ; ISTORE L2[r7], r3 - mov eax, r15d - and eax, 262136 - mov qword ptr [rsi+rax], r11 - ; FADD_R f2, a2 - addpd xmm2, xmm10 - ; FSCAL_R f1 - xorps xmm1, xmm15 +randomx_isn_12: ; IMUL_R r7, r4 imul r15, r12 - ; IMUL_RCP r4, 3027698566 - mov rax, 13083892069700893994 - imul r12, rax - ; IMULH_M r2, L1[r3] +randomx_isn_13: + ; FSQRT_R e2 + sqrtpd xmm6, xmm6 +randomx_isn_14: + ; FSQRT_R e2 + sqrtpd xmm6, xmm6 +randomx_isn_15: + ; IADD_R r6, r2 + add r14, r10 +randomx_isn_16: + ; FSUB_M f2, L1[r1] + mov eax, r9d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm2, xmm12 +randomx_isn_17: + ; IROR_R r4, r3 mov ecx, r11d - and ecx, 16376 - mov rax, r10 - mul qword ptr [rsi+rcx] - mov r10, rdx - ; IADD_M r6, L1[r1] - mov eax, r9d + ror r12, cl +randomx_isn_18: + ; ISTORE L1[r4], r4 + mov eax, r12d and eax, 16376 - add r14, qword ptr [rsi+rax] - ; IMUL_M r3, L1[r1] - mov eax, r9d + mov qword ptr [rsi+rax], r12 +randomx_isn_19: + ; FMUL_R e1, a2 + mulpd xmm5, xmm10 +randomx_isn_20: + ; COND_R r6, of(r3, 1593588996) + add r8, 2 + test r8, 2 + jz randomx_isn_0 + xor ecx, ecx + cmp r11d, 1593588996 + seto cl + add r14, rcx +randomx_isn_21: + ; IXOR_M r7, L1[r2] + mov eax, r10d and eax, 16376 - imul r11, qword ptr [rsi+rax] + xor r15, qword ptr [rsi+rax] +randomx_isn_22: + ; IXOR_M r1, L1[r0] + mov eax, r8d + and eax, 16376 + xor r9, qword ptr [rsi+rax] +randomx_isn_23: + ; FMUL_R e2, a0 + mulpd xmm6, xmm8 +randomx_isn_24: + ; COND_R r6, no(r0, 149087159) + add r8, 64 + test r8, 64 + jz randomx_isn_21 + xor ecx, ecx + cmp r8d, 149087159 + setno cl + add r14, rcx +randomx_isn_25: + ; FADD_R f3, a0 + addpd xmm3, xmm8 +randomx_isn_26: + ; IADD_R r7, r0 + add r15, r8 +randomx_isn_27: + ; IMUL_R r2, r3 + imul r10, r11 +randomx_isn_28: + ; IADD_R r5, r7 + add r13, r15 +randomx_isn_29: + ; ISTORE L2[r6], r2 + mov eax, r14d + and eax, 262136 + mov qword ptr [rsi+rax], r10 +randomx_isn_30: ; ISTORE L1[r7], r5 mov eax, r15d and eax, 16376 mov qword ptr [rsi+rax], r13 - ; IADD_RC r3, r1, -183791073 - lea r11, [r11+r9-183791073] - ; IMUL_9C r6, 1353963989 - lea r14, [r14+r14*8+1353963989] - ; ISUB_R r2, r3 - sub r10, r11 - ; IMUL_R r2, r1 - imul r10, r9 - ; IMULH_R r6, r4 - mov rax, r14 - mul r12 - mov r14, rdx - ; ISMULH_R r6, r4 - mov rax, r14 - imul r12 - mov r14, rdx - ; IADD_R r7, r4 - add r15, r12 - ; FMUL_R e3, a1 - mulpd xmm7, xmm9 +randomx_isn_31: + ; FSUB_R f1, a2 + subpd xmm1, xmm10 +randomx_isn_32: + ; IMUL_R r3, r5 + imul r11, r13 +randomx_isn_33: + ; IROR_R r1, 20 + ror r9, 20 +randomx_isn_34: + ; FSCAL_R f1 + xorps xmm1, xmm15 +randomx_isn_35: + ; IMUL_R r6, 835132161 + imul r14, 835132161 +randomx_isn_36: + ; IADD_M r3, L1[r4] + mov eax, r12d + and eax, 16376 + add r11, qword ptr [rsi+rax] +randomx_isn_37: + ; IMUL_9C r6, 1885029796 + lea r14, [r14+r14*8+1885029796] +randomx_isn_38: + ; FSCAL_R f2 + xorps xmm2, xmm15 +randomx_isn_39: + ; ISUB_M r5, L1[r0] + mov eax, r8d + and eax, 16376 + sub r13, qword ptr [rsi+rax] +randomx_isn_40: + ; IMUL_R r7, r2 + imul r15, r10 +randomx_isn_41: + ; FMUL_R e1, a0 + mulpd xmm5, xmm8 +randomx_isn_42: + ; IXOR_R r5, r0 + xor r13, r8 +randomx_isn_43: + ; FSWAP_R e0 + shufpd xmm4, xmm4, 1 +randomx_isn_44: ; FADD_R f1, a2 addpd xmm1, xmm10 - ; IADD_R r5, r6 - add r13, r14 - ; IADD_RC r4, r0, -1810659257 - lea r12, [r12+r8-1810659257] - ; IROR_R r2, r5 - mov ecx, r13d +randomx_isn_45: + ; ISTORE L1[r0], r5 + mov eax, r8d + and eax, 16376 + mov qword ptr [rsi+rax], r13 +randomx_isn_46: + ; IADD_M r0, L2[r7] + mov eax, r15d + and eax, 262136 + add r8, qword ptr [rsi+rax] +randomx_isn_47: + ; IXOR_R r5, r2 + xor r13, r10 +randomx_isn_48: + ; FSUB_R f3, a3 + subpd xmm3, xmm11 +randomx_isn_49: + ; FMUL_R e3, a3 + mulpd xmm7, xmm11 +randomx_isn_50: + ; FSUB_R f3, a0 + subpd xmm3, xmm8 +randomx_isn_51: + ; COND_R r2, be(r3, -1975981803) + add r12, 128 + test r12, 128 + jz randomx_isn_25 + xor ecx, ecx + cmp r11d, -1975981803 + setbe cl + add r10, rcx +randomx_isn_52: + ; IADD_RC r1, r1, 878232328 + lea r9, [r9+r9+878232328] +randomx_isn_53: + ; FSUB_R f2, a0 + subpd xmm2, xmm8 +randomx_isn_54: + ; COND_R r5, ns(r1, 1917049931) + add r8, 64 + test r8, 64 + jz randomx_isn_52 + xor ecx, ecx + cmp r9d, 1917049931 + setns cl + add r13, rcx +randomx_isn_55: + ; IXOR_R r2, r3 + xor r10, r11 +randomx_isn_56: + ; FSCAL_R f0 + xorps xmm0, xmm15 +randomx_isn_57: + ; IMUL_R r5, r1 + imul r13, r9 +randomx_isn_58: + ; IADD_R r5, r1 + add r13, r9 +randomx_isn_59: + ; FMUL_R e2, a2 + mulpd xmm6, xmm10 +randomx_isn_60: + ; IROR_R r2, r6 + mov ecx, r14d ror r10, cl - ; FADD_R f2, a2 - addpd xmm2, xmm10 - ; FSWAP_R e2 - shufpd xmm6, xmm6, 1 - ; FADD_M f0, L1[r2] +randomx_isn_61: + ; IADD_RC r0, r3, 553576025 + lea r8, [r8+r11+553576025] +randomx_isn_62: + ; FSQRT_R e3 + sqrtpd xmm7, xmm7 +randomx_isn_63: + ; IMUL_9C r6, -1165860156 + lea r14, [r14+r14*8-1165860156] +randomx_isn_64: + ; IMUL_9C r5, -1323706896 + lea r13, [r13+r13*8-1323706896] +randomx_isn_65: + ; IMUL_RCP r5, 2362240456 + mov rax, 16769707400664451577 + imul r13, rax +randomx_isn_66: + ; ISUB_R r4, 841292629 + sub r12, 841292629 +randomx_isn_67: + ; IADD_M r4, L1[r6] + mov eax, r14d + and eax, 16376 + add r12, qword ptr [rsi+rax] +randomx_isn_68: + ; FSUB_M f3, L1[r4] + mov eax, r12d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm3, xmm12 +randomx_isn_69: + ; IADD_RC r6, r4, -1863144764 + lea r14, [r14+r12-1863144764] +randomx_isn_70: + ; FSUB_M f1, L1[r5] + mov eax, r13d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm1, xmm12 +randomx_isn_71: + ; FSWAP_R e1 + shufpd xmm5, xmm5, 1 +randomx_isn_72: + ; FADD_R f2, a0 + addpd xmm2, xmm8 +randomx_isn_73: + ; FMUL_R e0, a0 + mulpd xmm4, xmm8 +randomx_isn_74: + ; COND_R r6, ns(r3, -1200328848) + add r9, 4 + test r9, 4 + jz randomx_isn_55 + xor ecx, ecx + cmp r11d, -1200328848 + setns cl + add r14, rcx +randomx_isn_75: + ; FMUL_R e0, a3 + mulpd xmm4, xmm11 +randomx_isn_76: + ; FDIV_M e3, L1[r4] + mov eax, r12d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + andps xmm12, xmm13 + orps xmm12, xmm14 + divpd xmm7, xmm12 +randomx_isn_77: + ; FADD_R f1, a2 + addpd xmm1, xmm10 +randomx_isn_78: + ; FMUL_R e2, a1 + mulpd xmm6, xmm9 +randomx_isn_79: + ; IADD_RC r3, r1, -919815727 + lea r11, [r11+r9-919815727] +randomx_isn_80: + ; ISTORE L1[r2], r4 mov eax, r10d and eax, 16376 + mov qword ptr [rsi+rax], r12 +randomx_isn_81: + ; IMULH_R r3, r0 + mov rax, r11 + mul r8 + mov r11, rdx +randomx_isn_82: + ; IXOR_R r2, r6 + xor r10, r14 +randomx_isn_83: + ; FSUB_R f0, a2 + subpd xmm0, xmm10 +randomx_isn_84: + ; ISMULH_R r1, r7 + mov rax, r9 + imul r15 + mov r9, rdx +randomx_isn_85: + ; FSCAL_R f0 + xorps xmm0, xmm15 +randomx_isn_86: + ; ISUB_R r7, r3 + sub r15, r11 +randomx_isn_87: + ; IXOR_R r4, r2 + xor r12, r10 +randomx_isn_88: + ; IMUL_R r1, r3 + imul r9, r11 +randomx_isn_89: + ; COND_M r2, no(L1[r0], -122257389) + add r8, 64 + test r8, 64 + jz randomx_isn_75 + xor ecx, ecx + mov eax, r8d + and eax, 16376 + cmp dword ptr [rsi+rax], -122257389 + setno cl + add r10, rcx +randomx_isn_90: + ; ISTORE L1[r5], r7 + mov eax, r13d + and eax, 16376 + mov qword ptr [rsi+rax], r15 +randomx_isn_91: + ; ISTORE L1[r6], r5 + mov eax, r14d + and eax, 16376 + mov qword ptr [rsi+rax], r13 +randomx_isn_92: + ; FSUB_R f2, a0 + subpd xmm2, xmm8 +randomx_isn_93: + ; FADD_R f0, a1 + addpd xmm0, xmm9 +randomx_isn_94: + ; IXOR_R r6, r1 + xor r14, r9 +randomx_isn_95: + ; ISUB_M r0, L3[910032] + sub r8, qword ptr [rsi+910032] +randomx_isn_96: + ; FSWAP_R e3 + shufpd xmm7, xmm7, 1 +randomx_isn_97: + ; IMUL_M r4, L1[r2] + mov eax, r10d + and eax, 16376 + imul r12, qword ptr [rsi+rax] +randomx_isn_98: + ; IMUL_9C r0, 2144355962 + lea r8, [r8+r8*8+2144355962] +randomx_isn_99: + ; IMULH_R r1, r5 + mov rax, r9 + mul r13 + mov r9, rdx +randomx_isn_100: + ; ISTORE L1[r7], r3 + mov eax, r15d + and eax, 16376 + mov qword ptr [rsi+rax], r11 +randomx_isn_101: + ; ISWAP_R r0, r0 +randomx_isn_102: + ; IMUL_R r2, r7 + imul r10, r15 +randomx_isn_103: + ; ISUB_R r2, -1777504751 + sub r10, -1777504751 +randomx_isn_104: + ; ISTORE L2[r6], r7 + mov eax, r14d + and eax, 262136 + mov qword ptr [rsi+rax], r15 +randomx_isn_105: + ; FADD_R f3, a1 + addpd xmm3, xmm9 +randomx_isn_106: + ; FSUB_R f2, a2 + subpd xmm2, xmm10 +randomx_isn_107: + ; ISMULH_R r6, r5 + mov rax, r14 + imul r13 + mov r14, rdx +randomx_isn_108: + ; IADD_M r7, L1[r0] + mov eax, r8d + and eax, 16376 + add r15, qword ptr [rsi+rax] +randomx_isn_109: + ; IMUL_R r6, r5 + imul r14, r13 +randomx_isn_110: + ; IMUL_R r5, r1 + imul r13, r9 +randomx_isn_111: + ; FADD_M f2, L1[r0] + mov eax, r8d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm2, xmm12 +randomx_isn_112: + ; IADD_R r0, r3 + add r8, r11 +randomx_isn_113: + ; IADD_RC r3, r4, -1138304368 + lea r11, [r11+r12-1138304368] +randomx_isn_114: + ; IADD_M r2, L1[r4] + mov eax, r12d + and eax, 16376 + add r10, qword ptr [rsi+rax] +randomx_isn_115: + ; IMUL_M r7, L1[r2] + mov eax, r10d + and eax, 16376 + imul r15, qword ptr [rsi+rax] +randomx_isn_116: + ; FADD_R f1, a3 + addpd xmm1, xmm11 +randomx_isn_117: + ; FSUB_R f2, a2 + subpd xmm2, xmm10 +randomx_isn_118: + ; IADD_R r2, 160326201 + add r10, 160326201 +randomx_isn_119: + ; ISUB_M r7, L3[1780152] + sub r15, qword ptr [rsi+1780152] +randomx_isn_120: + ; IADD_R r4, r1 + add r12, r9 +randomx_isn_121: + ; IADD_R r4, r7 + add r12, r15 +randomx_isn_122: + ; FSUB_R f0, a1 + subpd xmm0, xmm9 +randomx_isn_123: + ; FSQRT_R e0 + sqrtpd xmm4, xmm4 +randomx_isn_124: + ; FSUB_R f2, a2 + subpd xmm2, xmm10 +randomx_isn_125: + ; ISMULH_M r2, L2[r1] + mov ecx, r9d + and ecx, 262136 + mov rax, r10 + imul qword ptr [rsi+rcx] + mov r10, rdx +randomx_isn_126: + ; FSUB_M f2, L2[r2] + mov eax, r10d + and eax, 262136 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm2, xmm12 +randomx_isn_127: + ; IMUL_R r2, r4 + imul r10, r12 +randomx_isn_128: + ; FSUB_R f3, a1 + subpd xmm3, xmm9 +randomx_isn_129: + ; IADD_RC r1, r2, 697183462 + lea r9, [r9+r10+697183462] +randomx_isn_130: + ; FSUB_R f1, a1 + subpd xmm1, xmm9 +randomx_isn_131: + ; IMUL_M r2, L1[r3] + mov eax, r11d + and eax, 16376 + imul r10, qword ptr [rsi+rax] +randomx_isn_132: + ; IXOR_M r5, L3[1438200] + xor r13, qword ptr [rsi+1438200] +randomx_isn_133: + ; FMUL_R e3, a3 + mulpd xmm7, xmm11 +randomx_isn_134: + ; IROR_R r5, r1 + mov ecx, r9d + ror r13, cl +randomx_isn_135: + ; FMUL_R e1, a2 + mulpd xmm5, xmm10 +randomx_isn_136: + ; ISUB_M r3, L2[r6] + mov eax, r14d + and eax, 262136 + sub r11, qword ptr [rsi+rax] +randomx_isn_137: + ; IADD_RC r4, r1, -1660063210 + lea r12, [r12+r9-1660063210] +randomx_isn_138: + ; ISTORE L1[r0], r0 + mov eax, r8d + and eax, 16376 + mov qword ptr [rsi+rax], r8 +randomx_isn_139: + ; FADD_M f0, L1[r5] + mov eax, r13d + and eax, 16376 cvtdq2pd xmm12, qword ptr [rsi+rax] addpd xmm0, xmm12 - ; IADD_R r0, 52817665 - add r8, 52817665 - ; IMUL_RCP r6, 3388141601 - mov rax, 11691979238837063231 - imul r14, rax - ; IMUL_RCP r3, 1356467790 - mov rax, 14601924774465956466 - imul r11, rax - ; IADD_RC r7, r4, -2056421852 - lea r15, [r15+r12-2056421852] - ; FSUB_M f1, L2[r4] - mov eax, r12d - and eax, 262136 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm1, xmm12 - ; ISWAP_R r1, r5 - xchg r9, r13 - ; ISTORE L2[r3], r5 +randomx_isn_140: + ; ISUB_M r7, L1[r3] mov eax, r11d - and eax, 262136 - mov qword ptr [rsi+rax], r13 - ; FMUL_R e0, a3 - mulpd xmm4, xmm11 - ; IADD_RC r1, r4, -129008866 - lea r9, [r9+r12-129008866] - ; COND_R r6, no(r4, 311828213) - xor ecx, ecx - cmp r12d, 311828213 - setno cl - add r14, rcx - ; FSWAP_R e2 - shufpd xmm6, xmm6, 1 - ; IADD_RC r2, r2, 498744396 - lea r10, [r10+r10+498744396] - ; IADD_RC r2, r3, 1515945097 - lea r10, [r10+r11+1515945097] - ; FMUL_R e0, a2 - mulpd xmm4, xmm10 - ; ISTORE L2[r5], r7 - mov eax, r13d - and eax, 262136 - mov qword ptr [rsi+rax], r15 - ; IMUL_M r7, L2[r0] - mov eax, r8d - and eax, 262136 - imul r15, qword ptr [rsi+rax] - ; IADD_R r0, r2 - add r8, r10 - ; IADD_RC r7, r3, 1081450346 - lea r15, [r15+r11+1081450346] - ; FADD_R f1, a3 - addpd xmm1, xmm11 - ; FSCAL_R f3 - xorps xmm3, xmm15 - ; FADD_M f3, L2[r7] - mov eax, r15d - and eax, 262136 - cvtdq2pd xmm12, qword ptr [rsi+rax] - addpd xmm3, xmm12 - ; FSUB_R f3, a0 - subpd xmm3, xmm8 - ; COND_M r2, of(L1[r5], -255033167) + and eax, 16376 + sub r15, qword ptr [rsi+rax] +randomx_isn_141: + ; IROR_R r3, r2 + mov ecx, r10d + ror r11, cl +randomx_isn_142: + ; FADD_R f1, a0 + addpd xmm1, xmm8 +randomx_isn_143: + ; COND_R r5, ge(r1, 880467599) + add r14, 4 + test r14, 4 + jz randomx_isn_110 xor ecx, ecx + cmp r9d, 880467599 + setge cl + add r13, rcx +randomx_isn_144: + ; FSUB_M f1, L1[r5] mov eax, r13d and eax, 16376 - cmp dword ptr [rsi+rax], -255033167 - seto cl - add r10, rcx - ; FSUB_R f1, a1 - subpd xmm1, xmm9 - ; IADD_R r2, r5 - add r10, r13 - ; FSQRT_R e2 - sqrtpd xmm6, xmm6 - ; IMUL_9C r2, 1521722302 - lea r10, [r10+r10*8+1521722302] - ; FADD_R f0, a3 - addpd xmm0, xmm11 - ; ISUB_R r0, r5 - sub r8, r13 - ; FADD_R f2, a0 - addpd xmm2, xmm8 - ; ISWAP_R r6, r0 - xchg r14, r8 - ; IADD_RC r1, r4, -693164762 - lea r9, [r9+r12-693164762] - ; FDIV_M e0, L2[r2] + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm1, xmm12 +randomx_isn_145: + ; ISUB_R r5, r3 + sub r13, r11 +randomx_isn_146: + ; IADD_RC r0, r3, 1228198394 + lea r8, [r8+r11+1228198394] +randomx_isn_147: + ; IADD_RC r1, r3, 1747766580 + lea r9, [r9+r11+1747766580] +randomx_isn_148: + ; FSQRT_R e1 + sqrtpd xmm5, xmm5 +randomx_isn_149: + ; IADD_R r4, r3 + add r12, r11 +randomx_isn_150: + ; FADD_M f1, L1[r0] + mov eax, r8d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm1, xmm12 +randomx_isn_151: + ; IADD_RC r1, r0, 1063245428 + lea r9, [r9+r8+1063245428] +randomx_isn_152: + ; FSUB_R f1, a0 + subpd xmm1, xmm8 +randomx_isn_153: + ; FSUB_R f0, a1 + subpd xmm0, xmm9 +randomx_isn_154: + ; IMUL_R r2, r6 + imul r10, r14 +randomx_isn_155: + ; CFROUND r3, 46 + mov rax, r11 + rol rax, 31 + and eax, 24576 + or eax, 40896 + mov dword ptr [rsp-8], eax + ldmxcsr dword ptr [rsp-8] +randomx_isn_156: + ; FSUB_R f3, a2 + subpd xmm3, xmm10 +randomx_isn_157: + ; ISTORE L1[r1], r1 + mov eax, r9d + and eax, 16376 + mov qword ptr [rsi+rax], r9 +randomx_isn_158: + ; ISTORE L1[r6], r4 + mov eax, r14d + and eax, 16376 + mov qword ptr [rsi+rax], r12 +randomx_isn_159: + ; IADD_M r7, L1[r2] mov eax, r10d - and eax, 262136 + and eax, 16376 + add r15, qword ptr [rsi+rax] +randomx_isn_160: + ; IMUL_RCP r7, 2040763167 + mov rax, 9705702723791900149 + imul r15, rax +randomx_isn_161: + ; FADD_R f3, a3 + addpd xmm3, xmm11 +randomx_isn_162: + ; IADD_RC r6, r4, -783948693 + lea r14, [r14+r12-783948693] +randomx_isn_163: + ; ISWAP_R r3, r5 + xchg r11, r13 +randomx_isn_164: + ; FSQRT_R e3 + sqrtpd xmm7, xmm7 +randomx_isn_165: + ; FSUB_R f1, a2 + subpd xmm1, xmm10 +randomx_isn_166: + ; IROR_R r5, r3 + mov ecx, r11d + ror r13, cl +randomx_isn_167: + ; IMUL_9C r2, 805006473 + lea r10, [r10+r10*8+805006473] +randomx_isn_168: + ; FDIV_M e0, L1[r4] + mov eax, r12d + and eax, 16376 cvtdq2pd xmm12, qword ptr [rsi+rax] andps xmm12, xmm13 orps xmm12, xmm14 divpd xmm4, xmm12 - ; IMUL_9C r4, -1849458799 - lea r12, [r12+r12*8-1849458799] - ; IADD_RC r1, r4, -651820510 - lea r9, [r9+r12-651820510] - ; IMULH_R r6, r6 - mov rax, r14 - mul r14 - mov r14, rdx - ; FSUB_M f3, L2[r0] +randomx_isn_169: + ; IMUL_9C r3, 1773188989 + lea r11, [r11+r11*8+1773188989] +randomx_isn_170: + ; FADD_R f0, a3 + addpd xmm0, xmm11 +randomx_isn_171: + ; FADD_R f1, a0 + addpd xmm1, xmm8 +randomx_isn_172: + ; ISTORE L1[r7], r6 + mov eax, r15d + and eax, 16376 + mov qword ptr [rsi+rax], r14 +randomx_isn_173: + ; FSUB_M f0, L1[r7] + mov eax, r15d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm0, xmm12 +randomx_isn_174: + ; ISWAP_R r3, r0 + xchg r11, r8 +randomx_isn_175: + ; IMULH_R r0, r3 + mov rax, r8 + mul r11 + mov r8, rdx +randomx_isn_176: + ; IMUL_M r2, L3[1439696] + imul r10, qword ptr [rsi+1439696] +randomx_isn_177: + ; IMUL_M r3, L3[232968] + imul r11, qword ptr [rsi+232968] +randomx_isn_178: + ; IADD_RC r5, r3, -2108568616 + lea r13, [r13+r11-2108568616] +randomx_isn_179: + ; IADD_RC r3, r4, 1322108729 + lea r11, [r11+r12+1322108729] +randomx_isn_180: + ; FADD_R f3, a1 + addpd xmm3, xmm9 +randomx_isn_181: + ; FSQRT_R e3 + sqrtpd xmm7, xmm7 +randomx_isn_182: + ; FMUL_R e2, a2 + mulpd xmm6, xmm10 +randomx_isn_183: + ; IADD_M r6, L2[r2] + mov eax, r10d + and eax, 262136 + add r14, qword ptr [rsi+rax] +randomx_isn_184: + ; FADD_R f2, a3 + addpd xmm2, xmm11 +randomx_isn_185: + ; FSWAP_R f3 + shufpd xmm3, xmm3, 1 +randomx_isn_186: + ; FSCAL_R f3 + xorps xmm3, xmm15 +randomx_isn_187: + ; IADD_RC r6, r6, -914790425 + lea r14, [r14+r14-914790425] +randomx_isn_188: + ; FSCAL_R f2 + xorps xmm2, xmm15 +randomx_isn_189: + ; IMUL_M r4, L1[r5] + mov eax, r13d + and eax, 16376 + imul r12, qword ptr [rsi+rax] +randomx_isn_190: + ; FSUB_M f2, L1[r3] + mov eax, r11d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm2, xmm12 +randomx_isn_191: + ; IMUL_M r4, L2[r3] + mov eax, r11d + and eax, 262136 + imul r12, qword ptr [rsi+rax] +randomx_isn_192: + ; ISUB_M r7, L1[r3] + mov eax, r11d + and eax, 16376 + sub r15, qword ptr [rsi+rax] +randomx_isn_193: + ; ISTORE L1[r1], r1 + mov eax, r9d + and eax, 16376 + mov qword ptr [rsi+rax], r9 +randomx_isn_194: + ; ISTORE L1[r3], r4 + mov eax, r11d + and eax, 16376 + mov qword ptr [rsi+rax], r12 +randomx_isn_195: + ; FMUL_R e2, a2 + mulpd xmm6, xmm10 +randomx_isn_196: + ; FMUL_R e2, a0 + mulpd xmm6, xmm8 +randomx_isn_197: + ; FADD_M f2, L2[r0] mov eax, r8d and eax, 262136 cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm3, xmm12 - ; FSUB_R f0, a2 - subpd xmm0, xmm10 + addpd xmm2, xmm12 +randomx_isn_198: + ; FADD_R f1, a2 + addpd xmm1, xmm10 +randomx_isn_199: + ; FSUB_R f3, a3 + subpd xmm3, xmm11 +randomx_isn_200: + ; IADD_RC r2, r5, 248917123 + lea r10, [r10+r13+248917123] +randomx_isn_201: + ; IMUL_9C r6, 376384700 + lea r14, [r14+r14*8+376384700] +randomx_isn_202: + ; ISWAP_R r3, r6 + xchg r11, r14 +randomx_isn_203: + ; ISTORE L1[r1], r3 + mov eax, r9d + and eax, 16376 + mov qword ptr [rsi+rax], r11 +randomx_isn_204: + ; IMUL_R r6, r1 + imul r14, r9 +randomx_isn_205: + ; ISUB_R r7, r5 + sub r15, r13 +randomx_isn_206: + ; IADD_R r3, r5 + add r11, r13 +randomx_isn_207: + ; FSCAL_R f1 + xorps xmm1, xmm15 +randomx_isn_208: + ; IADD_R r6, r3 + add r14, r11 +randomx_isn_209: + ; FSUB_M f0, L1[r4] + mov eax, r12d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm0, xmm12 +randomx_isn_210: + ; FSWAP_R e2 + shufpd xmm6, xmm6, 1 +randomx_isn_211: + ; FMUL_R e2, a3 + mulpd xmm6, xmm11 +randomx_isn_212: + ; IMUL_M r0, L1[r1] + mov eax, r9d + and eax, 16376 + imul r8, qword ptr [rsi+rax] +randomx_isn_213: + ; FSUB_M f2, L1[r5] + mov eax, r13d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm2, xmm12 +randomx_isn_214: + ; FMUL_R e1, a2 + mulpd xmm5, xmm10 +randomx_isn_215: + ; FADD_R f3, a1 + addpd xmm3, xmm9 +randomx_isn_216: + ; IXOR_M r4, L2[r1] + mov eax, r9d + and eax, 262136 + xor r12, qword ptr [rsi+rax] +randomx_isn_217: + ; IMUL_M r6, L1[r5] + mov eax, r13d + and eax, 16376 + imul r14, qword ptr [rsi+rax] +randomx_isn_218: + ; FSCAL_R f2 + xorps xmm2, xmm15 +randomx_isn_219: + ; FADD_M f3, L1[r7] + mov eax, r15d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm3, xmm12 +randomx_isn_220: + ; FSUB_R f0, a0 + subpd xmm0, xmm8 +randomx_isn_221: + ; IMUL_R r1, r0 + imul r9, r8 +randomx_isn_222: + ; IADD_M r1, L1[r0] + mov eax, r8d + and eax, 16376 + add r9, qword ptr [rsi+rax] +randomx_isn_223: + ; FSCAL_R f2 + xorps xmm2, xmm15 +randomx_isn_224: + ; IADD_R r5, r4 + add r13, r12 +randomx_isn_225: + ; ISTORE L2[r2], r1 + mov eax, r10d + and eax, 262136 + mov qword ptr [rsi+rax], r9 +randomx_isn_226: + ; ISUB_R r6, -791575725 + sub r14, -791575725 +randomx_isn_227: ; FDIV_M e3, L1[r0] mov eax, r8d and eax, 16376 @@ -710,11 +897,107 @@ andps xmm12, xmm13 orps xmm12, xmm14 divpd xmm7, xmm12 - ; IADD_M r3, L1[r7] +randomx_isn_228: + ; IXOR_R r7, r1 + xor r15, r9 +randomx_isn_229: + ; ISWAP_R r0, r6 + xchg r8, r14 +randomx_isn_230: + ; IADD_M r2, L1[r7] mov eax, r15d and eax, 16376 - add r11, qword ptr [rsi+rax] - ; IXOR_M r2, L2[r6] - mov eax, r14d - and eax, 262136 - xor r10, qword ptr [rsi+rax] + add r10, qword ptr [rsi+rax] +randomx_isn_231: + ; FMUL_R e1, a0 + mulpd xmm5, xmm8 +randomx_isn_232: + ; FMUL_R e3, a3 + mulpd xmm7, xmm11 +randomx_isn_233: + ; FMUL_R e0, a2 + mulpd xmm4, xmm10 +randomx_isn_234: + ; IADD_RC r2, r7, 1435646464 + lea r10, [r10+r15+1435646464] +randomx_isn_235: + ; ISWAP_R r7, r6 + xchg r15, r14 +randomx_isn_236: + ; FMUL_R e3, a2 + mulpd xmm7, xmm10 +randomx_isn_237: + ; FSUB_R f1, a3 + subpd xmm1, xmm11 +randomx_isn_238: + ; IADD_R r4, r2 + add r12, r10 +randomx_isn_239: + ; IMUL_RCP r7, 3065786637 + mov rax, 12921343181238534701 + imul r15, rax +randomx_isn_240: + ; IMUL_R r5, r7 + imul r13, r15 +randomx_isn_241: + ; IROR_R r6, r5 + mov ecx, r13d + ror r14, cl +randomx_isn_242: + ; IMUL_R r6, r4 + imul r14, r12 +randomx_isn_243: + ; FSUB_R f0, a3 + subpd xmm0, xmm11 +randomx_isn_244: + ; FADD_M f1, L1[r0] + mov eax, r8d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm1, xmm12 +randomx_isn_245: + ; INEG_R r3 + neg r11 +randomx_isn_246: + ; IMUL_9C r7, 1938400676 + lea r15, [r15+r15*8+1938400676] +randomx_isn_247: + ; COND_M r2, be(L1[r5], -8545330) + add r9, 4 + test r9, 4 + jz randomx_isn_223 + xor ecx, ecx + mov eax, r13d + and eax, 16376 + cmp dword ptr [rsi+rax], -8545330 + setbe cl + add r10, rcx +randomx_isn_248: + ; ISTORE L1[r0], r5 + mov eax, r8d + and eax, 16376 + mov qword ptr [rsi+rax], r13 +randomx_isn_249: + ; IADD_RC r6, r5, 2052724836 + lea r14, [r14+r13+2052724836] +randomx_isn_250: + ; FADD_R f3, a0 + addpd xmm3, xmm8 +randomx_isn_251: + ; IADD_R r0, -221201557 + add r8, -221201557 +randomx_isn_252: + ; ISUB_M r4, L1[r2] + mov eax, r10d + and eax, 16376 + sub r12, qword ptr [rsi+rax] +randomx_isn_253: + ; IADD_RC r5, r4, 256175395 + lea r13, [r13+r12+256175395] +randomx_isn_254: + ; IADD_RC r6, r7, 1119815512 + lea r14, [r14+r15+1119815512] +randomx_isn_255: + ; IROR_R r7, r3 + mov ecx, r11d + ror r15, cl