mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 07:48:54 +00:00
89aba80925
* Blake2Generator::getInt32 renamed to getUInt32 to avoid confusion * isPowerOf2 renamed to isZeroOrPowerOf2 to avoid confusion * added asserts to validate the input/output size of AES functions * fixed possible overflow in JitCompilerX86::getCodeSize (unused function)
611 lines
23 KiB
C++
611 lines
23 KiB
C++
/*
|
|
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
|
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
* Neither the name of the copyright holder nor the
|
|
names of its contributors may be used to endorse or promote products
|
|
derived from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <climits>
|
|
#include "assembly_generator_x86.hpp"
|
|
#include "common.hpp"
|
|
#include "reciprocal.h"
|
|
#include "program.hpp"
|
|
#include "superscalar.hpp"
|
|
|
|
namespace randomx {
|
|
|
|
static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
|
|
static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
|
static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
|
|
static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" };
|
|
static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" };
|
|
static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
|
|
|
|
static const char* tempRegx = "xmm12";
|
|
static const char* mantissaMaskReg = "xmm13";
|
|
static const char* exponentMaskReg = "xmm14";
|
|
static const char* scaleMaskReg = "xmm15";
|
|
static const char* regIc = "rbx";
|
|
static const char* regIc32 = "ebx";
|
|
static const char* regIc8 = "bl";
|
|
static const char* regScratchpadAddr = "rsi";
|
|
|
|
void AssemblyGeneratorX86::generateProgram(Program& prog) {
|
|
for (unsigned i = 0; i < RegistersCount; ++i) {
|
|
registerUsage[i] = -1;
|
|
}
|
|
asmCode.str(std::string()); //clear
|
|
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
|
asmCode << "randomx_isn_" << i << ":" << std::endl;
|
|
Instruction& instr = prog(i);
|
|
instr.src %= RegistersCount;
|
|
instr.dst %= RegistersCount;
|
|
generateCode(instr, i);
|
|
}
|
|
}
|
|
|
|
void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) {
|
|
asmCode.str(std::string()); //clear
|
|
#ifdef RANDOMX_ALIGN
|
|
asmCode << "ALIGN 16" << std::endl;
|
|
#endif
|
|
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
|
Instruction& instr = prog(i);
|
|
switch ((SuperscalarInstructionType)instr.opcode)
|
|
{
|
|
case SuperscalarInstructionType::ISUB_R:
|
|
asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IXOR_R:
|
|
asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IADD_RS:
|
|
asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IMUL_R:
|
|
asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IROR_C:
|
|
asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IADD_C7:
|
|
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IXOR_C7:
|
|
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IADD_C8:
|
|
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
|
#ifdef RANDOMX_ALIGN
|
|
asmCode << "nop" << std::endl;
|
|
#endif
|
|
break;
|
|
case SuperscalarInstructionType::IXOR_C8:
|
|
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
|
#ifdef RANDOMX_ALIGN
|
|
asmCode << "nop" << std::endl;
|
|
#endif
|
|
break;
|
|
case SuperscalarInstructionType::IADD_C9:
|
|
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
|
#ifdef RANDOMX_ALIGN
|
|
asmCode << "xchg ax, ax ;nop" << std::endl;
|
|
#endif
|
|
break;
|
|
case SuperscalarInstructionType::IXOR_C9:
|
|
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
|
#ifdef RANDOMX_ALIGN
|
|
asmCode << "xchg ax, ax ;nop" << std::endl;
|
|
#endif
|
|
break;
|
|
case SuperscalarInstructionType::IMULH_R:
|
|
asmCode << "mov rax, " << regR[instr.dst] << std::endl;
|
|
asmCode << "mul " << regR[instr.src] << std::endl;
|
|
asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::ISMULH_R:
|
|
asmCode << "mov rax, " << regR[instr.dst] << std::endl;
|
|
asmCode << "imul " << regR[instr.src] << std::endl;
|
|
asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IMUL_RCP:
|
|
asmCode << "mov rax, " << (int64_t)randomx_reciprocal(instr.getImm32()) << std::endl;
|
|
asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl;
|
|
break;
|
|
default:
|
|
UNREACHABLE;
|
|
}
|
|
}
|
|
}
|
|
|
|
void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) {
|
|
asmCode.str(std::string()); //clear
|
|
asmCode << "#include <stdint.h>" << std::endl;
|
|
asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl;
|
|
asmCode << " static inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
|
|
asmCode << " return ((unsigned __int128)a * b) >> 64;" << std::endl;
|
|
asmCode << " }" << std::endl;
|
|
asmCode << " static inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
|
|
asmCode << " return ((__int128)a * b) >> 64;" << std::endl;
|
|
asmCode << " }" << std::endl;
|
|
asmCode << " #define HAVE_MULH" << std::endl;
|
|
asmCode << " #define HAVE_SMULH" << std::endl;
|
|
asmCode << "#endif" << std::endl;
|
|
asmCode << "#if defined(_MSC_VER)" << std::endl;
|
|
asmCode << " #define HAS_VALUE(X) X ## 0" << std::endl;
|
|
asmCode << " #define EVAL_DEFINE(X) HAS_VALUE(X)" << std::endl;
|
|
asmCode << " #include <intrin.h>" << std::endl;
|
|
asmCode << " #include <stdlib.h>" << std::endl;
|
|
asmCode << " static __inline uint64_t rotr(uint64_t x , int c) {" << std::endl;
|
|
asmCode << " return _rotr64(x, c);" << std::endl;
|
|
asmCode << " }" << std::endl;
|
|
asmCode << " #define HAVE_ROTR" << std::endl;
|
|
asmCode << " #if EVAL_DEFINE(__MACHINEARM64_X64(1))" << std::endl;
|
|
asmCode << " static __inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
|
|
asmCode << " return __umulh(a, b);" << std::endl;
|
|
asmCode << " }" << std::endl;
|
|
asmCode << " #define HAVE_MULH" << std::endl;
|
|
asmCode << " #endif" << std::endl;
|
|
asmCode << " #if EVAL_DEFINE(__MACHINEX64(1))" << std::endl;
|
|
asmCode << " static __inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
|
|
asmCode << " int64_t hi;" << std::endl;
|
|
asmCode << " _mul128(a, b, &hi);" << std::endl;
|
|
asmCode << " return hi;" << std::endl;
|
|
asmCode << " }" << std::endl;
|
|
asmCode << " #define HAVE_SMULH" << std::endl;
|
|
asmCode << " #endif" << std::endl;
|
|
asmCode << "#endif" << std::endl;
|
|
asmCode << "#ifndef HAVE_ROTR" << std::endl;
|
|
asmCode << " static inline uint64_t rotr(uint64_t a, int b) {" << std::endl;
|
|
asmCode << " return (a >> b) | (a << (64 - b));" << std::endl;
|
|
asmCode << " }" << std::endl;
|
|
asmCode << " #define HAVE_ROTR" << std::endl;
|
|
asmCode << "#endif" << std::endl;
|
|
asmCode << "#if !defined(HAVE_MULH) || !defined(HAVE_SMULH) || !defined(HAVE_ROTR)" << std::endl;
|
|
asmCode << " #error \"Required functions are not defined\"" << std::endl;
|
|
asmCode << "#endif" << std::endl;
|
|
asmCode << "void superScalar(uint64_t r[8]) {" << std::endl;
|
|
asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl;
|
|
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
|
Instruction& instr = prog(i);
|
|
switch ((SuperscalarInstructionType)instr.opcode)
|
|
{
|
|
case SuperscalarInstructionType::ISUB_R:
|
|
asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IXOR_R:
|
|
asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IADD_RS:
|
|
asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift())) << ";" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IMUL_R:
|
|
asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IROR_C:
|
|
asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IADD_C7:
|
|
case SuperscalarInstructionType::IADD_C8:
|
|
case SuperscalarInstructionType::IADD_C9:
|
|
asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IXOR_C7:
|
|
case SuperscalarInstructionType::IXOR_C8:
|
|
case SuperscalarInstructionType::IXOR_C9:
|
|
asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IMULH_R:
|
|
asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::ISMULH_R:
|
|
asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
|
|
break;
|
|
case SuperscalarInstructionType::IMUL_RCP:
|
|
asmCode << regR[instr.dst] << " *= " << (int64_t)randomx_reciprocal(instr.getImm32()) << ";" << std::endl;
|
|
break;
|
|
default:
|
|
UNREACHABLE;
|
|
}
|
|
}
|
|
asmCode << "r[0] = r8; r[1] = r9; r[2] = r10; r[3] = r11; r[4] = r12; r[5] = r13; r[6] = r14; r[7] = r15;" << std::endl;
|
|
asmCode << "}" << std::endl;
|
|
}
|
|
|
|
void AssemblyGeneratorX86::traceint(Instruction& instr) {
|
|
if (trace) {
|
|
asmCode << "\tpush " << regR[instr.dst] << std::endl;
|
|
}
|
|
}
|
|
|
|
void AssemblyGeneratorX86::traceflt(Instruction& instr) {
|
|
if (trace) {
|
|
asmCode << "\tpush 0" << std::endl;
|
|
}
|
|
}
|
|
|
|
void AssemblyGeneratorX86::tracenop(Instruction& instr) {
|
|
if (trace) {
|
|
asmCode << "\tpush 0" << std::endl;
|
|
}
|
|
}
|
|
|
|
void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
|
|
asmCode << "\t; " << instr;
|
|
auto generator = engine[instr.opcode];
|
|
(this->*generator)(instr, i);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
|
|
asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
|
asmCode << "\tand " << reg << ", " << ((instr.getModMem()) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
|
|
}
|
|
|
|
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
|
|
asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
|
int mask;
|
|
if (instr.getModCond() < StoreL3Condition) {
|
|
mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask;
|
|
}
|
|
else {
|
|
mask = ScratchpadL3Mask;
|
|
}
|
|
asmCode << "\tand eax" << ", " << (mask & (-maskAlign)) << std::endl;
|
|
}
|
|
|
|
int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
|
|
return (int32_t)instr.getImm32() & ScratchpadL3Mask;
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if(instr.dst == RegisterNeedsDisplacement)
|
|
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
|
else
|
|
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl;
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
genAddressReg(instr);
|
|
asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
|
}
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\tsub " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
|
}
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
genAddressReg(instr);
|
|
asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
|
}
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\timul " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
|
}
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
genAddressReg(instr);
|
|
asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
|
}
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
|
asmCode << "\tmul " << regR[instr.src] << std::endl;
|
|
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
genAddressReg(instr, "ecx");
|
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
|
asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
|
asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
|
}
|
|
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
|
asmCode << "\timul " << regR[instr.src] << std::endl;
|
|
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
genAddressReg(instr, "ecx");
|
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
|
asmCode << "\timul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
|
asmCode << "\timul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
|
}
|
|
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
asmCode << "\tneg " << regR[instr.dst] << std::endl;
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\txor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
|
}
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
genAddressReg(instr);
|
|
asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
|
}
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
|
|
asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\tror " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl;
|
|
}
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) {
|
|
registerUsage[instr.dst] = i;
|
|
if (instr.src != instr.dst) {
|
|
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
|
|
asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl;
|
|
}
|
|
else {
|
|
asmCode << "\trol " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl;
|
|
}
|
|
traceint(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
|
|
uint64_t divisor = instr.getImm32();
|
|
if (!isZeroOrPowerOf2(divisor)) {
|
|
registerUsage[instr.dst] = i;
|
|
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;
|
|
asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl;
|
|
traceint(instr);
|
|
}
|
|
else {
|
|
tracenop(instr);
|
|
}
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) {
|
|
if (instr.src != instr.dst) {
|
|
registerUsage[instr.dst] = i;
|
|
registerUsage[instr.src] = i;
|
|
asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
|
traceint(instr);
|
|
}
|
|
else {
|
|
tracenop(instr);
|
|
}
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_FSWAP_R(Instruction& instr, int i) {
|
|
asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl;
|
|
traceflt(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) {
|
|
instr.dst %= RegisterCountFlt;
|
|
instr.src %= RegisterCountFlt;
|
|
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
|
traceflt(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) {
|
|
instr.dst %= RegisterCountFlt;
|
|
genAddressReg(instr);
|
|
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
|
asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
|
|
traceflt(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) {
|
|
instr.dst %= RegisterCountFlt;
|
|
instr.src %= RegisterCountFlt;
|
|
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
|
traceflt(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) {
|
|
instr.dst %= RegisterCountFlt;
|
|
genAddressReg(instr);
|
|
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
|
asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
|
|
traceflt(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
|
|
instr.dst %= RegisterCountFlt;
|
|
asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMaskReg << std::endl;
|
|
traceflt(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) {
|
|
instr.dst %= RegisterCountFlt;
|
|
instr.src %= RegisterCountFlt;
|
|
asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
|
|
traceflt(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) {
|
|
instr.dst %= RegisterCountFlt;
|
|
genAddressReg(instr);
|
|
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
|
asmCode << "\tandps " << tempRegx << ", " << mantissaMaskReg << std::endl;
|
|
asmCode << "\torps " << tempRegx << ", " << exponentMaskReg << std::endl;
|
|
asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl;
|
|
traceflt(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) {
|
|
instr.dst %= RegisterCountFlt;
|
|
asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
|
|
traceflt(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
|
|
asmCode << "\tmov rax, " << regR[instr.src] << std::endl;
|
|
int rotate = (13 - (instr.getImm32() & 63)) & 63;
|
|
if (rotate != 0)
|
|
asmCode << "\trol rax, " << rotate << std::endl;
|
|
asmCode << "\tand eax, 24576" << std::endl;
|
|
asmCode << "\tor eax, 40896" << std::endl;
|
|
asmCode << "\tpush rax" << std::endl;
|
|
asmCode << "\tldmxcsr dword ptr [rsp]" << std::endl;
|
|
asmCode << "\tpop rax" << std::endl;
|
|
tracenop(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) {
|
|
int reg = instr.dst;
|
|
int target = registerUsage[reg] + 1;
|
|
int shift = instr.getModCond() + ConditionOffset;
|
|
int32_t imm = instr.getImm32() | (1L << shift);
|
|
if (ConditionOffset > 0 || shift > 0)
|
|
imm &= ~(1L << (shift - 1));
|
|
asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl;
|
|
asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl;
|
|
asmCode << "\tjz randomx_isn_" << target << std::endl;
|
|
//mark all registers as used
|
|
for (unsigned j = 0; j < RegistersCount; ++j) {
|
|
registerUsage[j] = i;
|
|
}
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) {
|
|
genAddressRegDst(instr);
|
|
asmCode << "\tmov qword ptr [" << regScratchpadAddr << "+rax], " << regR[instr.src] << std::endl;
|
|
tracenop(instr);
|
|
}
|
|
|
|
void AssemblyGeneratorX86::h_NOP(Instruction& instr, int i) {
|
|
asmCode << "\tnop" << std::endl;
|
|
tracenop(instr);
|
|
}
|
|
|
|
#include "instruction_weights.hpp"
|
|
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
|
|
|
|
InstructionGenerator AssemblyGeneratorX86::engine[256] = {
|
|
INST_HANDLE(IADD_RS)
|
|
INST_HANDLE(IADD_M)
|
|
INST_HANDLE(ISUB_R)
|
|
INST_HANDLE(ISUB_M)
|
|
INST_HANDLE(IMUL_R)
|
|
INST_HANDLE(IMUL_M)
|
|
INST_HANDLE(IMULH_R)
|
|
INST_HANDLE(IMULH_M)
|
|
INST_HANDLE(ISMULH_R)
|
|
INST_HANDLE(ISMULH_M)
|
|
INST_HANDLE(IMUL_RCP)
|
|
INST_HANDLE(INEG_R)
|
|
INST_HANDLE(IXOR_R)
|
|
INST_HANDLE(IXOR_M)
|
|
INST_HANDLE(IROR_R)
|
|
INST_HANDLE(IROL_R)
|
|
INST_HANDLE(ISWAP_R)
|
|
INST_HANDLE(FSWAP_R)
|
|
INST_HANDLE(FADD_R)
|
|
INST_HANDLE(FADD_M)
|
|
INST_HANDLE(FSUB_R)
|
|
INST_HANDLE(FSUB_M)
|
|
INST_HANDLE(FSCAL_R)
|
|
INST_HANDLE(FMUL_R)
|
|
INST_HANDLE(FDIV_M)
|
|
INST_HANDLE(FSQRT_R)
|
|
INST_HANDLE(CBRANCH)
|
|
INST_HANDLE(CFROUND)
|
|
INST_HANDLE(ISTORE)
|
|
INST_HANDLE(NOP)
|
|
};
|
|
} |