mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 15:58:53 +00:00
Random accesses - JIT compiler
This commit is contained in:
parent
b71e0eec65
commit
d1a808643d
7
makefile
7
makefile
@ -11,7 +11,7 @@ SRCDIR=src
|
||||
OBJDIR=obj
|
||||
LDFLAGS=-lpthread
|
||||
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
||||
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o)
|
||||
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o)
|
||||
ifeq ($(PLATFORM),x86_64)
|
||||
ROBJS += $(OBJDIR)/JitCompilerX86-static.o
|
||||
endif
|
||||
@ -60,7 +60,7 @@ $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) |
|
||||
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
|
||||
|
||||
$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_r.inc read_f.inc)) | $(OBJDIR)
|
||||
$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read.inc)) | $(OBJDIR)
|
||||
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@
|
||||
|
||||
$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp instructions.hpp intrinPortable.h) | $(OBJDIR)
|
||||
@ -87,6 +87,9 @@ $(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR)
|
||||
$(OBJDIR)/VirtualMachine.o: $(addprefix $(SRCDIR)/,VirtualMachine.cpp VirtualMachine.hpp common.hpp dataset.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/VirtualMachine.cpp -o $@
|
||||
|
||||
$(OBJDIR)/virtualMemory.o: $(addprefix $(SRCDIR)/,virtualMemory.cpp virtualMemory.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/virtualMemory.cpp -o $@
|
||||
|
||||
$(OBJDIR)/t1ha2.o: $(addprefix $(SRCDIR)/t1ha/,t1ha2.c t1ha.h t1ha_bits.h) | $(OBJDIR)
|
||||
$(CC) $(CCFLAGS) -c $(SRCDIR)/t1ha/t1ha2.c -o $@
|
||||
|
||||
|
@ -169,11 +169,12 @@ namespace RandomX {
|
||||
asmCode << "\t" << instrx86 << " xmm0, " << regF[instr.regb % RegistersCount] << std::endl;
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::gencr(Instruction& instr) {
|
||||
void AssemblyGeneratorX86::gencr(Instruction& instr, bool rax = true) {
|
||||
switch (instr.locc & 7)
|
||||
{
|
||||
case 0:
|
||||
asmCode << "\tmov rcx, rax" << std::endl;
|
||||
if(rax)
|
||||
asmCode << "\tmov rcx, rax" << std::endl;
|
||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||
@ -186,7 +187,8 @@ namespace RandomX {
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
asmCode << "\tmov rcx, rax" << std::endl;
|
||||
if (rax)
|
||||
asmCode << "\tmov rcx, rax" << std::endl;
|
||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||
@ -197,9 +199,9 @@ namespace RandomX {
|
||||
return;
|
||||
|
||||
default:
|
||||
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl;
|
||||
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", " << (rax ? "rax" : "rcx") << std::endl;
|
||||
if (trace) {
|
||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], rax" << std::endl;
|
||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << (rax ? "rax" : "rcx") << std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -208,7 +210,7 @@ namespace RandomX {
|
||||
void AssemblyGeneratorX86::gencf(Instruction& instr, bool move = true) {
|
||||
if(move)
|
||||
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||
const char* store = (instr.locc & 8) ? "movhpd" : "movlpd";
|
||||
const char* store = (instr.locc & 128) ? "movhpd" : "movlpd";
|
||||
switch (instr.locc & 7)
|
||||
{
|
||||
case 4:
|
||||
@ -463,14 +465,13 @@ namespace RandomX {
|
||||
|
||||
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
//asmCode << "\tmov rcx, rax" << std::endl;
|
||||
asmCode << "\tmov rcx, rax" << std::endl;
|
||||
asmCode << "\tshl eax, 13" << std::endl;
|
||||
//asmCode << "\tand rcx, -2048" << std::endl;
|
||||
asmCode << "\tand eax, 24576" << std::endl;
|
||||
//asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||
asmCode << "\tor eax, 40896" << std::endl;
|
||||
asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl;
|
||||
asmCode << "\tldmxcsr dword ptr [rsp - 8]" << std::endl;
|
||||
gencr(instr, false);
|
||||
}
|
||||
|
||||
static inline const char* jumpCondition(Instruction& instr, bool invert = false) {
|
||||
|
@ -44,7 +44,7 @@ namespace RandomX {
|
||||
void genbr1(Instruction&);
|
||||
void genbr132(Instruction&);
|
||||
void genbf(Instruction&, const char*);
|
||||
void gencr(Instruction&);
|
||||
void gencr(Instruction&, bool);
|
||||
void gencf(Instruction&, bool);
|
||||
|
||||
void generateCode(Instruction&, int);
|
||||
|
@ -47,8 +47,8 @@ namespace RandomX {
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::execute() {
|
||||
executeProgram(reg, mem, scratchpad, readDataset);
|
||||
//compiler.getProgramFunc()(reg, mem, scratchpad);
|
||||
//executeProgram(reg, mem, scratchpad, readDataset);
|
||||
compiler.getProgramFunc()(reg, mem, scratchpad);
|
||||
#ifdef TRACEVM
|
||||
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
|
||||
std::cout << std::hex << tracepad[i].u64 << std::endl;
|
||||
|
@ -197,6 +197,17 @@ namespace RandomX {
|
||||
#define ALU_RETIRE(x) x(a, b, c); \
|
||||
if(trace) std::cout << std::hex << /*a.u64 << " " << b.u64 << " " <<*/ c.u64 << std::endl;
|
||||
|
||||
#define CHECK_NOP_FPDIV(b, c)
|
||||
#ifndef STATS
|
||||
#define CHECK_NOP_FPADD(b, c)
|
||||
#define CHECK_NOP_FPSUB(b, c)
|
||||
#define CHECK_NOP_FPMUL(b, c)
|
||||
#else
|
||||
#define CHECK_NOP_FPADD(b, c) bool loeq = (b.lo.u64 == c.lo.u64); bool hieq = (b.hi.u64 == c.hi.u64); count_FPADD_nop += loeq + hieq; if(loeq && hieq) count_FPADD_nop2++;
|
||||
#define CHECK_NOP_FPSUB(b, c) bool loeq = ((b.lo.u64 & INT64_MAX) == (c.lo.u64 & INT64_MAX)); bool hieq = ((b.hi.u64 & INT64_MAX) == (c.hi.u64 & INT64_MAX)); count_FPSUB_nop += loeq + hieq; if(loeq && hieq) count_FPSUB_nop2++;
|
||||
#define CHECK_NOP_FPMUL(b, c) bool loeq = (b.lo.u64 == c.lo.u64); bool hieq = (b.hi.u64 == c.hi.u64); count_FPMUL_nop += loeq + hieq; if(loeq && hieq) count_FPMUL_nop2++;
|
||||
#endif
|
||||
|
||||
#define FPU_RETIRE(x) x(a, b, c); \
|
||||
writecf(inst, c); \
|
||||
if(trace) { \
|
||||
@ -248,8 +259,10 @@ namespace RandomX {
|
||||
INC_COUNT(x) \
|
||||
convertible_t a = loada(inst); \
|
||||
fpu_reg_t& b = reg.f[inst.regb % RegistersCount]; \
|
||||
fpu_reg_t btemp = b; \
|
||||
fpu_reg_t& c = reg.f[inst.regc % RegistersCount]; \
|
||||
FPU_RETIRE(x) \
|
||||
CHECK_NOP_##x(btemp, c) \
|
||||
}
|
||||
|
||||
#define FPU_INST_NB(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
||||
|
@ -83,6 +83,12 @@ namespace RandomX {
|
||||
int count_retdepth_max = 0;
|
||||
int count_endstack = 0;
|
||||
int count_instructions[ProgramLength] = { 0 };
|
||||
int count_FPADD_nop = 0;
|
||||
int count_FPADD_nop2 = 0;
|
||||
int count_FPSUB_nop = 0;
|
||||
int count_FPSUB_nop2 = 0;
|
||||
int count_FPMUL_nop = 0;
|
||||
int count_FPMUL_nop2 = 0;
|
||||
#endif
|
||||
|
||||
convertible_t loada(Instruction&);
|
||||
|
@ -29,9 +29,12 @@
|
||||
.global DECL(randomx_program_prologue)
|
||||
.global DECL(randomx_program_begin)
|
||||
.global DECL(randomx_program_epilogue)
|
||||
.global DECL(randomx_program_read_r)
|
||||
.global DECL(randomx_program_read_f)
|
||||
.global DECL(randomx_program_read_l1)
|
||||
.global DECL(randomx_program_read_l2)
|
||||
.global DECL(randomx_program_end)
|
||||
.global DECL(randomx_program_transform)
|
||||
|
||||
#define db .byte
|
||||
|
||||
.align 64
|
||||
DECL(randomx_program_prologue):
|
||||
@ -45,14 +48,26 @@ DECL(randomx_program_begin):
|
||||
DECL(randomx_program_epilogue):
|
||||
#include "asm/program_epilogue_linux.inc"
|
||||
|
||||
.align 64
|
||||
DECL(randomx_program_read_r):
|
||||
#include "asm/program_read_r.inc"
|
||||
#define scratchpad_mask and ecx, 2040
|
||||
|
||||
.align 64
|
||||
DECL(randomx_program_read_f):
|
||||
#include "asm/program_read_f.inc"
|
||||
DECL(randomx_program_read_l1):
|
||||
#include "asm/program_read.inc"
|
||||
|
||||
#undef scratchpad_mask
|
||||
|
||||
#define scratchpad_mask and ecx, 32760
|
||||
|
||||
.align 64
|
||||
DECL(randomx_program_read_l2):
|
||||
#include "asm/program_read.inc"
|
||||
|
||||
#undef scratchpad_mask
|
||||
|
||||
.align 64
|
||||
DECL(randomx_program_end):
|
||||
nop
|
||||
nop
|
||||
|
||||
.align 8
|
||||
DECL(randomx_program_transform):
|
||||
#include "asm/program_transform_address.inc"
|
||||
|
@ -20,9 +20,11 @@ _RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
||||
PUBLIC randomx_program_prologue
|
||||
PUBLIC randomx_program_begin
|
||||
PUBLIC randomx_program_epilogue
|
||||
PUBLIC randomx_program_read_r
|
||||
PUBLIC randomx_program_read_f
|
||||
PUBLIC randomx_program_read_l1
|
||||
PUBLIC randomx_program_read_l2
|
||||
PUBLIC randomx_program_end
|
||||
PUBLIC randomx_program_transform
|
||||
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_prologue PROC
|
||||
@ -39,21 +41,34 @@ randomx_program_epilogue PROC
|
||||
include asm/program_epilogue_win64.inc
|
||||
randomx_program_epilogue ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_read_r PROC
|
||||
include asm/program_read_r.inc
|
||||
randomx_program_read_r ENDP
|
||||
scratchpad_mask MACRO
|
||||
and ecx, 2040
|
||||
ENDM
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_read_f PROC
|
||||
include asm/program_read_f.inc
|
||||
randomx_program_read_f ENDP
|
||||
randomx_program_read_l1 PROC
|
||||
include asm/program_read.inc
|
||||
randomx_program_read_l1 ENDP
|
||||
|
||||
scratchpad_mask MACRO
|
||||
and ecx, 32760
|
||||
ENDM
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_read_l2 PROC
|
||||
include asm/program_read.inc
|
||||
randomx_program_read_l2 ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_end PROC
|
||||
nop
|
||||
randomx_program_end ENDP
|
||||
|
||||
ALIGN 8
|
||||
randomx_program_transform PROC
|
||||
include asm/program_transform_address.inc
|
||||
randomx_program_transform ENDP
|
||||
|
||||
_RANDOMX_JITX86_STATIC ENDS
|
||||
|
||||
END
|
@ -21,7 +21,8 @@ extern "C" {
|
||||
void randomx_program_prologue();
|
||||
void randomx_program_begin();
|
||||
void randomx_program_epilogue();
|
||||
void randomx_program_read_r();
|
||||
void randomx_program_read_f();
|
||||
void randomx_program_transform();
|
||||
void randomx_program_read_l1();
|
||||
void randomx_program_read_l2();
|
||||
void randomx_program_end();
|
||||
}
|
@ -48,12 +48,12 @@ namespace RandomX {
|
||||
REGISTER ALLOCATION:
|
||||
|
||||
rax -> temporary
|
||||
rbx -> MemoryRegisters& memory
|
||||
rbx -> "ic"
|
||||
rcx -> temporary
|
||||
rdx -> temporary
|
||||
rsi -> convertible_t* scratchpad
|
||||
rdi -> "ic" (instruction counter)
|
||||
rbp -> beginning of VM stack
|
||||
rdi -> beginning of VM stack
|
||||
rbp -> "ma", "mx"
|
||||
rsp -> end of VM stack
|
||||
r8 -> "r0"
|
||||
r9 -> "r1"
|
||||
@ -82,7 +82,8 @@ namespace RandomX {
|
||||
| saved registers
|
||||
|
|
||||
v
|
||||
[rbp] RegisterFile& registerFile
|
||||
[rdi+8] RegisterFile& registerFile
|
||||
[rdi] uint8_t* dataset
|
||||
|
|
||||
|
|
||||
| VM stack
|
||||
@ -97,18 +98,19 @@ namespace RandomX {
|
||||
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
|
||||
const uint8_t* codeProgramBegin = (uint8_t*)&randomx_program_begin;
|
||||
const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue;
|
||||
const uint8_t* codeReadDatasetR = (uint8_t*)&randomx_program_read_r;
|
||||
const uint8_t* codeReadDatasetF = (uint8_t*)&randomx_program_read_f;
|
||||
const uint8_t* codeReadDatasetL1 = (uint8_t*)&randomx_program_read_l1;
|
||||
const uint8_t* codeReadDatasetL2 = (uint8_t*)&randomx_program_read_l2;
|
||||
const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end;
|
||||
const uint32_t* addressTransformations = (uint32_t*)&randomx_program_transform;
|
||||
|
||||
const int32_t prologueSize = codeProgramBegin - codePrologue;
|
||||
const int32_t epilogueSize = codeReadDatasetR - codeEpilogue;
|
||||
const int32_t readDatasetRSize = codeReadDatasetF - codeReadDatasetR;
|
||||
const int32_t readDatasetFSize = codeProgramEnd - codeReadDatasetF;
|
||||
const int32_t epilogueSize = codeReadDatasetL1 - codeEpilogue;
|
||||
const int32_t readDatasetL1Size = codeReadDatasetL2 - codeReadDatasetL1;
|
||||
const int32_t readDatasetL2Size = codeProgramEnd - codeReadDatasetL2;
|
||||
|
||||
const int32_t readDatasetFOffset = CodeSize - readDatasetFSize;
|
||||
const int32_t readDatasetROffset = readDatasetFOffset - readDatasetRSize;
|
||||
const int32_t epilogueOffset = readDatasetROffset - epilogueSize;
|
||||
const int32_t readDatasetL2Offset = CodeSize - readDatasetL2Size;
|
||||
const int32_t readDatasetL1Offset = readDatasetL2Offset - readDatasetL1Size;
|
||||
const int32_t epilogueOffset = readDatasetL1Offset - epilogueSize;
|
||||
|
||||
JitCompilerX86::JitCompilerX86() {
|
||||
#ifdef _WIN32
|
||||
@ -121,9 +123,9 @@ namespace RandomX {
|
||||
throw std::runtime_error("mmap failed");
|
||||
#endif
|
||||
memcpy(code, codePrologue, prologueSize);
|
||||
memcpy(code + CodeSize - readDatasetRSize - readDatasetFSize - epilogueSize, codeEpilogue, epilogueSize);
|
||||
memcpy(code + CodeSize - readDatasetRSize - readDatasetFSize, codeReadDatasetR, readDatasetRSize);
|
||||
memcpy(code + CodeSize - readDatasetFSize, codeReadDatasetF, readDatasetFSize);
|
||||
memcpy(code + CodeSize - epilogueSize - readDatasetL1Size - readDatasetL2Size, codeEpilogue, epilogueSize);
|
||||
memcpy(code + CodeSize - readDatasetL1Size - readDatasetL2Size, codeReadDatasetL1, readDatasetL1Size);
|
||||
memcpy(code + CodeSize - readDatasetL2Size, codeReadDatasetL2, readDatasetL2Size);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgram(Pcg32& gen) {
|
||||
@ -140,12 +142,33 @@ namespace RandomX {
|
||||
emitByte(0xe9);
|
||||
emit(instructionOffsets[0] - (codePos + 4));
|
||||
fixCallOffsets();
|
||||
uint32_t transformL1 = addressTransformations[gen.getUniform(0, TransformationCount - 1)];
|
||||
uint32_t transformL2 = addressTransformations[gen.getUniform(0, TransformationCount - 1)];
|
||||
*reinterpret_cast<uint32_t*>(code + readDatasetL1Offset + 1) = transformL1;
|
||||
*reinterpret_cast<uint32_t*>(code + readDatasetL2Offset + 1) = transformL2;
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateCode(Instruction& instr, int i) {
|
||||
instructionOffsets.push_back(codePos);
|
||||
emit(0x840fcfff); //dec edx; jz <epilogue>
|
||||
emit(0x840fcbff); //dec ebx; jz <epilogue>
|
||||
emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative)
|
||||
emit(uint16_t(0x8149)); //xor
|
||||
emitByte(0xf0 + (instr.rega % RegistersCount));
|
||||
emit(instr.addra);
|
||||
emit(uint16_t(0x8b41)); //mov
|
||||
emitByte(0xc8 + (instr.rega % RegistersCount)); //ecx, rega
|
||||
emit(0x753fc3f6); //test bl,0x3f; jne
|
||||
emit(uint16_t(0xe805));
|
||||
if (instr.loca & 3) { //A.LOC.W
|
||||
emit(readDatasetL1Offset - (codePos + 4));
|
||||
}
|
||||
else {
|
||||
emit(readDatasetL2Offset - (codePos + 4));
|
||||
}
|
||||
if ((instr.loca & 192) == 0) { //A.LOC.X
|
||||
emit(uint16_t(0x3348));
|
||||
emitByte(0xe9); //xor rbp, rcx
|
||||
}
|
||||
auto generator = engine[instr.opcode];
|
||||
(this->*generator)(instr, i);
|
||||
}
|
||||
@ -157,73 +180,26 @@ namespace RandomX {
|
||||
}
|
||||
|
||||
void JitCompilerX86::genar(Instruction& instr) {
|
||||
emit(uint16_t(0x8149)); //xor
|
||||
emitByte(0xf0 + (instr.rega % RegistersCount));
|
||||
emit(instr.addra);
|
||||
switch (instr.loca & 7)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
emit(uint16_t(0x8b41)); //mov
|
||||
emitByte(0xc8 + (instr.rega % RegistersCount)); //ecx, rega
|
||||
emitByte(0xe8); //call
|
||||
emit(readDatasetROffset - (codePos + 4));
|
||||
return;
|
||||
|
||||
case 4:
|
||||
emit(uint16_t(0x8b41)); //mov
|
||||
emitByte(0xc0 + (instr.rega % RegistersCount)); //eax, rega
|
||||
emitByte(0x25); //and
|
||||
emit(ScratchpadL2 - 1); //whole scratchpad
|
||||
emit(0xc6048b48); // mov rax,QWORD PTR [rsi+rax*8]
|
||||
return;
|
||||
|
||||
default:
|
||||
emit(uint16_t(0x8b41)); //mov
|
||||
emitByte(0xc0 + (instr.rega % RegistersCount)); //eax, rega
|
||||
emitByte(0x25); //and
|
||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||
emit(0xc6048b48); // mov rax,QWORD PTR [rsi+rax*8]
|
||||
return;
|
||||
emit(uint16_t(0xe181)); //and ecx,
|
||||
if (instr.loca & 3) {
|
||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||
}
|
||||
else {
|
||||
emit(ScratchpadL2 - 1); //whole scratchpad
|
||||
}
|
||||
emit(0xce048b48); //mov rax,QWORD PTR [rsi+rcx*8]
|
||||
}
|
||||
|
||||
void JitCompilerX86::genaf(Instruction& instr) {
|
||||
emit(uint16_t(0x8149)); //xor
|
||||
emitByte(0xf0 + (instr.rega % RegistersCount));
|
||||
emit(instr.addra);
|
||||
switch (instr.loca & 7)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
emit(uint16_t(0x8b41)); //mov
|
||||
emitByte(0xc8 + (instr.rega % RegistersCount)); //ecx, rega
|
||||
emitByte(0xe8); //call
|
||||
emit(readDatasetFOffset - (codePos + 4));
|
||||
return;
|
||||
|
||||
case 4:
|
||||
emit(uint16_t(0x8b41)); //mov
|
||||
emitByte(0xc0 + (instr.rega % RegistersCount)); //eax, rega
|
||||
emitByte(0x25); //and
|
||||
emit(ScratchpadL2 - 1); //whole scratchpad
|
||||
emitByte(0xf3);
|
||||
emit(0xc604e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rax*8]
|
||||
return;
|
||||
|
||||
default:
|
||||
emit(uint16_t(0x8b41)); //mov
|
||||
emitByte(0xc0 + (instr.rega % RegistersCount)); //eax, rega
|
||||
emitByte(0x25); //and
|
||||
emit(uint16_t(0xe181)); //and ecx,
|
||||
if (instr.loca & 3) {
|
||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||
emitByte(0xf3);
|
||||
emit(0xc604e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rax*8]
|
||||
return;
|
||||
}
|
||||
else {
|
||||
emit(ScratchpadL2 - 1); //whole scratchpad
|
||||
}
|
||||
emitByte(0xf3);
|
||||
emit(0xce04e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rcx*8]
|
||||
}
|
||||
|
||||
void JitCompilerX86::genbr0(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
|
||||
@ -274,8 +250,13 @@ namespace RandomX {
|
||||
}
|
||||
|
||||
|
||||
void JitCompilerX86::scratchpadStoreR(Instruction& instr, uint32_t scratchpadSize) {
|
||||
emit(0x41c88b48); //mov rcx, rax; REX
|
||||
void JitCompilerX86::scratchpadStoreR(Instruction& instr, uint32_t scratchpadSize, bool rax) {
|
||||
if (rax) {
|
||||
emit(0x41c88b48); //mov rcx, rax; REX
|
||||
}
|
||||
else {
|
||||
emitByte(0x41);
|
||||
}
|
||||
emitByte(0x8b); // mov
|
||||
emitByte(0xc0 + (instr.regc % RegistersCount)); //eax, regc
|
||||
emitByte(0x35); // xor eax
|
||||
@ -285,22 +266,27 @@ namespace RandomX {
|
||||
emit(0xc60c8948); // mov QWORD PTR [rsi+rax*8],rcx
|
||||
}
|
||||
|
||||
void JitCompilerX86::gencr(Instruction& instr) {
|
||||
void JitCompilerX86::gencr(Instruction& instr, bool rax = true) {
|
||||
switch (instr.locc & 7)
|
||||
{
|
||||
case 0:
|
||||
scratchpadStoreR(instr, ScratchpadL2);
|
||||
scratchpadStoreR(instr, ScratchpadL2, rax);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
scratchpadStoreR(instr, ScratchpadL1);
|
||||
scratchpadStoreR(instr, ScratchpadL1, rax);
|
||||
break;
|
||||
|
||||
default:
|
||||
emit(uint16_t(0x8b4c)); //mov
|
||||
emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
|
||||
if (rax) {
|
||||
emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
|
||||
}
|
||||
else {
|
||||
emitByte(0xc1 + 8 * (instr.regc % RegistersCount)); //regc, rcx
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -322,29 +308,21 @@ namespace RandomX {
|
||||
emitByte(0xc6);
|
||||
}
|
||||
|
||||
void JitCompilerX86::gencf(Instruction& instr, bool alwaysLow = false) {
|
||||
void JitCompilerX86::gencf(Instruction& instr) {
|
||||
int regc = (instr.regc % RegistersCount);
|
||||
if (!alwaysLow) {
|
||||
if (regc <= 1) {
|
||||
emitByte(0x44); //REX
|
||||
}
|
||||
emit(uint16_t(0x280f)); //movaps
|
||||
emitByte(0xc0 + 8 * regc); // regc, xmm0
|
||||
if (regc <= 1) {
|
||||
emitByte(0x44); //REX
|
||||
}
|
||||
switch (instr.locc & 7)
|
||||
emit(uint16_t(0x280f)); //movaps
|
||||
emitByte(0xc0 + 8 * regc); // regc, xmm0
|
||||
if (instr.locc & 4) //C.LOC.R
|
||||
{
|
||||
case 4:
|
||||
scratchpadStoreF(instr, regc, ScratchpadL2, !alwaysLow && (instr.locc & 8));
|
||||
break;
|
||||
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
scratchpadStoreF(instr, regc, ScratchpadL1, !alwaysLow && (instr.locc & 8));
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
if (instr.locc & 3) { //C.LOC.W
|
||||
scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad
|
||||
}
|
||||
else {
|
||||
scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //whole scratchpad
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -596,24 +574,11 @@ namespace RandomX {
|
||||
|
||||
void JitCompilerX86::h_FPROUND(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
emit(0x81480de0c1c88b48);
|
||||
emit(0x600025fffff800e1);
|
||||
emit(uint16_t(0x0000));
|
||||
emitByte(0xf2);
|
||||
int regc = (instr.regc % RegistersCount);
|
||||
if (regc <= 1) {
|
||||
emitByte(0x4c); //REX
|
||||
}
|
||||
else {
|
||||
emitByte(0x48); //REX
|
||||
}
|
||||
emit(uint16_t(0x2a0f));
|
||||
emitByte(0xc1 + 8 * regc);
|
||||
emitByte(0x0d);
|
||||
emit(0xf824448900009fc0);
|
||||
emit(0x2454ae0f); //ldmxcsr DWORD PTR [rsp-0x8]
|
||||
emit(0x00250de0c1c88b48); //mov rcx,rax; shl eax,0xd
|
||||
emit(0x00009fc00d000060); //and eax,0x6000; or eax,0x9fc0
|
||||
emit(0x2454ae0ff8244489); //ldmxcsr DWORD PTR [rsp-0x8]
|
||||
emitByte(0xf8);
|
||||
gencf(instr, true);
|
||||
gencr(instr, false); //result in rcx
|
||||
}
|
||||
|
||||
static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) {
|
||||
@ -670,7 +635,7 @@ namespace RandomX {
|
||||
if ((instr.locc & 7) <= 3) {
|
||||
crlen = 17;
|
||||
}
|
||||
emit(0x74e53b48); //cmp rsp, rbp; je
|
||||
emit(0x74e73b48); //cmp rsp, rdi; je
|
||||
emitByte(11 + crlen);
|
||||
emitByte(0x48);
|
||||
emit(0x08244433); //xor rax,QWORD PTR [rsp+0x8]
|
||||
|
@ -64,10 +64,10 @@ namespace RandomX {
|
||||
void genbr1(Instruction&, uint16_t, uint16_t);
|
||||
void genbr132(Instruction&, uint16_t, uint8_t);
|
||||
void genbf(Instruction&, uint8_t);
|
||||
void scratchpadStoreR(Instruction&, uint32_t);
|
||||
void scratchpadStoreR(Instruction&, uint32_t, bool);
|
||||
void scratchpadStoreF(Instruction&, int, uint32_t, bool);
|
||||
void gencr(Instruction&);
|
||||
void gencf(Instruction&, bool);
|
||||
void gencr(Instruction&, bool);
|
||||
void gencf(Instruction&);
|
||||
void generateCode(Instruction&, int);
|
||||
void fixCallOffsets();
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
;# unroll VM stack
|
||||
mov rsp, rbp
|
||||
mov rsp, rdi
|
||||
|
||||
;# save VM register values
|
||||
pop rcx
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
|
@ -7,9 +7,11 @@
|
||||
push r15
|
||||
|
||||
;# function arguments
|
||||
push rdi ;# RegisterFile& registerFile
|
||||
mov rbx, rsi ;# MemoryRegisters& memory
|
||||
mov rsi, rdx ;# convertible_t* scratchpad
|
||||
push rdi ;# RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rsi] ;# "mx", "ma"
|
||||
mov rax, qword ptr [rsi+8] ;# uint8_t* dataset
|
||||
push rax
|
||||
mov rsi, rdx ;# convertible_t* scratchpad
|
||||
mov rcx, rdi
|
||||
|
||||
#include "program_prologue_load.inc"
|
||||
|
@ -1,5 +1,5 @@
|
||||
mov rbp, rsp ;# beginning of VM stack
|
||||
mov rdi, 1048577 ;# number of VM instructions to execute + 1
|
||||
mov rdi, rsp ;# beginning of VM stack
|
||||
mov ebx, 1048577 ;# number of VM instructions to execute + 1
|
||||
|
||||
xorps xmm10, xmm10
|
||||
cmpeqpd xmm10, xmm10
|
||||
|
@ -15,9 +15,11 @@
|
||||
movdqu xmmword ptr [rsp+0], xmm10
|
||||
|
||||
;# function arguments
|
||||
push rcx ;# RegisterFile& registerFile
|
||||
mov rbx, rdx ;# MemoryRegisters& memory
|
||||
mov rsi, r8 ;# convertible_t* scratchpad
|
||||
push rcx ;# RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rdx] ;# "mx", "ma"
|
||||
mov rax, qword ptr [rdx+8] ;# uint8_t* dataset
|
||||
push rax
|
||||
mov rsi, r8 ;# convertible_t* scratchpad
|
||||
|
||||
include program_prologue_load.inc
|
||||
|
||||
|
32
src/asm/program_read.inc
Normal file
32
src/asm/program_read.inc
Normal file
@ -0,0 +1,32 @@
|
||||
push rcx ;# preserve ecx
|
||||
db 0, 0, 0, 0 ;# TransformAddress placeholder
|
||||
mov rax, qword ptr [rdi] ;# load the dataset address
|
||||
xor rbp, rcx ;# modify "mx"
|
||||
;# prefetch cacheline "mx"
|
||||
and rbp, -64 ;# align "mx" to the start of a cache line
|
||||
mov edx, ebp ;# edx = mx
|
||||
prefetchnta byte ptr [rax+rdx]
|
||||
;# read cacheline "ma"
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov edx, ebp ;# edx = ma
|
||||
scratchpad_mask ;# limit address to the specified scratchpad size aligned to multiple of 8
|
||||
lea rcx, [rsi+rcx*8] ;# scratchpad cache line
|
||||
lea rax, [rax+rdx] ;# dataset cache line
|
||||
mov rdx, qword ptr [rax+0] ;# load first dataset quadword (prefetched into the cache by now)
|
||||
xor qword ptr [rcx+0], rdx ;# XOR the dataset item with a scratchpad item, repeat for the rest of the cacheline
|
||||
mov rdx, qword ptr [rax+8]
|
||||
xor qword ptr [rcx+8], rdx
|
||||
mov rdx, qword ptr [rax+16]
|
||||
xor qword ptr [rcx+16], rdx
|
||||
mov rdx, qword ptr [rax+24]
|
||||
xor qword ptr [rcx+24], rdx
|
||||
mov rdx, qword ptr [rax+32]
|
||||
xor qword ptr [rcx+32], rdx
|
||||
mov rdx, qword ptr [rax+40]
|
||||
xor qword ptr [rcx+40], rdx
|
||||
mov rdx, qword ptr [rax+48]
|
||||
xor qword ptr [rcx+48], rdx
|
||||
mov rdx, qword ptr [rax+56]
|
||||
xor qword ptr [rcx+56], rdx
|
||||
pop rcx ;# restore ecx
|
||||
ret
|
@ -1,13 +0,0 @@
|
||||
mov edx, dword ptr [rbx] ;# ma
|
||||
mov rax, qword ptr [rbx+8] ;# dataset
|
||||
cvtdq2pd xmm0, qword ptr [rax+rdx]
|
||||
add dword ptr [rbx], 8
|
||||
xor ecx, dword ptr [rbx+4] ;# mx
|
||||
mov dword ptr [rbx+4], ecx
|
||||
test ecx, 65528
|
||||
jne short rx_read_dataset_f_ret
|
||||
and ecx, -8
|
||||
mov dword ptr [rbx], ecx
|
||||
prefetcht0 byte ptr [rax+rcx]
|
||||
rx_read_dataset_f_ret:
|
||||
ret 0
|
@ -1,13 +0,0 @@
|
||||
mov eax, dword ptr [rbx] ;# ma
|
||||
mov rdx, qword ptr [rbx+8] ;# dataset
|
||||
mov rax, qword ptr [rdx+rax]
|
||||
add dword ptr [rbx], 8
|
||||
xor ecx, dword ptr [rbx+4] ;# mx
|
||||
mov dword ptr [rbx+4], ecx
|
||||
test ecx, 65528
|
||||
jne short rx_read_dataset_r_ret
|
||||
and ecx, -8
|
||||
mov dword ptr [rbx], ecx
|
||||
prefetcht0 byte ptr [rdx+rcx]
|
||||
rx_read_dataset_r_ret:
|
||||
ret 0
|
@ -77,6 +77,7 @@ namespace RandomX {
|
||||
constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t);
|
||||
constexpr uint32_t ScratchpadL1 = ScratchpadSize / 16 / sizeof(convertible_t);
|
||||
constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t);
|
||||
constexpr uint32_t TransformationCount = 90;
|
||||
constexpr int RegistersCount = 8;
|
||||
|
||||
class Cache;
|
||||
|
@ -158,10 +158,14 @@ executeProgram PROC
|
||||
pslldq xmm7, 8
|
||||
cvtsi2sd xmm7, qword ptr [rcx+112]
|
||||
|
||||
; program body
|
||||
jmp program_begin
|
||||
|
||||
; program body
|
||||
ALIGN 64
|
||||
program_begin:
|
||||
include program.inc
|
||||
|
||||
ALIGN 64
|
||||
rx_finish:
|
||||
; unroll the stack
|
||||
mov rsp, rdi
|
||||
|
@ -277,10 +277,6 @@ int main(int argc, char** argv) {
|
||||
if(programCount == 1000)
|
||||
std::cout << "Reference result: 3e1c5f9b9d0bf8ffa250f860bf5f7ab76ac823b206ddee6a592660119a3640c6" << std::endl;
|
||||
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
|
||||
/*if (threadCount == 1 && !compiled) {
|
||||
auto ivm = (RandomX::InterpretedVirtualMachine*)vms[0];
|
||||
std::cout << ivm->getProgam();
|
||||
}*/
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << "ERROR: " << e.what() << std::endl;
|
||||
|
245
src/program.inc
245
src/program.inc
@ -76,11 +76,13 @@ rx_body_3:
|
||||
xor rbp, rcx
|
||||
and ecx, 2047
|
||||
mov rax, qword ptr [rsi+rcx*8]
|
||||
mov rcx, rax
|
||||
shl eax, 13
|
||||
and eax, 24576
|
||||
or eax, 40896
|
||||
mov dword ptr [rsp - 8], eax
|
||||
ldmxcsr dword ptr [rsp - 8]
|
||||
mov r8, rcx
|
||||
|
||||
rx_i_4: ;MULH_64
|
||||
dec ebx
|
||||
@ -153,7 +155,7 @@ rx_body_7:
|
||||
mov eax, r14d
|
||||
xor eax, 057c8c41bh
|
||||
and eax, 32767
|
||||
movhpd qword ptr [rsi + rax * 8], xmm6
|
||||
movlpd qword ptr [rsi + rax * 8], xmm6
|
||||
|
||||
rx_i_8: ;SHL_64
|
||||
dec ebx
|
||||
@ -218,7 +220,7 @@ rx_body_11:
|
||||
mov eax, r12d
|
||||
xor eax, 0852d40d8h
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm4
|
||||
movlpd qword ptr [rsi + rax * 8], xmm4
|
||||
|
||||
rx_i_12: ;CALL
|
||||
dec ebx
|
||||
@ -355,7 +357,7 @@ rx_body_18:
|
||||
mov eax, r11d
|
||||
xor eax, 0869baa81h
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm3
|
||||
movlpd qword ptr [rsi + rax * 8], xmm3
|
||||
|
||||
rx_i_19: ;FPSUB
|
||||
dec ebx
|
||||
@ -372,7 +374,7 @@ rx_body_19:
|
||||
subpd xmm0, xmm8
|
||||
movaps xmm7, xmm0
|
||||
|
||||
rx_i_20: ;FPMUL
|
||||
rx_i_20: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r13, 0ecca967dh
|
||||
@ -383,15 +385,12 @@ rx_i_20: ;FPMUL
|
||||
rx_body_20:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm2
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm2
|
||||
movaps xmm7, xmm0
|
||||
mov eax, r15d
|
||||
xor eax, 0aad81365h
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm7
|
||||
movlpd qword ptr [rsi + rax * 8], xmm7
|
||||
|
||||
rx_i_21: ;FPADD
|
||||
dec ebx
|
||||
@ -482,7 +481,7 @@ rx_body_25:
|
||||
mov eax, r14d
|
||||
xor eax, 0baf5c2d4h
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm6
|
||||
movlpd qword ptr [rsi + rax * 8], xmm6
|
||||
|
||||
rx_i_26: ;IMUL_32
|
||||
dec ebx
|
||||
@ -580,7 +579,7 @@ rx_body_31:
|
||||
mov eax, r14d
|
||||
xor eax, 01e2da792h
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm6
|
||||
movlpd qword ptr [rsi + rax * 8], xmm6
|
||||
|
||||
rx_i_32: ;XOR_64
|
||||
dec ebx
|
||||
@ -668,7 +667,7 @@ rx_body_36:
|
||||
andps xmm0, xmm1
|
||||
movaps xmm7, xmm0
|
||||
|
||||
rx_i_37: ;FPMUL
|
||||
rx_i_37: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r12, 0d0706601h
|
||||
@ -679,10 +678,7 @@ rx_i_37: ;FPMUL
|
||||
rx_body_37:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm2
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm2
|
||||
movaps xmm9, xmm0
|
||||
mov eax, r9d
|
||||
xor eax, 0bca81c78h
|
||||
@ -764,7 +760,7 @@ taken_call_41:
|
||||
push rax
|
||||
call rx_i_127
|
||||
|
||||
rx_i_42: ;FPSUB
|
||||
rx_i_42: ;FPADD
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r15, 0bc1de9f6h
|
||||
@ -776,7 +772,7 @@ rx_body_42:
|
||||
xor rbp, rcx
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
subpd xmm0, xmm6
|
||||
addpd xmm0, xmm6
|
||||
movaps xmm6, xmm0
|
||||
|
||||
rx_i_43: ;SUB_64
|
||||
@ -887,7 +883,7 @@ rx_body_48:
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm9
|
||||
|
||||
rx_i_49: ;FPMUL
|
||||
rx_i_49: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r8, 0f96c6a45h
|
||||
@ -898,10 +894,7 @@ rx_i_49: ;FPMUL
|
||||
rx_body_49:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm3
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm3
|
||||
movaps xmm5, xmm0
|
||||
|
||||
rx_i_50: ;OR_32
|
||||
@ -1018,7 +1011,7 @@ rx_body_55:
|
||||
mov eax, r11d
|
||||
xor eax, 07c79cddh
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm3
|
||||
movhpd qword ptr [rsi + rax * 8], xmm3
|
||||
|
||||
rx_i_56: ;AND_64
|
||||
dec ebx
|
||||
@ -1144,7 +1137,7 @@ taken_call_61:
|
||||
push rax
|
||||
call rx_i_120
|
||||
|
||||
rx_i_62: ;FPMUL
|
||||
rx_i_62: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r15, 0c3089414h
|
||||
@ -1155,17 +1148,14 @@ rx_i_62: ;FPMUL
|
||||
rx_body_62:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm8
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm8
|
||||
movaps xmm2, xmm0
|
||||
mov eax, r10d
|
||||
xor eax, 05c4789e3h
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm2
|
||||
movhpd qword ptr [rsi + rax * 8], xmm2
|
||||
|
||||
rx_i_63: ;FPMUL
|
||||
rx_i_63: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r9, 065cf272eh
|
||||
@ -1176,10 +1166,7 @@ rx_i_63: ;FPMUL
|
||||
rx_body_63:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm7
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm7
|
||||
movaps xmm8, xmm0
|
||||
|
||||
rx_i_64: ;SUB_64
|
||||
@ -1253,7 +1240,7 @@ taken_call_67:
|
||||
push rax
|
||||
call rx_i_79
|
||||
|
||||
rx_i_68: ;FPSUB
|
||||
rx_i_68: ;FPADD
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r13, 03aa5c3a4h
|
||||
@ -1264,7 +1251,7 @@ rx_i_68: ;FPSUB
|
||||
rx_body_68:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
subpd xmm0, xmm2
|
||||
addpd xmm0, xmm2
|
||||
movaps xmm4, xmm0
|
||||
mov eax, r12d
|
||||
xor eax, 03c51ef39h
|
||||
@ -1354,11 +1341,16 @@ rx_i_73: ;FPROUND
|
||||
rx_body_73:
|
||||
and ecx, 32767
|
||||
mov rax, qword ptr [rsi+rcx*8]
|
||||
mov rcx, rax
|
||||
shl eax, 13
|
||||
and eax, 24576
|
||||
or eax, 40896
|
||||
mov dword ptr [rsp - 8], eax
|
||||
ldmxcsr dword ptr [rsp - 8]
|
||||
mov eax, r10d
|
||||
xor eax, 040624270h
|
||||
and eax, 2047
|
||||
mov qword ptr [rsi + rax * 8], rcx
|
||||
|
||||
rx_i_74: ;MUL_64
|
||||
dec ebx
|
||||
@ -1722,7 +1714,7 @@ rx_body_93:
|
||||
mov eax, r10d
|
||||
xor eax, 07e48a0d8h
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm2
|
||||
movhpd qword ptr [rsi + rax * 8], xmm2
|
||||
|
||||
rx_i_94: ;RET
|
||||
dec ebx
|
||||
@ -1830,7 +1822,7 @@ rx_body_99:
|
||||
mov eax, r12d
|
||||
xor eax, 04c21df83h
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm4
|
||||
movhpd qword ptr [rsi + rax * 8], xmm4
|
||||
|
||||
rx_i_100: ;ADD_64
|
||||
dec ebx
|
||||
@ -1955,7 +1947,7 @@ rx_body_106:
|
||||
mov eax, r12d
|
||||
xor eax, 03cb2505h
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm4
|
||||
movlpd qword ptr [rsi + rax * 8], xmm4
|
||||
|
||||
rx_i_107: ;CALL
|
||||
dec ebx
|
||||
@ -1999,7 +1991,7 @@ rx_body_108:
|
||||
mov eax, r9d
|
||||
xor eax, 0678b65beh
|
||||
and eax, 32767
|
||||
movlpd qword ptr [rsi + rax * 8], xmm9
|
||||
movhpd qword ptr [rsi + rax * 8], xmm9
|
||||
|
||||
rx_i_109: ;FPADD
|
||||
dec ebx
|
||||
@ -2207,7 +2199,7 @@ rx_body_120:
|
||||
addpd xmm0, xmm4
|
||||
movaps xmm8, xmm0
|
||||
|
||||
rx_i_121: ;FPMUL
|
||||
rx_i_121: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r9, 03ab8f73h
|
||||
@ -2218,10 +2210,7 @@ rx_i_121: ;FPMUL
|
||||
rx_body_121:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm5
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm5
|
||||
movaps xmm8, xmm0
|
||||
|
||||
rx_i_122: ;RET
|
||||
@ -2813,7 +2802,7 @@ rx_body_153:
|
||||
mov eax, r8d
|
||||
xor eax, 09111c981h
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm8
|
||||
movhpd qword ptr [rsi + rax * 8], xmm8
|
||||
|
||||
rx_i_154: ;MUL_32
|
||||
dec ebx
|
||||
@ -3196,11 +3185,13 @@ rx_i_174: ;FPROUND
|
||||
rx_body_174:
|
||||
and ecx, 2047
|
||||
mov rax, qword ptr [rsi+rcx*8]
|
||||
mov rcx, rax
|
||||
shl eax, 13
|
||||
and eax, 24576
|
||||
or eax, 40896
|
||||
mov dword ptr [rsp - 8], eax
|
||||
ldmxcsr dword ptr [rsp - 8]
|
||||
mov r14, rcx
|
||||
|
||||
rx_i_175: ;SAR_64
|
||||
dec ebx
|
||||
@ -3431,7 +3422,7 @@ rx_body_187:
|
||||
andps xmm0, xmm1
|
||||
movaps xmm5, xmm0
|
||||
|
||||
rx_i_188: ;FPMUL
|
||||
rx_i_188: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r9, 04659becbh
|
||||
@ -3443,10 +3434,7 @@ rx_body_188:
|
||||
xor rbp, rcx
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm3
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm3
|
||||
movaps xmm4, xmm0
|
||||
|
||||
rx_i_189: ;FPROUND
|
||||
@ -3460,11 +3448,16 @@ rx_i_189: ;FPROUND
|
||||
rx_body_189:
|
||||
and ecx, 2047
|
||||
mov rax, qword ptr [rsi+rcx*8]
|
||||
mov rcx, rax
|
||||
shl eax, 13
|
||||
and eax, 24576
|
||||
or eax, 40896
|
||||
mov dword ptr [rsp - 8], eax
|
||||
ldmxcsr dword ptr [rsp - 8]
|
||||
mov eax, r13d
|
||||
xor eax, 0e6f1a3b7h
|
||||
and eax, 2047
|
||||
mov qword ptr [rsi + rax * 8], rcx
|
||||
|
||||
rx_i_190: ;RET
|
||||
dec ebx
|
||||
@ -3761,7 +3754,7 @@ rx_body_205:
|
||||
andps xmm0, xmm1
|
||||
movaps xmm5, xmm0
|
||||
|
||||
rx_i_206: ;FPMUL
|
||||
rx_i_206: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r11, 0e836a177h
|
||||
@ -3773,10 +3766,7 @@ rx_body_206:
|
||||
xor rbp, rcx
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm7
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm7
|
||||
movaps xmm4, xmm0
|
||||
|
||||
rx_i_207: ;AND_32
|
||||
@ -4085,7 +4075,7 @@ rx_body_223:
|
||||
mov eax, r10d
|
||||
xor eax, 07fca59eeh
|
||||
and eax, 32767
|
||||
movlpd qword ptr [rsi + rax * 8], xmm2
|
||||
movhpd qword ptr [rsi + rax * 8], xmm2
|
||||
|
||||
rx_i_224: ;SAR_64
|
||||
dec ebx
|
||||
@ -4171,7 +4161,7 @@ rx_body_227:
|
||||
mov eax, r11d
|
||||
xor eax, 0aabe2a0ah
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm3
|
||||
movhpd qword ptr [rsi + rax * 8], xmm3
|
||||
|
||||
rx_i_228: ;CALL
|
||||
dec ebx
|
||||
@ -4313,11 +4303,16 @@ rx_i_234: ;FPROUND
|
||||
rx_body_234:
|
||||
and ecx, 2047
|
||||
mov rax, qword ptr [rsi+rcx*8]
|
||||
mov rcx, rax
|
||||
shl eax, 13
|
||||
and eax, 24576
|
||||
or eax, 40896
|
||||
mov dword ptr [rsp - 8], eax
|
||||
ldmxcsr dword ptr [rsp - 8]
|
||||
mov eax, r12d
|
||||
xor eax, 04d2e9e7dh
|
||||
and eax, 2047
|
||||
mov qword ptr [rsi + rax * 8], rcx
|
||||
|
||||
rx_i_235: ;IMUL_32
|
||||
dec ebx
|
||||
@ -4438,7 +4433,7 @@ rx_body_241:
|
||||
mov eax, r15d
|
||||
xor eax, 0bc2423ebh
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm7
|
||||
movlpd qword ptr [rsi + rax * 8], xmm7
|
||||
|
||||
rx_i_242: ;MULH_64
|
||||
dec ebx
|
||||
@ -4734,7 +4729,7 @@ rx_body_257:
|
||||
mov eax, r11d
|
||||
xor eax, 0373b1b6fh
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm3
|
||||
movlpd qword ptr [rsi + rax * 8], xmm3
|
||||
|
||||
rx_i_258: ;MUL_32
|
||||
dec ebx
|
||||
@ -4771,7 +4766,7 @@ rx_body_259:
|
||||
addpd xmm0, xmm9
|
||||
movaps xmm3, xmm0
|
||||
|
||||
rx_i_260: ;FPMUL
|
||||
rx_i_260: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r13, 0f94e9fa9h
|
||||
@ -4783,10 +4778,7 @@ rx_body_260:
|
||||
xor rbp, rcx
|
||||
and ecx, 32767
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm5
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm5
|
||||
movaps xmm9, xmm0
|
||||
|
||||
rx_i_261: ;FPSQRT
|
||||
@ -4806,7 +4798,7 @@ rx_body_261:
|
||||
mov eax, r11d
|
||||
xor eax, 0745a48e9h
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm3
|
||||
movhpd qword ptr [rsi + rax * 8], xmm3
|
||||
|
||||
rx_i_262: ;OR_32
|
||||
dec ebx
|
||||
@ -5044,7 +5036,7 @@ rx_body_274:
|
||||
mov eax, r14d
|
||||
xor eax, 06a2b2b5bh
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm6
|
||||
movhpd qword ptr [rsi + rax * 8], xmm6
|
||||
|
||||
rx_i_275: ;OR_64
|
||||
dec ebx
|
||||
@ -5121,7 +5113,7 @@ rx_body_278:
|
||||
mov eax, r12d
|
||||
xor eax, 02d00ad10h
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm4
|
||||
movhpd qword ptr [rsi + rax * 8], xmm4
|
||||
|
||||
rx_i_279: ;FPSUB
|
||||
dec ebx
|
||||
@ -5139,7 +5131,7 @@ rx_body_279:
|
||||
mov eax, r9d
|
||||
xor eax, 0475ade01h
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm9
|
||||
movlpd qword ptr [rsi + rax * 8], xmm9
|
||||
|
||||
rx_i_280: ;AND_64
|
||||
dec ebx
|
||||
@ -5210,7 +5202,7 @@ rx_body_283:
|
||||
and eax, 2047
|
||||
mov qword ptr [rsi + rax * 8], rcx
|
||||
|
||||
rx_i_284: ;FPSUB
|
||||
rx_i_284: ;FPADD
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r15, 0e68f36ach
|
||||
@ -5222,7 +5214,7 @@ rx_body_284:
|
||||
xor rbp, rcx
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
subpd xmm0, xmm6
|
||||
addpd xmm0, xmm6
|
||||
movaps xmm9, xmm0
|
||||
mov eax, r9d
|
||||
xor eax, 0936f2960h
|
||||
@ -5313,7 +5305,7 @@ rx_body_289:
|
||||
andps xmm0, xmm1
|
||||
movaps xmm8, xmm0
|
||||
|
||||
rx_i_290: ;FPMUL
|
||||
rx_i_290: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r15, 060665748h
|
||||
@ -5324,10 +5316,7 @@ rx_i_290: ;FPMUL
|
||||
rx_body_290:
|
||||
and ecx, 32767
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm8
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm8
|
||||
movaps xmm9, xmm0
|
||||
|
||||
rx_i_291: ;RET
|
||||
@ -5531,7 +5520,7 @@ rx_body_301:
|
||||
mov eax, r15d
|
||||
xor eax, 0433cf2d6h
|
||||
and eax, 32767
|
||||
movlpd qword ptr [rsi + rax * 8], xmm7
|
||||
movhpd qword ptr [rsi + rax * 8], xmm7
|
||||
|
||||
rx_i_302: ;ADD_64
|
||||
dec ebx
|
||||
@ -5937,7 +5926,7 @@ rx_body_324:
|
||||
mov eax, r9d
|
||||
xor eax, 0944856d4h
|
||||
and eax, 32767
|
||||
movlpd qword ptr [rsi + rax * 8], xmm9
|
||||
movhpd qword ptr [rsi + rax * 8], xmm9
|
||||
|
||||
rx_i_325: ;SHL_64
|
||||
dec ebx
|
||||
@ -6076,7 +6065,7 @@ rx_body_332:
|
||||
mov eax, r11d
|
||||
xor eax, 0116c919eh
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm3
|
||||
movlpd qword ptr [rsi + rax * 8], xmm3
|
||||
|
||||
rx_i_333: ;XOR_64
|
||||
dec ebx
|
||||
@ -6222,7 +6211,7 @@ rx_body_341:
|
||||
and eax, 2047
|
||||
mov qword ptr [rsi + rax * 8], rcx
|
||||
|
||||
rx_i_342: ;FPMUL
|
||||
rx_i_342: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r9, 09ccc7abah
|
||||
@ -6233,10 +6222,7 @@ rx_i_342: ;FPMUL
|
||||
rx_body_342:
|
||||
and ecx, 32767
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm2
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm2
|
||||
movaps xmm3, xmm0
|
||||
|
||||
rx_i_343: ;SHR_64
|
||||
@ -6258,7 +6244,7 @@ rx_body_343:
|
||||
and eax, 32767
|
||||
mov qword ptr [rsi + rax * 8], rcx
|
||||
|
||||
rx_i_344: ;FPMUL
|
||||
rx_i_344: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r10, 03ef9bcc4h
|
||||
@ -6269,10 +6255,7 @@ rx_i_344: ;FPMUL
|
||||
rx_body_344:
|
||||
and ecx, 32767
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm6
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm6
|
||||
movaps xmm5, xmm0
|
||||
|
||||
rx_i_345: ;MULH_64
|
||||
@ -6343,7 +6326,7 @@ rx_body_348:
|
||||
mov eax, r9d
|
||||
xor eax, 039c35461h
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm9
|
||||
movhpd qword ptr [rsi + rax * 8], xmm9
|
||||
|
||||
rx_i_349: ;XOR_32
|
||||
dec ebx
|
||||
@ -6413,9 +6396,9 @@ rx_body_352:
|
||||
mov eax, r10d
|
||||
xor eax, 03bf686f2h
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm2
|
||||
movlpd qword ptr [rsi + rax * 8], xmm2
|
||||
|
||||
rx_i_353: ;FPMUL
|
||||
rx_i_353: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r13, 02e65278bh
|
||||
@ -6426,15 +6409,12 @@ rx_i_353: ;FPMUL
|
||||
rx_body_353:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm2
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm2
|
||||
movaps xmm7, xmm0
|
||||
mov eax, r15d
|
||||
xor eax, 0b3c9f7aeh
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm7
|
||||
movlpd qword ptr [rsi + rax * 8], xmm7
|
||||
|
||||
rx_i_354: ;MULH_64
|
||||
dec ebx
|
||||
@ -6535,7 +6515,7 @@ rx_body_359:
|
||||
mov eax, r12d
|
||||
xor eax, 0f16b9be3h
|
||||
and eax, 32767
|
||||
movhpd qword ptr [rsi + rax * 8], xmm4
|
||||
movlpd qword ptr [rsi + rax * 8], xmm4
|
||||
|
||||
rx_i_360: ;FPMUL
|
||||
dec ebx
|
||||
@ -6570,7 +6550,7 @@ rx_body_361:
|
||||
mov eax, r14d
|
||||
xor eax, 0ad0b81f5h
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm6
|
||||
movlpd qword ptr [rsi + rax * 8], xmm6
|
||||
|
||||
rx_i_362: ;SUB_64
|
||||
dec ebx
|
||||
@ -6726,7 +6706,7 @@ rx_body_370:
|
||||
mov eax, r14d
|
||||
xor eax, 0a120e0edh
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm6
|
||||
movlpd qword ptr [rsi + rax * 8], xmm6
|
||||
|
||||
rx_i_371: ;FPADD
|
||||
dec ebx
|
||||
@ -6948,7 +6928,7 @@ rx_body_383:
|
||||
mov eax, r13d
|
||||
xor eax, 0c9f5cc22h
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm5
|
||||
movlpd qword ptr [rsi + rax * 8], xmm5
|
||||
|
||||
rx_i_384: ;SHR_64
|
||||
dec ebx
|
||||
@ -7256,7 +7236,7 @@ rx_body_400:
|
||||
and eax, 32767
|
||||
mov qword ptr [rsi + rax * 8], rcx
|
||||
|
||||
rx_i_401: ;FPMUL
|
||||
rx_i_401: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r13, 032e81f25h
|
||||
@ -7267,15 +7247,12 @@ rx_i_401: ;FPMUL
|
||||
rx_body_401:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm4
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm4
|
||||
movaps xmm6, xmm0
|
||||
mov eax, r14d
|
||||
xor eax, 03ea60344h
|
||||
and eax, 32767
|
||||
movhpd qword ptr [rsi + rax * 8], xmm6
|
||||
movlpd qword ptr [rsi + rax * 8], xmm6
|
||||
|
||||
rx_i_402: ;RET
|
||||
dec ebx
|
||||
@ -7382,13 +7359,15 @@ rx_i_406: ;FPROUND
|
||||
rx_body_406:
|
||||
and ecx, 32767
|
||||
mov rax, qword ptr [rsi+rcx*8]
|
||||
mov rcx, rax
|
||||
shl eax, 13
|
||||
and eax, 24576
|
||||
or eax, 40896
|
||||
mov dword ptr [rsp - 8], eax
|
||||
ldmxcsr dword ptr [rsp - 8]
|
||||
mov r9, rcx
|
||||
|
||||
rx_i_407: ;FPMUL
|
||||
rx_i_407: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r14, 09699566fh
|
||||
@ -7400,10 +7379,7 @@ rx_body_407:
|
||||
xor rbp, rcx
|
||||
and ecx, 32767
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm9
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm9
|
||||
movaps xmm8, xmm0
|
||||
|
||||
rx_i_408: ;MUL_64
|
||||
@ -7493,7 +7469,7 @@ rx_body_412:
|
||||
mov eax, r11d
|
||||
xor eax, 0bbd2640ah
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm3
|
||||
movlpd qword ptr [rsi + rax * 8], xmm3
|
||||
|
||||
rx_i_413: ;FPDIV
|
||||
dec ebx
|
||||
@ -7704,7 +7680,7 @@ rx_body_424:
|
||||
mov eax, r9d
|
||||
xor eax, 0565ae8aah
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm9
|
||||
movlpd qword ptr [rsi + rax * 8], xmm9
|
||||
|
||||
rx_i_425: ;IMUL_32
|
||||
dec ebx
|
||||
@ -7887,7 +7863,7 @@ rx_body_434:
|
||||
mov eax, r9d
|
||||
xor eax, 08c1cfc74h
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm9
|
||||
movhpd qword ptr [rsi + rax * 8], xmm9
|
||||
|
||||
rx_i_435: ;MUL_64
|
||||
dec ebx
|
||||
@ -8068,7 +8044,7 @@ not_taken_ret_443:
|
||||
and eax, 2047
|
||||
mov qword ptr [rsi + rax * 8], rcx
|
||||
|
||||
rx_i_444: ;FPMUL
|
||||
rx_i_444: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r8, 042455dd8h
|
||||
@ -8079,15 +8055,12 @@ rx_i_444: ;FPMUL
|
||||
rx_body_444:
|
||||
and ecx, 32767
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm7
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm7
|
||||
movaps xmm5, xmm0
|
||||
mov eax, r13d
|
||||
xor eax, 0ce416070h
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm5
|
||||
movhpd qword ptr [rsi + rax * 8], xmm5
|
||||
|
||||
rx_i_445: ;ADD_64
|
||||
dec ebx
|
||||
@ -8128,7 +8101,7 @@ rx_body_446:
|
||||
and eax, 2047
|
||||
mov qword ptr [rsi + rax * 8], rcx
|
||||
|
||||
rx_i_447: ;FPSUB
|
||||
rx_i_447: ;FPADD
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r8, 01596d0e8h
|
||||
@ -8139,12 +8112,12 @@ rx_i_447: ;FPSUB
|
||||
rx_body_447:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
subpd xmm0, xmm7
|
||||
addpd xmm0, xmm7
|
||||
movaps xmm5, xmm0
|
||||
mov eax, r13d
|
||||
xor eax, 0b384d4afh
|
||||
and eax, 2047
|
||||
movhpd qword ptr [rsi + rax * 8], xmm5
|
||||
movlpd qword ptr [rsi + rax * 8], xmm5
|
||||
|
||||
rx_i_448: ;FPSUB
|
||||
dec ebx
|
||||
@ -8668,7 +8641,7 @@ rx_body_477:
|
||||
mov eax, r14d
|
||||
xor eax, 0e81fc7a6h
|
||||
and eax, 2047
|
||||
movlpd qword ptr [rsi + rax * 8], xmm6
|
||||
movhpd qword ptr [rsi + rax * 8], xmm6
|
||||
|
||||
rx_i_478: ;MUL_64
|
||||
dec ebx
|
||||
@ -9143,7 +9116,7 @@ rx_body_504:
|
||||
and eax, 32767
|
||||
movhpd qword ptr [rsi + rax * 8], xmm4
|
||||
|
||||
rx_i_505: ;FPMUL
|
||||
rx_i_505: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r12, 032c0a28ah
|
||||
@ -9154,17 +9127,14 @@ rx_i_505: ;FPMUL
|
||||
rx_body_505:
|
||||
and ecx, 32767
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm4
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm4
|
||||
movaps xmm8, xmm0
|
||||
mov eax, r8d
|
||||
xor eax, 021b54eaeh
|
||||
and eax, 32767
|
||||
movlpd qword ptr [rsi + rax * 8], xmm8
|
||||
movhpd qword ptr [rsi + rax * 8], xmm8
|
||||
|
||||
rx_i_506: ;FPMUL
|
||||
rx_i_506: ;FPSUB
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r9, 0a973d58ch
|
||||
@ -9175,10 +9145,7 @@ rx_i_506: ;FPMUL
|
||||
rx_body_506:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
mulpd xmm0, xmm9
|
||||
movaps xmm1, xmm0
|
||||
cmpeqpd xmm1, xmm1
|
||||
andps xmm0, xmm1
|
||||
subpd xmm0, xmm9
|
||||
movaps xmm3, xmm0
|
||||
|
||||
rx_i_507: ;RET
|
||||
@ -9238,7 +9205,7 @@ taken_call_509:
|
||||
push rax
|
||||
call rx_i_42
|
||||
|
||||
rx_i_510: ;FPSUB
|
||||
rx_i_510: ;FPADD
|
||||
dec ebx
|
||||
jz rx_finish
|
||||
xor r8, 0db65513ch
|
||||
@ -9249,7 +9216,7 @@ rx_i_510: ;FPSUB
|
||||
rx_body_510:
|
||||
and ecx, 2047
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
subpd xmm0, xmm2
|
||||
addpd xmm0, xmm2
|
||||
movaps xmm9, xmm0
|
||||
|
||||
rx_i_511: ;ROL_64
|
||||
|
@ -74,21 +74,21 @@ void setPrivilege(const char* pszPrivilege, BOOL bEnable) {
|
||||
}
|
||||
#endif
|
||||
|
||||
void* allocExecutableMemory(size_t bytes) {
|
||||
void* allocExecutableMemory(std::size_t bytes) {
|
||||
void* mem;
|
||||
#ifdef _WIN32
|
||||
mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||
if (mem == nullptr)
|
||||
throw std::runtime_error(getErrorMessage("allocExecutableMemory - VirtualAlloc"));
|
||||
#else
|
||||
mem = mmap(nullptr, CodeSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
if (mem == MAP_FAILED)
|
||||
throw std::runtime_error("allocExecutableMemory - mmap failed");
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
void* allocLargePagesMemory(size_t bytes) {
|
||||
void* allocLargePagesMemory(std::size_t bytes) {
|
||||
void* mem;
|
||||
#ifdef _WIN32
|
||||
setPrivilege("SeLockMemoryPrivilege", 1);
|
||||
|
@ -19,5 +19,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
#pragma once
|
||||
|
||||
void* allocExecutableMemory(size_t);
|
||||
void* allocLargePagesMemory(size_t);
|
||||
#include <cstddef>
|
||||
|
||||
void* allocExecutableMemory(std::size_t);
|
||||
void* allocLargePagesMemory(std::size_t);
|
Loading…
Reference in New Issue
Block a user