js -> jz to enable macro-op fusion on Intel CPUs (~1% speed-up)

This commit is contained in:
tevador 2018-12-28 14:18:41 +01:00
parent 76b6b05cf2
commit a09bee8d60
3 changed files with 7 additions and 7 deletions

View File

@ -49,7 +49,7 @@ namespace RandomX {
void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) { void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
asmCode << "rx_i_" << i << ": ;" << instr.getName() << std::endl; asmCode << "rx_i_" << i << ": ;" << instr.getName() << std::endl;
asmCode << "\tdec edi" << std::endl; asmCode << "\tdec edi" << std::endl;
asmCode << "\tjs rx_finish" << std::endl; asmCode << "\tjz rx_finish" << std::endl;
auto generator = engine[instr.opcode]; auto generator = engine[instr.opcode];
(this->*generator)(instr, i); (this->*generator)(instr, i);
} }

View File

@ -81,10 +81,10 @@ namespace RandomX {
*/ */
constexpr uint8_t ic3 = (InstructionCount >> 24); constexpr uint8_t ic3 = ((InstructionCount + 1) >> 24);
constexpr uint8_t ic2 = (InstructionCount >> 16); constexpr uint8_t ic2 = ((InstructionCount + 1) >> 16);
constexpr uint8_t ic1 = (InstructionCount >> 8); constexpr uint8_t ic1 = ((InstructionCount + 1) >> 8);
constexpr uint8_t ic0 = (InstructionCount >> 0); constexpr uint8_t ic0 = ((InstructionCount + 1) >> 0);
const uint8_t prologue[] = { const uint8_t prologue[] = {
0x53, //push rbx 0x53, //push rbx
@ -245,7 +245,7 @@ namespace RandomX {
void JitCompilerX86::generateCode(Instruction& instr, int i) { void JitCompilerX86::generateCode(Instruction& instr, int i) {
instructionOffsets.push_back(codePos); instructionOffsets.push_back(codePos);
emit(0x880fcfff); //dec edx; js <epilogue> emit(0x840fcfff); //dec edx; jz <epilogue>
emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative) emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative)
gena(instr); gena(instr);
auto generator = engine[instr.opcode]; auto generator = engine[instr.opcode];

View File

@ -84,7 +84,7 @@ executeProgram PROC
push r9 push r9
mov rbp, rsp ; beginning of VM stack mov rbp, rsp ; beginning of VM stack
mov rdi, 1048576 ; number of VM instructions to execute mov rdi, 1048577 ; number of VM instructions to execute + 1
; load VM register values ; load VM register values
mov r8, qword ptr [rcx+0] mov r8, qword ptr [rcx+0]