Added magic division to JIT compiler

New B operand selection rules
This commit is contained in:
tevador 2019-01-11 16:53:52 +01:00
parent 451dfc5730
commit 2756bcdcfe
9 changed files with 1237 additions and 1136 deletions

View File

@ -83,10 +83,10 @@ The `B.LOC.L` flag determines the B operand. It can be either a register or imme
|`B.LOC.L`|IA/DIV|IA/SHIFT|IA/MATH|FP|CL| |`B.LOC.L`|IA/DIV|IA/SHIFT|IA/MATH|FP|CL|
|----|--------|----|------|----|---| |----|--------|----|------|----|---|
|0|register|register|register|register|register| |0|register|`imm8`|`imm32`|register|register|
|1|`imm32`|register|register|register|register| |1|`imm32`|register|register|register|register|
|2|`imm32`|`imm8`|register|register|register| |2|`imm32`|`imm8`|register|register|register|
|3|`imm32`|`imm8`|`imm32`|register|register| |3|`imm32`|register|register|register|register|
Integer instructions are split into 3 classes: integer division (IA/DIV), shift and rotate (IA/SHIFT) and other (IA/MATH). Floating point (FP) and control (CL) instructions always use a register operand. Integer instructions are split into 3 classes: integer division (IA/DIV), shift and rotate (IA/SHIFT) and other (IA/MATH). Floating point (FP) and control (CL) instructions always use a register operand.

View File

@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>. along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/ */
//#define TRACE //#define TRACE
//#define MAGIC_DIVISION #define MAGIC_DIVISION
#include "AssemblyGeneratorX86.hpp" #include "AssemblyGeneratorX86.hpp"
#include "Pcg32.hpp" #include "Pcg32.hpp"
#include "common.hpp" #include "common.hpp"
@ -64,108 +64,61 @@ namespace RandomX {
(this->*generator)(instr, i); (this->*generator)(instr, i);
} }
void AssemblyGeneratorX86::genar(Instruction& instr, int i) { void AssemblyGeneratorX86::gena(Instruction& instr, int i) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl; asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl; asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\ttest " << regIc8 << ", 63" << std::endl; asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
asmCode << "\tjnz short rx_body_" << i << std::endl; asmCode << "\tjnz short rx_body_" << i << std::endl;
switch (instr.loca & 3) if (instr.loca & 3) {
{
case 0:
case 1:
case 2:
asmCode << "\tcall rx_read_l1" << std::endl; asmCode << "\tcall rx_read_l1" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl; asmCode << "rx_body_" << i << ":" << std::endl;
if ((instr.loca & 192) == 0) if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rcx" << std::endl; asmCode << "\txor " << regMx << ", rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl; asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
break; }
default: //3 else {
asmCode << "\tcall rx_read_l2" << std::endl; asmCode << "\tcall rx_read_l2" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl; asmCode << "rx_body_" << i << ":" << std::endl;
if ((instr.loca & 192) == 0) if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rcx" << std::endl; asmCode << "\txor " << regMx << ", rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl; asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
break;
} }
}
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
gena(instr, i);
asmCode << "\tmov rax, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl; asmCode << "\tmov rax, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
} }
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) { void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl; gena(instr, i);
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
asmCode << "\tjnz short rx_body_" << i << std::endl;
switch (instr.loca & 3)
{
case 0:
case 1:
case 2:
asmCode << "\tcall rx_read_l1" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
if((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
break;
default: //3
asmCode << "\tcall rx_read_l2" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
break;
}
asmCode << "\tcvtdq2pd xmm0, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl; asmCode << "\tcvtdq2pd xmm0, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
} }
void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) { void AssemblyGeneratorX86::genbiashift(Instruction& instr, const char* instrx86) {
switch (instr.locb & 7) if (instr.locb & 1) {
{
case 0:
case 1:
case 2:
case 3:
asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl; asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl;
asmCode << "\t" << instrx86 << " rax, cl" << std::endl; asmCode << "\t" << instrx86 << " rax, cl" << std::endl;
return; } else {
default:
asmCode << "\t" << instrx86 << " rax, " << (instr.imm8 & 63) << std::endl;; asmCode << "\t" << instrx86 << " rax, " << (instr.imm8 & 63) << std::endl;;
return;
} }
} }
void AssemblyGeneratorX86::genbr1(Instruction& instr) { void AssemblyGeneratorX86::genbia(Instruction& instr) {
switch (instr.locb & 7) if (instr.locb & 3) {
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
asmCode << regR[instr.regb % RegistersCount] << std::endl; asmCode << regR[instr.regb % RegistersCount] << std::endl;
return; } else {
default:
asmCode << instr.imm32 << std::endl;; asmCode << instr.imm32 << std::endl;;
return;
} }
} }
void AssemblyGeneratorX86::genbr132(Instruction& instr) { void AssemblyGeneratorX86::genbia32(Instruction& instr) {
switch (instr.locb & 7) if (instr.locb & 3) {
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
asmCode << regR32[instr.regb % RegistersCount] << std::endl; asmCode << regR32[instr.regb % RegistersCount] << std::endl;
return; }
default: else {
asmCode << instr.imm32 << std::endl;; asmCode << instr.imm32 << std::endl;;
return;
} }
} }
@ -241,28 +194,28 @@ namespace RandomX {
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\tadd rax, "; asmCode << "\tadd rax, ";
genbr1(instr); genbia(instr);
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) { void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\tadd eax, "; asmCode << "\tadd eax, ";
genbr132(instr); genbia32(instr);
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\tsub rax, "; asmCode << "\tsub rax, ";
genbr1(instr); genbia(instr);
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) { void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\tsub eax, "; asmCode << "\tsub eax, ";
genbr132(instr); genbia32(instr);
gencr(instr); gencr(instr);
} }
@ -272,14 +225,14 @@ namespace RandomX {
if ((instr.locb & 7) >= 6) { if ((instr.locb & 7) >= 6) {
asmCode << "rax, "; asmCode << "rax, ";
} }
genbr1(instr); genbia(instr);
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\tmov rcx, "; asmCode << "\tmov rcx, ";
genbr1(instr); genbia(instr);
asmCode << "\tmul rcx" << std::endl; asmCode << "\tmul rcx" << std::endl;
asmCode << "\tmov rax, rdx" << std::endl; asmCode << "\tmov rax, rdx" << std::endl;
gencr(instr); gencr(instr);
@ -289,7 +242,7 @@ namespace RandomX {
genar(instr, i); genar(instr, i);
asmCode << "\tmov ecx, eax" << std::endl; asmCode << "\tmov ecx, eax" << std::endl;
asmCode << "\tmov eax, "; asmCode << "\tmov eax, ";
genbr132(instr); genbia32(instr);
asmCode << "\timul rax, rcx" << std::endl; asmCode << "\timul rax, rcx" << std::endl;
gencr(instr); gencr(instr);
} }
@ -310,7 +263,7 @@ namespace RandomX {
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\tmov rcx, "; asmCode << "\tmov rcx, ";
genbr1(instr); genbia(instr);
asmCode << "\timul rcx" << std::endl; asmCode << "\timul rcx" << std::endl;
asmCode << "\tmov rax, rdx" << std::endl; asmCode << "\tmov rax, rdx" << std::endl;
gencr(instr); gencr(instr);
@ -318,7 +271,7 @@ namespace RandomX {
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
if ((instr.locb & 7) >= 6) { if (instr.locb & 3) {
#ifdef MAGIC_DIVISION #ifdef MAGIC_DIVISION
if (instr.imm32 != 0) { if (instr.imm32 != 0) {
uint32_t divisor = instr.imm32; uint32_t divisor = instr.imm32;
@ -373,8 +326,8 @@ namespace RandomX {
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
if (instr.locb & 3) {
#ifdef MAGIC_DIVISION #ifdef MAGIC_DIVISION
if ((instr.locb & 7) >= 6) {
int64_t divisor = instr.imm32; int64_t divisor = instr.imm32;
asmCode << "\t; magic divide by " << divisor << std::endl; asmCode << "\t; magic divide by " << divisor << std::endl;
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) { if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
@ -394,9 +347,10 @@ namespace RandomX {
asmCode << "\tadd rax, rcx" << std::endl; asmCode << "\tadd rax, rcx" << std::endl;
asmCode << "\tsar rax, " << shift << std::endl; asmCode << "\tsar rax, " << shift << std::endl;
} }
if(negative) if (negative)
asmCode << "\tneg rax" << std::endl; asmCode << "\tneg rax" << std::endl;
} else if(divisor != 0) { }
else if (divisor != 0) {
magics_info mi = compute_signed_magic_info(divisor); magics_info mi = compute_signed_magic_info(divisor);
if ((divisor >= 0) != (mi.multiplier >= 0)) if ((divisor >= 0) != (mi.multiplier >= 0))
asmCode << "\tmov rcx, rax" << std::endl; asmCode << "\tmov rcx, rax" << std::endl;
@ -422,16 +376,20 @@ namespace RandomX {
asmCode << "\tsets dl" << std::endl; asmCode << "\tsets dl" << std::endl;
asmCode << "\tadd rax, rdx" << std::endl; asmCode << "\tadd rax, rdx" << std::endl;
} }
#else
asmCode << "\tmov edx, " << instr.imm32 << std::endl;
#endif
} }
else { else {
asmCode << "\tmov edx, " << regR32[instr.regb % RegistersCount] << std::endl;
#ifndef MAGIC_DIVISION
}
#endif #endif
asmCode << "\tmov edx, ";
genbr132(instr);
asmCode << "\tcmp edx, -1" << std::endl; asmCode << "\tcmp edx, -1" << std::endl;
asmCode << "\tjne short safe_idiv_" << i << std::endl; asmCode << "\tjne short body_idiv_" << i << std::endl;
asmCode << "\tneg rax" << std::endl; asmCode << "\tneg rax" << std::endl;
asmCode << "\tjmp short result_idiv_" << i << std::endl; asmCode << "\tjmp short result_idiv_" << i << std::endl;
asmCode << "safe_idiv_" << i << ":" << std::endl; asmCode << "body_idiv_" << i << ":" << std::endl;
asmCode << "\tmov ecx, 1" << std::endl; asmCode << "\tmov ecx, 1" << std::endl;
asmCode << "\ttest edx, edx" << std::endl; asmCode << "\ttest edx, edx" << std::endl;
asmCode << "\tcmovne ecx, edx" << std::endl; asmCode << "\tcmovne ecx, edx" << std::endl;
@ -448,72 +406,72 @@ namespace RandomX {
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\tand rax, "; asmCode << "\tand rax, ";
genbr1(instr); genbia(instr);
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) { void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\tand eax, "; asmCode << "\tand eax, ";
genbr132(instr); genbia32(instr);
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\tor rax, "; asmCode << "\tor rax, ";
genbr1(instr); genbia(instr);
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) { void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\tor eax, "; asmCode << "\tor eax, ";
genbr132(instr); genbia32(instr);
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\txor rax, "; asmCode << "\txor rax, ";
genbr1(instr); genbia(instr);
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) { void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
asmCode << "\txor eax, "; asmCode << "\txor eax, ";
genbr132(instr); genbia32(instr);
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
genbr0(instr, "shl"); genbiashift(instr, "shl");
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
genbr0(instr, "shr"); genbiashift(instr, "shr");
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
genbr0(instr, "sar"); genbiashift(instr, "sar");
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
genbr0(instr, "rol"); genbiashift(instr, "rol");
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
genar(instr, i); genar(instr, i);
genbr0(instr, "ror"); genbiashift(instr, "ror");
gencr(instr); gencr(instr);
} }

View File

@ -38,11 +38,12 @@ namespace RandomX {
static InstructionGenerator engine[256]; static InstructionGenerator engine[256];
std::stringstream asmCode; std::stringstream asmCode;
void gena(Instruction&, int);
void genar(Instruction&, int); void genar(Instruction&, int);
void genaf(Instruction&, int); void genaf(Instruction&, int);
void genbr0(Instruction&, const char*); void genbiashift(Instruction&, const char*);
void genbr1(Instruction&); void genbia(Instruction&);
void genbr132(Instruction&); void genbia32(Instruction&);
void genbf(Instruction&, const char*); void genbf(Instruction&, const char*);
void gencr(Instruction&, bool); void gencr(Instruction&, bool);
void gencf(Instruction&, bool); void gencf(Instruction&, bool);

View File

@ -17,10 +17,14 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>. along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/ */
//#define MAGIC_DIVISION
#include "JitCompilerX86.hpp" #include "JitCompilerX86.hpp"
#include "Pcg32.hpp" #include "Pcg32.hpp"
#include <cstring> #include <cstring>
#include <stdexcept> #include <stdexcept>
#ifdef MAGIC_DIVISION
#include "divideByConstantCodegen.h"
#endif
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include <windows.h>
@ -152,6 +156,17 @@ namespace RandomX {
instructionOffsets.push_back(codePos); instructionOffsets.push_back(codePos);
emit(0x840fcbff); //dec ebx; jz <epilogue> emit(0x840fcbff); //dec ebx; jz <epilogue>
emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative) emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative)
auto generator = engine[instr.opcode];
(this->*generator)(instr, i);
}
void JitCompilerX86::fixCallOffsets() {
for (CallOffset& co : callOffsets) {
*reinterpret_cast<int32_t*>(code + co.pos) = instructionOffsets[co.index] - (co.pos + 4);
}
}
void JitCompilerX86::gena(Instruction& instr) {
emit(uint16_t(0x8149)); //xor emit(uint16_t(0x8149)); //xor
emitByte(0xf0 + (instr.rega % RegistersCount)); emitByte(0xf0 + (instr.rega % RegistersCount));
emit(instr.addra); emit(instr.addra);
@ -169,41 +184,28 @@ namespace RandomX {
emit(uint16_t(0x3348)); emit(uint16_t(0x3348));
emitByte(0xe9); //xor rbp, rcx emitByte(0xe9); //xor rbp, rcx
} }
auto generator = engine[instr.opcode]; emit(uint16_t(0xe181)); //and ecx,
(this->*generator)(instr, i); if (instr.loca & 3) {
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
} }
else {
void JitCompilerX86::fixCallOffsets() { emit(ScratchpadL2 - 1); //whole scratchpad
for (CallOffset& co : callOffsets) {
*reinterpret_cast<int32_t*>(code + co.pos) = instructionOffsets[co.index] - (co.pos + 4);
} }
} }
void JitCompilerX86::genar(Instruction& instr) { void JitCompilerX86::genar(Instruction& instr) {
emit(uint16_t(0xe181)); //and ecx, gena(instr);
if (instr.loca & 3) {
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
}
else {
emit(ScratchpadL2 - 1); //whole scratchpad
}
emit(0xce048b48); //mov rax,QWORD PTR [rsi+rcx*8] emit(0xce048b48); //mov rax,QWORD PTR [rsi+rcx*8]
} }
void JitCompilerX86::genaf(Instruction& instr) { void JitCompilerX86::genaf(Instruction& instr) {
emit(uint16_t(0xe181)); //and ecx, gena(instr);
if (instr.loca & 3) {
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
}
else {
emit(ScratchpadL2 - 1); //whole scratchpad
}
emitByte(0xf3); emitByte(0xf3);
emit(0xce04e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rcx*8] emit(0xce04e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rcx*8]
} }
void JitCompilerX86::genbr0(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) { void JitCompilerX86::genbiashift(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
if ((instr.locb & 7) <= 3) { if (instr.locb & 1) {
emit(uint16_t(0x8b49)); //mov emit(uint16_t(0x8b49)); //mov
emitByte(0xc8 + (instr.regb % RegistersCount)); //rcx, regb emitByte(0xc8 + (instr.regb % RegistersCount)); //rcx, regb
emitByte(0x48); //REX.W emitByte(0x48); //REX.W
@ -216,8 +218,8 @@ namespace RandomX {
} }
} }
void JitCompilerX86::genbr1(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) { void JitCompilerX86::genbia(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
if ((instr.locb & 7) <= 5) { if (instr.locb & 3) {
emit(opcodeReg); // xxx rax, r64 emit(opcodeReg); // xxx rax, r64
emitByte(0xc0 + (instr.regb % RegistersCount)); emitByte(0xc0 + (instr.regb % RegistersCount));
} }
@ -227,8 +229,8 @@ namespace RandomX {
} }
} }
void JitCompilerX86::genbr132(Instruction& instr, uint16_t opcodeReg, uint8_t opcodeImm) { void JitCompilerX86::genbia32(Instruction& instr, uint16_t opcodeReg, uint8_t opcodeImm) {
if ((instr.locb & 7) <= 5) { if (instr.locb & 3) {
emit(opcodeReg); // xxx eax, r32 emit(opcodeReg); // xxx eax, r32
emitByte(0xc0 + (instr.regb % RegistersCount)); emitByte(0xc0 + (instr.regb % RegistersCount));
} }
@ -328,25 +330,25 @@ namespace RandomX {
void JitCompilerX86::h_ADD_64(Instruction& instr, int i) { void JitCompilerX86::h_ADD_64(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr1(instr, 0x0349, 0x0548); genbia(instr, 0x0349, 0x0548);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_ADD_32(Instruction& instr, int i) { void JitCompilerX86::h_ADD_32(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr132(instr, 0x0341, 0x05); genbia32(instr, 0x0341, 0x05);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_SUB_64(Instruction& instr, int i) { void JitCompilerX86::h_SUB_64(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr1(instr, 0x2b49, 0x2d48); genbia(instr, 0x2b49, 0x2d48);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_SUB_32(Instruction& instr, int i) { void JitCompilerX86::h_SUB_32(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr132(instr, 0x2b41, 0x2d); genbia32(instr, 0x2b41, 0x2d);
gencr(instr); gencr(instr);
} }
@ -435,104 +437,209 @@ namespace RandomX {
void JitCompilerX86::h_DIV_64(Instruction& instr, int i) { void JitCompilerX86::h_DIV_64(Instruction& instr, int i) {
genar(instr); genar(instr);
if ((instr.locb & 7) <= 5) { if (instr.locb & 3) {
#ifdef MAGIC_DIVISION
if (instr.imm32 != 0) {
uint32_t divisor = instr.imm32;
if (divisor & (divisor - 1)) {
magicu_info mi = compute_unsigned_magic_info(divisor, sizeof(uint64_t) * 8);
if (mi.pre_shift > 0) {
if (mi.pre_shift == 1) {
emitByte(0x48);
emit(uint16_t(0xe8d1)); //shr rax,1
}
else {
emit(0x00e8c148 | (mi.pre_shift << 24)); //shr rax, pre_shift
}
}
if (mi.increment) {
emit(0x00d8834801c08348); //add rax,1; sbb rax,0
}
emit(uint16_t(0xb948)); //movabs rcx, multiplier
emit(mi.multiplier);
emit(0x48e1f748); //mul rcx; REX
emit(uint16_t(0xc28b)); //mov rax,rdx
if (mi.post_shift > 0)
emit(0x00e8c148 | (mi.post_shift << 24)); //shr rax, post_shift
}
else { //divisor is a power of two
int shift = 0;
while (divisor >>= 1)
++shift;
if (shift > 0)
emit(0x00e8c148 | (shift << 24)); //shr rax, shift
}
}
#else
emitByte(0xb9); //mov ecx, imm32
emit(instr.imm32 != 0 ? instr.imm32 : 1);
#endif
}
else {
emitByte(0xb9); //mov ecx, 1 emitByte(0xb9); //mov ecx, 1
emit(1); emit(1);
emit(uint16_t(0x8b41)); //mov edx, r32 emit(uint16_t(0x8b41)); //mov edx, r32
emitByte(0xd0 + (instr.regb % RegistersCount)); emitByte(0xd0 + (instr.regb % RegistersCount));
emit(0x450fd285); //test edx, edx; cmovne ecx,edx emit(0x450fd285); //test edx, edx; cmovne ecx,edx
emitByte(0xca); emitByte(0xca);
} #ifdef MAGIC_DIVISION
else {
emitByte(0xb9); //mov ecx, imm32
emit(instr.imm32 != 0 ? instr.imm32 : 1);
}
emit(0xf748d233); //xor edx,edx; div rcx emit(0xf748d233); //xor edx,edx; div rcx
emitByte(0xf1); emitByte(0xf1);
#endif
}
#ifndef MAGIC_DIVISION
emit(0xf748d233); //xor edx,edx; div rcx
emitByte(0xf1);
#endif
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_IDIV_64(Instruction& instr, int i) { void JitCompilerX86::h_IDIV_64(Instruction& instr, int i) {
genar(instr); genar(instr);
if ((instr.locb & 7) <= 5) { if (instr.locb & 3) {
emit(uint16_t(0x8b41)); //mov edx, r32 #ifdef MAGIC_DIVISION
emitByte(0xd0 + (instr.regb % RegistersCount)); int64_t divisor = instr.imm32;
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
// +/- power of two
bool negative = divisor < 0;
if (negative)
divisor = -divisor;
int shift = 0;
uint64_t unsignedDivisor = divisor;
while (unsignedDivisor >>= 1)
++shift;
if (shift > 0) {
emitByte(0x48);
emit(uint16_t(0xc88b)); //mov rcx, rax
emit(0x3ff9c148); //sar rcx, 63
uint32_t mask = (1ULL << shift) - 1;
emit(uint16_t(0xe181)); //and ecx, mask
emit(mask);
emitByte(0x48);
emit(uint16_t(0xc103)); //add rax, rcx
emit(0x00f8c148 | (shift << 24)); //sar rax, shift
}
if (negative) {
emitByte(0x48);
emit(uint16_t(0xd8f7)); //neg rax
}
}
else if (divisor != 0) {
magics_info mi = compute_signed_magic_info(divisor);
if ((divisor >= 0) != (mi.multiplier >= 0)) {
emitByte(0x48);
emit(uint16_t(0xc88b)); //mov rcx, rax
}
emit(uint16_t(0xba48)); //movabs rdx, multiplier
emit(mi.multiplier);
emit(0xd233c28b48eaf748); //imul rdx; mov rax,rdx; xor edx,edx
bool haveSF = false;
if (divisor > 0 && mi.multiplier < 0) {
emitByte(0x48);
emit(uint16_t(0xc103)); //add rax, rcx
haveSF = true;
}
if (divisor < 0 && mi.multiplier > 0) {
emitByte(0x48);
emit(uint16_t(0xc12b)); //sub rax, rcx
haveSF = true;
}
if (mi.shift > 0) {
emit(0x00f8c148 | (mi.shift << 24)); //sar rax, shift
haveSF = true;
}
if (!haveSF) {
emitByte(0x48);
emit(uint16_t(0x85c0));
}
emit(0x48c2980f); //sets dl; add rax, rdx
emit(uint16_t(0xc203));
}
#else
emitByte(0xba); // mov edx, imm32
emit(instr.imm32);
#endif
} }
else { else {
emitByte(0xba); // xxx edx, imm32 emit(uint16_t(0x8b41)); //mov edx, r32
emit(instr.imm32); emitByte(0xd0 + (instr.regb % RegistersCount));
#ifndef MAGIC_DIVISION
} }
#endif
emit(0xc88b480b75fffa83); emit(0xc88b480b75fffa83);
emit(0x1274c9ff48c1d148); emit(0x1274c9ff48c1d148);
emit(0x0fd28500000001b9); emit(0x0fd28500000001b9);
emit(0x489948c96348ca45); emit(0x489948c96348ca45);
emit(uint16_t(0xf9f7)); //idiv rcx emit(uint16_t(0xf9f7)); //idiv rcx
#ifdef MAGIC_DIVISION
}
#endif
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_AND_64(Instruction& instr, int i) { void JitCompilerX86::h_AND_64(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr1(instr, 0x2349, 0x2548); genbia(instr, 0x2349, 0x2548);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_AND_32(Instruction& instr, int i) { void JitCompilerX86::h_AND_32(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr132(instr, 0x2341, 0x25); genbia32(instr, 0x2341, 0x25);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_OR_64(Instruction& instr, int i) { void JitCompilerX86::h_OR_64(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr1(instr, 0x0b49, 0x0d48); genbia(instr, 0x0b49, 0x0d48);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_OR_32(Instruction& instr, int i) { void JitCompilerX86::h_OR_32(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr132(instr, 0x0b41, 0x0d); genbia32(instr, 0x0b41, 0x0d);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_XOR_64(Instruction& instr, int i) { void JitCompilerX86::h_XOR_64(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr1(instr, 0x3349, 0x3548); genbia(instr, 0x3349, 0x3548);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_XOR_32(Instruction& instr, int i) { void JitCompilerX86::h_XOR_32(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr132(instr, 0x3341, 0x35); genbia32(instr, 0x3341, 0x35);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_SHL_64(Instruction& instr, int i) { void JitCompilerX86::h_SHL_64(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr0(instr, 0xe0d3, 0xe0c1); genbiashift(instr, 0xe0d3, 0xe0c1);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_SHR_64(Instruction& instr, int i) { void JitCompilerX86::h_SHR_64(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr0(instr, 0xe8d3, 0xe8c1); genbiashift(instr, 0xe8d3, 0xe8c1);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_SAR_64(Instruction& instr, int i) { void JitCompilerX86::h_SAR_64(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr0(instr, 0xf8d3, 0xf8c1); genbiashift(instr, 0xf8d3, 0xf8c1);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_ROL_64(Instruction& instr, int i) { void JitCompilerX86::h_ROL_64(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr0(instr, 0xc0d3, 0xc0c1); genbiashift(instr, 0xc0d3, 0xc0c1);
gencr(instr); gencr(instr);
} }
void JitCompilerX86::h_ROR_64(Instruction& instr, int i) { void JitCompilerX86::h_ROR_64(Instruction& instr, int i) {
genar(instr); genar(instr);
genbr0(instr, 0xc8d3, 0xc8c1); genbiashift(instr, 0xc8d3, 0xc8c1);
gencr(instr); gencr(instr);
} }

View File

@ -58,11 +58,12 @@ namespace RandomX {
std::vector<int32_t> instructionOffsets; std::vector<int32_t> instructionOffsets;
std::vector<CallOffset> callOffsets; std::vector<CallOffset> callOffsets;
void gena(Instruction&);
void genar(Instruction&); void genar(Instruction&);
void genaf(Instruction&); void genaf(Instruction&);
void genbr0(Instruction&, uint16_t, uint16_t); void genbiashift(Instruction&, uint16_t, uint16_t);
void genbr1(Instruction&, uint16_t, uint16_t); void genbia(Instruction&, uint16_t, uint16_t);
void genbr132(Instruction&, uint16_t, uint8_t); void genbia32(Instruction&, uint16_t, uint8_t);
void genbf(Instruction&, uint8_t); void genbf(Instruction&, uint8_t);
void scratchpadStoreR(Instruction&, uint32_t, bool); void scratchpadStoreR(Instruction&, uint32_t, bool);
void scratchpadStoreF(Instruction&, int, uint32_t, bool); void scratchpadStoreF(Instruction&, int, uint32_t, bool);

View File

@ -11,10 +11,10 @@
#include "divideByConstantCodegen.h" #include "divideByConstantCodegen.h"
struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) { struct magicu_info compute_unsigned_magic_info(unsigned_type D, unsigned num_bits) {
//The numerator must fit in a uint //The numerator must fit in a unsigned_type
assert(num_bits > 0 && num_bits <= sizeof(uint) * CHAR_BIT); assert(num_bits > 0 && num_bits <= sizeof(unsigned_type) * CHAR_BIT);
// D must be larger than zero and not a power of 2 // D must be larger than zero and not a power of 2
assert(D & (D - 1)); assert(D & (D - 1));
@ -22,29 +22,29 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
// The eventual result // The eventual result
struct magicu_info result; struct magicu_info result;
// Bits in a uint // Bits in a unsigned_type
const unsigned UINT_BITS = sizeof(uint) * CHAR_BIT; const unsigned UINT_BITS = sizeof(unsigned_type) * CHAR_BIT;
// The extra shift implicit in the difference between UINT_BITS and num_bits // The extra shift implicit in the difference between UINT_BITS and num_bits
const unsigned extra_shift = UINT_BITS - num_bits; const unsigned extra_shift = UINT_BITS - num_bits;
// The initial power of 2 is one less than the first one that can possibly work // The initial power of 2 is one less than the first one that can possibly work
const uint initial_power_of_2 = (uint)1 << (UINT_BITS - 1); const unsigned_type initial_power_of_2 = (unsigned_type)1 << (UINT_BITS - 1);
// The remainder and quotient of our power of 2 divided by d // The remainder and quotient of our power of 2 divided by d
uint quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D; unsigned_type quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;
// ceil(log_2 D) // ceil(log_2 D)
unsigned ceil_log_2_D; unsigned ceil_log_2_D;
// The magic info for the variant "round down" algorithm // The magic info for the variant "round down" algorithm
uint down_multiplier = 0; unsigned_type down_multiplier = 0;
unsigned down_exponent = 0; unsigned down_exponent = 0;
int has_magic_down = 0; int has_magic_down = 0;
// Compute ceil(log_2 D) // Compute ceil(log_2 D)
ceil_log_2_D = 0; ceil_log_2_D = 0;
uint tmp; unsigned_type tmp;
for (tmp = D; tmp > 0; tmp >>= 1) for (tmp = D; tmp > 0; tmp >>= 1)
ceil_log_2_D += 1; ceil_log_2_D += 1;
@ -67,11 +67,11 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
// We're done if this exponent works for the round_up algorithm. // We're done if this exponent works for the round_up algorithm.
// Note that exponent may be larger than the maximum shift supported, // Note that exponent may be larger than the maximum shift supported,
// so the check for >= ceil_log_2_D is critical. // so the check for >= ceil_log_2_D is critical.
if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((uint)1 << (exponent + extra_shift))) if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((unsigned_type)1 << (exponent + extra_shift)))
break; break;
// Set magic_down if we have not set it yet and this exponent works for the round_down algorithm // Set magic_down if we have not set it yet and this exponent works for the round_down algorithm
if (!has_magic_down && remainder <= ((uint)1 << (exponent + extra_shift))) { if (!has_magic_down && remainder <= ((unsigned_type)1 << (exponent + extra_shift))) {
has_magic_down = 1; has_magic_down = 1;
down_multiplier = quotient; down_multiplier = quotient;
down_exponent = exponent; down_exponent = exponent;
@ -96,7 +96,7 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
else { else {
// Even divisor, so use a prefix-shifted dividend // Even divisor, so use a prefix-shifted dividend
unsigned pre_shift = 0; unsigned pre_shift = 0;
uint shifted_D = D; unsigned_type shifted_D = D;
while ((shifted_D & 1) == 0) { while ((shifted_D & 1) == 0) {
shifted_D >>= 1; shifted_D >>= 1;
pre_shift += 1; pre_shift += 1;
@ -108,34 +108,34 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
return result; return result;
} }
struct magics_info compute_signed_magic_info(sint D) { struct magics_info compute_signed_magic_info(signed_type D) {
// D must not be zero and must not be a power of 2 (or its negative) // D must not be zero and must not be a power of 2 (or its negative)
assert(D != 0 && (D & -D) != D && (D & -D) != -D); assert(D != 0 && (D & -D) != D && (D & -D) != -D);
// Our result // Our result
struct magics_info result; struct magics_info result;
// Bits in an sint // Bits in an signed_type
const unsigned SINT_BITS = sizeof(sint) * CHAR_BIT; const unsigned SINT_BITS = sizeof(signed_type) * CHAR_BIT;
// Absolute value of D (we know D is not the most negative value since that's a power of 2) // Absolute value of D (we know D is not the most negative value since that's a power of 2)
const uint abs_d = (D < 0 ? -D : D); const unsigned_type abs_d = (D < 0 ? -D : D);
// The initial power of 2 is one less than the first one that can possibly work // The initial power of 2 is one less than the first one that can possibly work
// "two31" in Warren // "two31" in Warren
unsigned exponent = SINT_BITS - 1; unsigned exponent = SINT_BITS - 1;
const uint initial_power_of_2 = (uint)1 << exponent; const unsigned_type initial_power_of_2 = (unsigned_type)1 << exponent;
// Compute the absolute value of our "test numerator," // Compute the absolute value of our "test numerator,"
// which is the largest dividend whose remainder with d is d-1. // which is the largest dividend whose remainder with d is d-1.
// This is called anc in Warren. // This is called anc in Warren.
const uint tmp = initial_power_of_2 + (D < 0); const unsigned_type tmp = initial_power_of_2 + (D < 0);
const uint abs_test_numer = tmp - 1 - tmp % abs_d; const unsigned_type abs_test_numer = tmp - 1 - tmp % abs_d;
// Initialize our quotients and remainders (q1, r1, q2, r2 in Warren) // Initialize our quotients and remainders (q1, r1, q2, r2 in Warren)
uint quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer; unsigned_type quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
uint quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d; unsigned_type quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
uint delta; unsigned_type delta;
// Begin our loop // Begin our loop
do { do {

View File

@ -24,11 +24,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
extern "C" { extern "C" {
#endif #endif
typedef uint64_t uint; typedef uint64_t unsigned_type;
typedef int64_t sint; typedef int64_t signed_type;
/* Computes "magic info" for performing signed division by a fixed integer D. /* Computes "magic info" for performing signed division by a fixed integer D.
The type 'sint' is assumed to be defined as a signed integer type large enough The type 'signed_type' is assumed to be defined as a signed integer type large enough
to hold both the dividend and the divisor. to hold both the dividend and the divisor.
Here >> is arithmetic (signed) shift, and >>> is logical shift. Here >> is arithmetic (signed) shift, and >>> is logical shift.
@ -55,17 +55,17 @@ extern "C" {
*/ */
struct magics_info { struct magics_info {
sint multiplier; // the "magic number" multiplier signed_type multiplier; // the "magic number" multiplier
unsigned shift; // shift for the dividend after multiplying unsigned shift; // shift for the dividend after multiplying
}; };
struct magics_info compute_signed_magic_info(sint D); struct magics_info compute_signed_magic_info(signed_type D);
/* Computes "magic info" for performing unsigned division by a fixed positive integer D. /* Computes "magic info" for performing unsigned division by a fixed positive integer D.
The type 'uint' is assumed to be defined as an unsigned integer type large enough The type 'unsigned_type' is assumed to be defined as an unsigned integer type large enough
to hold both the dividend and the divisor. num_bits can be set appropriately if n is to hold both the dividend and the divisor. num_bits can be set appropriately if n is
known to be smaller than the largest uint; if this is not known then pass known to be smaller than the largest unsigned_type; if this is not known then pass
(sizeof(uint) * CHAR_BIT) for num_bits. (sizeof(unsigned_type) * CHAR_BIT) for num_bits.
Assume we have a hardware register of width UINT_BITS, a known constant D which is Assume we have a hardware register of width UINT_BITS, a known constant D which is
not zero and not a power of 2, and a variable n of width num_bits (which may be not zero and not a power of 2, and a variable n of width num_bits (which may be
@ -105,12 +105,12 @@ extern "C" {
*/ */
struct magicu_info { struct magicu_info {
uint multiplier; // the "magic number" multiplier unsigned_type multiplier; // the "magic number" multiplier
unsigned pre_shift; // shift for the dividend before multiplying unsigned pre_shift; // shift for the dividend before multiplying
unsigned post_shift; //shift for the dividend after multiplying unsigned post_shift; //shift for the dividend after multiplying
int increment; // 0 or 1; if set then increment the numerator, using one of the two strategies int increment; // 0 or 1; if set then increment the numerator, using one of the two strategies
}; };
struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits); struct magicu_info compute_unsigned_magic_info(unsigned_type D, unsigned num_bits);
#if defined(__cplusplus) #if defined(__cplusplus)
} }

View File

@ -19,17 +19,17 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#pragma once #pragma once
#define WT_ADD_64 15 #define WT_ADD_64 12
#define WT_ADD_32 2 #define WT_ADD_32 2
#define WT_SUB_64 15 #define WT_SUB_64 12
#define WT_SUB_32 2 #define WT_SUB_32 2
#define WT_MUL_64 23 #define WT_MUL_64 23
#define WT_MULH_64 10 #define WT_MULH_64 10
#define WT_MUL_32 15 #define WT_MUL_32 15
#define WT_IMUL_32 15 #define WT_IMUL_32 15
#define WT_IMULH_64 6 #define WT_IMULH_64 6
#define WT_DIV_64 1 #define WT_DIV_64 4
#define WT_IDIV_64 1 #define WT_IDIV_64 4
#define WT_AND_64 4 #define WT_AND_64 4
#define WT_AND_32 2 #define WT_AND_32 2
#define WT_OR_64 4 #define WT_OR_64 4

File diff suppressed because it is too large Load Diff