mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 15:58:53 +00:00
Added magic division to JIT compiler
New B operand selection rules
This commit is contained in:
parent
451dfc5730
commit
2756bcdcfe
@ -83,10 +83,10 @@ The `B.LOC.L` flag determines the B operand. It can be either a register or imme
|
|||||||
|
|
||||||
|`B.LOC.L`|IA/DIV|IA/SHIFT|IA/MATH|FP|CL|
|
|`B.LOC.L`|IA/DIV|IA/SHIFT|IA/MATH|FP|CL|
|
||||||
|----|--------|----|------|----|---|
|
|----|--------|----|------|----|---|
|
||||||
|0|register|register|register|register|register|
|
|0|register|`imm8`|`imm32`|register|register|
|
||||||
|1|`imm32`|register|register|register|register|
|
|1|`imm32`|register|register|register|register|
|
||||||
|2|`imm32`|`imm8`|register|register|register|
|
|2|`imm32`|`imm8`|register|register|register|
|
||||||
|3|`imm32`|`imm8`|`imm32`|register|register|
|
|3|`imm32`|register|register|register|register|
|
||||||
|
|
||||||
Integer instructions are split into 3 classes: integer division (IA/DIV), shift and rotate (IA/SHIFT) and other (IA/MATH). Floating point (FP) and control (CL) instructions always use a register operand.
|
Integer instructions are split into 3 classes: integer division (IA/DIV), shift and rotate (IA/SHIFT) and other (IA/MATH). Floating point (FP) and control (CL) instructions always use a register operand.
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
|
|||||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
//#define TRACE
|
//#define TRACE
|
||||||
//#define MAGIC_DIVISION
|
#define MAGIC_DIVISION
|
||||||
#include "AssemblyGeneratorX86.hpp"
|
#include "AssemblyGeneratorX86.hpp"
|
||||||
#include "Pcg32.hpp"
|
#include "Pcg32.hpp"
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
@ -64,108 +64,61 @@ namespace RandomX {
|
|||||||
(this->*generator)(instr, i);
|
(this->*generator)(instr, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::gena(Instruction& instr, int i) {
|
||||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||||
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
|
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
|
||||||
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
||||||
switch (instr.loca & 3)
|
if (instr.loca & 3) {
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
asmCode << "\tcall rx_read_l1" << std::endl;
|
asmCode << "\tcall rx_read_l1" << std::endl;
|
||||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||||
if ((instr.loca & 192) == 0)
|
if ((instr.loca & 192) == 0)
|
||||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||||
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
||||||
break;
|
}
|
||||||
default: //3
|
else {
|
||||||
asmCode << "\tcall rx_read_l2" << std::endl;
|
asmCode << "\tcall rx_read_l2" << std::endl;
|
||||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||||
if ((instr.loca & 192) == 0)
|
if ((instr.loca & 192) == 0)
|
||||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||||
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
|
||||||
|
gena(instr, i);
|
||||||
asmCode << "\tmov rax, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
|
asmCode << "\tmov rax, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
|
||||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
gena(instr, i);
|
||||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
|
|
||||||
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
|
||||||
switch (instr.loca & 3)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
asmCode << "\tcall rx_read_l1" << std::endl;
|
|
||||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
|
||||||
if((instr.loca & 192) == 0)
|
|
||||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
|
||||||
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
|
||||||
break;
|
|
||||||
default: //3
|
|
||||||
asmCode << "\tcall rx_read_l2" << std::endl;
|
|
||||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
|
||||||
if ((instr.loca & 192) == 0)
|
|
||||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
|
||||||
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
asmCode << "\tcvtdq2pd xmm0, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
|
asmCode << "\tcvtdq2pd xmm0, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) {
|
void AssemblyGeneratorX86::genbiashift(Instruction& instr, const char* instrx86) {
|
||||||
switch (instr.locb & 7)
|
if (instr.locb & 1) {
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
case 3:
|
|
||||||
asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl;
|
asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl;
|
||||||
asmCode << "\t" << instrx86 << " rax, cl" << std::endl;
|
asmCode << "\t" << instrx86 << " rax, cl" << std::endl;
|
||||||
return;
|
} else {
|
||||||
default:
|
|
||||||
asmCode << "\t" << instrx86 << " rax, " << (instr.imm8 & 63) << std::endl;;
|
asmCode << "\t" << instrx86 << " rax, " << (instr.imm8 & 63) << std::endl;;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genbr1(Instruction& instr) {
|
void AssemblyGeneratorX86::genbia(Instruction& instr) {
|
||||||
switch (instr.locb & 7)
|
if (instr.locb & 3) {
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
case 3:
|
|
||||||
case 4:
|
|
||||||
case 5:
|
|
||||||
asmCode << regR[instr.regb % RegistersCount] << std::endl;
|
asmCode << regR[instr.regb % RegistersCount] << std::endl;
|
||||||
return;
|
} else {
|
||||||
default:
|
|
||||||
asmCode << instr.imm32 << std::endl;;
|
asmCode << instr.imm32 << std::endl;;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genbr132(Instruction& instr) {
|
void AssemblyGeneratorX86::genbia32(Instruction& instr) {
|
||||||
switch (instr.locb & 7)
|
if (instr.locb & 3) {
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
case 3:
|
|
||||||
case 4:
|
|
||||||
case 5:
|
|
||||||
asmCode << regR32[instr.regb % RegistersCount] << std::endl;
|
asmCode << regR32[instr.regb % RegistersCount] << std::endl;
|
||||||
return;
|
}
|
||||||
default:
|
else {
|
||||||
asmCode << instr.imm32 << std::endl;;
|
asmCode << instr.imm32 << std::endl;;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -241,28 +194,28 @@ namespace RandomX {
|
|||||||
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tadd rax, ";
|
asmCode << "\tadd rax, ";
|
||||||
genbr1(instr);
|
genbia(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tadd eax, ";
|
asmCode << "\tadd eax, ";
|
||||||
genbr132(instr);
|
genbia32(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tsub rax, ";
|
asmCode << "\tsub rax, ";
|
||||||
genbr1(instr);
|
genbia(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tsub eax, ";
|
asmCode << "\tsub eax, ";
|
||||||
genbr132(instr);
|
genbia32(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -272,14 +225,14 @@ namespace RandomX {
|
|||||||
if ((instr.locb & 7) >= 6) {
|
if ((instr.locb & 7) >= 6) {
|
||||||
asmCode << "rax, ";
|
asmCode << "rax, ";
|
||||||
}
|
}
|
||||||
genbr1(instr);
|
genbia(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tmov rcx, ";
|
asmCode << "\tmov rcx, ";
|
||||||
genbr1(instr);
|
genbia(instr);
|
||||||
asmCode << "\tmul rcx" << std::endl;
|
asmCode << "\tmul rcx" << std::endl;
|
||||||
asmCode << "\tmov rax, rdx" << std::endl;
|
asmCode << "\tmov rax, rdx" << std::endl;
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
@ -289,7 +242,7 @@ namespace RandomX {
|
|||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tmov ecx, eax" << std::endl;
|
asmCode << "\tmov ecx, eax" << std::endl;
|
||||||
asmCode << "\tmov eax, ";
|
asmCode << "\tmov eax, ";
|
||||||
genbr132(instr);
|
genbia32(instr);
|
||||||
asmCode << "\timul rax, rcx" << std::endl;
|
asmCode << "\timul rax, rcx" << std::endl;
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
@ -310,7 +263,7 @@ namespace RandomX {
|
|||||||
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tmov rcx, ";
|
asmCode << "\tmov rcx, ";
|
||||||
genbr1(instr);
|
genbia(instr);
|
||||||
asmCode << "\timul rcx" << std::endl;
|
asmCode << "\timul rcx" << std::endl;
|
||||||
asmCode << "\tmov rax, rdx" << std::endl;
|
asmCode << "\tmov rax, rdx" << std::endl;
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
@ -318,7 +271,7 @@ namespace RandomX {
|
|||||||
|
|
||||||
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
if ((instr.locb & 7) >= 6) {
|
if (instr.locb & 3) {
|
||||||
#ifdef MAGIC_DIVISION
|
#ifdef MAGIC_DIVISION
|
||||||
if (instr.imm32 != 0) {
|
if (instr.imm32 != 0) {
|
||||||
uint32_t divisor = instr.imm32;
|
uint32_t divisor = instr.imm32;
|
||||||
@ -373,8 +326,8 @@ namespace RandomX {
|
|||||||
|
|
||||||
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
|
if (instr.locb & 3) {
|
||||||
#ifdef MAGIC_DIVISION
|
#ifdef MAGIC_DIVISION
|
||||||
if ((instr.locb & 7) >= 6) {
|
|
||||||
int64_t divisor = instr.imm32;
|
int64_t divisor = instr.imm32;
|
||||||
asmCode << "\t; magic divide by " << divisor << std::endl;
|
asmCode << "\t; magic divide by " << divisor << std::endl;
|
||||||
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
|
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
|
||||||
@ -394,9 +347,10 @@ namespace RandomX {
|
|||||||
asmCode << "\tadd rax, rcx" << std::endl;
|
asmCode << "\tadd rax, rcx" << std::endl;
|
||||||
asmCode << "\tsar rax, " << shift << std::endl;
|
asmCode << "\tsar rax, " << shift << std::endl;
|
||||||
}
|
}
|
||||||
if(negative)
|
if (negative)
|
||||||
asmCode << "\tneg rax" << std::endl;
|
asmCode << "\tneg rax" << std::endl;
|
||||||
} else if(divisor != 0) {
|
}
|
||||||
|
else if (divisor != 0) {
|
||||||
magics_info mi = compute_signed_magic_info(divisor);
|
magics_info mi = compute_signed_magic_info(divisor);
|
||||||
if ((divisor >= 0) != (mi.multiplier >= 0))
|
if ((divisor >= 0) != (mi.multiplier >= 0))
|
||||||
asmCode << "\tmov rcx, rax" << std::endl;
|
asmCode << "\tmov rcx, rax" << std::endl;
|
||||||
@ -422,16 +376,20 @@ namespace RandomX {
|
|||||||
asmCode << "\tsets dl" << std::endl;
|
asmCode << "\tsets dl" << std::endl;
|
||||||
asmCode << "\tadd rax, rdx" << std::endl;
|
asmCode << "\tadd rax, rdx" << std::endl;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
asmCode << "\tmov edx, " << instr.imm32 << std::endl;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
asmCode << "\tmov edx, " << regR32[instr.regb % RegistersCount] << std::endl;
|
||||||
|
#ifndef MAGIC_DIVISION
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
asmCode << "\tmov edx, ";
|
|
||||||
genbr132(instr);
|
|
||||||
asmCode << "\tcmp edx, -1" << std::endl;
|
asmCode << "\tcmp edx, -1" << std::endl;
|
||||||
asmCode << "\tjne short safe_idiv_" << i << std::endl;
|
asmCode << "\tjne short body_idiv_" << i << std::endl;
|
||||||
asmCode << "\tneg rax" << std::endl;
|
asmCode << "\tneg rax" << std::endl;
|
||||||
asmCode << "\tjmp short result_idiv_" << i << std::endl;
|
asmCode << "\tjmp short result_idiv_" << i << std::endl;
|
||||||
asmCode << "safe_idiv_" << i << ":" << std::endl;
|
asmCode << "body_idiv_" << i << ":" << std::endl;
|
||||||
asmCode << "\tmov ecx, 1" << std::endl;
|
asmCode << "\tmov ecx, 1" << std::endl;
|
||||||
asmCode << "\ttest edx, edx" << std::endl;
|
asmCode << "\ttest edx, edx" << std::endl;
|
||||||
asmCode << "\tcmovne ecx, edx" << std::endl;
|
asmCode << "\tcmovne ecx, edx" << std::endl;
|
||||||
@ -448,72 +406,72 @@ namespace RandomX {
|
|||||||
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tand rax, ";
|
asmCode << "\tand rax, ";
|
||||||
genbr1(instr);
|
genbia(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tand eax, ";
|
asmCode << "\tand eax, ";
|
||||||
genbr132(instr);
|
genbia32(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tor rax, ";
|
asmCode << "\tor rax, ";
|
||||||
genbr1(instr);
|
genbia(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tor eax, ";
|
asmCode << "\tor eax, ";
|
||||||
genbr132(instr);
|
genbia32(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\txor rax, ";
|
asmCode << "\txor rax, ";
|
||||||
genbr1(instr);
|
genbia(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\txor eax, ";
|
asmCode << "\txor eax, ";
|
||||||
genbr132(instr);
|
genbia32(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
genbr0(instr, "shl");
|
genbiashift(instr, "shl");
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
genbr0(instr, "shr");
|
genbiashift(instr, "shr");
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
genbr0(instr, "sar");
|
genbiashift(instr, "sar");
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
genbr0(instr, "rol");
|
genbiashift(instr, "rol");
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
genbr0(instr, "ror");
|
genbiashift(instr, "ror");
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,11 +38,12 @@ namespace RandomX {
|
|||||||
static InstructionGenerator engine[256];
|
static InstructionGenerator engine[256];
|
||||||
std::stringstream asmCode;
|
std::stringstream asmCode;
|
||||||
|
|
||||||
|
void gena(Instruction&, int);
|
||||||
void genar(Instruction&, int);
|
void genar(Instruction&, int);
|
||||||
void genaf(Instruction&, int);
|
void genaf(Instruction&, int);
|
||||||
void genbr0(Instruction&, const char*);
|
void genbiashift(Instruction&, const char*);
|
||||||
void genbr1(Instruction&);
|
void genbia(Instruction&);
|
||||||
void genbr132(Instruction&);
|
void genbia32(Instruction&);
|
||||||
void genbf(Instruction&, const char*);
|
void genbf(Instruction&, const char*);
|
||||||
void gencr(Instruction&, bool);
|
void gencr(Instruction&, bool);
|
||||||
void gencf(Instruction&, bool);
|
void gencf(Instruction&, bool);
|
||||||
|
@ -17,10 +17,14 @@ You should have received a copy of the GNU General Public License
|
|||||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
//#define MAGIC_DIVISION
|
||||||
#include "JitCompilerX86.hpp"
|
#include "JitCompilerX86.hpp"
|
||||||
#include "Pcg32.hpp"
|
#include "Pcg32.hpp"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
#ifdef MAGIC_DIVISION
|
||||||
|
#include "divideByConstantCodegen.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
@ -152,6 +156,17 @@ namespace RandomX {
|
|||||||
instructionOffsets.push_back(codePos);
|
instructionOffsets.push_back(codePos);
|
||||||
emit(0x840fcbff); //dec ebx; jz <epilogue>
|
emit(0x840fcbff); //dec ebx; jz <epilogue>
|
||||||
emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative)
|
emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative)
|
||||||
|
auto generator = engine[instr.opcode];
|
||||||
|
(this->*generator)(instr, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitCompilerX86::fixCallOffsets() {
|
||||||
|
for (CallOffset& co : callOffsets) {
|
||||||
|
*reinterpret_cast<int32_t*>(code + co.pos) = instructionOffsets[co.index] - (co.pos + 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitCompilerX86::gena(Instruction& instr) {
|
||||||
emit(uint16_t(0x8149)); //xor
|
emit(uint16_t(0x8149)); //xor
|
||||||
emitByte(0xf0 + (instr.rega % RegistersCount));
|
emitByte(0xf0 + (instr.rega % RegistersCount));
|
||||||
emit(instr.addra);
|
emit(instr.addra);
|
||||||
@ -169,41 +184,28 @@ namespace RandomX {
|
|||||||
emit(uint16_t(0x3348));
|
emit(uint16_t(0x3348));
|
||||||
emitByte(0xe9); //xor rbp, rcx
|
emitByte(0xe9); //xor rbp, rcx
|
||||||
}
|
}
|
||||||
auto generator = engine[instr.opcode];
|
emit(uint16_t(0xe181)); //and ecx,
|
||||||
(this->*generator)(instr, i);
|
if (instr.loca & 3) {
|
||||||
|
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
void JitCompilerX86::fixCallOffsets() {
|
emit(ScratchpadL2 - 1); //whole scratchpad
|
||||||
for (CallOffset& co : callOffsets) {
|
|
||||||
*reinterpret_cast<int32_t*>(code + co.pos) = instructionOffsets[co.index] - (co.pos + 4);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genar(Instruction& instr) {
|
void JitCompilerX86::genar(Instruction& instr) {
|
||||||
emit(uint16_t(0xe181)); //and ecx,
|
gena(instr);
|
||||||
if (instr.loca & 3) {
|
|
||||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
emit(ScratchpadL2 - 1); //whole scratchpad
|
|
||||||
}
|
|
||||||
emit(0xce048b48); //mov rax,QWORD PTR [rsi+rcx*8]
|
emit(0xce048b48); //mov rax,QWORD PTR [rsi+rcx*8]
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genaf(Instruction& instr) {
|
void JitCompilerX86::genaf(Instruction& instr) {
|
||||||
emit(uint16_t(0xe181)); //and ecx,
|
gena(instr);
|
||||||
if (instr.loca & 3) {
|
|
||||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
emit(ScratchpadL2 - 1); //whole scratchpad
|
|
||||||
}
|
|
||||||
emitByte(0xf3);
|
emitByte(0xf3);
|
||||||
emit(0xce04e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rcx*8]
|
emit(0xce04e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rcx*8]
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genbr0(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
|
void JitCompilerX86::genbiashift(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
|
||||||
if ((instr.locb & 7) <= 3) {
|
if (instr.locb & 1) {
|
||||||
emit(uint16_t(0x8b49)); //mov
|
emit(uint16_t(0x8b49)); //mov
|
||||||
emitByte(0xc8 + (instr.regb % RegistersCount)); //rcx, regb
|
emitByte(0xc8 + (instr.regb % RegistersCount)); //rcx, regb
|
||||||
emitByte(0x48); //REX.W
|
emitByte(0x48); //REX.W
|
||||||
@ -216,8 +218,8 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genbr1(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
|
void JitCompilerX86::genbia(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
|
||||||
if ((instr.locb & 7) <= 5) {
|
if (instr.locb & 3) {
|
||||||
emit(opcodeReg); // xxx rax, r64
|
emit(opcodeReg); // xxx rax, r64
|
||||||
emitByte(0xc0 + (instr.regb % RegistersCount));
|
emitByte(0xc0 + (instr.regb % RegistersCount));
|
||||||
}
|
}
|
||||||
@ -227,8 +229,8 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genbr132(Instruction& instr, uint16_t opcodeReg, uint8_t opcodeImm) {
|
void JitCompilerX86::genbia32(Instruction& instr, uint16_t opcodeReg, uint8_t opcodeImm) {
|
||||||
if ((instr.locb & 7) <= 5) {
|
if (instr.locb & 3) {
|
||||||
emit(opcodeReg); // xxx eax, r32
|
emit(opcodeReg); // xxx eax, r32
|
||||||
emitByte(0xc0 + (instr.regb % RegistersCount));
|
emitByte(0xc0 + (instr.regb % RegistersCount));
|
||||||
}
|
}
|
||||||
@ -328,25 +330,25 @@ namespace RandomX {
|
|||||||
|
|
||||||
void JitCompilerX86::h_ADD_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_ADD_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr1(instr, 0x0349, 0x0548);
|
genbia(instr, 0x0349, 0x0548);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_ADD_32(Instruction& instr, int i) {
|
void JitCompilerX86::h_ADD_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr132(instr, 0x0341, 0x05);
|
genbia32(instr, 0x0341, 0x05);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_SUB_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_SUB_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr1(instr, 0x2b49, 0x2d48);
|
genbia(instr, 0x2b49, 0x2d48);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_SUB_32(Instruction& instr, int i) {
|
void JitCompilerX86::h_SUB_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr132(instr, 0x2b41, 0x2d);
|
genbia32(instr, 0x2b41, 0x2d);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -435,104 +437,209 @@ namespace RandomX {
|
|||||||
|
|
||||||
void JitCompilerX86::h_DIV_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_DIV_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
if ((instr.locb & 7) <= 5) {
|
if (instr.locb & 3) {
|
||||||
|
#ifdef MAGIC_DIVISION
|
||||||
|
if (instr.imm32 != 0) {
|
||||||
|
uint32_t divisor = instr.imm32;
|
||||||
|
if (divisor & (divisor - 1)) {
|
||||||
|
magicu_info mi = compute_unsigned_magic_info(divisor, sizeof(uint64_t) * 8);
|
||||||
|
if (mi.pre_shift > 0) {
|
||||||
|
if (mi.pre_shift == 1) {
|
||||||
|
emitByte(0x48);
|
||||||
|
emit(uint16_t(0xe8d1)); //shr rax,1
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
emit(0x00e8c148 | (mi.pre_shift << 24)); //shr rax, pre_shift
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (mi.increment) {
|
||||||
|
emit(0x00d8834801c08348); //add rax,1; sbb rax,0
|
||||||
|
}
|
||||||
|
emit(uint16_t(0xb948)); //movabs rcx, multiplier
|
||||||
|
emit(mi.multiplier);
|
||||||
|
emit(0x48e1f748); //mul rcx; REX
|
||||||
|
emit(uint16_t(0xc28b)); //mov rax,rdx
|
||||||
|
if (mi.post_shift > 0)
|
||||||
|
emit(0x00e8c148 | (mi.post_shift << 24)); //shr rax, post_shift
|
||||||
|
}
|
||||||
|
else { //divisor is a power of two
|
||||||
|
int shift = 0;
|
||||||
|
while (divisor >>= 1)
|
||||||
|
++shift;
|
||||||
|
if (shift > 0)
|
||||||
|
emit(0x00e8c148 | (shift << 24)); //shr rax, shift
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
emitByte(0xb9); //mov ecx, imm32
|
||||||
|
emit(instr.imm32 != 0 ? instr.imm32 : 1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else {
|
||||||
emitByte(0xb9); //mov ecx, 1
|
emitByte(0xb9); //mov ecx, 1
|
||||||
emit(1);
|
emit(1);
|
||||||
emit(uint16_t(0x8b41)); //mov edx, r32
|
emit(uint16_t(0x8b41)); //mov edx, r32
|
||||||
emitByte(0xd0 + (instr.regb % RegistersCount));
|
emitByte(0xd0 + (instr.regb % RegistersCount));
|
||||||
emit(0x450fd285); //test edx, edx; cmovne ecx,edx
|
emit(0x450fd285); //test edx, edx; cmovne ecx,edx
|
||||||
emitByte(0xca);
|
emitByte(0xca);
|
||||||
}
|
#ifdef MAGIC_DIVISION
|
||||||
else {
|
|
||||||
emitByte(0xb9); //mov ecx, imm32
|
|
||||||
emit(instr.imm32 != 0 ? instr.imm32 : 1);
|
|
||||||
}
|
|
||||||
emit(0xf748d233); //xor edx,edx; div rcx
|
emit(0xf748d233); //xor edx,edx; div rcx
|
||||||
emitByte(0xf1);
|
emitByte(0xf1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#ifndef MAGIC_DIVISION
|
||||||
|
emit(0xf748d233); //xor edx,edx; div rcx
|
||||||
|
emitByte(0xf1);
|
||||||
|
#endif
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IDIV_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_IDIV_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
if ((instr.locb & 7) <= 5) {
|
if (instr.locb & 3) {
|
||||||
emit(uint16_t(0x8b41)); //mov edx, r32
|
#ifdef MAGIC_DIVISION
|
||||||
emitByte(0xd0 + (instr.regb % RegistersCount));
|
int64_t divisor = instr.imm32;
|
||||||
|
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
|
||||||
|
// +/- power of two
|
||||||
|
bool negative = divisor < 0;
|
||||||
|
if (negative)
|
||||||
|
divisor = -divisor;
|
||||||
|
int shift = 0;
|
||||||
|
uint64_t unsignedDivisor = divisor;
|
||||||
|
while (unsignedDivisor >>= 1)
|
||||||
|
++shift;
|
||||||
|
if (shift > 0) {
|
||||||
|
emitByte(0x48);
|
||||||
|
emit(uint16_t(0xc88b)); //mov rcx, rax
|
||||||
|
emit(0x3ff9c148); //sar rcx, 63
|
||||||
|
uint32_t mask = (1ULL << shift) - 1;
|
||||||
|
emit(uint16_t(0xe181)); //and ecx, mask
|
||||||
|
emit(mask);
|
||||||
|
emitByte(0x48);
|
||||||
|
emit(uint16_t(0xc103)); //add rax, rcx
|
||||||
|
emit(0x00f8c148 | (shift << 24)); //sar rax, shift
|
||||||
|
}
|
||||||
|
if (negative) {
|
||||||
|
emitByte(0x48);
|
||||||
|
emit(uint16_t(0xd8f7)); //neg rax
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (divisor != 0) {
|
||||||
|
magics_info mi = compute_signed_magic_info(divisor);
|
||||||
|
if ((divisor >= 0) != (mi.multiplier >= 0)) {
|
||||||
|
emitByte(0x48);
|
||||||
|
emit(uint16_t(0xc88b)); //mov rcx, rax
|
||||||
|
}
|
||||||
|
emit(uint16_t(0xba48)); //movabs rdx, multiplier
|
||||||
|
emit(mi.multiplier);
|
||||||
|
emit(0xd233c28b48eaf748); //imul rdx; mov rax,rdx; xor edx,edx
|
||||||
|
bool haveSF = false;
|
||||||
|
if (divisor > 0 && mi.multiplier < 0) {
|
||||||
|
emitByte(0x48);
|
||||||
|
emit(uint16_t(0xc103)); //add rax, rcx
|
||||||
|
haveSF = true;
|
||||||
|
}
|
||||||
|
if (divisor < 0 && mi.multiplier > 0) {
|
||||||
|
emitByte(0x48);
|
||||||
|
emit(uint16_t(0xc12b)); //sub rax, rcx
|
||||||
|
haveSF = true;
|
||||||
|
}
|
||||||
|
if (mi.shift > 0) {
|
||||||
|
emit(0x00f8c148 | (mi.shift << 24)); //sar rax, shift
|
||||||
|
haveSF = true;
|
||||||
|
}
|
||||||
|
if (!haveSF) {
|
||||||
|
emitByte(0x48);
|
||||||
|
emit(uint16_t(0x85c0));
|
||||||
|
}
|
||||||
|
emit(0x48c2980f); //sets dl; add rax, rdx
|
||||||
|
emit(uint16_t(0xc203));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
emitByte(0xba); // mov edx, imm32
|
||||||
|
emit(instr.imm32);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
emitByte(0xba); // xxx edx, imm32
|
emit(uint16_t(0x8b41)); //mov edx, r32
|
||||||
emit(instr.imm32);
|
emitByte(0xd0 + (instr.regb % RegistersCount));
|
||||||
|
#ifndef MAGIC_DIVISION
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
emit(0xc88b480b75fffa83);
|
emit(0xc88b480b75fffa83);
|
||||||
emit(0x1274c9ff48c1d148);
|
emit(0x1274c9ff48c1d148);
|
||||||
emit(0x0fd28500000001b9);
|
emit(0x0fd28500000001b9);
|
||||||
emit(0x489948c96348ca45);
|
emit(0x489948c96348ca45);
|
||||||
emit(uint16_t(0xf9f7)); //idiv rcx
|
emit(uint16_t(0xf9f7)); //idiv rcx
|
||||||
|
#ifdef MAGIC_DIVISION
|
||||||
|
}
|
||||||
|
#endif
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_AND_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_AND_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr1(instr, 0x2349, 0x2548);
|
genbia(instr, 0x2349, 0x2548);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_AND_32(Instruction& instr, int i) {
|
void JitCompilerX86::h_AND_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr132(instr, 0x2341, 0x25);
|
genbia32(instr, 0x2341, 0x25);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_OR_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_OR_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr1(instr, 0x0b49, 0x0d48);
|
genbia(instr, 0x0b49, 0x0d48);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_OR_32(Instruction& instr, int i) {
|
void JitCompilerX86::h_OR_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr132(instr, 0x0b41, 0x0d);
|
genbia32(instr, 0x0b41, 0x0d);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_XOR_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_XOR_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr1(instr, 0x3349, 0x3548);
|
genbia(instr, 0x3349, 0x3548);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_XOR_32(Instruction& instr, int i) {
|
void JitCompilerX86::h_XOR_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr132(instr, 0x3341, 0x35);
|
genbia32(instr, 0x3341, 0x35);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_SHL_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_SHL_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr0(instr, 0xe0d3, 0xe0c1);
|
genbiashift(instr, 0xe0d3, 0xe0c1);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_SHR_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_SHR_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr0(instr, 0xe8d3, 0xe8c1);
|
genbiashift(instr, 0xe8d3, 0xe8c1);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_SAR_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_SAR_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr0(instr, 0xf8d3, 0xf8c1);
|
genbiashift(instr, 0xf8d3, 0xf8c1);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_ROL_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_ROL_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr0(instr, 0xc0d3, 0xc0c1);
|
genbiashift(instr, 0xc0d3, 0xc0c1);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_ROR_64(Instruction& instr, int i) {
|
void JitCompilerX86::h_ROR_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
genbr0(instr, 0xc8d3, 0xc8c1);
|
genbiashift(instr, 0xc8d3, 0xc8c1);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,11 +58,12 @@ namespace RandomX {
|
|||||||
std::vector<int32_t> instructionOffsets;
|
std::vector<int32_t> instructionOffsets;
|
||||||
std::vector<CallOffset> callOffsets;
|
std::vector<CallOffset> callOffsets;
|
||||||
|
|
||||||
|
void gena(Instruction&);
|
||||||
void genar(Instruction&);
|
void genar(Instruction&);
|
||||||
void genaf(Instruction&);
|
void genaf(Instruction&);
|
||||||
void genbr0(Instruction&, uint16_t, uint16_t);
|
void genbiashift(Instruction&, uint16_t, uint16_t);
|
||||||
void genbr1(Instruction&, uint16_t, uint16_t);
|
void genbia(Instruction&, uint16_t, uint16_t);
|
||||||
void genbr132(Instruction&, uint16_t, uint8_t);
|
void genbia32(Instruction&, uint16_t, uint8_t);
|
||||||
void genbf(Instruction&, uint8_t);
|
void genbf(Instruction&, uint8_t);
|
||||||
void scratchpadStoreR(Instruction&, uint32_t, bool);
|
void scratchpadStoreR(Instruction&, uint32_t, bool);
|
||||||
void scratchpadStoreF(Instruction&, int, uint32_t, bool);
|
void scratchpadStoreF(Instruction&, int, uint32_t, bool);
|
||||||
|
@ -11,10 +11,10 @@
|
|||||||
|
|
||||||
#include "divideByConstantCodegen.h"
|
#include "divideByConstantCodegen.h"
|
||||||
|
|
||||||
struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
|
struct magicu_info compute_unsigned_magic_info(unsigned_type D, unsigned num_bits) {
|
||||||
|
|
||||||
//The numerator must fit in a uint
|
//The numerator must fit in a unsigned_type
|
||||||
assert(num_bits > 0 && num_bits <= sizeof(uint) * CHAR_BIT);
|
assert(num_bits > 0 && num_bits <= sizeof(unsigned_type) * CHAR_BIT);
|
||||||
|
|
||||||
// D must be larger than zero and not a power of 2
|
// D must be larger than zero and not a power of 2
|
||||||
assert(D & (D - 1));
|
assert(D & (D - 1));
|
||||||
@ -22,29 +22,29 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
|
|||||||
// The eventual result
|
// The eventual result
|
||||||
struct magicu_info result;
|
struct magicu_info result;
|
||||||
|
|
||||||
// Bits in a uint
|
// Bits in a unsigned_type
|
||||||
const unsigned UINT_BITS = sizeof(uint) * CHAR_BIT;
|
const unsigned UINT_BITS = sizeof(unsigned_type) * CHAR_BIT;
|
||||||
|
|
||||||
// The extra shift implicit in the difference between UINT_BITS and num_bits
|
// The extra shift implicit in the difference between UINT_BITS and num_bits
|
||||||
const unsigned extra_shift = UINT_BITS - num_bits;
|
const unsigned extra_shift = UINT_BITS - num_bits;
|
||||||
|
|
||||||
// The initial power of 2 is one less than the first one that can possibly work
|
// The initial power of 2 is one less than the first one that can possibly work
|
||||||
const uint initial_power_of_2 = (uint)1 << (UINT_BITS - 1);
|
const unsigned_type initial_power_of_2 = (unsigned_type)1 << (UINT_BITS - 1);
|
||||||
|
|
||||||
// The remainder and quotient of our power of 2 divided by d
|
// The remainder and quotient of our power of 2 divided by d
|
||||||
uint quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;
|
unsigned_type quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;
|
||||||
|
|
||||||
// ceil(log_2 D)
|
// ceil(log_2 D)
|
||||||
unsigned ceil_log_2_D;
|
unsigned ceil_log_2_D;
|
||||||
|
|
||||||
// The magic info for the variant "round down" algorithm
|
// The magic info for the variant "round down" algorithm
|
||||||
uint down_multiplier = 0;
|
unsigned_type down_multiplier = 0;
|
||||||
unsigned down_exponent = 0;
|
unsigned down_exponent = 0;
|
||||||
int has_magic_down = 0;
|
int has_magic_down = 0;
|
||||||
|
|
||||||
// Compute ceil(log_2 D)
|
// Compute ceil(log_2 D)
|
||||||
ceil_log_2_D = 0;
|
ceil_log_2_D = 0;
|
||||||
uint tmp;
|
unsigned_type tmp;
|
||||||
for (tmp = D; tmp > 0; tmp >>= 1)
|
for (tmp = D; tmp > 0; tmp >>= 1)
|
||||||
ceil_log_2_D += 1;
|
ceil_log_2_D += 1;
|
||||||
|
|
||||||
@ -67,11 +67,11 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
|
|||||||
// We're done if this exponent works for the round_up algorithm.
|
// We're done if this exponent works for the round_up algorithm.
|
||||||
// Note that exponent may be larger than the maximum shift supported,
|
// Note that exponent may be larger than the maximum shift supported,
|
||||||
// so the check for >= ceil_log_2_D is critical.
|
// so the check for >= ceil_log_2_D is critical.
|
||||||
if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((uint)1 << (exponent + extra_shift)))
|
if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((unsigned_type)1 << (exponent + extra_shift)))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Set magic_down if we have not set it yet and this exponent works for the round_down algorithm
|
// Set magic_down if we have not set it yet and this exponent works for the round_down algorithm
|
||||||
if (!has_magic_down && remainder <= ((uint)1 << (exponent + extra_shift))) {
|
if (!has_magic_down && remainder <= ((unsigned_type)1 << (exponent + extra_shift))) {
|
||||||
has_magic_down = 1;
|
has_magic_down = 1;
|
||||||
down_multiplier = quotient;
|
down_multiplier = quotient;
|
||||||
down_exponent = exponent;
|
down_exponent = exponent;
|
||||||
@ -96,7 +96,7 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
|
|||||||
else {
|
else {
|
||||||
// Even divisor, so use a prefix-shifted dividend
|
// Even divisor, so use a prefix-shifted dividend
|
||||||
unsigned pre_shift = 0;
|
unsigned pre_shift = 0;
|
||||||
uint shifted_D = D;
|
unsigned_type shifted_D = D;
|
||||||
while ((shifted_D & 1) == 0) {
|
while ((shifted_D & 1) == 0) {
|
||||||
shifted_D >>= 1;
|
shifted_D >>= 1;
|
||||||
pre_shift += 1;
|
pre_shift += 1;
|
||||||
@ -108,34 +108,34 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct magics_info compute_signed_magic_info(sint D) {
|
struct magics_info compute_signed_magic_info(signed_type D) {
|
||||||
// D must not be zero and must not be a power of 2 (or its negative)
|
// D must not be zero and must not be a power of 2 (or its negative)
|
||||||
assert(D != 0 && (D & -D) != D && (D & -D) != -D);
|
assert(D != 0 && (D & -D) != D && (D & -D) != -D);
|
||||||
|
|
||||||
// Our result
|
// Our result
|
||||||
struct magics_info result;
|
struct magics_info result;
|
||||||
|
|
||||||
// Bits in an sint
|
// Bits in an signed_type
|
||||||
const unsigned SINT_BITS = sizeof(sint) * CHAR_BIT;
|
const unsigned SINT_BITS = sizeof(signed_type) * CHAR_BIT;
|
||||||
|
|
||||||
// Absolute value of D (we know D is not the most negative value since that's a power of 2)
|
// Absolute value of D (we know D is not the most negative value since that's a power of 2)
|
||||||
const uint abs_d = (D < 0 ? -D : D);
|
const unsigned_type abs_d = (D < 0 ? -D : D);
|
||||||
|
|
||||||
// The initial power of 2 is one less than the first one that can possibly work
|
// The initial power of 2 is one less than the first one that can possibly work
|
||||||
// "two31" in Warren
|
// "two31" in Warren
|
||||||
unsigned exponent = SINT_BITS - 1;
|
unsigned exponent = SINT_BITS - 1;
|
||||||
const uint initial_power_of_2 = (uint)1 << exponent;
|
const unsigned_type initial_power_of_2 = (unsigned_type)1 << exponent;
|
||||||
|
|
||||||
// Compute the absolute value of our "test numerator,"
|
// Compute the absolute value of our "test numerator,"
|
||||||
// which is the largest dividend whose remainder with d is d-1.
|
// which is the largest dividend whose remainder with d is d-1.
|
||||||
// This is called anc in Warren.
|
// This is called anc in Warren.
|
||||||
const uint tmp = initial_power_of_2 + (D < 0);
|
const unsigned_type tmp = initial_power_of_2 + (D < 0);
|
||||||
const uint abs_test_numer = tmp - 1 - tmp % abs_d;
|
const unsigned_type abs_test_numer = tmp - 1 - tmp % abs_d;
|
||||||
|
|
||||||
// Initialize our quotients and remainders (q1, r1, q2, r2 in Warren)
|
// Initialize our quotients and remainders (q1, r1, q2, r2 in Warren)
|
||||||
uint quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
|
unsigned_type quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
|
||||||
uint quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
|
unsigned_type quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
|
||||||
uint delta;
|
unsigned_type delta;
|
||||||
|
|
||||||
// Begin our loop
|
// Begin our loop
|
||||||
do {
|
do {
|
||||||
|
@ -24,11 +24,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef uint64_t uint;
|
typedef uint64_t unsigned_type;
|
||||||
typedef int64_t sint;
|
typedef int64_t signed_type;
|
||||||
|
|
||||||
/* Computes "magic info" for performing signed division by a fixed integer D.
|
/* Computes "magic info" for performing signed division by a fixed integer D.
|
||||||
The type 'sint' is assumed to be defined as a signed integer type large enough
|
The type 'signed_type' is assumed to be defined as a signed integer type large enough
|
||||||
to hold both the dividend and the divisor.
|
to hold both the dividend and the divisor.
|
||||||
Here >> is arithmetic (signed) shift, and >>> is logical shift.
|
Here >> is arithmetic (signed) shift, and >>> is logical shift.
|
||||||
|
|
||||||
@ -55,17 +55,17 @@ extern "C" {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
struct magics_info {
|
struct magics_info {
|
||||||
sint multiplier; // the "magic number" multiplier
|
signed_type multiplier; // the "magic number" multiplier
|
||||||
unsigned shift; // shift for the dividend after multiplying
|
unsigned shift; // shift for the dividend after multiplying
|
||||||
};
|
};
|
||||||
struct magics_info compute_signed_magic_info(sint D);
|
struct magics_info compute_signed_magic_info(signed_type D);
|
||||||
|
|
||||||
|
|
||||||
/* Computes "magic info" for performing unsigned division by a fixed positive integer D.
|
/* Computes "magic info" for performing unsigned division by a fixed positive integer D.
|
||||||
The type 'uint' is assumed to be defined as an unsigned integer type large enough
|
The type 'unsigned_type' is assumed to be defined as an unsigned integer type large enough
|
||||||
to hold both the dividend and the divisor. num_bits can be set appropriately if n is
|
to hold both the dividend and the divisor. num_bits can be set appropriately if n is
|
||||||
known to be smaller than the largest uint; if this is not known then pass
|
known to be smaller than the largest unsigned_type; if this is not known then pass
|
||||||
(sizeof(uint) * CHAR_BIT) for num_bits.
|
(sizeof(unsigned_type) * CHAR_BIT) for num_bits.
|
||||||
|
|
||||||
Assume we have a hardware register of width UINT_BITS, a known constant D which is
|
Assume we have a hardware register of width UINT_BITS, a known constant D which is
|
||||||
not zero and not a power of 2, and a variable n of width num_bits (which may be
|
not zero and not a power of 2, and a variable n of width num_bits (which may be
|
||||||
@ -105,12 +105,12 @@ extern "C" {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
struct magicu_info {
|
struct magicu_info {
|
||||||
uint multiplier; // the "magic number" multiplier
|
unsigned_type multiplier; // the "magic number" multiplier
|
||||||
unsigned pre_shift; // shift for the dividend before multiplying
|
unsigned pre_shift; // shift for the dividend before multiplying
|
||||||
unsigned post_shift; //shift for the dividend after multiplying
|
unsigned post_shift; //shift for the dividend after multiplying
|
||||||
int increment; // 0 or 1; if set then increment the numerator, using one of the two strategies
|
int increment; // 0 or 1; if set then increment the numerator, using one of the two strategies
|
||||||
};
|
};
|
||||||
struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits);
|
struct magicu_info compute_unsigned_magic_info(unsigned_type D, unsigned num_bits);
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
}
|
}
|
||||||
|
@ -19,17 +19,17 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#define WT_ADD_64 15
|
#define WT_ADD_64 12
|
||||||
#define WT_ADD_32 2
|
#define WT_ADD_32 2
|
||||||
#define WT_SUB_64 15
|
#define WT_SUB_64 12
|
||||||
#define WT_SUB_32 2
|
#define WT_SUB_32 2
|
||||||
#define WT_MUL_64 23
|
#define WT_MUL_64 23
|
||||||
#define WT_MULH_64 10
|
#define WT_MULH_64 10
|
||||||
#define WT_MUL_32 15
|
#define WT_MUL_32 15
|
||||||
#define WT_IMUL_32 15
|
#define WT_IMUL_32 15
|
||||||
#define WT_IMULH_64 6
|
#define WT_IMULH_64 6
|
||||||
#define WT_DIV_64 1
|
#define WT_DIV_64 4
|
||||||
#define WT_IDIV_64 1
|
#define WT_IDIV_64 4
|
||||||
#define WT_AND_64 4
|
#define WT_AND_64 4
|
||||||
#define WT_AND_32 2
|
#define WT_AND_32 2
|
||||||
#define WT_OR_64 4
|
#define WT_OR_64 4
|
||||||
|
1882
src/program.inc
1882
src/program.inc
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user