mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-05 06:38:53 +00:00
Inlined calls for memory read
This commit is contained in:
parent
6519fed4d1
commit
2f6a599ff6
@ -59,37 +59,55 @@ namespace RandomX {
|
|||||||
(this->*generator)(instr, i);
|
(this->*generator)(instr, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genar(Instruction& instr) {
|
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
|
||||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||||
|
asmCode << "\ttest ebp, 63" << std::endl;
|
||||||
|
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
||||||
switch (instr.loca & 3)
|
switch (instr.loca & 3)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
case 1:
|
||||||
case 2:
|
case 2:
|
||||||
asmCode << "\tcall rx_readint_l1" << std::endl;
|
asmCode << "\tcall rx_read_l1" << std::endl;
|
||||||
return;
|
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||||
|
asmCode << "\txor rdi, rcx" << std::endl;
|
||||||
|
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
||||||
|
break;
|
||||||
default: //3
|
default: //3
|
||||||
asmCode << "\tcall rx_readint_l2" << std::endl;
|
asmCode << "\tcall rx_read_l2" << std::endl;
|
||||||
return;
|
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||||
|
asmCode << "\txor rdi, rcx" << std::endl;
|
||||||
|
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
asmCode << "\tmov rax, qword ptr [rsi+rcx*8]" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genaf(Instruction& instr) {
|
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
|
||||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||||
|
asmCode << "\ttest ebp, 63" << std::endl;
|
||||||
|
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
||||||
switch (instr.loca & 3)
|
switch (instr.loca & 3)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
case 1:
|
||||||
case 2:
|
case 2:
|
||||||
asmCode << "\tcall rx_readfloat_l1" << std::endl;
|
asmCode << "\tcall rx_read_l1" << std::endl;
|
||||||
return;
|
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||||
|
asmCode << "\txor rdi, rcx" << std::endl;
|
||||||
|
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
||||||
|
break;
|
||||||
default: //3
|
default: //3
|
||||||
asmCode << "\tcall rx_readfloat_l2" << std::endl;
|
asmCode << "\tcall rx_read_l2" << std::endl;
|
||||||
return;
|
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||||
|
asmCode << "\txor rdi, rcx" << std::endl;
|
||||||
|
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi+rcx*8]" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) {
|
void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) {
|
||||||
@ -209,35 +227,35 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tadd rax, ";
|
asmCode << "\tadd rax, ";
|
||||||
genbr1(instr);
|
genbr1(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tadd eax, ";
|
asmCode << "\tadd eax, ";
|
||||||
genbr132(instr);
|
genbr132(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tsub rax, ";
|
asmCode << "\tsub rax, ";
|
||||||
genbr1(instr);
|
genbr1(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tsub eax, ";
|
asmCode << "\tsub eax, ";
|
||||||
genbr132(instr);
|
genbr132(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_MUL_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_MUL_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\timul rax, ";
|
asmCode << "\timul rax, ";
|
||||||
if ((instr.locb & 7) >= 6) {
|
if ((instr.locb & 7) >= 6) {
|
||||||
asmCode << "rax, ";
|
asmCode << "rax, ";
|
||||||
@ -247,7 +265,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tmov rcx, ";
|
asmCode << "\tmov rcx, ";
|
||||||
genbr1(instr);
|
genbr1(instr);
|
||||||
asmCode << "\tmul rcx" << std::endl;
|
asmCode << "\tmul rcx" << std::endl;
|
||||||
@ -256,7 +274,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_MUL_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_MUL_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tmov ecx, eax" << std::endl;
|
asmCode << "\tmov ecx, eax" << std::endl;
|
||||||
asmCode << "\tmov eax, ";
|
asmCode << "\tmov eax, ";
|
||||||
genbr132(instr);
|
genbr132(instr);
|
||||||
@ -265,7 +283,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_IMUL_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_IMUL_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tmovsxd rcx, eax" << std::endl;
|
asmCode << "\tmovsxd rcx, eax" << std::endl;
|
||||||
if ((instr.locb & 7) >= 6) {
|
if ((instr.locb & 7) >= 6) {
|
||||||
asmCode << "\tmov rax, " << instr.imm32 << std::endl;
|
asmCode << "\tmov rax, " << instr.imm32 << std::endl;
|
||||||
@ -278,7 +296,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tmov rcx, ";
|
asmCode << "\tmov rcx, ";
|
||||||
genbr1(instr);
|
genbr1(instr);
|
||||||
asmCode << "\timul rcx" << std::endl;
|
asmCode << "\timul rcx" << std::endl;
|
||||||
@ -287,7 +305,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
if ((instr.locb & 7) >= 6) {
|
if ((instr.locb & 7) >= 6) {
|
||||||
if (instr.imm32 == 0) {
|
if (instr.imm32 == 0) {
|
||||||
asmCode << "\tmov ecx, 1" << std::endl;
|
asmCode << "\tmov ecx, 1" << std::endl;
|
||||||
@ -308,7 +326,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tmov edx, ";
|
asmCode << "\tmov edx, ";
|
||||||
genbr132(instr);
|
genbr132(instr);
|
||||||
asmCode << "\tcmp edx, -1" << std::endl;
|
asmCode << "\tcmp edx, -1" << std::endl;
|
||||||
@ -329,91 +347,91 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tand rax, ";
|
asmCode << "\tand rax, ";
|
||||||
genbr1(instr);
|
genbr1(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tand eax, ";
|
asmCode << "\tand eax, ";
|
||||||
genbr132(instr);
|
genbr132(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tor rax, ";
|
asmCode << "\tor rax, ";
|
||||||
genbr1(instr);
|
genbr1(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tor eax, ";
|
asmCode << "\tor eax, ";
|
||||||
genbr132(instr);
|
genbr132(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\txor rax, ";
|
asmCode << "\txor rax, ";
|
||||||
genbr1(instr);
|
genbr1(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\txor eax, ";
|
asmCode << "\txor eax, ";
|
||||||
genbr132(instr);
|
genbr132(instr);
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
genbr0(instr, "shl");
|
genbr0(instr, "shl");
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
genbr0(instr, "shr");
|
genbr0(instr, "shr");
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
genbr0(instr, "sar");
|
genbr0(instr, "sar");
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
genbr0(instr, "rol");
|
genbr0(instr, "rol");
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
genbr0(instr, "ror");
|
genbr0(instr, "ror");
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPADD(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FPADD(Instruction& instr, int i) {
|
||||||
genaf(instr);
|
genaf(instr, i);
|
||||||
genbf(instr, "addpd");
|
genbf(instr, "addpd");
|
||||||
gencf(instr);
|
gencf(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPSUB(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FPSUB(Instruction& instr, int i) {
|
||||||
genaf(instr);
|
genaf(instr, i);
|
||||||
genbf(instr, "subpd");
|
genbf(instr, "subpd");
|
||||||
gencf(instr);
|
gencf(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPMUL(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FPMUL(Instruction& instr, int i) {
|
||||||
genaf(instr);
|
genaf(instr, i);
|
||||||
genbf(instr, "mulpd");
|
genbf(instr, "mulpd");
|
||||||
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
|
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
|
||||||
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
|
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
|
||||||
@ -422,7 +440,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPDIV(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FPDIV(Instruction& instr, int i) {
|
||||||
genaf(instr);
|
genaf(instr, i);
|
||||||
genbf(instr, "divpd");
|
genbf(instr, "divpd");
|
||||||
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
|
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
|
||||||
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
|
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
|
||||||
@ -431,14 +449,14 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPSQRT(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FPSQRT(Instruction& instr, int i) {
|
||||||
genaf(instr);
|
genaf(instr, i);
|
||||||
asmCode << "\tandps xmm0, xmm10" << std::endl;
|
asmCode << "\tandps xmm0, xmm10" << std::endl;
|
||||||
asmCode << "\tsqrtpd xmm0, xmm0" << std::endl;
|
asmCode << "\tsqrtpd xmm0, xmm0" << std::endl;
|
||||||
gencf(instr);
|
gencf(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
//asmCode << "\tmov rcx, rax" << std::endl;
|
//asmCode << "\tmov rcx, rax" << std::endl;
|
||||||
asmCode << "\tshl eax, 13" << std::endl;
|
asmCode << "\tshl eax, 13" << std::endl;
|
||||||
//asmCode << "\tand rcx, -2048" << std::endl;
|
//asmCode << "\tand rcx, -2048" << std::endl;
|
||||||
@ -472,7 +490,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
|
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
|
||||||
asmCode << "\t" << jumpCondition(instr);
|
asmCode << "\t" << jumpCondition(instr);
|
||||||
asmCode << " short taken_call_" << i << std::endl;
|
asmCode << " short taken_call_" << i << std::endl;
|
||||||
@ -487,7 +505,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr, i);
|
||||||
asmCode << "\tcmp rsp, " << regStackBeginAddr << std::endl;
|
asmCode << "\tcmp rsp, " << regStackBeginAddr << std::endl;
|
||||||
asmCode << "\tje short not_taken_ret_" << i << std::endl;
|
asmCode << "\tje short not_taken_ret_" << i << std::endl;
|
||||||
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
|
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
|
||||||
|
@ -38,8 +38,8 @@ namespace RandomX {
|
|||||||
static InstructionGenerator engine[256];
|
static InstructionGenerator engine[256];
|
||||||
std::stringstream asmCode;
|
std::stringstream asmCode;
|
||||||
|
|
||||||
void genar(Instruction&);
|
void genar(Instruction&, int);
|
||||||
void genaf(Instruction&);
|
void genaf(Instruction&, int);
|
||||||
void genbr0(Instruction&, const char*);
|
void genbr0(Instruction&, const char*);
|
||||||
void genbr1(Instruction&);
|
void genbr1(Instruction&);
|
||||||
void genbr132(Instruction&);
|
void genbr132(Instruction&);
|
||||||
|
@ -98,7 +98,7 @@ namespace RandomX {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct MemoryRegisters {
|
struct MemoryRegisters {
|
||||||
addr_t ma, mx;
|
addr_t mx, ma;
|
||||||
dataset_t ds;
|
dataset_t ds;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -82,7 +82,7 @@ executeProgram PROC
|
|||||||
|
|
||||||
; function arguments
|
; function arguments
|
||||||
push rcx ; RegisterFile& registerFile
|
push rcx ; RegisterFile& registerFile
|
||||||
mov edi, dword ptr [rdx] ; "mx"
|
mov rdi, qword ptr [rdx] ; "mx", "ma"
|
||||||
mov rax, qword ptr [rdx+8] ; uint8_t* dataset
|
mov rax, qword ptr [rdx+8] ; uint8_t* dataset
|
||||||
push rax
|
push rax
|
||||||
mov rsi, r8 ; convertible_t* scratchpad
|
mov rsi, r8 ; convertible_t* scratchpad
|
||||||
@ -216,7 +216,7 @@ TransformAddress MACRO reg32, reg64
|
|||||||
;xor reg32, -8 ;# C = all except 0 to 7
|
;xor reg32, -8 ;# C = all except 0 to 7
|
||||||
ENDM
|
ENDM
|
||||||
|
|
||||||
ReadMemoryRandom MACRO spmask, float
|
ReadMemoryRandom MACRO spmask
|
||||||
;# IN ecx = random 32-bit address
|
;# IN ecx = random 32-bit address
|
||||||
;# OUT rax = 64-bit integer return value
|
;# OUT rax = 64-bit integer return value
|
||||||
;# OUT xmm0 = 128-bit floating point return value
|
;# OUT xmm0 = 128-bit floating point return value
|
||||||
@ -225,19 +225,6 @@ ReadMemoryRandom MACRO spmask, float
|
|||||||
;# GLOBAL rsi = address of the scratchpad
|
;# GLOBAL rsi = address of the scratchpad
|
||||||
;# GLOBAL rdi = low 32 bits = "mx", high 32 bits = "ma"
|
;# GLOBAL rdi = low 32 bits = "mx", high 32 bits = "ma"
|
||||||
;# MODIFY rcx, rdx
|
;# MODIFY rcx, rdx
|
||||||
LOCAL L_prefetch_read, L_return
|
|
||||||
test ebp, 63
|
|
||||||
jz short L_prefetch_read ;# "ic" divisible by 64 -> prefetch + read
|
|
||||||
xor rdi, rcx ;# randomize "mx"
|
|
||||||
L_return:
|
|
||||||
and ecx, spmask ;# limit address to the specified scratchpad size
|
|
||||||
IF float
|
|
||||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
|
||||||
ELSE
|
|
||||||
mov rax, qword ptr [rsi+rcx*8]
|
|
||||||
ENDIF
|
|
||||||
ret
|
|
||||||
L_prefetch_read:
|
|
||||||
; prefetch cacheline "mx"
|
; prefetch cacheline "mx"
|
||||||
mov rax, qword ptr [rbx] ;# load the dataset address
|
mov rax, qword ptr [rbx] ;# load the dataset address
|
||||||
and rdi, -64 ;# align "mx" to the start of a cache line
|
and rdi, -64 ;# align "mx" to the start of a cache line
|
||||||
@ -249,34 +236,6 @@ L_prefetch_read:
|
|||||||
push rcx
|
push rcx
|
||||||
TransformAddress ecx, rcx ;# TransformAddress function
|
TransformAddress ecx, rcx ;# TransformAddress function
|
||||||
and ecx, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
|
and ecx, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
|
||||||
call rx_read_dataset
|
|
||||||
pop rcx
|
|
||||||
jmp short L_return
|
|
||||||
ENDM
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
rx_readint_l1:
|
|
||||||
ReadMemoryRandom 2047, 0
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
rx_readint_l2:
|
|
||||||
ReadMemoryRandom 32767, 0
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
rx_readfloat_l1:
|
|
||||||
ReadMemoryRandom 2047, 1
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
rx_readfloat_l2:
|
|
||||||
ReadMemoryRandom 32767, 1
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
rx_read_dataset:
|
|
||||||
;# IN rax = dataset address
|
|
||||||
;# IN ecx = scratchpad index - must be divisible by 8
|
|
||||||
;# IN edx = dataset index - must be divisible by 64
|
|
||||||
;# GLOBAL rsi = address of the scratchpad
|
|
||||||
;# MODIFY rax, rcx, rdx
|
|
||||||
lea rcx, [rsi+rcx*8] ;# scratchpad cache line
|
lea rcx, [rsi+rcx*8] ;# scratchpad cache line
|
||||||
lea rax, [rax+rdx] ;# dataset cache line
|
lea rax, [rax+rdx] ;# dataset cache line
|
||||||
mov rdx, qword ptr [rax+0] ;# load first dataset quadword (prefetched into the cache by now)
|
mov rdx, qword ptr [rax+0] ;# load first dataset quadword (prefetched into the cache by now)
|
||||||
@ -295,7 +254,18 @@ rx_read_dataset:
|
|||||||
xor qword ptr [rcx+48], rdx
|
xor qword ptr [rcx+48], rdx
|
||||||
mov rdx, qword ptr [rax+56]
|
mov rdx, qword ptr [rax+56]
|
||||||
xor qword ptr [rcx+56], rdx
|
xor qword ptr [rcx+56], rdx
|
||||||
|
pop rcx
|
||||||
ret
|
ret
|
||||||
|
ENDM
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
rx_read_l1:
|
||||||
|
ReadMemoryRandom 2047
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
rx_read_l2:
|
||||||
|
ReadMemoryRandom 32767
|
||||||
|
|
||||||
executeProgram ENDP
|
executeProgram ENDP
|
||||||
|
|
||||||
_RANDOMX_EXECUTE_PROGRAM ENDS
|
_RANDOMX_EXECUTE_PROGRAM ENDS
|
||||||
|
4096
src/program.inc
4096
src/program.inc
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user