mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-03 05:38:54 +00:00
ASM code generator for "small" programs that fit into the uOP cache
This commit is contained in:
parent
bd0dba88a8
commit
d2cb086221
@ -30,12 +30,20 @@ namespace RandomX {
|
|||||||
|
|
||||||
static const char* regR[8] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
|
static const char* regR[8] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
|
||||||
static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
||||||
static const char* regF[8] = { "xmm8", "xmm9", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
|
static const char* regFE[8] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||||
|
static const char* regF[4] = { "xmm0", "xmm1", "xmm2", "xmm3" };
|
||||||
|
static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||||
|
static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" };
|
||||||
|
|
||||||
|
static const char* regA4 = "xmm12";
|
||||||
|
static const char* dblMin = "xmm13";
|
||||||
|
static const char* absMask = "xmm14";
|
||||||
|
static const char* signMask = "xmm15";
|
||||||
static const char* regMx = "rbp";
|
static const char* regMx = "rbp";
|
||||||
static const char* regIc = "ebx";
|
static const char* regIc = "rbx";
|
||||||
|
static const char* regIc32 = "ebx";
|
||||||
static const char* regIc8 = "bl";
|
static const char* regIc8 = "bl";
|
||||||
static const char* regStackBeginAddr = "rdi";
|
static const char* regDatasetAddr = "rdi";
|
||||||
static const char* regScratchpadAddr = "rsi";
|
static const char* regScratchpadAddr = "rsi";
|
||||||
|
|
||||||
void AssemblyGeneratorX86::generateProgram(const void* seed) {
|
void AssemblyGeneratorX86::generateProgram(const void* seed) {
|
||||||
@ -49,226 +57,217 @@ namespace RandomX {
|
|||||||
for (unsigned j = 0; j < sizeof(instr) / sizeof(Pcg32::result_type); ++j) {
|
for (unsigned j = 0; j < sizeof(instr) / sizeof(Pcg32::result_type); ++j) {
|
||||||
*(((uint32_t*)&instr) + j) = gen();
|
*(((uint32_t*)&instr) + j) = gen();
|
||||||
}
|
}
|
||||||
|
instr.src %= RegistersCount;
|
||||||
|
instr.dst %= RegistersCount;
|
||||||
generateCode(instr, i);
|
generateCode(instr, i);
|
||||||
asmCode << std::endl;
|
//asmCode << std::endl;
|
||||||
}
|
}
|
||||||
if(ProgramLength > 0)
|
|
||||||
asmCode << "\tjmp rx_i_0" << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
|
||||||
asmCode << "rx_i_" << i << ": ;" << instr.getName() << std::endl;
|
asmCode << "\t; " << instr;
|
||||||
asmCode << "\tdec " << regIc << std::endl;
|
|
||||||
asmCode << "\tjz rx_finish" << std::endl;
|
|
||||||
auto generator = engine[instr.opcode];
|
auto generator = engine[instr.opcode];
|
||||||
(this->*generator)(instr, i);
|
(this->*generator)(instr, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::gena(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
|
||||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
asmCode << "\tmov " << reg << ", " << regR32[instr.src] << std::endl;
|
||||||
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
|
asmCode << "\tand " << reg << ", " << ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
|
||||||
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
|
}
|
||||||
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
|
||||||
asmCode << "\tcall rx_read" << std::endl;
|
int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
|
||||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
return instr.imm32 & ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||||
if ((instr.loca & 192) == 0)
|
}
|
||||||
asmCode << "\txor " << regMx << ", rax" << std::endl;
|
|
||||||
if (instr.loca & 15) {
|
//1 uOP
|
||||||
if (instr.loca & 3) {
|
void AssemblyGeneratorX86::h_IADD_R(Instruction& instr, int i) {
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
if (instr.src != instr.dst) {
|
||||||
}
|
asmCode << "\tadd " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||||
else {
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
asmCode << "\tand eax, " << (ScratchpadL3 - 1) << std::endl;
|
asmCode << "\tadd " << regR[instr.dst] << ", " << instr.imm32 << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
|
//2.75 uOP
|
||||||
gena(instr, i);
|
void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) {
|
||||||
asmCode << "\tmov rax, qword ptr [" << regScratchpadAddr << "+rax*8]" << std::endl;
|
if (instr.src != instr.dst) {
|
||||||
}
|
genAddressReg(instr);
|
||||||
|
asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
|
|
||||||
gena(instr, i);
|
|
||||||
asmCode << "\tcvtdq2pd xmm0, qword ptr [" << regScratchpadAddr << "+rax*8]" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genbiashift(Instruction& instr, const char* instrx86) {
|
|
||||||
if (instr.locb & 1) {
|
|
||||||
asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\t" << instrx86 << " rax, cl" << std::endl;
|
|
||||||
} else {
|
|
||||||
asmCode << "\t" << instrx86 << " rax, " << (instr.imm8 & 63) << std::endl;;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genbia(Instruction& instr) {
|
|
||||||
if (instr.locb & 3) {
|
|
||||||
asmCode << regR[instr.regb % RegistersCount] << std::endl;
|
|
||||||
} else {
|
|
||||||
asmCode << instr.imm32 << std::endl;;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genbia32(Instruction& instr) {
|
|
||||||
if (instr.locb & 3) {
|
|
||||||
asmCode << regR32[instr.regb % RegistersCount] << std::endl;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
asmCode << instr.imm32 << std::endl;;
|
asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genbf(Instruction& instr, const char* instrx86) {
|
//1 uOP
|
||||||
asmCode << "\t" << instrx86 << " xmm0, " << regF[instr.regb % RegistersCount] << std::endl;
|
void AssemblyGeneratorX86::h_IADD_RC(Instruction& instr, int i) {
|
||||||
|
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << std::showpos << instr.imm32 << std::noshowpos << "]" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::gencr(Instruction& instr, bool rax = true) {
|
//1 uOP
|
||||||
if (instr.locc & 16) { //write to register
|
void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) {
|
||||||
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", " << (rax ? "rax" : "rcx") << std::endl;
|
if (instr.src != instr.dst) {
|
||||||
if (trace) {
|
asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << (rax ? "rax" : "rcx") << std::endl;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else { //write to scratchpad
|
else {
|
||||||
if (rax)
|
asmCode << "\tsub " << regR[instr.dst] << ", " << instr.imm32 << std::endl;
|
||||||
asmCode << "\tmov rcx, rax" << std::endl;
|
|
||||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
|
||||||
if (instr.locc & 15) {
|
|
||||||
if (instr.locc & 3) {
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL3 - 1) << std::endl;
|
|
||||||
}
|
|
||||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
|
|
||||||
if (trace) {
|
|
||||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], rcx" << std::endl;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::gencf(Instruction& instr, bool move = true) {
|
//2.75 uOP
|
||||||
if(move)
|
void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) {
|
||||||
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
if (instr.src != instr.dst) {
|
||||||
const char* store = (instr.locc & 128) ? "movhpd" : "movlpd";
|
genAddressReg(instr);
|
||||||
if (instr.locc & 16) { //write to scratchpad
|
asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
|
||||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
|
||||||
if (instr.locc & 15) {
|
|
||||||
if (instr.locc & 3) {
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL3 - 1) << std::endl;
|
|
||||||
}
|
|
||||||
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
|
||||||
}
|
}
|
||||||
if (trace) {
|
else {
|
||||||
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + " << regIc << " * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
|
//1 uOP
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_IMUL_9C(Instruction& instr, int i) {
|
||||||
asmCode << "\tadd rax, ";
|
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.dst] << "*8" << std::showpos << instr.imm32 << std::noshowpos << "]" << std::endl;
|
||||||
genbia(instr);
|
|
||||||
gencr(instr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
|
//1 uOP
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) {
|
||||||
asmCode << "\tadd eax, ";
|
if (instr.src != instr.dst) {
|
||||||
genbia32(instr);
|
asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||||
gencr(instr);
|
}
|
||||||
}
|
else {
|
||||||
|
asmCode << "\timul " << regR[instr.dst] << ", " << instr.imm32 << std::endl;
|
||||||
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
|
|
||||||
genar(instr, i);
|
|
||||||
asmCode << "\tsub rax, ";
|
|
||||||
genbia(instr);
|
|
||||||
gencr(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
|
|
||||||
genar(instr, i);
|
|
||||||
asmCode << "\tsub eax, ";
|
|
||||||
genbia32(instr);
|
|
||||||
gencr(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_MUL_64(Instruction& instr, int i) {
|
|
||||||
genar(instr, i);
|
|
||||||
asmCode << "\timul rax, ";
|
|
||||||
if ((instr.locb & 3) == 0) {
|
|
||||||
asmCode << "rax, ";
|
|
||||||
}
|
}
|
||||||
genbia(instr);
|
|
||||||
gencr(instr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
|
//2.75 uOP
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) {
|
||||||
asmCode << "\tmov rcx, ";
|
if (instr.src != instr.dst) {
|
||||||
genbia(instr);
|
genAddressReg(instr);
|
||||||
asmCode << "\tmul rcx" << std::endl;
|
asmCode << "\timul " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
|
||||||
asmCode << "\tmov rax, rdx" << std::endl;
|
}
|
||||||
gencr(instr);
|
else {
|
||||||
|
asmCode << "\timul " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_MUL_32(Instruction& instr, int i) {
|
//4 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) {
|
||||||
asmCode << "\tmov ecx, eax" << std::endl;
|
if (instr.src != instr.dst) {
|
||||||
asmCode << "\tmov eax, ";
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||||
genbia32(instr);
|
asmCode << "\tmul " << regR[instr.src] << std::endl;
|
||||||
asmCode << "\timul rax, rcx" << std::endl;
|
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||||
gencr(instr);
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\tmov eax, " << instr.imm32 << std::endl;
|
||||||
|
asmCode << "\tmul " << regR[instr.dst] << std::endl;
|
||||||
|
asmCode << "\tadd " << regR[instr.dst] << ", rdx" << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_IMUL_32(Instruction& instr, int i) {
|
//5.75 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) {
|
||||||
asmCode << "\tmovsxd rcx, eax" << std::endl;
|
if (instr.src != instr.dst) {
|
||||||
if ((instr.locb & 3) == 0) {
|
genAddressReg(instr, "ecx");
|
||||||
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||||
|
asmCode << "\tmul qword ptr [rsi+rcx]" << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||||
|
asmCode << "\tmul qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl;
|
||||||
|
}
|
||||||
|
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
//4 uOPs
|
||||||
|
void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) {
|
||||||
|
if (instr.src != instr.dst) {
|
||||||
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||||
|
asmCode << "\timul " << regR[instr.src] << std::endl;
|
||||||
|
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
asmCode << "\tmov rax, " << instr.imm32 << std::endl;
|
asmCode << "\tmov rax, " << instr.imm32 << std::endl;
|
||||||
|
asmCode << "\timul " << regR[instr.dst] << std::endl;
|
||||||
|
asmCode << "\tadd " << regR[instr.dst] << ", rdx" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//5.75 uOPs
|
||||||
|
void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) {
|
||||||
|
if (instr.src != instr.dst) {
|
||||||
|
genAddressReg(instr, "ecx");
|
||||||
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||||
|
asmCode << "\timul qword ptr [rsi+rcx]" << std::endl;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
asmCode << "\tmovsxd rax, " << regR32[instr.regb % RegistersCount] << std::endl;
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||||
|
asmCode << "\timul qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl;
|
||||||
}
|
}
|
||||||
asmCode << "\timul rax, rcx" << std::endl;
|
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||||
gencr(instr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
|
//1 uOP
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) {
|
||||||
asmCode << "\tmov rcx, ";
|
asmCode << "\tneg " << regR[instr.dst] << std::endl;
|
||||||
genbia(instr);
|
|
||||||
asmCode << "\timul rcx" << std::endl;
|
|
||||||
asmCode << "\tmov rax, rdx" << std::endl;
|
|
||||||
gencr(instr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
|
//1 uOP
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) {
|
||||||
if (instr.locb & 3) {
|
if (instr.src != instr.dst) {
|
||||||
#ifdef MAGIC_DIVISION
|
asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||||
if (instr.imm32 != 0) {
|
}
|
||||||
uint32_t divisor = instr.imm32;
|
else {
|
||||||
asmCode << "\t; magic divide by " << divisor << std::endl;
|
asmCode << "\txor " << regR[instr.dst] << ", " << instr.imm32 << std::endl;
|
||||||
if (divisor & (divisor - 1)) {
|
}
|
||||||
magicu_info mi = compute_unsigned_magic_info(divisor, sizeof(uint64_t) * 8);
|
}
|
||||||
|
|
||||||
|
//2.75 uOP
|
||||||
|
void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) {
|
||||||
|
if (instr.src != instr.dst) {
|
||||||
|
genAddressReg(instr);
|
||||||
|
asmCode << "\txor " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\txor " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//1.75 uOPs
|
||||||
|
void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) {
|
||||||
|
if (instr.src != instr.dst) {
|
||||||
|
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
|
||||||
|
asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\tror " << regR[instr.dst] << ", " << (instr.imm32 & 63) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//1.75 uOPs
|
||||||
|
void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) {
|
||||||
|
if (instr.src != instr.dst) {
|
||||||
|
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
|
||||||
|
asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\trol " << regR[instr.dst] << ", " << (instr.imm32 & 63) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//~6 uOPs
|
||||||
|
void AssemblyGeneratorX86::h_IDIV_C(Instruction& instr, int i) {
|
||||||
|
if (instr.imm32 != 0) {
|
||||||
|
uint32_t divisor = instr.imm32;
|
||||||
|
if (divisor & (divisor - 1)) {
|
||||||
|
magicu_info mi = compute_unsigned_magic_info(divisor, sizeof(uint64_t) * 8);
|
||||||
|
if (mi.pre_shift == 0 && !mi.increment) {
|
||||||
|
asmCode << "\tmov rax, " << mi.multiplier << std::endl;
|
||||||
|
asmCode << "\tmul " << regR[instr.dst] << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||||
if (mi.pre_shift > 0)
|
if (mi.pre_shift > 0)
|
||||||
asmCode << "\tshr rax, " << mi.pre_shift << std::endl;
|
asmCode << "\tshr rax, " << mi.pre_shift << std::endl;
|
||||||
if (mi.increment) {
|
if (mi.increment) {
|
||||||
@ -277,326 +276,249 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
asmCode << "\tmov rcx, " << mi.multiplier << std::endl;
|
asmCode << "\tmov rcx, " << mi.multiplier << std::endl;
|
||||||
asmCode << "\tmul rcx" << std::endl;
|
asmCode << "\tmul rcx" << std::endl;
|
||||||
asmCode << "\tmov rax, rdx" << std::endl;
|
|
||||||
if (mi.post_shift > 0)
|
|
||||||
asmCode << "\tshr rax, " << mi.post_shift << std::endl;
|
|
||||||
}
|
|
||||||
else { //divisor is a power of two
|
|
||||||
int shift = 0;
|
|
||||||
while (divisor >>= 1)
|
|
||||||
++shift;
|
|
||||||
if(shift > 0)
|
|
||||||
asmCode << "\tshr rax, " << shift << std::endl;
|
|
||||||
}
|
}
|
||||||
|
if (mi.post_shift > 0)
|
||||||
|
asmCode << "\tshr rdx, " << mi.post_shift << std::endl;
|
||||||
|
asmCode << "\tadd " << regR[instr.dst] << ", rdx" << std::endl;
|
||||||
}
|
}
|
||||||
#else
|
else { //divisor is a power of two
|
||||||
if (instr.imm32 == 0) {
|
|
||||||
asmCode << "\tmov ecx, 1" << std::endl;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
asmCode << "\tmov ecx, " << instr.imm32 << std::endl;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
asmCode << "\tmov ecx, 1" << std::endl;
|
|
||||||
asmCode << "\tmov edx, " << regR32[instr.regb % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\ttest edx, edx" << std::endl;
|
|
||||||
asmCode << "\tcmovne ecx, edx" << std::endl;
|
|
||||||
#ifdef MAGIC_DIVISION
|
|
||||||
asmCode << "\txor edx, edx" << std::endl;
|
|
||||||
asmCode << "\tdiv rcx" << std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#ifndef MAGIC_DIVISION
|
|
||||||
asmCode << "\txor edx, edx" << std::endl;
|
|
||||||
asmCode << "\tdiv rcx" << std::endl;
|
|
||||||
#endif
|
|
||||||
gencr(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
|
|
||||||
genar(instr, i);
|
|
||||||
if (instr.locb & 3) {
|
|
||||||
#ifdef MAGIC_DIVISION
|
|
||||||
int64_t divisor = instr.imm32;
|
|
||||||
asmCode << "\t; magic divide by " << divisor << std::endl;
|
|
||||||
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
|
|
||||||
// +/- power of two
|
|
||||||
bool negative = divisor < 0;
|
|
||||||
if (negative)
|
|
||||||
divisor = -divisor;
|
|
||||||
int shift = 0;
|
int shift = 0;
|
||||||
uint64_t unsignedDivisor = divisor;
|
while (divisor >>= 1)
|
||||||
while (unsignedDivisor >>= 1)
|
|
||||||
++shift;
|
++shift;
|
||||||
if (shift > 0) {
|
if(shift > 0)
|
||||||
asmCode << "\tmov rcx, rax" << std::endl;
|
asmCode << "\tshr " << regR[instr.dst] << ", " << shift << std::endl;
|
||||||
asmCode << "\tsar rcx, 63" << std::endl;
|
|
||||||
uint32_t mask = (1ULL << shift) + 0xFFFFFFFF;
|
|
||||||
asmCode << "\tand ecx, 0" << std::hex << mask << std::dec << "h" << std::endl;
|
|
||||||
asmCode << "\tadd rax, rcx" << std::endl;
|
|
||||||
asmCode << "\tsar rax, " << shift << std::endl;
|
|
||||||
}
|
|
||||||
if (negative)
|
|
||||||
asmCode << "\tneg rax" << std::endl;
|
|
||||||
}
|
}
|
||||||
else if (divisor != 0) {
|
}
|
||||||
magics_info mi = compute_signed_magic_info(divisor);
|
}
|
||||||
if ((divisor >= 0) != (mi.multiplier >= 0))
|
|
||||||
asmCode << "\tmov rcx, rax" << std::endl;
|
//~8.5 uOPs
|
||||||
asmCode << "\tmov rdx, " << mi.multiplier << std::endl;
|
void AssemblyGeneratorX86::h_ISDIV_C(Instruction& instr, int i) {
|
||||||
asmCode << "\timul rdx" << std::endl;
|
int64_t divisor = instr.imm32;
|
||||||
asmCode << "\tmov rax, rdx" << std::endl;
|
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
|
||||||
asmCode << "\txor edx, edx" << std::endl;
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||||
bool haveSF = false;
|
// +/- power of two
|
||||||
if (divisor > 0 && mi.multiplier < 0) {
|
bool negative = divisor < 0;
|
||||||
asmCode << "\tadd rax, rcx" << std::endl;
|
if (negative)
|
||||||
haveSF = true;
|
divisor = -divisor;
|
||||||
}
|
int shift = 0;
|
||||||
if (divisor < 0 && mi.multiplier > 0) {
|
uint64_t unsignedDivisor = divisor;
|
||||||
asmCode << "\tsub rax, rcx" << std::endl;
|
while (unsignedDivisor >>= 1)
|
||||||
haveSF = true;
|
++shift;
|
||||||
}
|
if (shift > 0) {
|
||||||
if (mi.shift > 0) {
|
asmCode << "\tmov rcx, rax" << std::endl;
|
||||||
asmCode << "\tsar rax, " << mi.shift << std::endl;
|
asmCode << "\tsar rcx, 63" << std::endl;
|
||||||
haveSF = true;
|
uint32_t mask = (1ULL << shift) + 0xFFFFFFFF;
|
||||||
}
|
asmCode << "\tand ecx, 0" << std::hex << mask << std::dec << "h" << std::endl;
|
||||||
if (!haveSF)
|
asmCode << "\tadd rax, rcx" << std::endl;
|
||||||
asmCode << "\ttest rax, rax" << std::endl;
|
asmCode << "\tsar rax, " << shift << std::endl;
|
||||||
asmCode << "\tsets dl" << std::endl;
|
|
||||||
asmCode << "\tadd rax, rdx" << std::endl;
|
|
||||||
}
|
}
|
||||||
#else
|
if (negative)
|
||||||
asmCode << "\tmov edx, " << instr.imm32 << std::endl;
|
asmCode << "\tneg rax" << std::endl;
|
||||||
#endif
|
asmCode << "\tadd " << regR[instr.dst] << ", rax" << std::endl;
|
||||||
}
|
}
|
||||||
else {
|
else if (divisor != 0) {
|
||||||
asmCode << "\tmov edx, " << regR32[instr.regb % RegistersCount] << std::endl;
|
magics_info mi = compute_signed_magic_info(divisor);
|
||||||
#ifndef MAGIC_DIVISION
|
asmCode << "\tmov rax, " << mi.multiplier << std::endl;
|
||||||
|
asmCode << "\timul " << regR[instr.dst] << std::endl;
|
||||||
|
//asmCode << "\tmov rax, rdx" << std::endl;
|
||||||
|
asmCode << "\txor eax, eax" << std::endl;
|
||||||
|
bool haveSF = false;
|
||||||
|
if (divisor > 0 && mi.multiplier < 0) {
|
||||||
|
asmCode << "\tadd rdx, " << regR[instr.dst] << std::endl;
|
||||||
|
haveSF = true;
|
||||||
|
}
|
||||||
|
if (divisor < 0 && mi.multiplier > 0) {
|
||||||
|
asmCode << "\tsub rdx, " << regR[instr.dst] << std::endl;
|
||||||
|
haveSF = true;
|
||||||
|
}
|
||||||
|
if (mi.shift > 0) {
|
||||||
|
asmCode << "\tsar rdx, " << mi.shift << std::endl;
|
||||||
|
haveSF = true;
|
||||||
|
}
|
||||||
|
if (!haveSF)
|
||||||
|
asmCode << "\ttest rdx, rdx" << std::endl;
|
||||||
|
asmCode << "\tsets al" << std::endl;
|
||||||
|
asmCode << "\tadd rdx, rax" << std::endl;
|
||||||
|
asmCode << "\tadd " << regR[instr.dst] << ", rdx" << std::endl;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
asmCode << "\tcmp edx, -1" << std::endl;
|
|
||||||
asmCode << "\tjne short body_idiv_" << i << std::endl;
|
|
||||||
asmCode << "\tneg rax" << std::endl;
|
|
||||||
asmCode << "\tjmp short result_idiv_" << i << std::endl;
|
|
||||||
asmCode << "body_idiv_" << i << ":" << std::endl;
|
|
||||||
asmCode << "\tmov ecx, 1" << std::endl;
|
|
||||||
asmCode << "\ttest edx, edx" << std::endl;
|
|
||||||
asmCode << "\tcmovne ecx, edx" << std::endl;
|
|
||||||
asmCode << "\tmovsxd rcx, ecx" << std::endl;
|
|
||||||
asmCode << "\tcqo" << std::endl;
|
|
||||||
asmCode << "\tidiv rcx" << std::endl;
|
|
||||||
asmCode << "result_idiv_" << i << ":" << std::endl;
|
|
||||||
#ifdef MAGIC_DIVISION
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
gencr(instr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
|
//1 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPSWAP_R(Instruction& instr, int i) {
|
||||||
asmCode << "\tand rax, ";
|
asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl;
|
||||||
genbia(instr);
|
|
||||||
gencr(instr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
|
//1 uOP
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPADD_R(Instruction& instr, int i) {
|
||||||
asmCode << "\tand eax, ";
|
instr.dst %= 4;
|
||||||
genbia32(instr);
|
instr.src %= 4;
|
||||||
gencr(instr);
|
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
|
//5 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPADD_M(Instruction& instr, int i) {
|
||||||
asmCode << "\tor rax, ";
|
instr.dst %= 4;
|
||||||
genbia(instr);
|
genAddressReg(instr);
|
||||||
gencr(instr);
|
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
||||||
|
asmCode << "\taddpd " << regF[instr.dst] << ", xmm12" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
|
//1 uOP
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPSUB_R(Instruction& instr, int i) {
|
||||||
asmCode << "\tor eax, ";
|
instr.dst %= 4;
|
||||||
genbia32(instr);
|
instr.src %= 4;
|
||||||
gencr(instr);
|
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
|
//5 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPSUB_M(Instruction& instr, int i) {
|
||||||
asmCode << "\txor rax, ";
|
instr.dst %= 4;
|
||||||
genbia(instr);
|
genAddressReg(instr);
|
||||||
gencr(instr);
|
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
||||||
|
asmCode << "\tsubpd " << regF[instr.dst] << ", xmm12" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
|
//1 uOP
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPNEG_R(Instruction& instr, int i) {
|
||||||
asmCode << "\txor eax, ";
|
instr.dst %= 4;
|
||||||
genbia32(instr);
|
asmCode << "\txorps " << regF[instr.dst] << ", " << signMask << std::endl;
|
||||||
gencr(instr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
|
//1 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPMUL_R(Instruction& instr, int i) {
|
||||||
genbiashift(instr, "shl");
|
instr.dst %= 4;
|
||||||
gencr(instr);
|
instr.src %= 4;
|
||||||
|
asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
|
//6 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPMUL_M(Instruction& instr, int i) {
|
||||||
genbiashift(instr, "shr");
|
instr.dst %= 4;
|
||||||
gencr(instr);
|
genAddressReg(instr);
|
||||||
|
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
||||||
|
asmCode << "\tmulpd " << regE[instr.dst] << ", xmm12" << std::endl;
|
||||||
|
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
|
//2 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPDIV_R(Instruction& instr, int i) {
|
||||||
genbiashift(instr, "sar");
|
instr.dst %= 4;
|
||||||
gencr(instr);
|
instr.src %= 4;
|
||||||
|
asmCode << "\tdivpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||||
|
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
|
//6 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPDIV_M(Instruction& instr, int i) {
|
||||||
genbiashift(instr, "rol");
|
instr.dst %= 4;
|
||||||
gencr(instr);
|
genAddressReg(instr);
|
||||||
|
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
||||||
|
asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl;
|
||||||
|
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
|
//1 uOP
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_FPSQRT_R(Instruction& instr, int i) {
|
||||||
genbiashift(instr, "ror");
|
instr.dst %= 4;
|
||||||
gencr(instr);
|
asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPADD(Instruction& instr, int i) {
|
//6 uOPs
|
||||||
genaf(instr, i);
|
void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
|
||||||
genbf(instr, "addpd");
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||||
gencf(instr);
|
int rotate = (13 - (instr.alt & 63)) & 63;
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPSUB(Instruction& instr, int i) {
|
|
||||||
genaf(instr, i);
|
|
||||||
genbf(instr, "subpd");
|
|
||||||
gencf(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPMUL(Instruction& instr, int i) {
|
|
||||||
genaf(instr, i);
|
|
||||||
genbf(instr, "mulpd");
|
|
||||||
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
|
|
||||||
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
|
|
||||||
asmCode << "\tandps xmm0, xmm1" << std::endl;
|
|
||||||
gencf(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPDIV(Instruction& instr, int i) {
|
|
||||||
genaf(instr, i);
|
|
||||||
genbf(instr, "divpd");
|
|
||||||
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
|
|
||||||
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
|
|
||||||
asmCode << "\tandps xmm0, xmm1" << std::endl;
|
|
||||||
gencf(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPSQRT(Instruction& instr, int i) {
|
|
||||||
genaf(instr, i);
|
|
||||||
asmCode << "\tandps xmm0, xmm10" << std::endl;
|
|
||||||
asmCode << "\tsqrtpd " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
|
||||||
gencf(instr, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
|
||||||
genar(instr, i);
|
|
||||||
asmCode << "\tmov rcx, rax" << std::endl;
|
|
||||||
int rotate = (13 - (instr.imm8 & 63)) & 63;
|
|
||||||
if (rotate != 0)
|
if (rotate != 0)
|
||||||
asmCode << "\trol rax, " << rotate << std::endl;
|
asmCode << "\trol rax, " << rotate << std::endl;
|
||||||
asmCode << "\tand eax, 24576" << std::endl;
|
asmCode << "\tand eax, 24576" << std::endl;
|
||||||
asmCode << "\tor eax, 40896" << std::endl;
|
asmCode << "\tor eax, 40896" << std::endl;
|
||||||
asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl;
|
asmCode << "\tmov dword ptr [rsp-8], eax" << std::endl;
|
||||||
asmCode << "\tldmxcsr dword ptr [rsp - 8]" << std::endl;
|
asmCode << "\tldmxcsr dword ptr [rsp-8]" << std::endl;
|
||||||
gencr(instr, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline const char* jumpCondition(Instruction& instr, bool invert = false) {
|
static inline const char* condition(Instruction& instr, bool invert = false) {
|
||||||
switch ((instr.locb & 7) ^ invert)
|
switch (((instr.alt >> 2) & 7) ^ invert)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
return "jbe";
|
return "be";
|
||||||
case 1:
|
case 1:
|
||||||
return "ja";
|
return "a";
|
||||||
case 2:
|
case 2:
|
||||||
return "js";
|
return "s";
|
||||||
case 3:
|
case 3:
|
||||||
return "jns";
|
return "ns";
|
||||||
case 4:
|
case 4:
|
||||||
return "jo";
|
return "o";
|
||||||
case 5:
|
case 5:
|
||||||
return "jno";
|
return "no";
|
||||||
case 6:
|
case 6:
|
||||||
return "jl";
|
return "l";
|
||||||
case 7:
|
case 7:
|
||||||
return "jge";
|
return "ge";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_JUMP(Instruction& instr, int i) {
|
//4 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) {
|
||||||
gencr(instr);
|
asmCode << "\txor ecx, ecx" << std::endl;
|
||||||
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
|
asmCode << "\tcmp " << regR32[instr.src] << ", " << instr.imm32 << std::endl;
|
||||||
asmCode << "\t" << jumpCondition(instr);
|
asmCode << "\tset" << condition(instr) << " cl" << std::endl;
|
||||||
asmCode << " rx_i_" << wrapInstr(i + (instr.imm8 & 127) + 2) << std::endl;
|
asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) {
|
//6 uOPs
|
||||||
genar(instr, i);
|
void AssemblyGeneratorX86::h_COND_M(Instruction& instr, int i) {
|
||||||
gencr(instr);
|
asmCode << "\txor ecx, ecx" << std::endl;
|
||||||
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
|
genAddressReg(instr);
|
||||||
asmCode << "\t" << jumpCondition(instr, true);
|
asmCode << "\tcmp dword ptr [rsi+rax], " << instr.imm32 << std::endl;
|
||||||
asmCode << " short rx_i_" << wrapInstr(i + 1) << std::endl;
|
asmCode << "\tset" << condition(instr) << " cl" << std::endl;
|
||||||
asmCode << "\tcall rx_i_" << wrapInstr(i + (instr.imm8 & 127) + 2) << std::endl;
|
asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl;
|
||||||
}
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
|
|
||||||
genar(instr, i);
|
|
||||||
gencr(instr);
|
|
||||||
asmCode << "\tcmp rsp, " << regStackBeginAddr << std::endl;
|
|
||||||
asmCode << "\tje short rx_i_" << wrapInstr(i + 1) << std::endl;
|
|
||||||
asmCode << "\tret" << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#include "instructionWeights.hpp"
|
#include "instructionWeights.hpp"
|
||||||
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
|
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
|
||||||
|
|
||||||
InstructionGenerator AssemblyGeneratorX86::engine[256] = {
|
InstructionGenerator AssemblyGeneratorX86::engine[256] = {
|
||||||
INST_HANDLE(ADD_64)
|
//Integer
|
||||||
INST_HANDLE(ADD_32)
|
INST_HANDLE(IADD_R)
|
||||||
INST_HANDLE(SUB_64)
|
INST_HANDLE(IADD_M)
|
||||||
INST_HANDLE(SUB_32)
|
INST_HANDLE(IADD_RC)
|
||||||
INST_HANDLE(MUL_64)
|
INST_HANDLE(ISUB_R)
|
||||||
INST_HANDLE(MULH_64)
|
INST_HANDLE(ISUB_M)
|
||||||
INST_HANDLE(MUL_32)
|
INST_HANDLE(IMUL_9C)
|
||||||
INST_HANDLE(IMUL_32)
|
INST_HANDLE(IMUL_R)
|
||||||
INST_HANDLE(IMULH_64)
|
INST_HANDLE(IMUL_M)
|
||||||
INST_HANDLE(DIV_64)
|
INST_HANDLE(IMULH_R)
|
||||||
INST_HANDLE(IDIV_64)
|
INST_HANDLE(IMULH_M)
|
||||||
INST_HANDLE(AND_64)
|
INST_HANDLE(ISMULH_R)
|
||||||
INST_HANDLE(AND_32)
|
INST_HANDLE(ISMULH_M)
|
||||||
INST_HANDLE(OR_64)
|
INST_HANDLE(IDIV_C)
|
||||||
INST_HANDLE(OR_32)
|
INST_HANDLE(ISDIV_C)
|
||||||
INST_HANDLE(XOR_64)
|
INST_HANDLE(INEG_R)
|
||||||
INST_HANDLE(XOR_32)
|
INST_HANDLE(IXOR_R)
|
||||||
INST_HANDLE(SHL_64)
|
INST_HANDLE(IXOR_M)
|
||||||
INST_HANDLE(SHR_64)
|
INST_HANDLE(IROR_R)
|
||||||
INST_HANDLE(SAR_64)
|
INST_HANDLE(IROL_R)
|
||||||
INST_HANDLE(ROL_64)
|
|
||||||
INST_HANDLE(ROR_64)
|
//Common floating point
|
||||||
INST_HANDLE(FPADD)
|
INST_HANDLE(FPSWAP_R)
|
||||||
INST_HANDLE(FPSUB)
|
|
||||||
INST_HANDLE(FPMUL)
|
//Floating point group F
|
||||||
INST_HANDLE(FPDIV)
|
INST_HANDLE(FPADD_R)
|
||||||
INST_HANDLE(FPSQRT)
|
INST_HANDLE(FPADD_M)
|
||||||
INST_HANDLE(FPROUND)
|
INST_HANDLE(FPSUB_R)
|
||||||
INST_HANDLE(JUMP)
|
INST_HANDLE(FPSUB_M)
|
||||||
INST_HANDLE(CALL)
|
INST_HANDLE(FPNEG_R)
|
||||||
INST_HANDLE(RET)
|
|
||||||
|
//Floating point group E
|
||||||
|
INST_HANDLE(FPMUL_R)
|
||||||
|
INST_HANDLE(FPMUL_M)
|
||||||
|
INST_HANDLE(FPDIV_R)
|
||||||
|
INST_HANDLE(FPDIV_M)
|
||||||
|
INST_HANDLE(FPSQRT_R)
|
||||||
|
|
||||||
|
//Control
|
||||||
|
INST_HANDLE(COND_R)
|
||||||
|
INST_HANDLE(COND_M)
|
||||||
|
INST_HANDLE(CFROUND)
|
||||||
};
|
};
|
||||||
}
|
}
|
@ -47,39 +47,43 @@ namespace RandomX {
|
|||||||
void genbf(Instruction&, const char*);
|
void genbf(Instruction&, const char*);
|
||||||
void gencr(Instruction&, bool);
|
void gencr(Instruction&, bool);
|
||||||
void gencf(Instruction&, bool);
|
void gencf(Instruction&, bool);
|
||||||
|
void genAddressReg(Instruction&, const char*);
|
||||||
|
int32_t genAddressImm(Instruction&);
|
||||||
|
|
||||||
void generateCode(Instruction&, int);
|
void generateCode(Instruction&, int);
|
||||||
|
|
||||||
void h_ADD_64(Instruction&, int);
|
void h_IADD_R(Instruction&, int);
|
||||||
void h_ADD_32(Instruction&, int);
|
void h_IADD_M(Instruction&, int);
|
||||||
void h_SUB_64(Instruction&, int);
|
void h_IADD_RC(Instruction&, int);
|
||||||
void h_SUB_32(Instruction&, int);
|
void h_ISUB_R(Instruction&, int);
|
||||||
void h_MUL_64(Instruction&, int);
|
void h_ISUB_M(Instruction&, int);
|
||||||
void h_MULH_64(Instruction&, int);
|
void h_IMUL_9C(Instruction&, int);
|
||||||
void h_MUL_32(Instruction&, int);
|
void h_IMUL_R(Instruction&, int);
|
||||||
void h_IMUL_32(Instruction&, int);
|
void h_IMUL_M(Instruction&, int);
|
||||||
void h_IMULH_64(Instruction&, int);
|
void h_IMULH_R(Instruction&, int);
|
||||||
void h_DIV_64(Instruction&, int);
|
void h_IMULH_M(Instruction&, int);
|
||||||
void h_IDIV_64(Instruction&, int);
|
void h_ISMULH_R(Instruction&, int);
|
||||||
void h_AND_64(Instruction&, int);
|
void h_ISMULH_M(Instruction&, int);
|
||||||
void h_AND_32(Instruction&, int);
|
void h_IDIV_C(Instruction&, int);
|
||||||
void h_OR_64(Instruction&, int);
|
void h_ISDIV_C(Instruction&, int);
|
||||||
void h_OR_32(Instruction&, int);
|
void h_INEG_R(Instruction&, int);
|
||||||
void h_XOR_64(Instruction&, int);
|
void h_IXOR_R(Instruction&, int);
|
||||||
void h_XOR_32(Instruction&, int);
|
void h_IXOR_M(Instruction&, int);
|
||||||
void h_SHL_64(Instruction&, int);
|
void h_IROR_R(Instruction&, int);
|
||||||
void h_SHR_64(Instruction&, int);
|
void h_IROL_R(Instruction&, int);
|
||||||
void h_SAR_64(Instruction&, int);
|
void h_FPSWAP_R(Instruction&, int);
|
||||||
void h_ROL_64(Instruction&, int);
|
void h_FPADD_R(Instruction&, int);
|
||||||
void h_ROR_64(Instruction&, int);
|
void h_FPADD_M(Instruction&, int);
|
||||||
void h_FPADD(Instruction&, int);
|
void h_FPSUB_R(Instruction&, int);
|
||||||
void h_FPSUB(Instruction&, int);
|
void h_FPSUB_M(Instruction&, int);
|
||||||
void h_FPMUL(Instruction&, int);
|
void h_FPNEG_R(Instruction&, int);
|
||||||
void h_FPDIV(Instruction&, int);
|
void h_FPMUL_R(Instruction&, int);
|
||||||
void h_FPSQRT(Instruction&, int);
|
void h_FPMUL_M(Instruction&, int);
|
||||||
void h_FPROUND(Instruction&, int);
|
void h_FPDIV_R(Instruction&, int);
|
||||||
void h_JUMP(Instruction&, int);
|
void h_FPDIV_M(Instruction&, int);
|
||||||
void h_CALL(Instruction&, int);
|
void h_FPSQRT_R(Instruction&, int);
|
||||||
void h_RET(Instruction&, int);
|
void h_COND_R(Instruction&, int);
|
||||||
|
void h_COND_M(Instruction&, int);
|
||||||
|
void h_CFROUND(Instruction&, int);
|
||||||
};
|
};
|
||||||
}
|
}
|
@ -25,6 +25,12 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
|
constexpr int mantissaSize = 52;
|
||||||
|
constexpr int exponentSize = 11;
|
||||||
|
constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
|
||||||
|
constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
|
||||||
|
constexpr int exponentBias = 1023;
|
||||||
|
|
||||||
CompiledVirtualMachine::CompiledVirtualMachine() {
|
CompiledVirtualMachine::CompiledVirtualMachine() {
|
||||||
totalSize = 0;
|
totalSize = 0;
|
||||||
}
|
}
|
||||||
@ -37,25 +43,42 @@ namespace RandomX {
|
|||||||
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
|
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
|
||||||
|
auto exponent = entropy >> 60; //0..15
|
||||||
|
auto mantissa = entropy & mantissaMask;
|
||||||
|
exponent += exponentBias;
|
||||||
|
exponent &= exponentMask;
|
||||||
|
exponent <<= mantissaSize;
|
||||||
|
return exponent | mantissa;
|
||||||
|
}
|
||||||
|
|
||||||
void CompiledVirtualMachine::initializeProgram(const void* seed) {
|
void CompiledVirtualMachine::initializeProgram(const void* seed) {
|
||||||
Pcg32 gen(seed);
|
Pcg32 gen(seed);
|
||||||
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
|
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
|
||||||
*(((uint32_t*)®) + i) = gen();
|
*(((uint32_t*)®) + i) = gen();
|
||||||
}
|
}
|
||||||
FPINIT();
|
FPINIT();
|
||||||
for (int i = 0; i < RegistersCount; ++i) {
|
/*for (int i = 0; i < RegistersCount / 2; ++i) {
|
||||||
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
|
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
|
||||||
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
|
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
|
||||||
}
|
}
|
||||||
|
for (int i = 0; i < RegistersCount / 2; ++i) {
|
||||||
|
reg.g[i].lo.f64 = std::abs((double)reg.g[i].lo.i64);
|
||||||
|
reg.g[i].hi.f64 = std::abs((double)reg.g[i].hi.i64);
|
||||||
|
}*/
|
||||||
|
for (int i = 0; i < RegistersCount / 2; ++i) {
|
||||||
|
reg.a[i].lo.u64 = getSmallPositiveFloatBits(reg.f[i].lo.u64);
|
||||||
|
reg.a[i].hi.u64 = getSmallPositiveFloatBits(reg.f[i].hi.u64);
|
||||||
|
}
|
||||||
compiler.generateProgram(gen);
|
compiler.generateProgram(gen);
|
||||||
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
|
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
|
||||||
mem.mx = *(((uint32_t*)seed) + 5);
|
mem.mx = *(((uint32_t*)seed) + 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CompiledVirtualMachine::execute() {
|
void CompiledVirtualMachine::execute() {
|
||||||
//executeProgram(reg, mem, scratchpad, readDataset);
|
executeProgram(reg, mem, scratchpad, InstructionCount);
|
||||||
totalSize += compiler.getCodeSize();
|
totalSize += compiler.getCodeSize();
|
||||||
compiler.getProgramFunc()(reg, mem, scratchpad);
|
//compiler.getProgramFunc()(reg, mem, scratchpad);
|
||||||
#ifdef TRACEVM
|
#ifdef TRACEVM
|
||||||
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
|
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
|
||||||
std::cout << std::hex << tracepad[i].u64 << std::endl;
|
std::cout << std::hex << tracepad[i].u64 << std::endl;
|
||||||
|
@ -18,54 +18,391 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "Instruction.hpp"
|
#include "Instruction.hpp"
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
void Instruction::print(std::ostream& os) const {
|
void Instruction::print(std::ostream& os) const {
|
||||||
os << " A: loc = " << std::dec << (loca & 7) << ", reg: " << (rega & 7) << std::endl;
|
os << names[opcode] << " ";
|
||||||
os << " B: loc = " << (locb & 7) << ", reg: " << (regb & 7) << std::endl;
|
auto handler = engine[opcode];
|
||||||
os << " C: loc = " << (locc & 7) << ", reg: " << (regc & 7) << std::endl;
|
(this->*handler)(os);
|
||||||
os << " addra = " << std::hex << addra << std::endl;
|
}
|
||||||
os << " addrc = " << addrc << std::endl;
|
|
||||||
os << " imm8 = " << std::dec << (int)imm8 << std::endl;
|
void Instruction::genAddressReg(std::ostream& os) const {
|
||||||
os << " imm32 = " << imm32 << std::endl;
|
os << ((alt % 4) ? "L1" : "L2") << "[r" << (int)src << "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::genAddressImm(std::ostream& os) const {
|
||||||
|
os << ((alt % 4) ? "L1" : "L2") << "[" << (imm32 & ((alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask)) << "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IADD_R(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", " << imm32 << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IADD_M(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressImm(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IADD_RC(std::ostream& os) const {
|
||||||
|
os << "r" << (int)dst << ", r" << (int)src << ", " << imm32 << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
//1 uOP
|
||||||
|
void Instruction::h_ISUB_R(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", " << imm32 << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_ISUB_M(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressImm(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IMUL_9C(std::ostream& os) const {
|
||||||
|
os << "r" << (int)dst << ", " << imm32 << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IMUL_R(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", " << imm32 << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IMUL_M(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressImm(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IMULH_R(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", " << imm32 << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IMULH_M(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressImm(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_ISMULH_R(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", " << imm32 << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_ISMULH_M(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressImm(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_INEG_R(std::ostream& os) const {
|
||||||
|
os << "r" << (int)dst << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IXOR_R(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", " << imm32 << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IXOR_M(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", ";
|
||||||
|
genAddressImm(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IROR_R(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", " << (imm32 & 63) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IROL_R(std::ostream& os) const {
|
||||||
|
if (src != dst) {
|
||||||
|
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
os << "r" << (int)dst << ", " << (imm32 & 63) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_IDIV_C(std::ostream& os) const {
|
||||||
|
os << "r" << (int)dst << ", " << (uint32_t)imm32 << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_ISDIV_C(std::ostream& os) const {
|
||||||
|
os << "r" << (int)dst << ", " << imm32 << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPSWAP_R(std::ostream& os) const {
|
||||||
|
const char reg = (dst >= 4) ? 'e' : 'f';
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
os << reg << dstIndex << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPADD_R(std::ostream& os) const {
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
auto srcIndex = src % 4;
|
||||||
|
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPADD_M(std::ostream& os) const {
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
os << "f" << dstIndex << ", ";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPSUB_R(std::ostream& os) const {
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
auto srcIndex = src % 4;
|
||||||
|
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPSUB_M(std::ostream& os) const {
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
os << "f" << dstIndex << ", ";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPNEG_R(std::ostream& os) const {
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
os << "f" << dstIndex << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPMUL_R(std::ostream& os) const {
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
auto srcIndex = src % 4;
|
||||||
|
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPMUL_M(std::ostream& os) const {
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
os << "e" << dstIndex << ", ";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPDIV_R(std::ostream& os) const {
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
auto srcIndex = src % 4;
|
||||||
|
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPDIV_M(std::ostream& os) const {
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
os << "e" << dstIndex << ", ";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_FPSQRT_R(std::ostream& os) const {
|
||||||
|
auto dstIndex = dst % 4;
|
||||||
|
os << "e" << dstIndex << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_CFROUND(std::ostream& os) const {
|
||||||
|
os << "r" << (int)dst << ", " << (alt & 63) << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline const char* condition(int index) {
|
||||||
|
switch (index)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
return "be";
|
||||||
|
case 1:
|
||||||
|
return "ab";
|
||||||
|
case 2:
|
||||||
|
return "sg";
|
||||||
|
case 3:
|
||||||
|
return "ns";
|
||||||
|
case 4:
|
||||||
|
return "of";
|
||||||
|
case 5:
|
||||||
|
return "no";
|
||||||
|
case 6:
|
||||||
|
return "lt";
|
||||||
|
case 7:
|
||||||
|
return "ge";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_COND_R(std::ostream& os) const {
|
||||||
|
os << "r" << (int)dst << ", " << condition((alt >> 2) & 7) << "(r" << (int)src << ", " << imm32 << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Instruction::h_COND_M(std::ostream& os) const {
|
||||||
|
os << "r" << (int)dst << ", " << condition((alt >> 2) & 7) << "(";
|
||||||
|
genAddressReg(os);
|
||||||
|
os << ", " << imm32 << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
#include "instructionWeights.hpp"
|
#include "instructionWeights.hpp"
|
||||||
#define INST_NAME(x) REPN(#x, WT(x))
|
#define INST_NAME(x) REPN(#x, WT(x))
|
||||||
|
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
|
||||||
|
|
||||||
const char* Instruction::names[256] = {
|
const char* Instruction::names[256] = {
|
||||||
INST_NAME(ADD_64)
|
//Integer
|
||||||
INST_NAME(ADD_32)
|
INST_NAME(IADD_R)
|
||||||
INST_NAME(SUB_64)
|
INST_NAME(IADD_M)
|
||||||
INST_NAME(SUB_32)
|
INST_NAME(IADD_RC)
|
||||||
INST_NAME(MUL_64)
|
INST_NAME(ISUB_R)
|
||||||
INST_NAME(MULH_64)
|
INST_NAME(ISUB_M)
|
||||||
INST_NAME(MUL_32)
|
INST_NAME(IMUL_9C)
|
||||||
INST_NAME(IMUL_32)
|
INST_NAME(IMUL_R)
|
||||||
INST_NAME(IMULH_64)
|
INST_NAME(IMUL_M)
|
||||||
INST_NAME(DIV_64)
|
INST_NAME(IMULH_R)
|
||||||
INST_NAME(IDIV_64)
|
INST_NAME(IMULH_M)
|
||||||
INST_NAME(AND_64)
|
INST_NAME(ISMULH_R)
|
||||||
INST_NAME(AND_32)
|
INST_NAME(ISMULH_M)
|
||||||
INST_NAME(OR_64)
|
INST_NAME(IDIV_C)
|
||||||
INST_NAME(OR_32)
|
INST_NAME(ISDIV_C)
|
||||||
INST_NAME(XOR_64)
|
INST_NAME(INEG_R)
|
||||||
INST_NAME(XOR_32)
|
INST_NAME(IXOR_R)
|
||||||
INST_NAME(SHL_64)
|
INST_NAME(IXOR_M)
|
||||||
INST_NAME(SHR_64)
|
INST_NAME(IROR_R)
|
||||||
INST_NAME(SAR_64)
|
INST_NAME(IROL_R)
|
||||||
INST_NAME(ROL_64)
|
|
||||||
INST_NAME(ROR_64)
|
//Common floating point
|
||||||
INST_NAME(FPADD)
|
INST_NAME(FPSWAP_R)
|
||||||
INST_NAME(FPSUB)
|
|
||||||
INST_NAME(FPMUL)
|
//Floating point group F
|
||||||
INST_NAME(FPDIV)
|
INST_NAME(FPADD_R)
|
||||||
INST_NAME(FPSQRT)
|
INST_NAME(FPADD_M)
|
||||||
INST_NAME(FPROUND)
|
INST_NAME(FPSUB_R)
|
||||||
INST_NAME(JUMP)
|
INST_NAME(FPSUB_M)
|
||||||
INST_NAME(CALL)
|
INST_NAME(FPNEG_R)
|
||||||
INST_NAME(RET)
|
|
||||||
|
//Floating point group E
|
||||||
|
INST_NAME(FPMUL_R)
|
||||||
|
INST_NAME(FPMUL_M)
|
||||||
|
INST_NAME(FPDIV_R)
|
||||||
|
INST_NAME(FPDIV_M)
|
||||||
|
INST_NAME(FPSQRT_R)
|
||||||
|
|
||||||
|
//Control
|
||||||
|
INST_NAME(COND_R)
|
||||||
|
INST_NAME(COND_M)
|
||||||
|
INST_NAME(CFROUND)
|
||||||
|
};
|
||||||
|
|
||||||
|
InstructionVisualizer Instruction::engine[256] = {
|
||||||
|
//Integer
|
||||||
|
INST_HANDLE(IADD_R)
|
||||||
|
INST_HANDLE(IADD_M)
|
||||||
|
INST_HANDLE(IADD_RC)
|
||||||
|
INST_HANDLE(ISUB_R)
|
||||||
|
INST_HANDLE(ISUB_M)
|
||||||
|
INST_HANDLE(IMUL_9C)
|
||||||
|
INST_HANDLE(IMUL_R)
|
||||||
|
INST_HANDLE(IMUL_M)
|
||||||
|
INST_HANDLE(IMULH_R)
|
||||||
|
INST_HANDLE(IMULH_M)
|
||||||
|
INST_HANDLE(ISMULH_R)
|
||||||
|
INST_HANDLE(ISMULH_M)
|
||||||
|
INST_HANDLE(IDIV_C)
|
||||||
|
INST_HANDLE(ISDIV_C)
|
||||||
|
INST_HANDLE(INEG_R)
|
||||||
|
INST_HANDLE(IXOR_R)
|
||||||
|
INST_HANDLE(IXOR_M)
|
||||||
|
INST_HANDLE(IROR_R)
|
||||||
|
INST_HANDLE(IROL_R)
|
||||||
|
|
||||||
|
//Common floating point
|
||||||
|
INST_HANDLE(FPSWAP_R)
|
||||||
|
|
||||||
|
//Floating point group F
|
||||||
|
INST_HANDLE(FPADD_R)
|
||||||
|
INST_HANDLE(FPADD_M)
|
||||||
|
INST_HANDLE(FPSUB_R)
|
||||||
|
INST_HANDLE(FPSUB_M)
|
||||||
|
INST_HANDLE(FPNEG_R)
|
||||||
|
|
||||||
|
//Floating point group E
|
||||||
|
INST_HANDLE(FPMUL_R)
|
||||||
|
INST_HANDLE(FPMUL_M)
|
||||||
|
INST_HANDLE(FPDIV_R)
|
||||||
|
INST_HANDLE(FPDIV_M)
|
||||||
|
INST_HANDLE(FPSQRT_R)
|
||||||
|
|
||||||
|
//Control
|
||||||
|
INST_HANDLE(COND_R)
|
||||||
|
INST_HANDLE(COND_M)
|
||||||
|
INST_HANDLE(CFROUND)
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
@ -24,21 +24,17 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
|
class Instruction;
|
||||||
|
|
||||||
|
typedef void(Instruction::*InstructionVisualizer)(std::ostream&) const;
|
||||||
|
|
||||||
class Instruction {
|
class Instruction {
|
||||||
public:
|
public:
|
||||||
uint8_t opcode;
|
uint8_t opcode;
|
||||||
uint8_t loca;
|
uint8_t dst;
|
||||||
uint8_t rega;
|
uint8_t src;
|
||||||
uint8_t locb;
|
uint8_t alt;
|
||||||
uint8_t regb;
|
int32_t imm32;
|
||||||
uint8_t locc;
|
|
||||||
uint8_t regc;
|
|
||||||
uint8_t imm8;
|
|
||||||
int32_t addra;
|
|
||||||
union {
|
|
||||||
uint32_t addrc;
|
|
||||||
int32_t imm32;
|
|
||||||
};
|
|
||||||
const char* getName() const {
|
const char* getName() const {
|
||||||
return names[opcode];
|
return names[opcode];
|
||||||
}
|
}
|
||||||
@ -49,8 +45,46 @@ namespace RandomX {
|
|||||||
private:
|
private:
|
||||||
void print(std::ostream&) const;
|
void print(std::ostream&) const;
|
||||||
static const char* names[256];
|
static const char* names[256];
|
||||||
|
static InstructionVisualizer engine[256];
|
||||||
|
|
||||||
|
void genAddressReg(std::ostream& os) const;
|
||||||
|
void genAddressImm(std::ostream& os) const;
|
||||||
|
|
||||||
|
void h_IADD_R(std::ostream&) const;
|
||||||
|
void h_IADD_M(std::ostream&) const;
|
||||||
|
void h_IADD_RC(std::ostream&) const;
|
||||||
|
void h_ISUB_R(std::ostream&) const;
|
||||||
|
void h_ISUB_M(std::ostream&) const;
|
||||||
|
void h_IMUL_9C(std::ostream&) const;
|
||||||
|
void h_IMUL_R(std::ostream&) const;
|
||||||
|
void h_IMUL_M(std::ostream&) const;
|
||||||
|
void h_IMULH_R(std::ostream&) const;
|
||||||
|
void h_IMULH_M(std::ostream&) const;
|
||||||
|
void h_ISMULH_R(std::ostream&) const;
|
||||||
|
void h_ISMULH_M(std::ostream&) const;
|
||||||
|
void h_IDIV_C(std::ostream&) const;
|
||||||
|
void h_ISDIV_C(std::ostream&) const;
|
||||||
|
void h_INEG_R(std::ostream&) const;
|
||||||
|
void h_IXOR_R(std::ostream&) const;
|
||||||
|
void h_IXOR_M(std::ostream&) const;
|
||||||
|
void h_IROR_R(std::ostream&) const;
|
||||||
|
void h_IROL_R(std::ostream&) const;
|
||||||
|
void h_FPSWAP_R(std::ostream&) const;
|
||||||
|
void h_FPADD_R(std::ostream&) const;
|
||||||
|
void h_FPADD_M(std::ostream&) const;
|
||||||
|
void h_FPSUB_R(std::ostream&) const;
|
||||||
|
void h_FPSUB_M(std::ostream&) const;
|
||||||
|
void h_FPNEG_R(std::ostream&) const;
|
||||||
|
void h_FPMUL_R(std::ostream&) const;
|
||||||
|
void h_FPMUL_M(std::ostream&) const;
|
||||||
|
void h_FPDIV_R(std::ostream&) const;
|
||||||
|
void h_FPDIV_M(std::ostream&) const;
|
||||||
|
void h_FPSQRT_R(std::ostream&) const;
|
||||||
|
void h_COND_R(std::ostream&) const;
|
||||||
|
void h_COND_M(std::ostream&) const;
|
||||||
|
void h_CFROUND(std::ostream&) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(sizeof(Instruction) == 16, "Invalid alignment of struct Instruction");
|
static_assert(sizeof(Instruction) == 8, "Invalid alignment of struct Instruction");
|
||||||
|
|
||||||
}
|
}
|
@ -130,333 +130,10 @@ namespace RandomX {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
convertible_t InterpretedVirtualMachine::loada(Instruction& instr) {
|
|
||||||
convertible_t& rega = reg.r[instr.rega % RegistersCount];
|
|
||||||
rega.i64 ^= instr.addra; //sign-extend addra
|
|
||||||
addr_t addr = rega.u32;
|
|
||||||
|
|
||||||
if ((ic % 64) == 0) {
|
|
||||||
addr = currentTransform->apply(addr);
|
|
||||||
#ifdef STATS
|
|
||||||
datasetAccess[mem.ma / (DatasetBlockCount / 256) / CacheLineSize]++;
|
|
||||||
#endif
|
|
||||||
readDataset(addr, mem, reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((instr.loca & 192) == 0) {
|
|
||||||
mem.mx ^= addr;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (instr.loca & 3) {
|
|
||||||
return scratchpad[addr % ScratchpadL1];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return scratchpad[addr % ScratchpadL2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
convertible_t InterpretedVirtualMachine::loadbia(Instruction& instr) {
|
|
||||||
if (instr.locb & 3) {
|
|
||||||
return reg.r[instr.regb % RegistersCount];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
convertible_t temp;
|
|
||||||
temp.i64 = instr.imm32; //sign-extend imm32
|
|
||||||
return temp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
convertible_t InterpretedVirtualMachine::loadbiashift(Instruction& instr) {
|
|
||||||
if (instr.locb & 1) {
|
|
||||||
return reg.r[instr.regb % RegistersCount];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
convertible_t temp;
|
|
||||||
temp.u64 = instr.imm8;
|
|
||||||
return temp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
convertible_t InterpretedVirtualMachine::loadbiadiv(Instruction& instr) {
|
|
||||||
if (instr.locb & 3) {
|
|
||||||
convertible_t temp;
|
|
||||||
temp.u64 = instr.imm32;
|
|
||||||
return temp;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return reg.r[instr.regb % RegistersCount];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
convertible_t& InterpretedVirtualMachine::getcr(Instruction& inst) {
|
|
||||||
addr_t addr;
|
|
||||||
switch (inst.locc & 7)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
|
|
||||||
return scratchpad[addr % ScratchpadL2];
|
|
||||||
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
case 3:
|
|
||||||
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
|
|
||||||
return scratchpad[addr % ScratchpadL1];
|
|
||||||
|
|
||||||
case 4:
|
|
||||||
case 5:
|
|
||||||
case 6:
|
|
||||||
case 7:
|
|
||||||
return reg.r[inst.regc % RegistersCount];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void InterpretedVirtualMachine::writecf(Instruction& inst, fpu_reg_t& regc) {
|
|
||||||
addr_t addr;
|
|
||||||
switch (inst.locc & 7)
|
|
||||||
{
|
|
||||||
case 4:
|
|
||||||
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
|
|
||||||
scratchpad[addr % ScratchpadL2] = (inst.locc & 8) ? regc.hi : regc.lo;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 5:
|
|
||||||
case 6:
|
|
||||||
case 7:
|
|
||||||
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
|
|
||||||
scratchpad[addr % ScratchpadL1] = (inst.locc & 8) ? regc.hi : regc.lo;
|
|
||||||
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define ALU_RETIRE(x) x(a, b, c); \
|
|
||||||
if(trace) std::cout << std::hex << /*a.u64 << " " << b.u64 << " " <<*/ c.u64 << std::endl;
|
|
||||||
|
|
||||||
#define CHECK_NOP_FPDIV(b, c)
|
|
||||||
#ifndef STATS
|
|
||||||
#define CHECK_NOP_FPADD(b, c)
|
|
||||||
#define CHECK_NOP_FPSUB(b, c)
|
|
||||||
#define CHECK_NOP_FPMUL(b, c)
|
|
||||||
#else
|
|
||||||
#define CHECK_NOP_FPADD(b, c) bool loeq = (b.lo.u64 == c.lo.u64); bool hieq = (b.hi.u64 == c.hi.u64); count_FPADD_nop += loeq + hieq; if(loeq && hieq) count_FPADD_nop2++;
|
|
||||||
#define CHECK_NOP_FPSUB(b, c) bool loeq = ((b.lo.u64 & INT64_MAX) == (c.lo.u64 & INT64_MAX)); bool hieq = ((b.hi.u64 & INT64_MAX) == (c.hi.u64 & INT64_MAX)); count_FPSUB_nop += loeq + hieq; if(loeq && hieq) count_FPSUB_nop2++;
|
|
||||||
#define CHECK_NOP_FPMUL(b, c) bool loeq = (b.lo.u64 == c.lo.u64); bool hieq = (b.hi.u64 == c.hi.u64); count_FPMUL_nop += loeq + hieq; if(loeq && hieq) count_FPMUL_nop2++;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define FPU_RETIRE(x) x(a, b, c); \
|
|
||||||
writecf(inst, c); \
|
|
||||||
if(trace) { \
|
|
||||||
std::cout << std::hex << ((inst.locc & 8) ? c.hi.u64 : c.lo.u64) << std::endl; \
|
|
||||||
} \
|
|
||||||
if(fpuCheck) { \
|
|
||||||
if(c.hi.f64 != c.hi.f64 || c.lo.f64 != c.lo.f64) { \
|
|
||||||
std::stringstream ss; \
|
|
||||||
ss << "NaN result of " << #x << "(" << std::hex << a.u64 << ", " << b.hi.u64 << " " << b.lo.u64 << ") = " << c.hi.u64 << " " << c.lo.u64 << std::endl; \
|
|
||||||
throw std::runtime_error(ss.str()); \
|
|
||||||
} else if (std::fpclassify(c.hi.f64) == FP_SUBNORMAL || std::fpclassify(c.lo.f64) == FP_SUBNORMAL) {\
|
|
||||||
std::stringstream ss; \
|
|
||||||
ss << "Denormal result of " << #x << "(" << std::hex << a.u64 << ", " << b.hi.u64 << " " << b.lo.u64 << ") = " << c.hi.u64 << " " << c.lo.u64 << std::endl; \
|
|
||||||
throw std::runtime_error(ss.str()); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef STATS
|
|
||||||
#define INC_COUNT(x) count_##x++;
|
|
||||||
#else
|
|
||||||
#define INC_COUNT(x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define FPU_RETIRE_FPSQRT(x) FPSQRT(a, b, c); \
|
|
||||||
writecf(inst, c); \
|
|
||||||
if(trace) std::cout << std::hex << ((inst.locc & 8) ? c.hi.u64 : c.lo.u64) << std::endl;
|
|
||||||
|
|
||||||
#define FPU_RETIRE_FPROUND(x) FPROUND(a, b, c); \
|
|
||||||
writecflo(inst, c); \
|
|
||||||
if(trace) std::cout << std::hex << c.lo.u64 << std::endl;
|
|
||||||
|
|
||||||
#define ALU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
|
||||||
INC_COUNT(x) \
|
|
||||||
convertible_t a = loada(inst); \
|
|
||||||
convertible_t b = loadbia(inst); \
|
|
||||||
convertible_t& c = getcr(inst); \
|
|
||||||
ALU_RETIRE(x) \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define ALU_INST_SR(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
|
||||||
INC_COUNT(x) \
|
|
||||||
convertible_t a = loada(inst); \
|
|
||||||
convertible_t b = loadbiashift(inst); \
|
|
||||||
convertible_t& c = getcr(inst); \
|
|
||||||
ALU_RETIRE(x) \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define ALU_INST_DIV(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
|
||||||
INC_COUNT(x) \
|
|
||||||
convertible_t a = loada(inst); \
|
|
||||||
convertible_t b = loadbiadiv(inst); \
|
|
||||||
convertible_t& c = getcr(inst); \
|
|
||||||
ALU_RETIRE(x) \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define FPU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
|
||||||
INC_COUNT(x) \
|
|
||||||
convertible_t a = loada(inst); \
|
|
||||||
fpu_reg_t& b = reg.f[inst.regb % RegistersCount]; \
|
|
||||||
fpu_reg_t btemp = b; \
|
|
||||||
fpu_reg_t& c = reg.f[inst.regc % RegistersCount]; \
|
|
||||||
FPU_RETIRE(x) \
|
|
||||||
CHECK_NOP_##x(btemp, c) \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define FPU_INST_NB(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
|
||||||
INC_COUNT(x) \
|
|
||||||
convertible_t a = loada(inst); \
|
|
||||||
fpu_reg_t b; \
|
|
||||||
fpu_reg_t& c = reg.f[inst.regc % RegistersCount]; \
|
|
||||||
FPU_RETIRE_##x(x) \
|
|
||||||
}
|
|
||||||
|
|
||||||
ALU_INST(ADD_64)
|
|
||||||
ALU_INST(ADD_32)
|
|
||||||
ALU_INST(SUB_64)
|
|
||||||
ALU_INST(SUB_32)
|
|
||||||
ALU_INST(MUL_64)
|
|
||||||
ALU_INST(MULH_64)
|
|
||||||
ALU_INST(MUL_32)
|
|
||||||
ALU_INST(IMUL_32)
|
|
||||||
ALU_INST(IMULH_64)
|
|
||||||
ALU_INST_DIV(DIV_64)
|
|
||||||
ALU_INST_DIV(IDIV_64)
|
|
||||||
ALU_INST(AND_64)
|
|
||||||
ALU_INST(AND_32)
|
|
||||||
ALU_INST(OR_64)
|
|
||||||
ALU_INST(OR_32)
|
|
||||||
ALU_INST(XOR_64)
|
|
||||||
ALU_INST(XOR_32)
|
|
||||||
|
|
||||||
ALU_INST_SR(SHL_64)
|
|
||||||
ALU_INST_SR(SHR_64)
|
|
||||||
ALU_INST_SR(SAR_64)
|
|
||||||
ALU_INST_SR(ROL_64)
|
|
||||||
ALU_INST_SR(ROR_64)
|
|
||||||
|
|
||||||
FPU_INST(FPADD)
|
|
||||||
FPU_INST(FPSUB)
|
|
||||||
FPU_INST(FPMUL)
|
|
||||||
FPU_INST(FPDIV)
|
|
||||||
FPU_INST_NB(FPSQRT)
|
|
||||||
|
|
||||||
void InterpretedVirtualMachine::h_FPROUND(Instruction& inst) {
|
|
||||||
convertible_t a = loada(inst);
|
|
||||||
convertible_t& c = getcr(inst);
|
|
||||||
c.u64 = a.u64;
|
|
||||||
if (trace) std::cout << std::hex << a.u64 << std::endl;
|
|
||||||
FPROUND(a, inst.imm8);
|
|
||||||
}
|
|
||||||
|
|
||||||
void InterpretedVirtualMachine::h_JUMP(Instruction& inst) {
|
|
||||||
convertible_t a = loada(inst);
|
|
||||||
convertible_t& c = getcr(inst);
|
|
||||||
c.u64 = a.u64;
|
|
||||||
if (trace) std::cout << std::hex << a.u64 << std::endl;
|
|
||||||
if (JMP_COND(inst.locb, reg.r[inst.regb % RegistersCount], inst.imm32)) {
|
|
||||||
#ifdef STATS
|
|
||||||
count_JUMP_taken++;
|
|
||||||
count_jump_taken[inst.locb & 7]++;
|
|
||||||
#endif
|
|
||||||
pc += (inst.imm8 & 127) + 1;
|
|
||||||
pc = pc % ProgramLength;
|
|
||||||
}
|
|
||||||
#ifdef STATS
|
|
||||||
else {
|
|
||||||
count_JUMP_not_taken++;
|
|
||||||
count_jump_not_taken[inst.locb & 7]++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void InterpretedVirtualMachine::h_CALL(Instruction& inst) {
|
|
||||||
convertible_t a = loada(inst);
|
|
||||||
convertible_t& c = getcr(inst);
|
|
||||||
c.u64 = a.u64;
|
|
||||||
if (trace) std::cout << std::hex << a.u64 << std::endl;
|
|
||||||
if (JMP_COND(inst.locb, reg.r[inst.regb % RegistersCount], inst.imm32)) {
|
|
||||||
#ifdef STATS
|
|
||||||
count_CALL_taken++;
|
|
||||||
count_jump_taken[inst.locb & 7]++;
|
|
||||||
count_retdepth = std::max(0, count_retdepth - 1);
|
|
||||||
#endif
|
|
||||||
stackPush(pc);
|
|
||||||
#ifdef STATS
|
|
||||||
count_max_stack = std::max(count_max_stack, (int)stack.size());
|
|
||||||
#endif
|
|
||||||
pc += (inst.imm8 & 127) + 1;
|
|
||||||
pc = pc % ProgramLength;
|
|
||||||
}
|
|
||||||
#ifdef STATS
|
|
||||||
else {
|
|
||||||
count_CALL_not_taken++;
|
|
||||||
count_jump_not_taken[inst.locb & 7]++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void InterpretedVirtualMachine::h_RET(Instruction& inst) {
|
|
||||||
convertible_t a = loada(inst);
|
|
||||||
convertible_t& c = getcr(inst);
|
|
||||||
c.u64 = a.u64;
|
|
||||||
if (trace) std::cout << std::hex << a.u64 << std::endl;
|
|
||||||
if (stack.size() > 0) {
|
|
||||||
#ifdef STATS
|
|
||||||
count_RET_taken++;
|
|
||||||
count_retdepth++;
|
|
||||||
count_retdepth_max = std::max(count_retdepth_max, count_retdepth);
|
|
||||||
#endif
|
|
||||||
auto raddr = stackPopAddress();
|
|
||||||
pc = raddr;
|
|
||||||
}
|
|
||||||
#ifdef STATS
|
|
||||||
else {
|
|
||||||
count_RET_stack_empty++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "instructionWeights.hpp"
|
#include "instructionWeights.hpp"
|
||||||
#define INST_HANDLE(x) REPN(&InterpretedVirtualMachine::h_##x, WT(x))
|
#define INST_HANDLE(x) REPN(&InterpretedVirtualMachine::h_##x, WT(x))
|
||||||
|
|
||||||
InstructionHandler InterpretedVirtualMachine::engine[256] = {
|
InstructionHandler InterpretedVirtualMachine::engine[256] = {
|
||||||
INST_HANDLE(ADD_64)
|
|
||||||
INST_HANDLE(ADD_32)
|
|
||||||
INST_HANDLE(SUB_64)
|
|
||||||
INST_HANDLE(SUB_32)
|
|
||||||
INST_HANDLE(MUL_64)
|
|
||||||
INST_HANDLE(MULH_64)
|
|
||||||
INST_HANDLE(MUL_32)
|
|
||||||
INST_HANDLE(IMUL_32)
|
|
||||||
INST_HANDLE(IMULH_64)
|
|
||||||
INST_HANDLE(DIV_64)
|
|
||||||
INST_HANDLE(IDIV_64)
|
|
||||||
INST_HANDLE(AND_64)
|
|
||||||
INST_HANDLE(AND_32)
|
|
||||||
INST_HANDLE(OR_64)
|
|
||||||
INST_HANDLE(OR_32)
|
|
||||||
INST_HANDLE(XOR_64)
|
|
||||||
INST_HANDLE(XOR_32)
|
|
||||||
INST_HANDLE(SHL_64)
|
|
||||||
INST_HANDLE(SHR_64)
|
|
||||||
INST_HANDLE(SAR_64)
|
|
||||||
INST_HANDLE(ROL_64)
|
|
||||||
INST_HANDLE(ROR_64)
|
|
||||||
INST_HANDLE(FPADD)
|
|
||||||
INST_HANDLE(FPSUB)
|
|
||||||
INST_HANDLE(FPMUL)
|
|
||||||
INST_HANDLE(FPDIV)
|
|
||||||
INST_HANDLE(FPSQRT)
|
|
||||||
INST_HANDLE(FPROUND)
|
|
||||||
INST_HANDLE(JUMP)
|
|
||||||
INST_HANDLE(CALL)
|
|
||||||
INST_HANDLE(RET)
|
|
||||||
};
|
};
|
||||||
}
|
}
|
@ -38,9 +38,9 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
#if !defined(_M_X64) && !defined(__x86_64__)
|
#if true || !defined(_M_X64) && !defined(__x86_64__)
|
||||||
JitCompilerX86::JitCompilerX86() {
|
JitCompilerX86::JitCompilerX86() {
|
||||||
throw std::runtime_error("JIT compiler only supports x86-64 CPUs");
|
//throw std::runtime_error("JIT compiler only supports x86-64 CPUs");
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::generateProgram(Pcg32& gen) {
|
void JitCompilerX86::generateProgram(Pcg32& gen) {
|
||||||
|
@ -30,7 +30,6 @@ namespace RandomX {
|
|||||||
void Program::print(std::ostream& os) const {
|
void Program::print(std::ostream& os) const {
|
||||||
for (int i = 0; i < RandomX::ProgramLength; ++i) {
|
for (int i = 0; i < RandomX::ProgramLength; ++i) {
|
||||||
auto instr = programBuffer[i];
|
auto instr = programBuffer[i];
|
||||||
os << std::dec << instr.getName() << " (" << i << "):" << std::endl;
|
|
||||||
os << instr;
|
os << instr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@ namespace RandomX {
|
|||||||
constexpr int CacheBlockCount = CacheSize / CacheLineSize;
|
constexpr int CacheBlockCount = CacheSize / CacheLineSize;
|
||||||
constexpr int BlockExpansionRatio = DatasetSize / CacheSize;
|
constexpr int BlockExpansionRatio = DatasetSize / CacheSize;
|
||||||
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
|
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
|
||||||
constexpr int DatasetIterations = 10;
|
constexpr int DatasetIterations = 3;
|
||||||
|
|
||||||
|
|
||||||
#ifdef TRACE
|
#ifdef TRACE
|
||||||
@ -72,13 +72,15 @@ namespace RandomX {
|
|||||||
convertible_t hi;
|
convertible_t hi;
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr int ProgramLength = 512;
|
constexpr int ProgramLength = 256;
|
||||||
constexpr uint32_t InstructionCount = 1024 * 1024;
|
constexpr uint32_t InstructionCount = 1024;
|
||||||
constexpr uint32_t ScratchpadSize = 1024 * 1024;
|
constexpr uint32_t ScratchpadSize = 1024 * 1024;
|
||||||
constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t);
|
||||||
constexpr uint32_t ScratchpadL1 = ScratchpadSize / 64 / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadL1 = ScratchpadSize / 64 / sizeof(convertible_t);
|
||||||
constexpr uint32_t ScratchpadL2 = ScratchpadSize / 4 / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadL2 = ScratchpadSize / 4 / sizeof(convertible_t);
|
||||||
constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(convertible_t);
|
||||||
|
constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
|
||||||
|
constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
|
||||||
constexpr uint32_t TransformationCount = 90;
|
constexpr uint32_t TransformationCount = 90;
|
||||||
constexpr int RegistersCount = 8;
|
constexpr int RegistersCount = 8;
|
||||||
|
|
||||||
@ -118,17 +120,19 @@ namespace RandomX {
|
|||||||
|
|
||||||
struct RegisterFile {
|
struct RegisterFile {
|
||||||
convertible_t r[RegistersCount];
|
convertible_t r[RegistersCount];
|
||||||
fpu_reg_t f[RegistersCount];
|
fpu_reg_t f[RegistersCount / 2];
|
||||||
|
fpu_reg_t g[RegistersCount / 2];
|
||||||
|
fpu_reg_t a[RegistersCount / 2];
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(sizeof(RegisterFile) == 3 * RegistersCount * sizeof(convertible_t), "Invalid alignment of struct RandomX::RegisterFile");
|
static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct RandomX::RegisterFile");
|
||||||
|
|
||||||
typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, RegisterFile&);
|
typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, RegisterFile&);
|
||||||
|
|
||||||
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*);
|
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*);
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void executeProgram(RegisterFile&, MemoryRegisters&, convertible_t*, DatasetReadFunc);
|
void executeProgram(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,16 +21,24 @@ _RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
|
|||||||
|
|
||||||
PUBLIC executeProgram
|
PUBLIC executeProgram
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
minDbl:
|
||||||
|
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
|
||||||
|
absMask:
|
||||||
|
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
||||||
|
signMask:
|
||||||
|
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
|
||||||
|
|
||||||
executeProgram PROC
|
executeProgram PROC
|
||||||
; REGISTER ALLOCATION:
|
; REGISTER ALLOCATION:
|
||||||
; rax -> temporary
|
; rax -> temporary
|
||||||
; rbx -> "ic"
|
; rbx -> "ic"
|
||||||
; rcx -> temporary
|
; rcx -> temporary
|
||||||
; rdx -> temporary
|
; rdx -> temporary
|
||||||
; rsi -> convertible_t& scratchpad
|
; rsi -> scratchpad pointer
|
||||||
; rdi -> beginning of VM stack
|
; rdi -> dataset pointer
|
||||||
; rbp -> "ma", "mx"
|
; rbp -> "ma", "mx"
|
||||||
; rsp -> end of VM stack
|
; rsp -> stack pointer
|
||||||
; r8 -> "r0"
|
; r8 -> "r0"
|
||||||
; r9 -> "r1"
|
; r9 -> "r1"
|
||||||
; r10 -> "r2"
|
; r10 -> "r2"
|
||||||
@ -39,32 +47,22 @@ executeProgram PROC
|
|||||||
; r13 -> "r5"
|
; r13 -> "r5"
|
||||||
; r14 -> "r6"
|
; r14 -> "r6"
|
||||||
; r15 -> "r7"
|
; r15 -> "r7"
|
||||||
; xmm0 -> temporary
|
; xmm0 -> "f0"
|
||||||
; xmm1 -> temporary
|
; xmm1 -> "f1"
|
||||||
; xmm2 -> "f2"
|
; xmm2 -> "f2"
|
||||||
; xmm3 -> "f3"
|
; xmm3 -> "f3"
|
||||||
; xmm4 -> "f4"
|
; xmm4 -> "e0"
|
||||||
; xmm5 -> "f5"
|
; xmm5 -> "e1"
|
||||||
; xmm6 -> "f6"
|
; xmm6 -> "e2"
|
||||||
; xmm7 -> "f7"
|
; xmm7 -> "e3"
|
||||||
; xmm8 -> "f0"
|
; xmm8 -> "a0"
|
||||||
; xmm9 -> "f1"
|
; xmm9 -> "a1"
|
||||||
; xmm10 -> absolute value mask
|
; xmm10 -> "a2"
|
||||||
|
; xmm11 -> "a3"
|
||||||
; STACK STRUCTURE:
|
; xmm12 -> temporary
|
||||||
; |
|
; xmm13 -> DBL_MIN
|
||||||
; |
|
; xmm14 -> absolute value mask
|
||||||
; | saved registers
|
; xmm15 -> sign mask
|
||||||
; |
|
|
||||||
; v
|
|
||||||
; [rbx+8] RegisterFile& registerFile
|
|
||||||
; [rbx+0] uint8_t* dataset
|
|
||||||
; |
|
|
||||||
; |
|
|
||||||
; | VM stack
|
|
||||||
; |
|
|
||||||
; v
|
|
||||||
; [rsp] last element of VM stack
|
|
||||||
|
|
||||||
; store callee-saved registers
|
; store callee-saved registers
|
||||||
push rbx
|
push rbx
|
||||||
@ -81,100 +79,117 @@ executeProgram PROC
|
|||||||
movdqu xmmword ptr [rsp+32], xmm8
|
movdqu xmmword ptr [rsp+32], xmm8
|
||||||
movdqu xmmword ptr [rsp+16], xmm9
|
movdqu xmmword ptr [rsp+16], xmm9
|
||||||
movdqu xmmword ptr [rsp+0], xmm10
|
movdqu xmmword ptr [rsp+0], xmm10
|
||||||
|
sub rsp, 80
|
||||||
|
movdqu xmmword ptr [rsp+64], xmm11
|
||||||
|
movdqu xmmword ptr [rsp+48], xmm12
|
||||||
|
movdqu xmmword ptr [rsp+32], xmm13
|
||||||
|
movdqu xmmword ptr [rsp+16], xmm14
|
||||||
|
movdqu xmmword ptr [rsp+0], xmm15
|
||||||
|
|
||||||
; function arguments
|
; function arguments
|
||||||
push rcx ; RegisterFile& registerFile
|
push rcx ; RegisterFile& registerFile
|
||||||
mov rbp, qword ptr [rdx] ; "mx", "ma"
|
mov rbp, qword ptr [rdx] ; "mx", "ma"
|
||||||
mov rax, qword ptr [rdx+8] ; uint8_t* dataset
|
mov eax, ebp ; "mx"
|
||||||
push rax
|
mov rdi, qword ptr [rdx+8] ; uint8_t* dataset
|
||||||
mov rsi, r8 ; convertible_t* scratchpad
|
mov rsi, r8 ; convertible_t* scratchpad
|
||||||
|
mov rbx, r9 ; loop counter
|
||||||
|
|
||||||
|
;# zero integer registers
|
||||||
|
xor r8, r8
|
||||||
|
xor r9, r9
|
||||||
|
xor r10, r10
|
||||||
|
xor r11, r11
|
||||||
|
xor r12, r12
|
||||||
|
xor r13, r13
|
||||||
|
xor r14, r14
|
||||||
|
xor r15, r15
|
||||||
|
|
||||||
|
;# load constant registers
|
||||||
|
lea rcx, [rcx+120]
|
||||||
|
movapd xmm8, xmmword ptr [rcx+72]
|
||||||
|
movapd xmm9, xmmword ptr [rcx+88]
|
||||||
|
movapd xmm10, xmmword ptr [rcx+104]
|
||||||
|
movapd xmm11, xmmword ptr [rcx+120]
|
||||||
|
movapd xmm13, xmmword ptr [minDbl]
|
||||||
|
movapd xmm14, xmmword ptr [absMask]
|
||||||
|
movapd xmm15, xmmword ptr [signMask]
|
||||||
|
|
||||||
mov rdi, rsp ; beginning of VM stack
|
|
||||||
mov ebx, 1048577 ; number of VM instructions to execute + 1
|
|
||||||
|
|
||||||
xorps xmm10, xmm10
|
|
||||||
cmpeqpd xmm10, xmm10
|
|
||||||
psrlq xmm10, 1 ; mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
|
|
||||||
|
|
||||||
; reset rounding mode
|
|
||||||
mov dword ptr [rsp-8], 40896
|
|
||||||
ldmxcsr dword ptr [rsp-8]
|
|
||||||
|
|
||||||
; load integer registers
|
|
||||||
mov r8, qword ptr [rcx+0]
|
|
||||||
mov r9, qword ptr [rcx+8]
|
|
||||||
mov r10, qword ptr [rcx+16]
|
|
||||||
mov r11, qword ptr [rcx+24]
|
|
||||||
mov r12, qword ptr [rcx+32]
|
|
||||||
mov r13, qword ptr [rcx+40]
|
|
||||||
mov r14, qword ptr [rcx+48]
|
|
||||||
mov r15, qword ptr [rcx+56]
|
|
||||||
|
|
||||||
; load register f0 hi, lo
|
|
||||||
xorps xmm8, xmm8
|
|
||||||
cvtsi2sd xmm8, qword ptr [rcx+72]
|
|
||||||
pslldq xmm8, 8
|
|
||||||
cvtsi2sd xmm8, qword ptr [rcx+64]
|
|
||||||
|
|
||||||
; load register f1 hi, lo
|
|
||||||
xorps xmm9, xmm9
|
|
||||||
cvtsi2sd xmm9, qword ptr [rcx+88]
|
|
||||||
pslldq xmm9, 8
|
|
||||||
cvtsi2sd xmm9, qword ptr [rcx+80]
|
|
||||||
|
|
||||||
; load register f2 hi, lo
|
|
||||||
xorps xmm2, xmm2
|
|
||||||
cvtsi2sd xmm2, qword ptr [rcx+104]
|
|
||||||
pslldq xmm2, 8
|
|
||||||
cvtsi2sd xmm2, qword ptr [rcx+96]
|
|
||||||
|
|
||||||
; load register f3 hi, lo
|
|
||||||
xorps xmm3, xmm3
|
|
||||||
cvtsi2sd xmm3, qword ptr [rcx+120]
|
|
||||||
pslldq xmm3, 8
|
|
||||||
cvtsi2sd xmm3, qword ptr [rcx+112]
|
|
||||||
|
|
||||||
lea rcx, [rcx+64]
|
|
||||||
|
|
||||||
; load register f4 hi, lo
|
|
||||||
xorps xmm4, xmm4
|
|
||||||
cvtsi2sd xmm4, qword ptr [rcx+72]
|
|
||||||
pslldq xmm4, 8
|
|
||||||
cvtsi2sd xmm4, qword ptr [rcx+64]
|
|
||||||
|
|
||||||
; load register f5 hi, lo
|
|
||||||
xorps xmm5, xmm5
|
|
||||||
cvtsi2sd xmm5, qword ptr [rcx+88]
|
|
||||||
pslldq xmm5, 8
|
|
||||||
cvtsi2sd xmm5, qword ptr [rcx+80]
|
|
||||||
|
|
||||||
; load register f6 hi, lo
|
|
||||||
xorps xmm6, xmm6
|
|
||||||
cvtsi2sd xmm6, qword ptr [rcx+104]
|
|
||||||
pslldq xmm6, 8
|
|
||||||
cvtsi2sd xmm6, qword ptr [rcx+96]
|
|
||||||
|
|
||||||
; load register f7 hi, lo
|
|
||||||
xorps xmm7, xmm7
|
|
||||||
cvtsi2sd xmm7, qword ptr [rcx+120]
|
|
||||||
pslldq xmm7, 8
|
|
||||||
cvtsi2sd xmm7, qword ptr [rcx+112]
|
|
||||||
|
|
||||||
jmp program_begin
|
|
||||||
|
|
||||||
; program body
|
|
||||||
ALIGN 64
|
|
||||||
program_begin:
|
program_begin:
|
||||||
|
xor eax, r8d ;# read address register 1
|
||||||
|
and eax, 262080
|
||||||
|
lea rcx, [rsi+rax]
|
||||||
|
xor r8, qword ptr [rcx+0]
|
||||||
|
xor r9, qword ptr [rcx+8]
|
||||||
|
xor r10, qword ptr [rcx+16]
|
||||||
|
xor r11, qword ptr [rcx+24]
|
||||||
|
xor r12, qword ptr [rcx+32]
|
||||||
|
xor r13, qword ptr [rcx+40]
|
||||||
|
xor r14, qword ptr [rcx+48]
|
||||||
|
xor r15, qword ptr [rcx+56]
|
||||||
|
xor eax, r9d ;# read address register 2
|
||||||
|
and eax, 262080
|
||||||
|
lea rcx, [rsi+rax]
|
||||||
|
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||||
|
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||||
|
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||||
|
cvtdq2pd xmm3, qword ptr [rcx+24]
|
||||||
|
cvtdq2pd xmm4, qword ptr [rcx+32]
|
||||||
|
cvtdq2pd xmm5, qword ptr [rcx+40]
|
||||||
|
cvtdq2pd xmm6, qword ptr [rcx+48]
|
||||||
|
cvtdq2pd xmm7, qword ptr [rcx+56]
|
||||||
|
andps xmm4, xmm14
|
||||||
|
andps xmm5, xmm14
|
||||||
|
andps xmm6, xmm14
|
||||||
|
andps xmm7, xmm14
|
||||||
|
|
||||||
|
;# 256 instructions
|
||||||
include program.inc
|
include program.inc
|
||||||
|
|
||||||
ALIGN 64
|
mov eax, r8d ;# read address register 1
|
||||||
|
xor eax, r9d ;# read address register 2
|
||||||
|
xor rbp, rax ;# modify "mx"
|
||||||
|
and rbp, -64 ;# align "mx" to the start of a cache line
|
||||||
|
mov edx, ebp ;# edx = mx
|
||||||
|
prefetchnta byte ptr [rdi+rdx]
|
||||||
|
ror rbp, 32 ;# swap "ma" and "mx"
|
||||||
|
mov edx, ebp ;# edx = ma
|
||||||
|
lea rcx, [rdi+rdx] ;# dataset cache line
|
||||||
|
xor r8, qword ptr [rcx+0]
|
||||||
|
xor r9, qword ptr [rcx+8]
|
||||||
|
xor r10, qword ptr [rcx+16]
|
||||||
|
xor r11, qword ptr [rcx+24]
|
||||||
|
xor r12, qword ptr [rcx+32]
|
||||||
|
xor r13, qword ptr [rcx+40]
|
||||||
|
xor r14, qword ptr [rcx+48]
|
||||||
|
xor r15, qword ptr [rcx+56]
|
||||||
|
mov eax, r12d ;# write address register 1
|
||||||
|
and eax, 262080
|
||||||
|
lea rcx, [rsi+rax]
|
||||||
|
mov qword ptr [rcx+0], r8
|
||||||
|
mov qword ptr [rcx+8], r9
|
||||||
|
mov qword ptr [rcx+16], r10
|
||||||
|
mov qword ptr [rcx+24], r11
|
||||||
|
mov qword ptr [rcx+32], r12
|
||||||
|
mov qword ptr [rcx+40], r13
|
||||||
|
mov qword ptr [rcx+48], r14
|
||||||
|
mov qword ptr [rcx+56], r15
|
||||||
|
xor eax, r13d ;# write address register 2
|
||||||
|
and eax, 262080
|
||||||
|
lea rcx, [rsi+rax]
|
||||||
|
mulpd xmm0, xmm4
|
||||||
|
mulpd xmm1, xmm5
|
||||||
|
mulpd xmm2, xmm6
|
||||||
|
mulpd xmm3, xmm7
|
||||||
|
movapd xmmword ptr [rcx+0], xmm0
|
||||||
|
movapd xmmword ptr [rcx+16], xmm1
|
||||||
|
movapd xmmword ptr [rcx+32], xmm2
|
||||||
|
movapd xmmword ptr [rcx+48], xmm3
|
||||||
|
dec ebx
|
||||||
|
jnz program_begin
|
||||||
|
|
||||||
rx_finish:
|
rx_finish:
|
||||||
; unroll the stack
|
|
||||||
mov rsp, rdi
|
|
||||||
|
|
||||||
; save VM register values
|
; save VM register values
|
||||||
pop rcx
|
pop rcx
|
||||||
pop rcx
|
|
||||||
mov qword ptr [rcx+0], r8
|
mov qword ptr [rcx+0], r8
|
||||||
mov qword ptr [rcx+8], r9
|
mov qword ptr [rcx+8], r9
|
||||||
mov qword ptr [rcx+16], r10
|
mov qword ptr [rcx+16], r10
|
||||||
@ -183,8 +198,8 @@ rx_finish:
|
|||||||
mov qword ptr [rcx+40], r13
|
mov qword ptr [rcx+40], r13
|
||||||
mov qword ptr [rcx+48], r14
|
mov qword ptr [rcx+48], r14
|
||||||
mov qword ptr [rcx+56], r15
|
mov qword ptr [rcx+56], r15
|
||||||
movdqa xmmword ptr [rcx+64], xmm8
|
movdqa xmmword ptr [rcx+64], xmm0
|
||||||
movdqa xmmword ptr [rcx+80], xmm9
|
movdqa xmmword ptr [rcx+80], xmm1
|
||||||
movdqa xmmword ptr [rcx+96], xmm2
|
movdqa xmmword ptr [rcx+96], xmm2
|
||||||
movdqa xmmword ptr [rcx+112], xmm3
|
movdqa xmmword ptr [rcx+112], xmm3
|
||||||
lea rcx, [rcx+64]
|
lea rcx, [rcx+64]
|
||||||
@ -194,6 +209,12 @@ rx_finish:
|
|||||||
movdqa xmmword ptr [rcx+112], xmm7
|
movdqa xmmword ptr [rcx+112], xmm7
|
||||||
|
|
||||||
; load callee-saved registers
|
; load callee-saved registers
|
||||||
|
movdqu xmm15, xmmword ptr [rsp]
|
||||||
|
movdqu xmm14, xmmword ptr [rsp+16]
|
||||||
|
movdqu xmm13, xmmword ptr [rsp+32]
|
||||||
|
movdqu xmm12, xmmword ptr [rsp+48]
|
||||||
|
movdqu xmm11, xmmword ptr [rsp+64]
|
||||||
|
add rsp, 80
|
||||||
movdqu xmm10, xmmword ptr [rsp]
|
movdqu xmm10, xmmword ptr [rsp]
|
||||||
movdqu xmm9, xmmword ptr [rsp+16]
|
movdqu xmm9, xmmword ptr [rsp+16]
|
||||||
movdqu xmm8, xmmword ptr [rsp+32]
|
movdqu xmm8, xmmword ptr [rsp+32]
|
||||||
|
@ -19,46 +19,58 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#define WT_ADD_64 12
|
//Integer
|
||||||
#define WT_ADD_32 2
|
#define WT_IADD_R 10
|
||||||
#define WT_SUB_64 12
|
#define WT_IADD_M 3
|
||||||
#define WT_SUB_32 2
|
#define WT_IADD_RC 12
|
||||||
#define WT_MUL_64 23
|
#define WT_ISUB_R 10
|
||||||
#define WT_MULH_64 5
|
#define WT_ISUB_M 3
|
||||||
#define WT_MUL_32 15
|
#define WT_IMUL_9C 12
|
||||||
#define WT_IMUL_32 15
|
#define WT_IMUL_R 24
|
||||||
#define WT_IMULH_64 3
|
#define WT_IMUL_M 8
|
||||||
#define WT_DIV_64 8
|
#define WT_IMULH_R 6
|
||||||
#define WT_IDIV_64 8
|
#define WT_IMULH_M 2
|
||||||
#define WT_AND_64 4
|
#define WT_ISMULH_R 6
|
||||||
#define WT_AND_32 2
|
#define WT_ISMULH_M 2
|
||||||
#define WT_OR_64 4
|
#define WT_IDIV_C 4
|
||||||
#define WT_OR_32 2
|
#define WT_ISDIV_C 2
|
||||||
#define WT_XOR_64 4
|
#define WT_INEG_R 4
|
||||||
#define WT_XOR_32 2
|
#define WT_IXOR_R 15
|
||||||
#define WT_SHL_64 3
|
#define WT_IXOR_M 5
|
||||||
#define WT_SHR_64 3
|
#define WT_IROR_R 10
|
||||||
#define WT_SAR_64 3
|
#define WT_IROL_R 10
|
||||||
#define WT_ROL_64 6
|
|
||||||
#define WT_ROR_64 6
|
//Common floating point
|
||||||
#define WT_FPADD 20
|
#define WT_FPSWAP_R 6
|
||||||
#define WT_FPSUB 20
|
|
||||||
#define WT_FPMUL 22
|
//Floating point group F
|
||||||
#define WT_FPDIV 8
|
#define WT_FPADD_R 18
|
||||||
#define WT_FPSQRT 6
|
#define WT_FPADD_M 3
|
||||||
#define WT_FPROUND 2
|
#define WT_FPSUB_R 18
|
||||||
#define WT_JUMP 11
|
#define WT_FPSUB_M 3
|
||||||
#define WT_CALL 11
|
#define WT_FPNEG_R 5
|
||||||
#define WT_RET 12
|
|
||||||
|
//Floating point group E
|
||||||
|
#define WT_FPMUL_R 18
|
||||||
|
#define WT_FPMUL_M 3
|
||||||
|
#define WT_FPDIV_R 6
|
||||||
|
#define WT_FPDIV_M 1
|
||||||
|
#define WT_FPSQRT_R 6
|
||||||
|
|
||||||
|
//Control
|
||||||
|
#define WT_COND_R 15
|
||||||
|
#define WT_COND_M 5
|
||||||
|
#define WT_CFROUND 1
|
||||||
|
|
||||||
#define WT_NOP 0
|
#define WT_NOP 0
|
||||||
|
|
||||||
|
constexpr int wtSum = WT_IADD_R + WT_IADD_M + WT_IADD_RC + WT_ISUB_R + \
|
||||||
constexpr int wtSum = WT_ADD_64 + WT_ADD_32 + WT_SUB_64 + WT_SUB_32 + \
|
WT_ISUB_M + WT_IMUL_9C + WT_IMUL_R + WT_IMUL_M + WT_IMULH_R + \
|
||||||
WT_MUL_64 + WT_MULH_64 + WT_MUL_32 + WT_IMUL_32 + WT_IMULH_64 + \
|
WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
|
||||||
WT_DIV_64 + WT_IDIV_64 + WT_AND_64 + WT_AND_32 + WT_OR_64 + \
|
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
|
||||||
WT_OR_32 + WT_XOR_64 + WT_XOR_32 + WT_SHL_64 + WT_SHR_64 + \
|
WT_FPSWAP_R + WT_FPADD_R + WT_FPADD_M + WT_FPSUB_R + WT_FPSUB_M + \
|
||||||
WT_SAR_64 + WT_ROL_64 + WT_ROR_64 + WT_FPADD + WT_FPSUB + WT_FPMUL \
|
WT_FPNEG_R + WT_FPMUL_R + WT_FPMUL_M + WT_FPDIV_R + WT_FPDIV_M + \
|
||||||
+ WT_FPDIV + WT_FPSQRT + WT_FPROUND + WT_JUMP + WT_CALL + WT_RET + WT_NOP;
|
WT_FPSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_NOP;
|
||||||
|
|
||||||
static_assert(wtSum == 256,
|
static_assert(wtSum == 256,
|
||||||
"Sum of instruction weights must be 256");
|
"Sum of instruction weights must be 256");
|
||||||
|
50
src/main.cpp
50
src/main.cpp
@ -34,6 +34,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include "dataset.hpp"
|
#include "dataset.hpp"
|
||||||
#include "Cache.hpp"
|
#include "Cache.hpp"
|
||||||
|
#include "Pcg32.hpp"
|
||||||
|
|
||||||
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
|
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
|
||||||
|
|
||||||
@ -130,6 +131,27 @@ void generateAsm(int nonce) {
|
|||||||
asmX86.printCode(std::cout);
|
asmX86.printCode(std::cout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void generateNative(int nonce) {
|
||||||
|
uint64_t hash[4];
|
||||||
|
unsigned char blockTemplate[] = {
|
||||||
|
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
|
||||||
|
0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e,
|
||||||
|
0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca,
|
||||||
|
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
|
||||||
|
};
|
||||||
|
int* noncePtr = (int*)(blockTemplate + 39);
|
||||||
|
*noncePtr = nonce;
|
||||||
|
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||||
|
RandomX::Program prog;
|
||||||
|
Pcg32 gen(hash);
|
||||||
|
prog.initialize(gen);
|
||||||
|
for (int i = 0; i < RandomX::ProgramLength; ++i) {
|
||||||
|
prog(i).dst %= 8;
|
||||||
|
prog(i).src %= 8;
|
||||||
|
}
|
||||||
|
std::cout << prog << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread, uint8_t* scratchpad) {
|
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread, uint8_t* scratchpad) {
|
||||||
uint64_t hash[4];
|
uint64_t hash[4];
|
||||||
unsigned char blockTemplate[] = {
|
unsigned char blockTemplate[] = {
|
||||||
@ -147,18 +169,16 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
|||||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||||
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
|
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
|
||||||
vm->initializeScratchpad(scratchpad, spIndex);
|
vm->initializeScratchpad(scratchpad, spIndex);
|
||||||
vm->initializeProgram(hash);
|
//vm->initializeProgram(hash);
|
||||||
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
||||||
vm->setScratchpad(scratchpad + 3 * RandomX::ScratchpadSize / 4);
|
for (int chain = 0; chain < 16; ++chain) {
|
||||||
vm->execute();
|
vm->initializeProgram(hash);
|
||||||
vm->setScratchpad(scratchpad + 2 * RandomX::ScratchpadSize / 4);
|
int segment = hash[3] & 3;
|
||||||
vm->execute();
|
vm->setScratchpad(scratchpad);// +segment * RandomX::ScratchpadSize / 4);
|
||||||
vm->getResult(nullptr, 0, hash);
|
vm->execute();
|
||||||
vm->initializeProgram(hash);
|
vm->getResult(nullptr, 0, hash);
|
||||||
vm->setScratchpad(scratchpad + 1 * RandomX::ScratchpadSize / 4);
|
}
|
||||||
vm->execute();
|
//vm->initializeProgram(hash);
|
||||||
vm->setScratchpad(scratchpad + 0 * RandomX::ScratchpadSize / 4);
|
|
||||||
vm->execute();
|
|
||||||
vm->getResult(scratchpad, RandomX::ScratchpadSize, hash);
|
vm->getResult(scratchpad, RandomX::ScratchpadSize, hash);
|
||||||
result.xorWith(hash);
|
result.xorWith(hash);
|
||||||
if (RandomX::trace) {
|
if (RandomX::trace) {
|
||||||
@ -171,7 +191,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
bool softAes, lightClient, genAsm, compiled, help, largePages, async, aesBench;
|
bool softAes, lightClient, genAsm, compiled, help, largePages, async, aesBench, genNative;
|
||||||
int programCount, threadCount;
|
int programCount, threadCount;
|
||||||
readOption("--help", argc, argv, help);
|
readOption("--help", argc, argv, help);
|
||||||
|
|
||||||
@ -189,12 +209,18 @@ int main(int argc, char** argv) {
|
|||||||
readOption("--largePages", argc, argv, largePages);
|
readOption("--largePages", argc, argv, largePages);
|
||||||
readOption("--async", argc, argv, async);
|
readOption("--async", argc, argv, async);
|
||||||
readOption("--aesBench", argc, argv, aesBench);
|
readOption("--aesBench", argc, argv, aesBench);
|
||||||
|
readOption("--genNative", argc, argv, genNative);
|
||||||
|
|
||||||
if (genAsm) {
|
if (genAsm) {
|
||||||
generateAsm(programCount);
|
generateAsm(programCount);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (genNative) {
|
||||||
|
generateNative(programCount);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (softAes)
|
if (softAes)
|
||||||
std::cout << "Using software AES." << std::endl;
|
std::cout << "Using software AES." << std::endl;
|
||||||
|
|
||||||
|
9610
src/program.inc
9610
src/program.inc
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user