mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 15:58:53 +00:00
Unique scratchpad addresses
This commit is contained in:
parent
2c87a058ec
commit
33a2fd021d
@ -257,12 +257,12 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
|
void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
|
||||||
asmCode << "\tmov " << reg << ", " << regR32[instr.src] << std::endl;
|
asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
||||||
asmCode << "\tand " << reg << ", " << ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
|
asmCode << "\tand " << reg << ", " << ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
|
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
|
||||||
asmCode << "\tmov eax" << ", " << regR32[instr.dst] << std::endl;
|
asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
||||||
asmCode << "\tand eax" << ", " << ((instr.mod % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl;
|
asmCode << "\tand eax" << ", " << ((instr.mod % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -273,7 +273,7 @@ namespace RandomX {
|
|||||||
//1 uOP
|
//1 uOP
|
||||||
void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
|
||||||
registerUsage[instr.dst] = i;
|
registerUsage[instr.dst] = i;
|
||||||
if(instr.dst == 5)
|
if(instr.dst == RegisterNeedsDisplacement)
|
||||||
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
||||||
else
|
else
|
||||||
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl;
|
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl;
|
||||||
|
@ -29,11 +29,11 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::genAddressReg(std::ostream& os) const {
|
void Instruction::genAddressReg(std::ostream& os) const {
|
||||||
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << "]";
|
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::genAddressRegDst(std::ostream& os) const {
|
void Instruction::genAddressRegDst(std::ostream& os) const {
|
||||||
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)dst << "]";
|
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::genAddressImm(std::ostream& os) const {
|
void Instruction::genAddressImm(std::ostream& os) const {
|
||||||
@ -41,12 +41,11 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_IADD_RS(std::ostream& os) const {
|
void Instruction::h_IADD_RS(std::ostream& os) const {
|
||||||
if (src != dst) {
|
os << "r" << (int)dst << ", r" << (int)src;
|
||||||
os << "r" << (int)dst << ", r" << (int)src << ", LSH " << (int)(mod % 4) << std::endl;
|
if(dst == RegisterNeedsDisplacement) {
|
||||||
}
|
os << ", " << (int32_t)getImm32();
|
||||||
else {
|
|
||||||
os << "r" << (int)dst << ", " << (int32_t)getImm32() << std::endl;
|
|
||||||
}
|
}
|
||||||
|
os << ", LSH " << (int)(mod % 4) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_IADD_M(std::ostream& os) const {
|
void Instruction::h_IADD_M(std::ostream& os) const {
|
||||||
|
@ -201,6 +201,7 @@ namespace RandomX {
|
|||||||
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
|
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
|
||||||
static const uint8_t JZ[] = { 0x0f, 0x84 };
|
static const uint8_t JZ[] = { 0x0f, 0x84 };
|
||||||
static const uint8_t RET = 0xc3;
|
static const uint8_t RET = 0xc3;
|
||||||
|
static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d };
|
||||||
|
|
||||||
static const uint8_t NOP1[] = { 0x90 };
|
static const uint8_t NOP1[] = { 0x90 };
|
||||||
static const uint8_t NOP2[] = { 0x66, 0x90 };
|
static const uint8_t NOP2[] = { 0x66, 0x90 };
|
||||||
@ -434,8 +435,12 @@ namespace RandomX {
|
|||||||
template void JitCompilerX86::generateCode<Program>(Instruction& instr, int i);
|
template void JitCompilerX86::generateCode<Program>(Instruction& instr, int i);
|
||||||
|
|
||||||
void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
|
void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
|
||||||
emit(REX_MOV_RR);
|
emit(LEA_32);
|
||||||
emitByte((rax ? 0xc0 : 0xc8) + instr.src);
|
emitByte(0x80 + instr.src + (rax ? 0 : 8));
|
||||||
|
if (instr.src == RegisterNeedsSib) {
|
||||||
|
emitByte(0x24);
|
||||||
|
}
|
||||||
|
emit32(instr.getImm32());
|
||||||
if (rax)
|
if (rax)
|
||||||
emitByte(AND_EAX_I);
|
emitByte(AND_EAX_I);
|
||||||
else
|
else
|
||||||
@ -444,8 +449,12 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) {
|
void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) {
|
||||||
emit(REX_MOV_RR);
|
emit(LEA_32);
|
||||||
emitByte(0xc0 + instr.dst);
|
emitByte(0x80 + instr.dst);
|
||||||
|
if (instr.dst == RegisterNeedsSib) {
|
||||||
|
emitByte(0x24);
|
||||||
|
}
|
||||||
|
emit32(instr.getImm32());
|
||||||
emitByte(AND_EAX_I);
|
emitByte(AND_EAX_I);
|
||||||
int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask;
|
int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask;
|
||||||
int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask;
|
int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask;
|
||||||
@ -472,12 +481,12 @@ namespace RandomX {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
emit(REX_LEA);
|
emit(REX_LEA);
|
||||||
if (instr.dst == 5) //rbp,r13 cannot be the base register without offset
|
if (instr.dst == RegisterNeedsDisplacement)
|
||||||
emitByte(0xac);
|
emitByte(0xac);
|
||||||
else
|
else
|
||||||
emitByte(0x04 + 8 * instr.dst);
|
emitByte(0x04 + 8 * instr.dst);
|
||||||
genSIB(instr.mod % 4, instr.src, instr.dst);
|
genSIB(instr.mod % 4, instr.src, instr.dst);
|
||||||
if (instr.dst == 5)
|
if (instr.dst == RegisterNeedsDisplacement)
|
||||||
emit32(instr.getImm32());
|
emit32(instr.getImm32());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,7 +95,8 @@ namespace RandomX {
|
|||||||
constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
|
constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
|
||||||
constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
|
constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
|
||||||
constexpr int RegistersCount = 8;
|
constexpr int RegistersCount = 8;
|
||||||
constexpr int LimitedAddressRegister = 5; //x86 r13 register
|
constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
|
||||||
|
constexpr int RegisterNeedsSib = 4; //x86 r12 register
|
||||||
|
|
||||||
struct Cache {
|
struct Cache {
|
||||||
uint8_t* memory;
|
uint8_t* memory;
|
||||||
|
526
src/program.inc
526
src/program.inc
File diff suppressed because it is too large
Load Diff
@ -495,7 +495,7 @@ namespace RandomX {
|
|||||||
// - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
|
// - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
|
||||||
// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
|
// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
|
||||||
for (unsigned i = 0; i < 8; ++i) {
|
for (unsigned i = 0; i < 8; ++i) {
|
||||||
if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister))
|
if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != RegisterNeedsDisplacement))
|
||||||
availableRegisters.push_back(i);
|
availableRegisters.push_back(i);
|
||||||
}
|
}
|
||||||
return selectRegister(availableRegisters, gen, dst_);
|
return selectRegister(availableRegisters, gen, dst_);
|
||||||
@ -510,8 +510,8 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
//if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination
|
//if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination
|
||||||
if (availableRegisters.size() == 2 && info_->getType() == SuperscalarInstructionType::IADD_RS) {
|
if (availableRegisters.size() == 2 && info_->getType() == SuperscalarInstructionType::IADD_RS) {
|
||||||
if (availableRegisters[0] == LimitedAddressRegister || availableRegisters[1] == LimitedAddressRegister) {
|
if (availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement) {
|
||||||
opGroupPar_ = src_ = LimitedAddressRegister;
|
opGroupPar_ = src_ = RegisterNeedsDisplacement;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user