Unique scratchpad addresses

This commit is contained in:
tevador 2019-04-14 17:21:26 +02:00
parent 2c87a058ec
commit 33a2fd021d
6 changed files with 287 additions and 288 deletions

View File

@ -257,12 +257,12 @@ namespace RandomX {
} }
void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") { void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
asmCode << "\tmov " << reg << ", " << regR32[instr.src] << std::endl; asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
asmCode << "\tand " << reg << ", " << ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl; asmCode << "\tand " << reg << ", " << ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
} }
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) { void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
asmCode << "\tmov eax" << ", " << regR32[instr.dst] << std::endl; asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
asmCode << "\tand eax" << ", " << ((instr.mod % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl; asmCode << "\tand eax" << ", " << ((instr.mod % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl;
} }
@ -273,7 +273,7 @@ namespace RandomX {
//1 uOP //1 uOP
void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
registerUsage[instr.dst] = i; registerUsage[instr.dst] = i;
if(instr.dst == 5) if(instr.dst == RegisterNeedsDisplacement)
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
else else
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl; asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl;

View File

@ -29,11 +29,11 @@ namespace RandomX {
} }
void Instruction::genAddressReg(std::ostream& os) const { void Instruction::genAddressReg(std::ostream& os) const {
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << "]"; os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
} }
void Instruction::genAddressRegDst(std::ostream& os) const { void Instruction::genAddressRegDst(std::ostream& os) const {
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)dst << "]"; os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
} }
void Instruction::genAddressImm(std::ostream& os) const { void Instruction::genAddressImm(std::ostream& os) const {
@ -41,12 +41,11 @@ namespace RandomX {
} }
void Instruction::h_IADD_RS(std::ostream& os) const { void Instruction::h_IADD_RS(std::ostream& os) const {
if (src != dst) { os << "r" << (int)dst << ", r" << (int)src;
os << "r" << (int)dst << ", r" << (int)src << ", LSH " << (int)(mod % 4) << std::endl; if(dst == RegisterNeedsDisplacement) {
} os << ", " << (int32_t)getImm32();
else {
os << "r" << (int)dst << ", " << (int32_t)getImm32() << std::endl;
} }
os << ", LSH " << (int)(mod % 4) << std::endl;
} }
void Instruction::h_IADD_M(std::ostream& os) const { void Instruction::h_IADD_M(std::ostream& os) const {

View File

@ -201,6 +201,7 @@ namespace RandomX {
static const uint8_t REX_TEST[] = { 0x49, 0xF7 }; static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
static const uint8_t JZ[] = { 0x0f, 0x84 }; static const uint8_t JZ[] = { 0x0f, 0x84 };
static const uint8_t RET = 0xc3; static const uint8_t RET = 0xc3;
static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d };
static const uint8_t NOP1[] = { 0x90 }; static const uint8_t NOP1[] = { 0x90 };
static const uint8_t NOP2[] = { 0x66, 0x90 }; static const uint8_t NOP2[] = { 0x66, 0x90 };
@ -434,8 +435,12 @@ namespace RandomX {
template void JitCompilerX86::generateCode<Program>(Instruction& instr, int i); template void JitCompilerX86::generateCode<Program>(Instruction& instr, int i);
void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) { void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
emit(REX_MOV_RR); emit(LEA_32);
emitByte((rax ? 0xc0 : 0xc8) + instr.src); emitByte(0x80 + instr.src + (rax ? 0 : 8));
if (instr.src == RegisterNeedsSib) {
emitByte(0x24);
}
emit32(instr.getImm32());
if (rax) if (rax)
emitByte(AND_EAX_I); emitByte(AND_EAX_I);
else else
@ -444,8 +449,12 @@ namespace RandomX {
} }
void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) { void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) {
emit(REX_MOV_RR); emit(LEA_32);
emitByte(0xc0 + instr.dst); emitByte(0x80 + instr.dst);
if (instr.dst == RegisterNeedsSib) {
emitByte(0x24);
}
emit32(instr.getImm32());
emitByte(AND_EAX_I); emitByte(AND_EAX_I);
int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask; int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask;
int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask; int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask;
@ -472,12 +481,12 @@ namespace RandomX {
return; return;
} }
emit(REX_LEA); emit(REX_LEA);
if (instr.dst == 5) //rbp,r13 cannot be the base register without offset if (instr.dst == RegisterNeedsDisplacement)
emitByte(0xac); emitByte(0xac);
else else
emitByte(0x04 + 8 * instr.dst); emitByte(0x04 + 8 * instr.dst);
genSIB(instr.mod % 4, instr.src, instr.dst); genSIB(instr.mod % 4, instr.src, instr.dst);
if (instr.dst == 5) if (instr.dst == RegisterNeedsDisplacement)
emit32(instr.getImm32()); emit32(instr.getImm32());
} }

View File

@ -95,7 +95,8 @@ namespace RandomX {
constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8; constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64; constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
constexpr int RegistersCount = 8; constexpr int RegistersCount = 8;
constexpr int LimitedAddressRegister = 5; //x86 r13 register constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
constexpr int RegisterNeedsSib = 4; //x86 r12 register
struct Cache { struct Cache {
uint8_t* memory; uint8_t* memory;

File diff suppressed because it is too large Load Diff

View File

@ -495,7 +495,7 @@ namespace RandomX {
// - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2" // - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction) // * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
for (unsigned i = 0; i < 8; ++i) { for (unsigned i = 0; i < 8; ++i) {
if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister)) if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != RegisterNeedsDisplacement))
availableRegisters.push_back(i); availableRegisters.push_back(i);
} }
return selectRegister(availableRegisters, gen, dst_); return selectRegister(availableRegisters, gen, dst_);
@ -510,8 +510,8 @@ namespace RandomX {
} }
//if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination //if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination
if (availableRegisters.size() == 2 && info_->getType() == SuperscalarInstructionType::IADD_RS) { if (availableRegisters.size() == 2 && info_->getType() == SuperscalarInstructionType::IADD_RS) {
if (availableRegisters[0] == LimitedAddressRegister || availableRegisters[1] == LimitedAddressRegister) { if (availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement) {
opGroupPar_ = src_ = LimitedAddressRegister; opGroupPar_ = src_ = RegisterNeedsDisplacement;
return true; return true;
} }
} }