mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-05 06:38:53 +00:00
Renamed floating point instructions
Fixed negative source operand for FMUL_M and FDIV_M
This commit is contained in:
parent
b417fd08ea
commit
ac4462ad42
@ -356,19 +356,19 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//1 uOPs
|
//1 uOPs
|
||||||
void AssemblyGeneratorX86::h_FPSWAP_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FSWAP_R(Instruction& instr, int i) {
|
||||||
asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl;
|
asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
//1 uOP
|
//1 uOP
|
||||||
void AssemblyGeneratorX86::h_FPADD_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
//5 uOPs
|
//5 uOPs
|
||||||
void AssemblyGeneratorX86::h_FPADD_M(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
genAddressReg(instr);
|
genAddressReg(instr);
|
||||||
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
||||||
@ -376,14 +376,14 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//1 uOP
|
//1 uOP
|
||||||
void AssemblyGeneratorX86::h_FPSUB_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
//5 uOPs
|
//5 uOPs
|
||||||
void AssemblyGeneratorX86::h_FPSUB_M(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
genAddressReg(instr);
|
genAddressReg(instr);
|
||||||
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
||||||
@ -397,40 +397,42 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//1 uOPs
|
//1 uOPs
|
||||||
void AssemblyGeneratorX86::h_FPMUL_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
|
asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
//6 uOPs
|
//7 uOPs
|
||||||
void AssemblyGeneratorX86::h_FPMUL_M(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FMUL_M(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
genAddressReg(instr);
|
genAddressReg(instr);
|
||||||
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
||||||
|
asmCode << "\tandps xmm12, xmm14" << std::endl;
|
||||||
asmCode << "\tmulpd " << regE[instr.dst] << ", xmm12" << std::endl;
|
asmCode << "\tmulpd " << regE[instr.dst] << ", xmm12" << std::endl;
|
||||||
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
|
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
//2 uOPs
|
//2 uOPs
|
||||||
void AssemblyGeneratorX86::h_FPDIV_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FDIV_R(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
asmCode << "\tdivpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
|
asmCode << "\tdivpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||||
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
|
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
//6 uOPs
|
//7 uOPs
|
||||||
void AssemblyGeneratorX86::h_FPDIV_M(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
genAddressReg(instr);
|
genAddressReg(instr);
|
||||||
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
||||||
|
asmCode << "\tandps xmm12, xmm14" << std::endl;
|
||||||
asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl;
|
asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl;
|
||||||
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
|
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
//1 uOP
|
//1 uOP
|
||||||
void AssemblyGeneratorX86::h_FPSQRT_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
|
asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
|
||||||
}
|
}
|
||||||
@ -529,21 +531,21 @@ namespace RandomX {
|
|||||||
INST_HANDLE(ISWAP_R)
|
INST_HANDLE(ISWAP_R)
|
||||||
|
|
||||||
//Common floating point
|
//Common floating point
|
||||||
INST_HANDLE(FPSWAP_R)
|
INST_HANDLE(FSWAP_R)
|
||||||
|
|
||||||
//Floating point group F
|
//Floating point group F
|
||||||
INST_HANDLE(FPADD_R)
|
INST_HANDLE(FADD_R)
|
||||||
INST_HANDLE(FPADD_M)
|
INST_HANDLE(FADD_M)
|
||||||
INST_HANDLE(FPSUB_R)
|
INST_HANDLE(FSUB_R)
|
||||||
INST_HANDLE(FPSUB_M)
|
INST_HANDLE(FSUB_M)
|
||||||
INST_HANDLE(FPNEG_R)
|
INST_HANDLE(FPNEG_R)
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
INST_HANDLE(FPMUL_R)
|
INST_HANDLE(FMUL_R)
|
||||||
INST_HANDLE(FPMUL_M)
|
INST_HANDLE(FMUL_M)
|
||||||
INST_HANDLE(FPDIV_R)
|
INST_HANDLE(FDIV_R)
|
||||||
INST_HANDLE(FPDIV_M)
|
INST_HANDLE(FDIV_M)
|
||||||
INST_HANDLE(FPSQRT_R)
|
INST_HANDLE(FSQRT_R)
|
||||||
|
|
||||||
//Control
|
//Control
|
||||||
INST_HANDLE(COND_R)
|
INST_HANDLE(COND_R)
|
||||||
|
@ -64,17 +64,17 @@ namespace RandomX {
|
|||||||
void h_IROR_R(Instruction&, int);
|
void h_IROR_R(Instruction&, int);
|
||||||
void h_IROL_R(Instruction&, int);
|
void h_IROL_R(Instruction&, int);
|
||||||
void h_ISWAP_R(Instruction&, int);
|
void h_ISWAP_R(Instruction&, int);
|
||||||
void h_FPSWAP_R(Instruction&, int);
|
void h_FSWAP_R(Instruction&, int);
|
||||||
void h_FPADD_R(Instruction&, int);
|
void h_FADD_R(Instruction&, int);
|
||||||
void h_FPADD_M(Instruction&, int);
|
void h_FADD_M(Instruction&, int);
|
||||||
void h_FPSUB_R(Instruction&, int);
|
void h_FSUB_R(Instruction&, int);
|
||||||
void h_FPSUB_M(Instruction&, int);
|
void h_FSUB_M(Instruction&, int);
|
||||||
void h_FPNEG_R(Instruction&, int);
|
void h_FPNEG_R(Instruction&, int);
|
||||||
void h_FPMUL_R(Instruction&, int);
|
void h_FMUL_R(Instruction&, int);
|
||||||
void h_FPMUL_M(Instruction&, int);
|
void h_FMUL_M(Instruction&, int);
|
||||||
void h_FPDIV_R(Instruction&, int);
|
void h_FDIV_R(Instruction&, int);
|
||||||
void h_FPDIV_M(Instruction&, int);
|
void h_FDIV_M(Instruction&, int);
|
||||||
void h_FPSQRT_R(Instruction&, int);
|
void h_FSQRT_R(Instruction&, int);
|
||||||
void h_COND_R(Instruction&, int);
|
void h_COND_R(Instruction&, int);
|
||||||
void h_COND_M(Instruction&, int);
|
void h_COND_M(Instruction&, int);
|
||||||
void h_CFROUND(Instruction&, int);
|
void h_CFROUND(Instruction&, int);
|
||||||
|
@ -215,32 +215,32 @@ namespace RandomX {
|
|||||||
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPSWAP_R(std::ostream& os) const {
|
void Instruction::h_FSWAP_R(std::ostream& os) const {
|
||||||
const char reg = (dst >= 4) ? 'e' : 'f';
|
const char reg = (dst >= 4) ? 'e' : 'f';
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
os << reg << dstIndex << std::endl;
|
os << reg << dstIndex << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPADD_R(std::ostream& os) const {
|
void Instruction::h_FADD_R(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
auto srcIndex = src % 4;
|
auto srcIndex = src % 4;
|
||||||
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
|
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPADD_M(std::ostream& os) const {
|
void Instruction::h_FADD_M(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
os << "f" << dstIndex << ", ";
|
os << "f" << dstIndex << ", ";
|
||||||
genAddressReg(os);
|
genAddressReg(os);
|
||||||
os << std::endl;
|
os << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPSUB_R(std::ostream& os) const {
|
void Instruction::h_FSUB_R(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
auto srcIndex = src % 4;
|
auto srcIndex = src % 4;
|
||||||
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
|
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPSUB_M(std::ostream& os) const {
|
void Instruction::h_FSUB_M(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
os << "f" << dstIndex << ", ";
|
os << "f" << dstIndex << ", ";
|
||||||
genAddressReg(os);
|
genAddressReg(os);
|
||||||
@ -252,33 +252,33 @@ namespace RandomX {
|
|||||||
os << "f" << dstIndex << std::endl;
|
os << "f" << dstIndex << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPMUL_R(std::ostream& os) const {
|
void Instruction::h_FMUL_R(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
auto srcIndex = src % 4;
|
auto srcIndex = src % 4;
|
||||||
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
|
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPMUL_M(std::ostream& os) const {
|
void Instruction::h_FMUL_M(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
os << "e" << dstIndex << ", ";
|
os << "e" << dstIndex << ", ";
|
||||||
genAddressReg(os);
|
genAddressReg(os);
|
||||||
os << std::endl;
|
os << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPDIV_R(std::ostream& os) const {
|
void Instruction::h_FDIV_R(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
auto srcIndex = src % 4;
|
auto srcIndex = src % 4;
|
||||||
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
|
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPDIV_M(std::ostream& os) const {
|
void Instruction::h_FDIV_M(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
os << "e" << dstIndex << ", ";
|
os << "e" << dstIndex << ", ";
|
||||||
genAddressReg(os);
|
genAddressReg(os);
|
||||||
os << std::endl;
|
os << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPSQRT_R(std::ostream& os) const {
|
void Instruction::h_FSQRT_R(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
os << "e" << dstIndex << std::endl;
|
os << "e" << dstIndex << std::endl;
|
||||||
}
|
}
|
||||||
@ -363,21 +363,21 @@ namespace RandomX {
|
|||||||
INST_NAME(ISWAP_R)
|
INST_NAME(ISWAP_R)
|
||||||
|
|
||||||
//Common floating point
|
//Common floating point
|
||||||
INST_NAME(FPSWAP_R)
|
INST_NAME(FSWAP_R)
|
||||||
|
|
||||||
//Floating point group F
|
//Floating point group F
|
||||||
INST_NAME(FPADD_R)
|
INST_NAME(FADD_R)
|
||||||
INST_NAME(FPADD_M)
|
INST_NAME(FADD_M)
|
||||||
INST_NAME(FPSUB_R)
|
INST_NAME(FSUB_R)
|
||||||
INST_NAME(FPSUB_M)
|
INST_NAME(FSUB_M)
|
||||||
INST_NAME(FPNEG_R)
|
INST_NAME(FPNEG_R)
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
INST_NAME(FPMUL_R)
|
INST_NAME(FMUL_R)
|
||||||
INST_NAME(FPMUL_M)
|
INST_NAME(FMUL_M)
|
||||||
INST_NAME(FPDIV_R)
|
INST_NAME(FDIV_R)
|
||||||
INST_NAME(FPDIV_M)
|
INST_NAME(FDIV_M)
|
||||||
INST_NAME(FPSQRT_R)
|
INST_NAME(FSQRT_R)
|
||||||
|
|
||||||
//Control
|
//Control
|
||||||
INST_NAME(COND_R)
|
INST_NAME(COND_R)
|
||||||
@ -414,21 +414,21 @@ namespace RandomX {
|
|||||||
INST_HANDLE(ISWAP_R)
|
INST_HANDLE(ISWAP_R)
|
||||||
|
|
||||||
//Common floating point
|
//Common floating point
|
||||||
INST_HANDLE(FPSWAP_R)
|
INST_HANDLE(FSWAP_R)
|
||||||
|
|
||||||
//Floating point group F
|
//Floating point group F
|
||||||
INST_HANDLE(FPADD_R)
|
INST_HANDLE(FADD_R)
|
||||||
INST_HANDLE(FPADD_M)
|
INST_HANDLE(FADD_M)
|
||||||
INST_HANDLE(FPSUB_R)
|
INST_HANDLE(FSUB_R)
|
||||||
INST_HANDLE(FPSUB_M)
|
INST_HANDLE(FSUB_M)
|
||||||
INST_HANDLE(FPNEG_R)
|
INST_HANDLE(FPNEG_R)
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
INST_HANDLE(FPMUL_R)
|
INST_HANDLE(FMUL_R)
|
||||||
INST_HANDLE(FPMUL_M)
|
INST_HANDLE(FMUL_M)
|
||||||
INST_HANDLE(FPDIV_R)
|
INST_HANDLE(FDIV_R)
|
||||||
INST_HANDLE(FPDIV_M)
|
INST_HANDLE(FDIV_M)
|
||||||
INST_HANDLE(FPSQRT_R)
|
INST_HANDLE(FSQRT_R)
|
||||||
|
|
||||||
//Control
|
//Control
|
||||||
INST_HANDLE(COND_R)
|
INST_HANDLE(COND_R)
|
||||||
|
@ -49,17 +49,17 @@ namespace RandomX {
|
|||||||
constexpr int IROR_R = 17;
|
constexpr int IROR_R = 17;
|
||||||
constexpr int IROL_R = 18;
|
constexpr int IROL_R = 18;
|
||||||
constexpr int ISWAP_R = 19;
|
constexpr int ISWAP_R = 19;
|
||||||
constexpr int FPSWAP_R = 20;
|
constexpr int FSWAP_R = 20;
|
||||||
constexpr int FPADD_R = 21;
|
constexpr int FADD_R = 21;
|
||||||
constexpr int FPADD_M = 22;
|
constexpr int FADD_M = 22;
|
||||||
constexpr int FPSUB_R = 23;
|
constexpr int FSUB_R = 23;
|
||||||
constexpr int FPSUB_M = 24;
|
constexpr int FSUB_M = 24;
|
||||||
constexpr int FPNEG_R = 25;
|
constexpr int FPNEG_R = 25;
|
||||||
constexpr int FPMUL_R = 26;
|
constexpr int FMUL_R = 26;
|
||||||
constexpr int FPMUL_M = 27;
|
constexpr int FMUL_M = 27;
|
||||||
constexpr int FPDIV_R = 28;
|
constexpr int FDIV_R = 28;
|
||||||
constexpr int FPDIV_M = 29;
|
constexpr int FDIV_M = 29;
|
||||||
constexpr int FPSQRT_R = 30;
|
constexpr int FSQRT_R = 30;
|
||||||
constexpr int COND_R = 31;
|
constexpr int COND_R = 31;
|
||||||
constexpr int COND_M = 32;
|
constexpr int COND_M = 32;
|
||||||
constexpr int CFROUND = 33;
|
constexpr int CFROUND = 33;
|
||||||
@ -111,17 +111,17 @@ namespace RandomX {
|
|||||||
void h_IROR_R(std::ostream&) const;
|
void h_IROR_R(std::ostream&) const;
|
||||||
void h_IROL_R(std::ostream&) const;
|
void h_IROL_R(std::ostream&) const;
|
||||||
void h_ISWAP_R(std::ostream&) const;
|
void h_ISWAP_R(std::ostream&) const;
|
||||||
void h_FPSWAP_R(std::ostream&) const;
|
void h_FSWAP_R(std::ostream&) const;
|
||||||
void h_FPADD_R(std::ostream&) const;
|
void h_FADD_R(std::ostream&) const;
|
||||||
void h_FPADD_M(std::ostream&) const;
|
void h_FADD_M(std::ostream&) const;
|
||||||
void h_FPSUB_R(std::ostream&) const;
|
void h_FSUB_R(std::ostream&) const;
|
||||||
void h_FPSUB_M(std::ostream&) const;
|
void h_FSUB_M(std::ostream&) const;
|
||||||
void h_FPNEG_R(std::ostream&) const;
|
void h_FPNEG_R(std::ostream&) const;
|
||||||
void h_FPMUL_R(std::ostream&) const;
|
void h_FMUL_R(std::ostream&) const;
|
||||||
void h_FPMUL_M(std::ostream&) const;
|
void h_FMUL_M(std::ostream&) const;
|
||||||
void h_FPDIV_R(std::ostream&) const;
|
void h_FDIV_R(std::ostream&) const;
|
||||||
void h_FPDIV_M(std::ostream&) const;
|
void h_FDIV_M(std::ostream&) const;
|
||||||
void h_FPSQRT_R(std::ostream&) const;
|
void h_FSQRT_R(std::ostream&) const;
|
||||||
void h_COND_R(std::ostream&) const;
|
void h_COND_R(std::ostream&) const;
|
||||||
void h_COND_M(std::ostream&) const;
|
void h_COND_M(std::ostream&) const;
|
||||||
void h_CFROUND(std::ostream&) const;
|
void h_CFROUND(std::ostream&) const;
|
||||||
|
@ -94,11 +94,11 @@ namespace RandomX {
|
|||||||
int count_SAR_64 = 0;
|
int count_SAR_64 = 0;
|
||||||
int count_ROL_64 = 0;
|
int count_ROL_64 = 0;
|
||||||
int count_ROR_64 = 0;
|
int count_ROR_64 = 0;
|
||||||
int count_FPADD = 0;
|
int count_FADD = 0;
|
||||||
int count_FPSUB = 0;
|
int count_FSUB = 0;
|
||||||
int count_FPMUL = 0;
|
int count_FMUL = 0;
|
||||||
int count_FPDIV = 0;
|
int count_FDIV = 0;
|
||||||
int count_FPSQRT = 0;
|
int count_FSQRT = 0;
|
||||||
int count_FPROUND = 0;
|
int count_FPROUND = 0;
|
||||||
int count_JUMP_taken = 0;
|
int count_JUMP_taken = 0;
|
||||||
int count_JUMP_not_taken = 0;
|
int count_JUMP_not_taken = 0;
|
||||||
@ -113,12 +113,12 @@ namespace RandomX {
|
|||||||
int count_retdepth_max = 0;
|
int count_retdepth_max = 0;
|
||||||
int count_endstack = 0;
|
int count_endstack = 0;
|
||||||
int count_instructions[ProgramLength] = { 0 };
|
int count_instructions[ProgramLength] = { 0 };
|
||||||
int count_FPADD_nop = 0;
|
int count_FADD_nop = 0;
|
||||||
int count_FPADD_nop2 = 0;
|
int count_FADD_nop2 = 0;
|
||||||
int count_FPSUB_nop = 0;
|
int count_FSUB_nop = 0;
|
||||||
int count_FPSUB_nop2 = 0;
|
int count_FSUB_nop2 = 0;
|
||||||
int count_FPMUL_nop = 0;
|
int count_FMUL_nop = 0;
|
||||||
int count_FPMUL_nop2 = 0;
|
int count_FMUL_nop2 = 0;
|
||||||
int datasetAccess[256] = { 0 };
|
int datasetAccess[256] = { 0 };
|
||||||
#endif
|
#endif
|
||||||
void executeInstruction(Instruction&);
|
void executeInstruction(Instruction&);
|
||||||
@ -173,11 +173,11 @@ namespace RandomX {
|
|||||||
void h_SAR_64(Instruction&);
|
void h_SAR_64(Instruction&);
|
||||||
void h_ROL_64(Instruction&);
|
void h_ROL_64(Instruction&);
|
||||||
void h_ROR_64(Instruction&);
|
void h_ROR_64(Instruction&);
|
||||||
void h_FPADD(Instruction&);
|
void h_FADD(Instruction&);
|
||||||
void h_FPSUB(Instruction&);
|
void h_FSUB(Instruction&);
|
||||||
void h_FPMUL(Instruction&);
|
void h_FMUL(Instruction&);
|
||||||
void h_FPDIV(Instruction&);
|
void h_FDIV(Instruction&);
|
||||||
void h_FPSQRT(Instruction&);
|
void h_FSQRT(Instruction&);
|
||||||
void h_FPROUND(Instruction&);
|
void h_FPROUND(Instruction&);
|
||||||
void h_JUMP(Instruction&);
|
void h_JUMP(Instruction&);
|
||||||
void h_CALL(Instruction&);
|
void h_CALL(Instruction&);
|
||||||
|
@ -177,6 +177,7 @@ namespace RandomX {
|
|||||||
static const uint8_t JMP = 0xe9;
|
static const uint8_t JMP = 0xe9;
|
||||||
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
||||||
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
|
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
|
||||||
|
static const uint8_t REX_ANDPS_XMM12[] = { 0x41, 0x0f, 0x54, 0xe6 };
|
||||||
|
|
||||||
size_t JitCompilerX86::getCodeSize() {
|
size_t JitCompilerX86::getCodeSize() {
|
||||||
return codePos - prologueSize;
|
return codePos - prologueSize;
|
||||||
@ -603,20 +604,20 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPSWAP_R(Instruction& instr) {
|
void JitCompilerX86::h_FSWAP_R(Instruction& instr) {
|
||||||
emit(SHUFPD);
|
emit(SHUFPD);
|
||||||
emitByte(0xc0 + 9 * instr.dst);
|
emitByte(0xc0 + 9 * instr.dst);
|
||||||
emitByte(1);
|
emitByte(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPADD_R(Instruction& instr) {
|
void JitCompilerX86::h_FADD_R(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
emit(REX_ADDPD);
|
emit(REX_ADDPD);
|
||||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPADD_M(Instruction& instr) {
|
void JitCompilerX86::h_FADD_M(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
genAddressReg(instr);
|
genAddressReg(instr);
|
||||||
emit(REX_CVTDQ2PD_XMM12);
|
emit(REX_CVTDQ2PD_XMM12);
|
||||||
@ -624,14 +625,14 @@ namespace RandomX {
|
|||||||
emitByte(0xc4 + 8 * instr.dst);
|
emitByte(0xc4 + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPSUB_R(Instruction& instr) {
|
void JitCompilerX86::h_FSUB_R(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
emit(REX_SUBPD);
|
emit(REX_SUBPD);
|
||||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPSUB_M(Instruction& instr) {
|
void JitCompilerX86::h_FSUB_M(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
genAddressReg(instr);
|
genAddressReg(instr);
|
||||||
emit(REX_CVTDQ2PD_XMM12);
|
emit(REX_CVTDQ2PD_XMM12);
|
||||||
@ -645,24 +646,25 @@ namespace RandomX {
|
|||||||
emitByte(0xc7 + 8 * instr.dst);
|
emitByte(0xc7 + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPMUL_R(Instruction& instr) {
|
void JitCompilerX86::h_FMUL_R(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
emit(REX_MULPD);
|
emit(REX_MULPD);
|
||||||
emitByte(0xe0 + instr.src + 8 * instr.dst);
|
emitByte(0xe0 + instr.src + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPMUL_M(Instruction& instr) {
|
void JitCompilerX86::h_FMUL_M(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
genAddressReg(instr);
|
genAddressReg(instr);
|
||||||
emit(REX_CVTDQ2PD_XMM12);
|
emit(REX_CVTDQ2PD_XMM12);
|
||||||
|
emit(REX_ANDPS_XMM12);
|
||||||
emit(REX_MULPD);
|
emit(REX_MULPD);
|
||||||
emitByte(0xe4 + 8 * instr.dst);
|
emitByte(0xe4 + 8 * instr.dst);
|
||||||
emit(REX_MAXPD);
|
emit(REX_MAXPD);
|
||||||
emitByte(0xe5 + 8 * instr.dst);
|
emitByte(0xe5 + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPDIV_R(Instruction& instr) {
|
void JitCompilerX86::h_FDIV_R(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
emit(REX_DIVPD);
|
emit(REX_DIVPD);
|
||||||
@ -671,17 +673,18 @@ namespace RandomX {
|
|||||||
emitByte(0xe5 + 8 * instr.dst);
|
emitByte(0xe5 + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPDIV_M(Instruction& instr) {
|
void JitCompilerX86::h_FDIV_M(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
genAddressReg(instr);
|
genAddressReg(instr);
|
||||||
emit(REX_CVTDQ2PD_XMM12);
|
emit(REX_CVTDQ2PD_XMM12);
|
||||||
|
emit(REX_ANDPS_XMM12);
|
||||||
emit(REX_DIVPD);
|
emit(REX_DIVPD);
|
||||||
emitByte(0xe4 + 8 * instr.dst);
|
emitByte(0xe4 + 8 * instr.dst);
|
||||||
emit(REX_MAXPD);
|
emit(REX_MAXPD);
|
||||||
emitByte(0xe5 + 8 * instr.dst);
|
emitByte(0xe5 + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPSQRT_R(Instruction& instr) {
|
void JitCompilerX86::h_FSQRT_R(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
emit(SQRTPD);
|
emit(SQRTPD);
|
||||||
emitByte(0xe4 + 9 * instr.dst);
|
emitByte(0xe4 + 9 * instr.dst);
|
||||||
@ -786,17 +789,17 @@ namespace RandomX {
|
|||||||
INST_HANDLE(IROR_R)
|
INST_HANDLE(IROR_R)
|
||||||
INST_HANDLE(IROL_R)
|
INST_HANDLE(IROL_R)
|
||||||
INST_HANDLE(ISWAP_R)
|
INST_HANDLE(ISWAP_R)
|
||||||
INST_HANDLE(FPSWAP_R)
|
INST_HANDLE(FSWAP_R)
|
||||||
INST_HANDLE(FPADD_R)
|
INST_HANDLE(FADD_R)
|
||||||
INST_HANDLE(FPADD_M)
|
INST_HANDLE(FADD_M)
|
||||||
INST_HANDLE(FPSUB_R)
|
INST_HANDLE(FSUB_R)
|
||||||
INST_HANDLE(FPSUB_M)
|
INST_HANDLE(FSUB_M)
|
||||||
INST_HANDLE(FPNEG_R)
|
INST_HANDLE(FPNEG_R)
|
||||||
INST_HANDLE(FPMUL_R)
|
INST_HANDLE(FMUL_R)
|
||||||
INST_HANDLE(FPMUL_M)
|
INST_HANDLE(FMUL_M)
|
||||||
INST_HANDLE(FPDIV_R)
|
INST_HANDLE(FDIV_R)
|
||||||
INST_HANDLE(FPDIV_M)
|
INST_HANDLE(FDIV_M)
|
||||||
INST_HANDLE(FPSQRT_R)
|
INST_HANDLE(FSQRT_R)
|
||||||
INST_HANDLE(COND_R)
|
INST_HANDLE(COND_R)
|
||||||
INST_HANDLE(COND_M)
|
INST_HANDLE(COND_M)
|
||||||
INST_HANDLE(CFROUND)
|
INST_HANDLE(CFROUND)
|
||||||
|
@ -110,17 +110,17 @@ namespace RandomX {
|
|||||||
void h_IROR_R(Instruction&);
|
void h_IROR_R(Instruction&);
|
||||||
void h_IROL_R(Instruction&);
|
void h_IROL_R(Instruction&);
|
||||||
void h_ISWAP_R(Instruction&);
|
void h_ISWAP_R(Instruction&);
|
||||||
void h_FPSWAP_R(Instruction&);
|
void h_FSWAP_R(Instruction&);
|
||||||
void h_FPADD_R(Instruction&);
|
void h_FADD_R(Instruction&);
|
||||||
void h_FPADD_M(Instruction&);
|
void h_FADD_M(Instruction&);
|
||||||
void h_FPSUB_R(Instruction&);
|
void h_FSUB_R(Instruction&);
|
||||||
void h_FPSUB_M(Instruction&);
|
void h_FSUB_M(Instruction&);
|
||||||
void h_FPNEG_R(Instruction&);
|
void h_FPNEG_R(Instruction&);
|
||||||
void h_FPMUL_R(Instruction&);
|
void h_FMUL_R(Instruction&);
|
||||||
void h_FPMUL_M(Instruction&);
|
void h_FMUL_M(Instruction&);
|
||||||
void h_FPDIV_R(Instruction&);
|
void h_FDIV_R(Instruction&);
|
||||||
void h_FPDIV_M(Instruction&);
|
void h_FDIV_M(Instruction&);
|
||||||
void h_FPSQRT_R(Instruction&);
|
void h_FSQRT_R(Instruction&);
|
||||||
void h_COND_R(Instruction&);
|
void h_COND_R(Instruction&);
|
||||||
void h_COND_M(Instruction&);
|
void h_COND_M(Instruction&);
|
||||||
void h_CFROUND(Instruction&);
|
void h_CFROUND(Instruction&);
|
||||||
|
@ -42,21 +42,21 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
#define WT_ISWAP_R 4
|
#define WT_ISWAP_R 4
|
||||||
|
|
||||||
//Common floating point
|
//Common floating point
|
||||||
#define WT_FPSWAP_R 8
|
#define WT_FSWAP_R 8
|
||||||
|
|
||||||
//Floating point group F
|
//Floating point group F
|
||||||
#define WT_FPADD_R 20
|
#define WT_FADD_R 20
|
||||||
#define WT_FPADD_M 5
|
#define WT_FADD_M 5
|
||||||
#define WT_FPSUB_R 20
|
#define WT_FSUB_R 20
|
||||||
#define WT_FPSUB_M 5
|
#define WT_FSUB_M 5
|
||||||
#define WT_FPNEG_R 6
|
#define WT_FPNEG_R 6
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
#define WT_FPMUL_R 16
|
#define WT_FMUL_R 16
|
||||||
#define WT_FPMUL_M 4
|
#define WT_FMUL_M 4
|
||||||
#define WT_FPDIV_R 7
|
#define WT_FDIV_R 7
|
||||||
#define WT_FPDIV_M 1
|
#define WT_FDIV_M 1
|
||||||
#define WT_FPSQRT_R 6
|
#define WT_FSQRT_R 6
|
||||||
|
|
||||||
//Control
|
//Control
|
||||||
#define WT_COND_R 7
|
#define WT_COND_R 7
|
||||||
@ -73,9 +73,9 @@ constexpr int wtSum = WT_IADD_R + WT_IADD_M + WT_IADD_RC + WT_ISUB_R + \
|
|||||||
WT_ISUB_M + WT_IMUL_9C + WT_IMUL_R + WT_IMUL_M + WT_IMULH_R + \
|
WT_ISUB_M + WT_IMUL_9C + WT_IMUL_R + WT_IMUL_M + WT_IMULH_R + \
|
||||||
WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
|
WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
|
||||||
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
|
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
|
||||||
WT_ISWAP_R + WT_FPSWAP_R + WT_FPADD_R + WT_FPADD_M + WT_FPSUB_R + WT_FPSUB_M + \
|
WT_ISWAP_R + WT_FSWAP_R + WT_FADD_R + WT_FADD_M + WT_FSUB_R + WT_FSUB_M + \
|
||||||
WT_FPNEG_R + WT_FPMUL_R + WT_FPMUL_M + WT_FPDIV_R + WT_FPDIV_M + \
|
WT_FPNEG_R + WT_FMUL_R + WT_FMUL_M + WT_FDIV_R + WT_FDIV_M + \
|
||||||
WT_FPSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
|
WT_FSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
|
||||||
|
|
||||||
static_assert(wtSum == 256,
|
static_assert(wtSum == 256,
|
||||||
"Sum of instruction weights must be 256");
|
"Sum of instruction weights must be 256");
|
||||||
|
@ -48,10 +48,10 @@ namespace RandomX {
|
|||||||
bool JMP_COND(uint8_t, convertible_t&, int32_t);
|
bool JMP_COND(uint8_t, convertible_t&, int32_t);
|
||||||
void FPINIT();
|
void FPINIT();
|
||||||
void FPROUND(convertible_t, uint8_t);
|
void FPROUND(convertible_t, uint8_t);
|
||||||
void FPADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
void FADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||||
void FPSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
void FSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||||
void FPMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
void FMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||||
void FPDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
void FDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||||
void FPSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
void FSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -225,7 +225,7 @@ namespace RandomX {
|
|||||||
c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);
|
c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void FPADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
void FADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
||||||
__m128d ad = _mm_cvtepi32_pd(ai);
|
__m128d ad = _mm_cvtepi32_pd(ai);
|
||||||
@ -240,7 +240,7 @@ namespace RandomX {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void FPSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
void FSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
||||||
__m128d ad = _mm_cvtepi32_pd(ai);
|
__m128d ad = _mm_cvtepi32_pd(ai);
|
||||||
@ -255,7 +255,7 @@ namespace RandomX {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void FPMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
void FMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
||||||
__m128d ad = _mm_cvtepi32_pd(ai);
|
__m128d ad = _mm_cvtepi32_pd(ai);
|
||||||
@ -272,7 +272,7 @@ namespace RandomX {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void FPDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
void FDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
||||||
__m128d ad = _mm_cvtepi32_pd(ai);
|
__m128d ad = _mm_cvtepi32_pd(ai);
|
||||||
@ -289,7 +289,7 @@ namespace RandomX {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void FPSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
void FSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
||||||
__m128d ad = _mm_cvtepi32_pd(ai);
|
__m128d ad = _mm_cvtepi32_pd(ai);
|
||||||
|
Loading…
Reference in New Issue
Block a user