mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-03 05:38:54 +00:00
Removed FPNEG instruction
Optimized instruction frequencies Increased the range for A registers from [1,65536) to [1, 4294967296)
This commit is contained in:
parent
ac4462ad42
commit
a586751f6b
@ -35,6 +35,8 @@ namespace RandomX {
|
|||||||
static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" };
|
static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||||
static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" };
|
static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" };
|
||||||
|
|
||||||
|
static const char* fsumInstr[4] = { "paddb", "paddw", "paddd", "paddq" };
|
||||||
|
|
||||||
static const char* regA4 = "xmm12";
|
static const char* regA4 = "xmm12";
|
||||||
static const char* dblMin = "xmm13";
|
static const char* dblMin = "xmm13";
|
||||||
static const char* absMask = "xmm14";
|
static const char* absMask = "xmm14";
|
||||||
@ -365,6 +367,7 @@ namespace RandomX {
|
|||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||||
|
//asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
//5 uOPs
|
//5 uOPs
|
||||||
@ -380,6 +383,7 @@ namespace RandomX {
|
|||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||||
|
//asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
//5 uOPs
|
//5 uOPs
|
||||||
@ -391,9 +395,9 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//1 uOP
|
//1 uOP
|
||||||
void AssemblyGeneratorX86::h_FPNEG_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_CFSUM_R(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
asmCode << "\txorps " << regF[instr.dst] << ", " << signMask << std::endl;
|
asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
//1 uOPs
|
//1 uOPs
|
||||||
@ -538,7 +542,7 @@ namespace RandomX {
|
|||||||
INST_HANDLE(FADD_M)
|
INST_HANDLE(FADD_M)
|
||||||
INST_HANDLE(FSUB_R)
|
INST_HANDLE(FSUB_R)
|
||||||
INST_HANDLE(FSUB_M)
|
INST_HANDLE(FSUB_M)
|
||||||
INST_HANDLE(FPNEG_R)
|
INST_HANDLE(CFSUM_R)
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
INST_HANDLE(FMUL_R)
|
INST_HANDLE(FMUL_R)
|
||||||
|
@ -69,7 +69,7 @@ namespace RandomX {
|
|||||||
void h_FADD_M(Instruction&, int);
|
void h_FADD_M(Instruction&, int);
|
||||||
void h_FSUB_R(Instruction&, int);
|
void h_FSUB_R(Instruction&, int);
|
||||||
void h_FSUB_M(Instruction&, int);
|
void h_FSUB_M(Instruction&, int);
|
||||||
void h_FPNEG_R(Instruction&, int);
|
void h_CFSUM_R(Instruction&, int);
|
||||||
void h_FMUL_R(Instruction&, int);
|
void h_FMUL_R(Instruction&, int);
|
||||||
void h_FMUL_M(Instruction&, int);
|
void h_FMUL_M(Instruction&, int);
|
||||||
void h_FDIV_R(Instruction&, int);
|
void h_FDIV_R(Instruction&, int);
|
||||||
|
@ -44,7 +44,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
|
static uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
|
||||||
auto exponent = entropy >> 60; //0..15
|
auto exponent = entropy >> 59; //0..31
|
||||||
auto mantissa = entropy & mantissaMask;
|
auto mantissa = entropy & mantissaMask;
|
||||||
exponent += exponentBias;
|
exponent += exponentBias;
|
||||||
exponent &= exponentMask;
|
exponent &= exponentMask;
|
||||||
|
@ -247,9 +247,9 @@ namespace RandomX {
|
|||||||
os << std::endl;
|
os << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FPNEG_R(std::ostream& os) const {
|
void Instruction::h_CFSUM_R(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
os << "f" << dstIndex << std::endl;
|
os << "f" << dstIndex << ", " << (1 << ((mod % 4) + 3)) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FMUL_R(std::ostream& os) const {
|
void Instruction::h_FMUL_R(std::ostream& os) const {
|
||||||
@ -370,7 +370,7 @@ namespace RandomX {
|
|||||||
INST_NAME(FADD_M)
|
INST_NAME(FADD_M)
|
||||||
INST_NAME(FSUB_R)
|
INST_NAME(FSUB_R)
|
||||||
INST_NAME(FSUB_M)
|
INST_NAME(FSUB_M)
|
||||||
INST_NAME(FPNEG_R)
|
INST_NAME(CFSUM_R)
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
INST_NAME(FMUL_R)
|
INST_NAME(FMUL_R)
|
||||||
@ -421,7 +421,7 @@ namespace RandomX {
|
|||||||
INST_HANDLE(FADD_M)
|
INST_HANDLE(FADD_M)
|
||||||
INST_HANDLE(FSUB_R)
|
INST_HANDLE(FSUB_R)
|
||||||
INST_HANDLE(FSUB_M)
|
INST_HANDLE(FSUB_M)
|
||||||
INST_HANDLE(FPNEG_R)
|
INST_HANDLE(CFSUM_R)
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
INST_HANDLE(FMUL_R)
|
INST_HANDLE(FMUL_R)
|
||||||
|
@ -54,7 +54,7 @@ namespace RandomX {
|
|||||||
constexpr int FADD_M = 22;
|
constexpr int FADD_M = 22;
|
||||||
constexpr int FSUB_R = 23;
|
constexpr int FSUB_R = 23;
|
||||||
constexpr int FSUB_M = 24;
|
constexpr int FSUB_M = 24;
|
||||||
constexpr int FPNEG_R = 25;
|
constexpr int CFSUM_R = 25;
|
||||||
constexpr int FMUL_R = 26;
|
constexpr int FMUL_R = 26;
|
||||||
constexpr int FMUL_M = 27;
|
constexpr int FMUL_M = 27;
|
||||||
constexpr int FDIV_R = 28;
|
constexpr int FDIV_R = 28;
|
||||||
@ -116,7 +116,7 @@ namespace RandomX {
|
|||||||
void h_FADD_M(std::ostream&) const;
|
void h_FADD_M(std::ostream&) const;
|
||||||
void h_FSUB_R(std::ostream&) const;
|
void h_FSUB_R(std::ostream&) const;
|
||||||
void h_FSUB_M(std::ostream&) const;
|
void h_FSUB_M(std::ostream&) const;
|
||||||
void h_FPNEG_R(std::ostream&) const;
|
void h_CFSUM_R(std::ostream&) const;
|
||||||
void h_FMUL_R(std::ostream&) const;
|
void h_FMUL_R(std::ostream&) const;
|
||||||
void h_FMUL_M(std::ostream&) const;
|
void h_FMUL_M(std::ostream&) const;
|
||||||
void h_FDIV_R(std::ostream&) const;
|
void h_FDIV_R(std::ostream&) const;
|
||||||
|
@ -87,7 +87,7 @@ namespace RandomX {
|
|||||||
; xmm12 -> temporary
|
; xmm12 -> temporary
|
||||||
; xmm13 -> DBL_MIN
|
; xmm13 -> DBL_MIN
|
||||||
; xmm14 -> absolute value mask 0x7fffffffffffffff7fffffffffffffff
|
; xmm14 -> absolute value mask 0x7fffffffffffffff7fffffffffffffff
|
||||||
; xmm15 -> sign mask 0x80000000000000008000000000000000
|
; xmm15 -> unused
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -178,6 +178,8 @@ namespace RandomX {
|
|||||||
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
||||||
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
|
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
|
||||||
static const uint8_t REX_ANDPS_XMM12[] = { 0x41, 0x0f, 0x54, 0xe6 };
|
static const uint8_t REX_ANDPS_XMM12[] = { 0x41, 0x0f, 0x54, 0xe6 };
|
||||||
|
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
|
||||||
|
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
|
||||||
|
|
||||||
size_t JitCompilerX86::getCodeSize() {
|
size_t JitCompilerX86::getCodeSize() {
|
||||||
return codePos - prologueSize;
|
return codePos - prologueSize;
|
||||||
@ -615,6 +617,9 @@ namespace RandomX {
|
|||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
emit(REX_ADDPD);
|
emit(REX_ADDPD);
|
||||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||||
|
//emit(REX_PADD);
|
||||||
|
//emitByte(PADD_OPCODES[instr.mod % 4]);
|
||||||
|
//emitByte(0xf8 + instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FADD_M(Instruction& instr) {
|
void JitCompilerX86::h_FADD_M(Instruction& instr) {
|
||||||
@ -630,6 +635,9 @@ namespace RandomX {
|
|||||||
instr.src %= 4;
|
instr.src %= 4;
|
||||||
emit(REX_SUBPD);
|
emit(REX_SUBPD);
|
||||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||||
|
//emit(REX_PADD);
|
||||||
|
//emitByte(PADD_OPCODES[instr.mod % 4]);
|
||||||
|
//emitByte(0xf8 + instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FSUB_M(Instruction& instr) {
|
void JitCompilerX86::h_FSUB_M(Instruction& instr) {
|
||||||
@ -640,7 +648,7 @@ namespace RandomX {
|
|||||||
emitByte(0xc4 + 8 * instr.dst);
|
emitByte(0xc4 + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPNEG_R(Instruction& instr) {
|
void JitCompilerX86::h_CFSUM_R(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
emit(REX_XORPS);
|
emit(REX_XORPS);
|
||||||
emitByte(0xc7 + 8 * instr.dst);
|
emitByte(0xc7 + 8 * instr.dst);
|
||||||
@ -794,7 +802,7 @@ namespace RandomX {
|
|||||||
INST_HANDLE(FADD_M)
|
INST_HANDLE(FADD_M)
|
||||||
INST_HANDLE(FSUB_R)
|
INST_HANDLE(FSUB_R)
|
||||||
INST_HANDLE(FSUB_M)
|
INST_HANDLE(FSUB_M)
|
||||||
INST_HANDLE(FPNEG_R)
|
INST_HANDLE(CFSUM_R)
|
||||||
INST_HANDLE(FMUL_R)
|
INST_HANDLE(FMUL_R)
|
||||||
INST_HANDLE(FMUL_M)
|
INST_HANDLE(FMUL_M)
|
||||||
INST_HANDLE(FDIV_R)
|
INST_HANDLE(FDIV_R)
|
||||||
|
@ -115,7 +115,7 @@ namespace RandomX {
|
|||||||
void h_FADD_M(Instruction&);
|
void h_FADD_M(Instruction&);
|
||||||
void h_FSUB_R(Instruction&);
|
void h_FSUB_R(Instruction&);
|
||||||
void h_FSUB_M(Instruction&);
|
void h_FSUB_M(Instruction&);
|
||||||
void h_FPNEG_R(Instruction&);
|
void h_CFSUM_R(Instruction&);
|
||||||
void h_FMUL_R(Instruction&);
|
void h_FMUL_R(Instruction&);
|
||||||
void h_FMUL_M(Instruction&);
|
void h_FMUL_M(Instruction&);
|
||||||
void h_FDIV_R(Instruction&);
|
void h_FDIV_R(Instruction&);
|
||||||
|
@ -12,6 +12,10 @@
|
|||||||
mulpd xmm1, xmm5
|
mulpd xmm1, xmm5
|
||||||
mulpd xmm2, xmm6
|
mulpd xmm2, xmm6
|
||||||
mulpd xmm3, xmm7
|
mulpd xmm3, xmm7
|
||||||
|
;# xorpd xmm0, xmm15
|
||||||
|
;# xorpd xmm1, xmm15
|
||||||
|
;# xorpd xmm2, xmm15
|
||||||
|
;# xorpd xmm3, xmm15
|
||||||
movapd xmmword ptr [rcx+0], xmm0
|
movapd xmmword ptr [rcx+0], xmm0
|
||||||
movapd xmmword ptr [rcx+16], xmm1
|
movapd xmmword ptr [rcx+16], xmm1
|
||||||
movapd xmmword ptr [rcx+32], xmm2
|
movapd xmmword ptr [rcx+32], xmm2
|
||||||
|
@ -18,5 +18,5 @@
|
|||||||
movapd xmm11, xmmword ptr [rcx+120]
|
movapd xmm11, xmmword ptr [rcx+120]
|
||||||
movapd xmm13, xmmword ptr [minDbl]
|
movapd xmm13, xmmword ptr [minDbl]
|
||||||
movapd xmm14, xmmword ptr [absMask]
|
movapd xmm14, xmmword ptr [absMask]
|
||||||
movapd xmm15, xmmword ptr [signMask]
|
;# xorpd xmm15, xmm15
|
||||||
|
|
||||||
|
@ -54,7 +54,7 @@ executeProgram PROC
|
|||||||
; xmm12 -> temporary
|
; xmm12 -> temporary
|
||||||
; xmm13 -> DBL_MIN
|
; xmm13 -> DBL_MIN
|
||||||
; xmm14 -> absolute value mask
|
; xmm14 -> absolute value mask
|
||||||
; xmm15 -> sign mask
|
; xmm15 -> unused
|
||||||
|
|
||||||
; store callee-saved registers
|
; store callee-saved registers
|
||||||
push rbx
|
push rbx
|
||||||
@ -104,7 +104,7 @@ executeProgram PROC
|
|||||||
movapd xmm11, xmmword ptr [rcx+120]
|
movapd xmm11, xmmword ptr [rcx+120]
|
||||||
movapd xmm13, xmmword ptr [minDbl]
|
movapd xmm13, xmmword ptr [minDbl]
|
||||||
movapd xmm14, xmmword ptr [absMask]
|
movapd xmm14, xmmword ptr [absMask]
|
||||||
movapd xmm15, xmmword ptr [signMask]
|
;# xorps xmm15, xmm15
|
||||||
|
|
||||||
jmp program_begin
|
jmp program_begin
|
||||||
|
|
||||||
|
@ -21,10 +21,10 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
//Integer
|
//Integer
|
||||||
#define WT_IADD_R 12
|
#define WT_IADD_R 12
|
||||||
#define WT_IADD_M 3
|
#define WT_IADD_M 7
|
||||||
#define WT_IADD_RC 12
|
#define WT_IADD_RC 16
|
||||||
#define WT_ISUB_R 12
|
#define WT_ISUB_R 12
|
||||||
#define WT_ISUB_M 3
|
#define WT_ISUB_M 7
|
||||||
#define WT_IMUL_9C 9
|
#define WT_IMUL_9C 9
|
||||||
#define WT_IMUL_R 16
|
#define WT_IMUL_R 16
|
||||||
#define WT_IMUL_M 4
|
#define WT_IMUL_M 4
|
||||||
@ -35,10 +35,10 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
#define WT_IDIV_C 4
|
#define WT_IDIV_C 4
|
||||||
#define WT_ISDIV_C 4
|
#define WT_ISDIV_C 4
|
||||||
#define WT_INEG_R 2
|
#define WT_INEG_R 2
|
||||||
#define WT_IXOR_R 12
|
#define WT_IXOR_R 16
|
||||||
#define WT_IXOR_M 4
|
#define WT_IXOR_M 4
|
||||||
#define WT_IROR_R 10
|
#define WT_IROR_R 8
|
||||||
#define WT_IROL_R 10
|
#define WT_IROL_R 8
|
||||||
#define WT_ISWAP_R 4
|
#define WT_ISWAP_R 4
|
||||||
|
|
||||||
//Common floating point
|
//Common floating point
|
||||||
@ -49,22 +49,22 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
#define WT_FADD_M 5
|
#define WT_FADD_M 5
|
||||||
#define WT_FSUB_R 20
|
#define WT_FSUB_R 20
|
||||||
#define WT_FSUB_M 5
|
#define WT_FSUB_M 5
|
||||||
#define WT_FPNEG_R 6
|
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
#define WT_FMUL_R 16
|
#define WT_FMUL_R 20
|
||||||
#define WT_FMUL_M 4
|
#define WT_FMUL_M 0
|
||||||
#define WT_FDIV_R 7
|
#define WT_FDIV_R 0
|
||||||
#define WT_FDIV_M 1
|
#define WT_FDIV_M 4
|
||||||
#define WT_FSQRT_R 6
|
#define WT_FSQRT_R 6
|
||||||
|
|
||||||
//Control
|
//Control
|
||||||
#define WT_COND_R 7
|
#define WT_COND_R 7
|
||||||
#define WT_COND_M 1
|
#define WT_COND_M 1
|
||||||
#define WT_CFROUND 1
|
#define WT_CFROUND 1
|
||||||
|
#define WT_CFSUM_R 0
|
||||||
|
|
||||||
//Store
|
//Store
|
||||||
#define WT_ISTORE 18
|
#define WT_ISTORE 16
|
||||||
#define WT_FSTORE 0
|
#define WT_FSTORE 0
|
||||||
|
|
||||||
#define WT_NOP 0
|
#define WT_NOP 0
|
||||||
@ -74,7 +74,7 @@ WT_ISUB_M + WT_IMUL_9C + WT_IMUL_R + WT_IMUL_M + WT_IMULH_R + \
|
|||||||
WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
|
WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
|
||||||
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
|
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
|
||||||
WT_ISWAP_R + WT_FSWAP_R + WT_FADD_R + WT_FADD_M + WT_FSUB_R + WT_FSUB_M + \
|
WT_ISWAP_R + WT_FSWAP_R + WT_FADD_R + WT_FADD_M + WT_FSUB_R + WT_FSUB_M + \
|
||||||
WT_FPNEG_R + WT_FMUL_R + WT_FMUL_M + WT_FDIV_R + WT_FDIV_M + \
|
WT_CFSUM_R + WT_FMUL_R + WT_FMUL_M + WT_FDIV_R + WT_FDIV_M + \
|
||||||
WT_FSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
|
WT_FSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
|
||||||
|
|
||||||
static_assert(wtSum == 256,
|
static_assert(wtSum == 256,
|
||||||
|
1419
src/program.inc
1419
src/program.inc
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user