mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 07:48:54 +00:00
Updated JIT compiler and assembly generator for new int -> float conversion
This commit is contained in:
parent
790b382eda
commit
d9bc6cfeda
@ -40,6 +40,8 @@ For floating point instructions, the destination can be a group F or group E reg
|
|||||||
|
|
||||||
Memory operands are loaded as 8-byte values from the address indicated by `src`. The 8 byte value is interpreted as two 32-bit signed integers and implicitly converted to floating point format. The lower and upper memory operands are marked as `[src][0]` and `[src][1]`.
|
Memory operands are loaded as 8-byte values from the address indicated by `src`. The 8 byte value is interpreted as two 32-bit signed integers and implicitly converted to floating point format. The lower and upper memory operands are marked as `[src][0]` and `[src][1]`.
|
||||||
|
|
||||||
|
Memory operands for group E registers are loaded as described above, then their sign bit is cleared and their exponent value is set to `0x30F` (corresponds to 2<sup>-240</sup>).
|
||||||
|
|
||||||
|frequency|instruction|dst|src|operation|
|
|frequency|instruction|dst|src|operation|
|
||||||
|-|-|-|-|-|
|
|-|-|-|-|-|
|
||||||
|8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`|
|
|8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`|
|
||||||
@ -58,8 +60,7 @@ This instruction negates the number and multiplies it by <code>2<sup>x</sup></co
|
|||||||
The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x81F0000000000000`.
|
The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x81F0000000000000`.
|
||||||
|
|
||||||
#### Denormal and NaN values
|
#### Denormal and NaN values
|
||||||
Due to restrictions on the values of the floating point registers, no operation results in `NaN`.
|
Due to restrictions on the values of the floating point registers, no operation results in `NaN` or a denormal number.
|
||||||
`FDIV_M` can produce a denormal result. In that case, the result is set to `DBL_MIN = 2.22507385850720138309e-308`, which is the smallest positive normal number.
|
|
||||||
|
|
||||||
#### Rounding
|
#### Rounding
|
||||||
All floating point instructions give correctly rounded results. The rounding mode depends on the value of the `fprc` register:
|
All floating point instructions give correctly rounded results. The rounding mode depends on the value of the `fprc` register:
|
||||||
|
@ -385,9 +385,9 @@ namespace RandomX {
|
|||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
genAddressReg(instr);
|
genAddressReg(instr);
|
||||||
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
|
||||||
asmCode << "\tandps xmm12, xmm14" << std::endl;
|
asmCode << "\tandps xmm12, xmm13" << std::endl;
|
||||||
|
asmCode << "\torps xmm12, xmm14" << std::endl;
|
||||||
asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl;
|
asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl;
|
||||||
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
|
|
||||||
traceflt(instr);
|
traceflt(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,9 +73,9 @@ namespace RandomX {
|
|||||||
; xmm10 -> "a2"
|
; xmm10 -> "a2"
|
||||||
; xmm11 -> "a3"
|
; xmm11 -> "a3"
|
||||||
; xmm12 -> temporary
|
; xmm12 -> temporary
|
||||||
; xmm13 -> DBL_MIN
|
; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff
|
||||||
; xmm14 -> absolute value mask 0x7fffffffffffffff7fffffffffffffff
|
; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
|
||||||
; xmm15 -> sign mask 0x80000000000000008000000000000000
|
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -165,7 +165,7 @@ namespace RandomX {
|
|||||||
static const uint8_t JMP = 0xe9;
|
static const uint8_t JMP = 0xe9;
|
||||||
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
||||||
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
|
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
|
||||||
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0f, 0x54, 0xe6 };
|
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 };
|
||||||
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
|
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
|
||||||
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
|
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
|
||||||
|
|
||||||
@ -556,8 +556,6 @@ namespace RandomX {
|
|||||||
emit(REX_ANDPS_XMM12);
|
emit(REX_ANDPS_XMM12);
|
||||||
emit(REX_DIVPD);
|
emit(REX_DIVPD);
|
||||||
emitByte(0xe4 + 8 * instr.dst);
|
emitByte(0xe4 + 8 * instr.dst);
|
||||||
emit(REX_MAXPD);
|
|
||||||
emitByte(0xe5 + 8 * instr.dst);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FSQRT_R(Instruction& instr) {
|
void JitCompilerX86::h_FSQRT_R(Instruction& instr) {
|
||||||
|
@ -22,7 +22,11 @@
|
|||||||
cvtdq2pd xmm5, qword ptr [rcx+40]
|
cvtdq2pd xmm5, qword ptr [rcx+40]
|
||||||
cvtdq2pd xmm6, qword ptr [rcx+48]
|
cvtdq2pd xmm6, qword ptr [rcx+48]
|
||||||
cvtdq2pd xmm7, qword ptr [rcx+56]
|
cvtdq2pd xmm7, qword ptr [rcx+56]
|
||||||
andps xmm4, xmm14
|
andps xmm4, xmm13
|
||||||
andps xmm5, xmm14
|
andps xmm5, xmm13
|
||||||
andps xmm6, xmm14
|
andps xmm6, xmm13
|
||||||
andps xmm7, xmm14
|
andps xmm7, xmm13
|
||||||
|
orps xmm4, xmm14
|
||||||
|
orps xmm5, xmm14
|
||||||
|
orps xmm6, xmm14
|
||||||
|
orps xmm7, xmm14
|
||||||
|
@ -8,10 +8,10 @@
|
|||||||
mov qword ptr [rcx+48], r14
|
mov qword ptr [rcx+48], r14
|
||||||
mov qword ptr [rcx+56], r15
|
mov qword ptr [rcx+56], r15
|
||||||
pop rcx
|
pop rcx
|
||||||
mulpd xmm0, xmm4
|
xorpd xmm0, xmm4
|
||||||
mulpd xmm1, xmm5
|
xorpd xmm1, xmm5
|
||||||
mulpd xmm2, xmm6
|
xorpd xmm2, xmm6
|
||||||
mulpd xmm3, xmm7
|
xorpd xmm3, xmm7
|
||||||
movapd xmmword ptr [rcx+0], xmm0
|
movapd xmmword ptr [rcx+0], xmm0
|
||||||
movapd xmmword ptr [rcx+16], xmm1
|
movapd xmmword ptr [rcx+16], xmm1
|
||||||
movapd xmmword ptr [rcx+32], xmm2
|
movapd xmmword ptr [rcx+32], xmm2
|
||||||
|
@ -32,8 +32,8 @@
|
|||||||
movapd xmm9, xmmword ptr [rcx+88]
|
movapd xmm9, xmmword ptr [rcx+88]
|
||||||
movapd xmm10, xmmword ptr [rcx+104]
|
movapd xmm10, xmmword ptr [rcx+104]
|
||||||
movapd xmm11, xmmword ptr [rcx+120]
|
movapd xmm11, xmmword ptr [rcx+120]
|
||||||
movapd xmm13, xmmword ptr minDbl[rip]
|
movapd xmm13, xmmword ptr mantissaMask[rip]
|
||||||
movapd xmm14, xmmword ptr absMask[rip]
|
movapd xmm14, xmmword ptr exp240[rip]
|
||||||
movapd xmm15, xmmword ptr signMask[rip]
|
movapd xmm15, xmmword ptr scaleMask[rip]
|
||||||
|
|
||||||
jmp DECL(randomx_program_loop_begin)
|
jmp DECL(randomx_program_loop_begin)
|
@ -45,8 +45,8 @@
|
|||||||
movapd xmm9, xmmword ptr [rcx+88]
|
movapd xmm9, xmmword ptr [rcx+88]
|
||||||
movapd xmm10, xmmword ptr [rcx+104]
|
movapd xmm10, xmmword ptr [rcx+104]
|
||||||
movapd xmm11, xmmword ptr [rcx+120]
|
movapd xmm11, xmmword ptr [rcx+120]
|
||||||
movapd xmm13, xmmword ptr [minDbl]
|
movapd xmm13, xmmword ptr [mantissaMask]
|
||||||
movapd xmm14, xmmword ptr [absMask]
|
movapd xmm14, xmmword ptr [exp240]
|
||||||
movapd xmm15, xmmword ptr [signMask]
|
movapd xmm15, xmmword ptr [scaleMask]
|
||||||
|
|
||||||
jmp randomx_program_loop_begin
|
jmp randomx_program_loop_begin
|
@ -1,6 +1,6 @@
|
|||||||
minDbl:
|
mantissaMask:
|
||||||
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
|
db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0
|
||||||
absMask:
|
exp240:
|
||||||
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48
|
||||||
signMask:
|
scaleMask:
|
||||||
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
|
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
|
@ -52,9 +52,9 @@ executeProgram PROC
|
|||||||
; xmm10 -> "a2"
|
; xmm10 -> "a2"
|
||||||
; xmm11 -> "a3"
|
; xmm11 -> "a3"
|
||||||
; xmm12 -> temporary
|
; xmm12 -> temporary
|
||||||
; xmm13 -> DBL_MIN
|
; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff
|
||||||
; xmm14 -> absolute value mask
|
; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
|
||||||
; xmm15 -> sign mask
|
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
|
||||||
|
|
||||||
; store callee-saved registers
|
; store callee-saved registers
|
||||||
push rbx
|
push rbx
|
||||||
@ -103,18 +103,18 @@ executeProgram PROC
|
|||||||
movapd xmm9, xmmword ptr [rcx+88]
|
movapd xmm9, xmmword ptr [rcx+88]
|
||||||
movapd xmm10, xmmword ptr [rcx+104]
|
movapd xmm10, xmmword ptr [rcx+104]
|
||||||
movapd xmm11, xmmword ptr [rcx+120]
|
movapd xmm11, xmmword ptr [rcx+120]
|
||||||
movapd xmm13, xmmword ptr [minDbl]
|
movapd xmm13, xmmword ptr [mantissaMask]
|
||||||
movapd xmm14, xmmword ptr [absMask]
|
movapd xmm14, xmmword ptr [exp240]
|
||||||
movapd xmm15, xmmword ptr [signMask]
|
movapd xmm15, xmmword ptr [scaleMask]
|
||||||
|
|
||||||
jmp program_begin
|
jmp program_begin
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
minDbl:
|
mantissaMask:
|
||||||
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
|
db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0
|
||||||
absMask:
|
exp240:
|
||||||
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48
|
||||||
signMask:
|
scaleMask:
|
||||||
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
|
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
@ -145,10 +145,14 @@ program_begin:
|
|||||||
cvtdq2pd xmm5, qword ptr [rcx+40]
|
cvtdq2pd xmm5, qword ptr [rcx+40]
|
||||||
cvtdq2pd xmm6, qword ptr [rcx+48]
|
cvtdq2pd xmm6, qword ptr [rcx+48]
|
||||||
cvtdq2pd xmm7, qword ptr [rcx+56]
|
cvtdq2pd xmm7, qword ptr [rcx+56]
|
||||||
andps xmm4, xmm14
|
andps xmm4, xmm13
|
||||||
andps xmm5, xmm14
|
andps xmm5, xmm13
|
||||||
andps xmm6, xmm14
|
andps xmm6, xmm13
|
||||||
andps xmm7, xmm14
|
andps xmm7, xmm13
|
||||||
|
orps xmm4, xmm14
|
||||||
|
orps xmm5, xmm14
|
||||||
|
orps xmm6, xmm14
|
||||||
|
orps xmm7, xmm14
|
||||||
|
|
||||||
;# 256 instructions
|
;# 256 instructions
|
||||||
include program.inc
|
include program.inc
|
||||||
@ -181,10 +185,10 @@ IF 1
|
|||||||
mov qword ptr [rcx+48], r14
|
mov qword ptr [rcx+48], r14
|
||||||
mov qword ptr [rcx+56], r15
|
mov qword ptr [rcx+56], r15
|
||||||
pop rcx
|
pop rcx
|
||||||
mulpd xmm0, xmm4
|
xorpd xmm0, xmm4
|
||||||
mulpd xmm1, xmm5
|
xorpd xmm1, xmm5
|
||||||
mulpd xmm2, xmm6
|
xorpd xmm2, xmm6
|
||||||
mulpd xmm3, xmm7
|
xorpd xmm3, xmm7
|
||||||
movapd xmmword ptr [rcx+0], xmm0
|
movapd xmmword ptr [rcx+0], xmm0
|
||||||
movapd xmmword ptr [rcx+16], xmm1
|
movapd xmmword ptr [rcx+16], xmm1
|
||||||
movapd xmmword ptr [rcx+32], xmm2
|
movapd xmmword ptr [rcx+32], xmm2
|
||||||
|
@ -341,7 +341,7 @@ int main(int argc, char** argv) {
|
|||||||
std::cout << "Calculated result: ";
|
std::cout << "Calculated result: ";
|
||||||
result.print(std::cout);
|
result.print(std::cout);
|
||||||
if(programCount == 1000)
|
if(programCount == 1000)
|
||||||
std::cout << "Reference result: d3ae5a9365196ed48bb98ebfc3316498e29443ea7f056ecbd272f749c6af7730" << std::endl;
|
std::cout << "Reference result: e1b4144293ff9ab5aa4c98f2389bb18950d8c3fd874891ac64628e028a286006" << std::endl;
|
||||||
if (!miningMode) {
|
if (!miningMode) {
|
||||||
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
|
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user