Updated JIT compiler and assembly generator for new int -> float conversion

This commit is contained in:
tevador 2019-02-24 17:24:06 +01:00
parent 790b382eda
commit d9bc6cfeda
10 changed files with 56 additions and 49 deletions

View File

@ -40,6 +40,8 @@ For floating point instructions, the destination can be a group F or group E reg
Memory operands are loaded as 8-byte values from the address indicated by `src`. The 8 byte value is interpreted as two 32-bit signed integers and implicitly converted to floating point format. The lower and upper memory operands are marked as `[src][0]` and `[src][1]`. Memory operands are loaded as 8-byte values from the address indicated by `src`. The 8 byte value is interpreted as two 32-bit signed integers and implicitly converted to floating point format. The lower and upper memory operands are marked as `[src][0]` and `[src][1]`.
Memory operands for group E registers are loaded as described above, then their sign bit is cleared and their exponent value is set to `0x30F` (corresponds to 2<sup>-240</sup>).
|frequency|instruction|dst|src|operation| |frequency|instruction|dst|src|operation|
|-|-|-|-|-| |-|-|-|-|-|
|8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`| |8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`|
@ -58,8 +60,7 @@ This instruction negates the number and multiplies it by <code>2<sup>x</sup></co
The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x81F0000000000000`. The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x81F0000000000000`.
#### Denormal and NaN values #### Denormal and NaN values
Due to restrictions on the values of the floating point registers, no operation results in `NaN`. Due to restrictions on the values of the floating point registers, no operation results in `NaN` or a denormal number.
`FDIV_M` can produce a denormal result. In that case, the result is set to `DBL_MIN = 2.22507385850720138309e-308`, which is the smallest positive normal number.
#### Rounding #### Rounding
All floating point instructions give correctly rounded results. The rounding mode depends on the value of the `fprc` register: All floating point instructions give correctly rounded results. The rounding mode depends on the value of the `fprc` register:

View File

@ -385,9 +385,9 @@ namespace RandomX {
instr.dst %= 4; instr.dst %= 4;
genAddressReg(instr); genAddressReg(instr);
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl; asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
asmCode << "\tandps xmm12, xmm14" << std::endl; asmCode << "\tandps xmm12, xmm13" << std::endl;
asmCode << "\torps xmm12, xmm14" << std::endl;
asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl; asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl;
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
traceflt(instr); traceflt(instr);
} }

View File

@ -73,9 +73,9 @@ namespace RandomX {
; xmm10 -> "a2" ; xmm10 -> "a2"
; xmm11 -> "a3" ; xmm11 -> "a3"
; xmm12 -> temporary ; xmm12 -> temporary
; xmm13 -> DBL_MIN ; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff
; xmm14 -> absolute value mask 0x7fffffffffffffff7fffffffffffffff ; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
; xmm15 -> sign mask 0x80000000000000008000000000000000 ; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
*/ */
@ -165,7 +165,7 @@ namespace RandomX {
static const uint8_t JMP = 0xe9; static const uint8_t JMP = 0xe9;
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 }; static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 }; static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0f, 0x54, 0xe6 }; static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 };
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f }; static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 }; static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
@ -556,8 +556,6 @@ namespace RandomX {
emit(REX_ANDPS_XMM12); emit(REX_ANDPS_XMM12);
emit(REX_DIVPD); emit(REX_DIVPD);
emitByte(0xe4 + 8 * instr.dst); emitByte(0xe4 + 8 * instr.dst);
emit(REX_MAXPD);
emitByte(0xe5 + 8 * instr.dst);
} }
void JitCompilerX86::h_FSQRT_R(Instruction& instr) { void JitCompilerX86::h_FSQRT_R(Instruction& instr) {

View File

@ -22,7 +22,11 @@
cvtdq2pd xmm5, qword ptr [rcx+40] cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48] cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56] cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm14 andps xmm4, xmm13
andps xmm5, xmm14 andps xmm5, xmm13
andps xmm6, xmm14 andps xmm6, xmm13
andps xmm7, xmm14 andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14

View File

@ -8,10 +8,10 @@
mov qword ptr [rcx+48], r14 mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15 mov qword ptr [rcx+56], r15
pop rcx pop rcx
mulpd xmm0, xmm4 xorpd xmm0, xmm4
mulpd xmm1, xmm5 xorpd xmm1, xmm5
mulpd xmm2, xmm6 xorpd xmm2, xmm6
mulpd xmm3, xmm7 xorpd xmm3, xmm7
movapd xmmword ptr [rcx+0], xmm0 movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1 movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2 movapd xmmword ptr [rcx+32], xmm2

View File

@ -32,8 +32,8 @@
movapd xmm9, xmmword ptr [rcx+88] movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104] movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120] movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr minDbl[rip] movapd xmm13, xmmword ptr mantissaMask[rip]
movapd xmm14, xmmword ptr absMask[rip] movapd xmm14, xmmword ptr exp240[rip]
movapd xmm15, xmmword ptr signMask[rip] movapd xmm15, xmmword ptr scaleMask[rip]
jmp DECL(randomx_program_loop_begin) jmp DECL(randomx_program_loop_begin)

View File

@ -45,8 +45,8 @@
movapd xmm9, xmmword ptr [rcx+88] movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104] movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120] movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr [minDbl] movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [absMask] movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [signMask] movapd xmm15, xmmword ptr [scaleMask]
jmp randomx_program_loop_begin jmp randomx_program_loop_begin

View File

@ -1,6 +1,6 @@
minDbl: mantissaMask:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0 db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0
absMask: exp240:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127 db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48
signMask: scaleMask:
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129 db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129

View File

@ -52,9 +52,9 @@ executeProgram PROC
; xmm10 -> "a2" ; xmm10 -> "a2"
; xmm11 -> "a3" ; xmm11 -> "a3"
; xmm12 -> temporary ; xmm12 -> temporary
; xmm13 -> DBL_MIN ; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff
; xmm14 -> absolute value mask ; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
; xmm15 -> sign mask ; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
; store callee-saved registers ; store callee-saved registers
push rbx push rbx
@ -103,18 +103,18 @@ executeProgram PROC
movapd xmm9, xmmword ptr [rcx+88] movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104] movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120] movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr [minDbl] movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [absMask] movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [signMask] movapd xmm15, xmmword ptr [scaleMask]
jmp program_begin jmp program_begin
ALIGN 64 ALIGN 64
minDbl: mantissaMask:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0 db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0
absMask: exp240:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127 db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48
signMask: scaleMask:
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129 db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
ALIGN 64 ALIGN 64
@ -145,10 +145,14 @@ program_begin:
cvtdq2pd xmm5, qword ptr [rcx+40] cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48] cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56] cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm14 andps xmm4, xmm13
andps xmm5, xmm14 andps xmm5, xmm13
andps xmm6, xmm14 andps xmm6, xmm13
andps xmm7, xmm14 andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14
;# 256 instructions ;# 256 instructions
include program.inc include program.inc
@ -181,10 +185,10 @@ IF 1
mov qword ptr [rcx+48], r14 mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15 mov qword ptr [rcx+56], r15
pop rcx pop rcx
mulpd xmm0, xmm4 xorpd xmm0, xmm4
mulpd xmm1, xmm5 xorpd xmm1, xmm5
mulpd xmm2, xmm6 xorpd xmm2, xmm6
mulpd xmm3, xmm7 xorpd xmm3, xmm7
movapd xmmword ptr [rcx+0], xmm0 movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1 movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2 movapd xmmword ptr [rcx+32], xmm2

View File

@ -341,7 +341,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: "; std::cout << "Calculated result: ";
result.print(std::cout); result.print(std::cout);
if(programCount == 1000) if(programCount == 1000)
std::cout << "Reference result: d3ae5a9365196ed48bb98ebfc3316498e29443ea7f056ecbd272f749c6af7730" << std::endl; std::cout << "Reference result: e1b4144293ff9ab5aa4c98f2389bb18950d8c3fd874891ac64628e028a286006" << std::endl;
if (!miningMode) { if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
} }