mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-11 09:38:52 +00:00
Merged load/store of integer and FP registers
This commit is contained in:
parent
8f2abd6c05
commit
20eb549725
src
JitCompilerX86-static.SJitCompilerX86-static.asmJitCompilerX86-static.hppJitCompilerX86.cpp
asm
program_load_int.incprogram_loop_load.incprogram_loop_store.incprogram_prologue_linux.incprogram_prologue_load.incprogram_prologue_win64.incprogram_read_dataset.incprogram_store_flt.incprogram_store_int.inc
executeProgram-win64.asm@ -27,13 +27,11 @@
|
||||
#define DECL(x) x
|
||||
#endif
|
||||
.global DECL(randomx_program_prologue)
|
||||
.global DECL(randomx_loop_begin)
|
||||
.global DECL(randomx_program_load_int)
|
||||
.global DECL(randomx_program_load_flt)
|
||||
.global DECL(randomx_program_loop_begin)
|
||||
.global DECL(randomx_program_loop_load)
|
||||
.global DECL(randomx_program_start)
|
||||
.global DECL(randomx_program_read_dataset)
|
||||
.global DECL(randomx_program_store_int)
|
||||
.global DECL(randomx_program_store_flt)
|
||||
.global DECL(randomx_program_loop_store)
|
||||
.global DECL(randomx_program_loop_end)
|
||||
.global DECL(randomx_program_epilogue)
|
||||
.global DECL(randomx_program_end)
|
||||
@ -48,14 +46,11 @@ DECL(randomx_program_prologue):
|
||||
#include "asm/program_xmm_constants.inc"
|
||||
|
||||
.align 64
|
||||
DECL(randomx_loop_begin):
|
||||
DECL(randomx_program_loop_begin):
|
||||
nop
|
||||
|
||||
DECL(randomx_program_load_int):
|
||||
#include "asm/program_load_int.inc"
|
||||
|
||||
DECL(randomx_program_load_flt):
|
||||
#include "asm/program_load_flt.inc"
|
||||
DECL(randomx_program_loop_load):
|
||||
#include "asm/program_loop_load.inc"
|
||||
|
||||
DECL(randomx_program_start):
|
||||
nop
|
||||
@ -63,11 +58,8 @@ DECL(randomx_program_start):
|
||||
DECL(randomx_program_read_dataset):
|
||||
#include "asm/program_read_dataset.inc"
|
||||
|
||||
DECL(randomx_program_store_int):
|
||||
#include "asm/program_store_int.inc"
|
||||
|
||||
DECL(randomx_program_store_flt):
|
||||
#include "asm/program_store_flt.inc"
|
||||
DECL(randomx_program_loop_store):
|
||||
#include "asm/program_loop_store.inc"
|
||||
|
||||
DECL(randomx_program_loop_end):
|
||||
nop
|
||||
|
@ -20,13 +20,11 @@ IFDEF RAX
|
||||
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
||||
|
||||
PUBLIC randomx_program_prologue
|
||||
PUBLIC randomx_loop_begin
|
||||
PUBLIC randomx_program_load_int
|
||||
PUBLIC randomx_program_load_flt
|
||||
PUBLIC randomx_program_loop_begin
|
||||
PUBLIC randomx_program_loop_load
|
||||
PUBLIC randomx_program_start
|
||||
PUBLIC randomx_program_read_dataset
|
||||
PUBLIC randomx_program_store_int
|
||||
PUBLIC randomx_program_store_flt
|
||||
PUBLIC randomx_program_loop_store
|
||||
PUBLIC randomx_program_loop_end
|
||||
PUBLIC randomx_program_epilogue
|
||||
PUBLIC randomx_program_end
|
||||
@ -40,17 +38,13 @@ ALIGN 64
|
||||
include asm/program_xmm_constants.inc
|
||||
|
||||
ALIGN 64
|
||||
randomx_loop_begin PROC
|
||||
randomx_program_loop_begin PROC
|
||||
nop
|
||||
randomx_loop_begin ENDP
|
||||
randomx_program_loop_begin ENDP
|
||||
|
||||
randomx_program_load_int PROC
|
||||
include asm/program_load_int.inc
|
||||
randomx_program_load_int ENDP
|
||||
|
||||
randomx_program_load_flt PROC
|
||||
include asm/program_load_flt.inc
|
||||
randomx_program_load_flt ENDP
|
||||
randomx_program_loop_load PROC
|
||||
include asm/program_loop_load.inc
|
||||
randomx_program_loop_load ENDP
|
||||
|
||||
randomx_program_start PROC
|
||||
nop
|
||||
@ -60,13 +54,9 @@ randomx_program_read_dataset PROC
|
||||
include asm/program_read_dataset.inc
|
||||
randomx_program_read_dataset ENDP
|
||||
|
||||
randomx_program_store_int PROC
|
||||
include asm/program_store_int.inc
|
||||
randomx_program_store_int ENDP
|
||||
|
||||
randomx_program_store_flt PROC
|
||||
include asm/program_store_flt.inc
|
||||
randomx_program_store_flt ENDP
|
||||
randomx_program_loop_store PROC
|
||||
include asm/program_loop_store.inc
|
||||
randomx_program_loop_store ENDP
|
||||
|
||||
randomx_program_loop_end PROC
|
||||
nop
|
||||
|
@ -19,13 +19,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
extern "C" {
|
||||
void randomx_program_prologue();
|
||||
void randomx_loop_begin();
|
||||
void randomx_program_load_int();
|
||||
void randomx_program_load_flt();
|
||||
void randomx_program_loop_begin();
|
||||
void randomx_program_loop_load();
|
||||
void randomx_program_start();
|
||||
void randomx_program_read_dataset();
|
||||
void randomx_program_store_int();
|
||||
void randomx_program_store_flt();
|
||||
void randomx_program_loop_store();
|
||||
void randomx_program_loop_end();
|
||||
void randomx_program_epilogue();
|
||||
void randomx_program_end();
|
||||
|
@ -94,13 +94,11 @@ namespace RandomX {
|
||||
#include "JitCompilerX86-static.hpp"
|
||||
|
||||
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
|
||||
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_loop_begin;
|
||||
const uint8_t* codeLoadInt = (uint8_t*)&randomx_program_load_int;
|
||||
const uint8_t* codeLoadFlt = (uint8_t*)&randomx_program_load_flt;
|
||||
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
|
||||
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
|
||||
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
|
||||
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
|
||||
const uint8_t* codeStoreInt = (uint8_t*)&randomx_program_store_int;
|
||||
const uint8_t* codeStoreFlt = (uint8_t*)&randomx_program_store_flt;
|
||||
const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store;
|
||||
const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end;
|
||||
const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue;
|
||||
const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end;
|
||||
@ -108,11 +106,9 @@ namespace RandomX {
|
||||
const int32_t prologueSize = codeLoopBegin - codePrologue;
|
||||
const int32_t epilogueSize = codeProgramEnd - codeEpilogue;
|
||||
|
||||
const int32_t loadIntSize = codeLoadFlt - codeLoadInt;
|
||||
const int32_t loadFltSize = codeProgamStart - codeLoadFlt;
|
||||
const int32_t readDatasetSize = codeStoreInt - codeReadDataset;
|
||||
const int32_t storeIntSize = codeStoreFlt - codeStoreInt;
|
||||
const int32_t storeFltSize = codeLoopEnd - codeStoreFlt;
|
||||
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
|
||||
const int32_t readDatasetSize = codeLoopStore - codeReadDataset;
|
||||
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
|
||||
|
||||
const int32_t epilogueOffset = CodeSize - epilogueSize;
|
||||
|
||||
@ -179,6 +175,7 @@ namespace RandomX {
|
||||
static const uint8_t SUB_EBX[] = { 0x83, 0xEB, 0x01 };
|
||||
static const uint8_t JNZ[] = { 0x0f, 0x85 };
|
||||
static const uint8_t JMP = 0xe9;
|
||||
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
||||
|
||||
size_t JitCompilerX86::getCodeSize() {
|
||||
return codePos - prologueSize;
|
||||
@ -204,18 +201,16 @@ namespace RandomX {
|
||||
addressRegisters >>= 1;
|
||||
int readReg2 = 2 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
int writeReg1 = 4 + (addressRegisters & 1);
|
||||
int readReg3 = 4 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
int writeReg2 = 6 + (addressRegisters & 1);
|
||||
int readReg4 = 6 + (addressRegisters & 1);
|
||||
codePos = prologueSize;
|
||||
emit(REX_XOR_EAX);
|
||||
emit(REX_XOR_RAX_R64);
|
||||
emitByte(0xc0 + readReg1);
|
||||
memcpy(code + codePos, codeLoadInt, loadIntSize);
|
||||
codePos += loadIntSize;
|
||||
emit(REX_XOR_EAX);
|
||||
emit(REX_XOR_RAX_R64);
|
||||
emitByte(0xc0 + readReg2);
|
||||
memcpy(code + codePos, codeLoadFlt, loadFltSize);
|
||||
codePos += loadFltSize;
|
||||
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
|
||||
codePos += loopLoadSize;
|
||||
Instruction instr;
|
||||
for (unsigned i = 0; i < ProgramLength; ++i) {
|
||||
for (unsigned j = 0; j < sizeof(instr) / sizeof(Pcg32::result_type); ++j) {
|
||||
@ -226,19 +221,13 @@ namespace RandomX {
|
||||
generateCode(instr);
|
||||
}
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc0 + readReg1);
|
||||
emitByte(0xc0 + readReg3);
|
||||
emit(REX_XOR_EAX);
|
||||
emitByte(0xc0 + readReg2);
|
||||
emitByte(0xc0 + readReg4);
|
||||
memcpy(code + codePos, codeReadDataset, readDatasetSize);
|
||||
codePos += readDatasetSize;
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc0 + writeReg1);
|
||||
memcpy(code + codePos, codeStoreInt, storeIntSize);
|
||||
codePos += storeIntSize;
|
||||
emit(REX_XOR_EAX);
|
||||
emitByte(0xc0 + writeReg2);
|
||||
memcpy(code + codePos, codeStoreFlt, storeFltSize);
|
||||
codePos += storeFltSize;
|
||||
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
||||
codePos += loopStoreSize;
|
||||
emit(SUB_EBX);
|
||||
emit(JNZ);
|
||||
emit32(prologueSize - codePos - 4);
|
||||
|
@ -1,10 +0,0 @@
|
||||
and eax, 1048512
|
||||
lea rcx, [rsi+rax]
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
@ -1,5 +1,19 @@
|
||||
mov rdx, rax
|
||||
and eax, 1048512
|
||||
lea rcx, [rsi+rax]
|
||||
push rcx
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
ror rdx, 32
|
||||
and edx, 1048512
|
||||
lea rcx, [rsi+rdx]
|
||||
push rcx
|
||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
18
src/asm/program_loop_store.inc
Normal file
18
src/asm/program_loop_store.inc
Normal file
@ -0,0 +1,18 @@
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
pop rcx
|
||||
mulpd xmm0, xmm4
|
||||
mulpd xmm1, xmm5
|
||||
mulpd xmm2, xmm6
|
||||
mulpd xmm3, xmm7
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
@ -11,10 +11,9 @@
|
||||
push rdi ;# RegisterFile& registerFile
|
||||
mov rcx, rdi
|
||||
mov rbp, qword ptr [rsi] ;# "mx", "ma"
|
||||
mov eax, ebp ;# "mx"
|
||||
mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
|
||||
mov rsi, rdx ;# convertible_t* scratchpad
|
||||
|
||||
#include "program_prologue_load.inc"
|
||||
|
||||
jmp DECL(randomx_loop_begin)
|
||||
jmp DECL(randomx_program_loop_begin)
|
@ -1,3 +1,5 @@
|
||||
mov rax, rbp
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
xor r9, r9
|
||||
|
@ -23,11 +23,10 @@
|
||||
; function arguments
|
||||
push rcx ; RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rdx] ; "mx", "ma"
|
||||
mov eax, ebp ; "mx"
|
||||
mov rdi, qword ptr [rdx+8] ; uint8_t* dataset
|
||||
mov rsi, r8 ; convertible_t* scratchpad
|
||||
mov rbx, r9 ; loop counter
|
||||
|
||||
include program_prologue_load.inc
|
||||
|
||||
jmp randomx_loop_begin
|
||||
jmp randomx_program_loop_begin
|
@ -1,4 +1,5 @@
|
||||
xor rbp, rax ;# modify "mx"
|
||||
xor eax, eax
|
||||
and rbp, -64 ;# align "mx" to the start of a cache line
|
||||
mov edx, ebp ;# edx = mx
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
|
@ -1,11 +0,0 @@
|
||||
and eax, 1048512
|
||||
lea rcx, [rsi+rax]
|
||||
mulpd xmm0, xmm4
|
||||
mulpd xmm1, xmm5
|
||||
mulpd xmm2, xmm6
|
||||
mulpd xmm3, xmm7
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
|
@ -1,10 +0,0 @@
|
||||
and eax, 1048512
|
||||
lea rcx, [rsi+rax]
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
@ -118,8 +118,11 @@ signMask:
|
||||
|
||||
ALIGN 64
|
||||
program_begin:
|
||||
xor eax, r8d ;# read address register 1
|
||||
xor rax, r8 ;# read address register 1
|
||||
xor rax, r9
|
||||
mov rdx, rax
|
||||
and eax, 1048512
|
||||
push rax
|
||||
lea rcx, [rsi+rax]
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
@ -129,9 +132,10 @@ program_begin:
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
xor eax, r9d ;# read address register 2
|
||||
and eax, 1048512
|
||||
lea rcx, [rsi+rax]
|
||||
ror rdx, 32
|
||||
and edx, 1048512
|
||||
push rdx
|
||||
lea rcx, [rsi+rdx]
|
||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||
@ -165,8 +169,7 @@ program_begin:
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
mov eax, r12d ;# write address register 1
|
||||
and eax, 1048512
|
||||
pop rax
|
||||
lea rcx, [rsi+rax]
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
@ -176,8 +179,7 @@ program_begin:
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
xor eax, r13d ;# write address register 2
|
||||
and eax, 1048512
|
||||
pop rax
|
||||
lea rcx, [rsi+rax]
|
||||
mulpd xmm0, xmm4
|
||||
mulpd xmm1, xmm5
|
||||
@ -187,6 +189,7 @@ program_begin:
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
xor eax, eax
|
||||
dec ebx
|
||||
jnz program_begin
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user