mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-03 05:38:54 +00:00
Optimizations to reduce code size under 32K
This commit is contained in:
parent
b6d654291f
commit
b71e0eec65
@ -28,9 +28,10 @@ namespace RandomX {
|
||||
static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
||||
static const char* regF[8] = { "xmm8", "xmm9", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||
|
||||
static const char* regMx = "rdi";
|
||||
static const char* regIc = "ebp";
|
||||
static const char* regStackBeginAddr = "rbx";
|
||||
static const char* regMx = "rbp";
|
||||
static const char* regIc = "ebx";
|
||||
static const char* regIc8 = "bl";
|
||||
static const char* regStackBeginAddr = "rdi";
|
||||
static const char* regScratchpadAddr = "rsi";
|
||||
|
||||
void AssemblyGeneratorX86::generateProgram(const void* seed) {
|
||||
@ -62,7 +63,7 @@ namespace RandomX {
|
||||
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
|
||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\ttest " << regIc << ", 63" << std::endl;
|
||||
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
|
||||
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
||||
switch (instr.loca & 3)
|
||||
{
|
||||
@ -71,12 +72,14 @@ namespace RandomX {
|
||||
case 2:
|
||||
asmCode << "\tcall rx_read_l1" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if ((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
||||
break;
|
||||
default: //3
|
||||
asmCode << "\tcall rx_read_l2" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if ((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
||||
break;
|
||||
@ -88,7 +91,7 @@ namespace RandomX {
|
||||
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
|
||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\ttest " << regIc << ", 63" << std::endl;
|
||||
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
|
||||
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
||||
switch (instr.loca & 3)
|
||||
{
|
||||
@ -97,12 +100,14 @@ namespace RandomX {
|
||||
case 2:
|
||||
asmCode << "\tcall rx_read_l1" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
||||
break;
|
||||
default: //3
|
||||
asmCode << "\tcall rx_read_l2" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if ((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
||||
break;
|
||||
@ -200,7 +205,8 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::gencf(Instruction& instr) {
|
||||
void AssemblyGeneratorX86::gencf(Instruction& instr, bool move = true) {
|
||||
if(move)
|
||||
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||
const char* store = (instr.locc & 8) ? "movhpd" : "movlpd";
|
||||
switch (instr.locc & 7)
|
||||
@ -451,8 +457,8 @@ namespace RandomX {
|
||||
void AssemblyGeneratorX86::h_FPSQRT(Instruction& instr, int i) {
|
||||
genaf(instr, i);
|
||||
asmCode << "\tandps xmm0, xmm10" << std::endl;
|
||||
asmCode << "\tsqrtpd xmm0, xmm0" << std::endl;
|
||||
gencf(instr);
|
||||
asmCode << "\tsqrtpd " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||
gencf(instr, false);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
||||
|
@ -45,7 +45,7 @@ namespace RandomX {
|
||||
void genbr132(Instruction&);
|
||||
void genbf(Instruction&, const char*);
|
||||
void gencr(Instruction&);
|
||||
void gencf(Instruction&);
|
||||
void gencf(Instruction&, bool);
|
||||
|
||||
void generateCode(Instruction&, int);
|
||||
|
||||
|
@ -22,12 +22,12 @@ PUBLIC executeProgram
|
||||
executeProgram PROC
|
||||
; REGISTER ALLOCATION:
|
||||
; rax -> temporary
|
||||
; rbx -> beginning of VM stack
|
||||
; rbx -> "ic"
|
||||
; rcx -> temporary
|
||||
; rdx -> temporary
|
||||
; rsi -> convertible_t& scratchpad
|
||||
; rdi -> "mx"
|
||||
; rbp -> "ic"
|
||||
; rdi -> beginning of VM stack
|
||||
; rbp -> "ma", "mx"
|
||||
; rsp -> end of VM stack
|
||||
; r8 -> "r0"
|
||||
; r9 -> "r1"
|
||||
@ -82,13 +82,13 @@ executeProgram PROC
|
||||
|
||||
; function arguments
|
||||
push rcx ; RegisterFile& registerFile
|
||||
mov rdi, qword ptr [rdx] ; "mx", "ma"
|
||||
mov rbp, qword ptr [rdx] ; "mx", "ma"
|
||||
mov rax, qword ptr [rdx+8] ; uint8_t* dataset
|
||||
push rax
|
||||
mov rsi, r8 ; convertible_t* scratchpad
|
||||
|
||||
mov rbx, rsp ; beginning of VM stack
|
||||
mov ebp, 1048577 ; number of VM instructions to execute + 1
|
||||
mov rdi, rsp ; beginning of VM stack
|
||||
mov ebx, 1048577 ; number of VM instructions to execute + 1
|
||||
|
||||
xorps xmm10, xmm10
|
||||
cmpeqpd xmm10, xmm10
|
||||
@ -164,7 +164,7 @@ executeProgram PROC
|
||||
|
||||
rx_finish:
|
||||
; unroll the stack
|
||||
mov rsp, rbx
|
||||
mov rsp, rdi
|
||||
|
||||
; save VM register values
|
||||
pop rcx
|
||||
@ -211,30 +211,29 @@ TransformAddress MACRO reg32, reg64
|
||||
;# lies in a different cache line than the original address (mod 2^N).
|
||||
;# This is done to prevent a load-store dependency.
|
||||
;# There are 3 different transformations that can be used: x -> 9*x+C, x -> x+C, x -> x^C
|
||||
lea reg32, [reg64+reg64*8+127] ;# C = -119 -110 -101 -92 -83 -74 -65 -55 -46 -37 -28 -19 -10 -1 9 18 27 36 45 54 63 73 82 91 100 109 118 127
|
||||
;lea reg32, [reg64-128] ;# C = all except -7 to +7
|
||||
;lea reg32, [reg64+reg64*8+127] ;# C = -119 -110 -101 -92 -83 -74 -65 -55 -46 -37 -28 -19 -10 -1 9 18 27 36 45 54 63 73 82 91 100 109 118 127
|
||||
db 64
|
||||
add reg32, -39 ;# C = all except -7 to +7
|
||||
;xor reg32, -8 ;# C = all except 0 to 7
|
||||
ENDM
|
||||
|
||||
ReadMemoryRandom MACRO spmask
|
||||
;# IN ecx = random 32-bit address
|
||||
;# OUT rax = 64-bit integer return value
|
||||
;# OUT xmm0 = 128-bit floating point return value
|
||||
;# GLOBAL rbp = "ic" number of instructions until the end of the program
|
||||
;# GLOBAL rbx = address of the dataset address
|
||||
;# GLOBAL rdi = address of the dataset address
|
||||
;# GLOBAL rsi = address of the scratchpad
|
||||
;# GLOBAL rdi = low 32 bits = "mx", high 32 bits = "ma"
|
||||
;# GLOBAL rbp = low 32 bits = "mx", high 32 bits = "ma"
|
||||
;# MODIFY rcx, rdx
|
||||
push rcx ;# preserve ecx
|
||||
TransformAddress ecx, rcx ;# TransformAddress function
|
||||
mov rax, qword ptr [rdi] ;# load the dataset address
|
||||
xor rbp, rcx ;# modify "mx"
|
||||
; prefetch cacheline "mx"
|
||||
mov rax, qword ptr [rbx] ;# load the dataset address
|
||||
and rdi, -64 ;# align "mx" to the start of a cache line
|
||||
mov edx, edi ;# edx = mx
|
||||
and rbp, -64 ;# align "mx" to the start of a cache line
|
||||
mov edx, ebp ;# edx = mx
|
||||
prefetchnta byte ptr [rax+rdx]
|
||||
; read cacheline "ma"
|
||||
ror rdi, 32 ;# swap "ma" and "mx"
|
||||
mov edx, edi ;# edx = ma
|
||||
push rcx
|
||||
TransformAddress ecx, rcx ;# TransformAddress function
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov edx, ebp ;# edx = ma
|
||||
and ecx, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
|
||||
lea rcx, [rsi+rcx*8] ;# scratchpad cache line
|
||||
lea rax, [rax+rdx] ;# dataset cache line
|
||||
@ -254,7 +253,7 @@ ReadMemoryRandom MACRO spmask
|
||||
xor qword ptr [rcx+48], rdx
|
||||
mov rdx, qword ptr [rax+56]
|
||||
xor qword ptr [rcx+56], rdx
|
||||
pop rcx
|
||||
pop rcx ;# restore ecx
|
||||
ret
|
||||
ENDM
|
||||
|
||||
|
2806
src/program.inc
2806
src/program.inc
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user