mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-03 05:38:54 +00:00
Combined prefetch + read into a single step
This commit is contained in:
parent
4189e4ebc6
commit
6519fed4d1
@ -19,13 +19,24 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
//#define TRACEVM
|
//#define TRACEVM
|
||||||
|
#include <new>
|
||||||
#include "VirtualMachine.hpp"
|
#include "VirtualMachine.hpp"
|
||||||
#include "JitCompilerX86.hpp"
|
#include "JitCompilerX86.hpp"
|
||||||
|
#include "intrinPortable.h"
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
class CompiledVirtualMachine : public VirtualMachine {
|
class CompiledVirtualMachine : public VirtualMachine {
|
||||||
public:
|
public:
|
||||||
|
void* operator new(size_t size) {
|
||||||
|
void* ptr = _mm_malloc(size, 64);
|
||||||
|
if (ptr == nullptr)
|
||||||
|
throw std::bad_alloc();
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void operator delete(void* ptr) {
|
||||||
|
_mm_free(ptr);
|
||||||
|
}
|
||||||
CompiledVirtualMachine(bool softAes);
|
CompiledVirtualMachine(bool softAes);
|
||||||
void setDataset(dataset_t ds, bool light = false) override;
|
void setDataset(dataset_t ds, bool light = false) override;
|
||||||
void initializeProgram(const void* seed) override;
|
void initializeProgram(const void* seed) override;
|
||||||
|
@ -223,17 +223,12 @@ ReadMemoryRandom MACRO spmask, float
|
|||||||
;# GLOBAL rbp = "ic" number of instructions until the end of the program
|
;# GLOBAL rbp = "ic" number of instructions until the end of the program
|
||||||
;# GLOBAL rbx = address of the dataset address
|
;# GLOBAL rbx = address of the dataset address
|
||||||
;# GLOBAL rsi = address of the scratchpad
|
;# GLOBAL rsi = address of the scratchpad
|
||||||
;# GLOBAL rdi = "mx" random 32-bit dataset address
|
;# GLOBAL rdi = low 32 bits = "mx", high 32 bits = "ma"
|
||||||
;# MODIFY rcx, rdx
|
;# MODIFY rcx, rdx
|
||||||
LOCAL L_prefetch, L_read, L_return
|
LOCAL L_prefetch_read, L_return
|
||||||
mov eax, ebp
|
test ebp, 63
|
||||||
and al, 63
|
jz short L_prefetch_read ;# "ic" divisible by 64 -> prefetch + read
|
||||||
jz short L_prefetch ;# "ic" divisible by 64 -> prefetch
|
xor rdi, rcx ;# randomize "mx"
|
||||||
xor edx, edx
|
|
||||||
cmp al, 14
|
|
||||||
je short L_read ;# "ic" = 14 (mod 64) -> random read
|
|
||||||
cmovb edx, ecx ;# "ic" < 14 (mod 64) -> modify random read address
|
|
||||||
xor edi, edx
|
|
||||||
L_return:
|
L_return:
|
||||||
and ecx, spmask ;# limit address to the specified scratchpad size
|
and ecx, spmask ;# limit address to the specified scratchpad size
|
||||||
IF float
|
IF float
|
||||||
@ -242,12 +237,15 @@ ELSE
|
|||||||
mov rax, qword ptr [rsi+rcx*8]
|
mov rax, qword ptr [rsi+rcx*8]
|
||||||
ENDIF
|
ENDIF
|
||||||
ret
|
ret
|
||||||
L_prefetch:
|
L_prefetch_read:
|
||||||
|
; prefetch cacheline "mx"
|
||||||
mov rax, qword ptr [rbx] ;# load the dataset address
|
mov rax, qword ptr [rbx] ;# load the dataset address
|
||||||
and edi, -64 ;# align "mx" to the start of a cache line
|
and rdi, -64 ;# align "mx" to the start of a cache line
|
||||||
prefetchnta byte ptr [rax+rdi]
|
mov edx, edi ;# edx = mx
|
||||||
jmp short L_return
|
prefetchnta byte ptr [rax+rdx]
|
||||||
L_read:
|
; read cacheline "ma"
|
||||||
|
ror rdi, 32 ;# swap "ma" and "mx"
|
||||||
|
mov edx, edi ;# edx = ma
|
||||||
push rcx
|
push rcx
|
||||||
TransformAddress ecx, rcx ;# TransformAddress function
|
TransformAddress ecx, rcx ;# TransformAddress function
|
||||||
and ecx, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
|
and ecx, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
|
||||||
@ -274,14 +272,13 @@ ReadMemoryRandom 32767, 1
|
|||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
rx_read_dataset:
|
rx_read_dataset:
|
||||||
;# IN rcx = scratchpad index - must be divisible by 8
|
;# IN rax = dataset address
|
||||||
;# GLOBAL rbx = address of the dataset address
|
;# IN ecx = scratchpad index - must be divisible by 8
|
||||||
|
;# IN edx = dataset index - must be divisible by 64
|
||||||
;# GLOBAL rsi = address of the scratchpad
|
;# GLOBAL rsi = address of the scratchpad
|
||||||
;# GLOBAL rdi = "mx" random 32-bit dataset address
|
|
||||||
;# MODIFY rax, rcx, rdx
|
;# MODIFY rax, rcx, rdx
|
||||||
mov rax, qword ptr [rbx] ;# load the dataset address
|
|
||||||
lea rcx, [rsi+rcx*8] ;# scratchpad cache line
|
lea rcx, [rsi+rcx*8] ;# scratchpad cache line
|
||||||
lea rax, [rax+rdi] ;# dataset cache line
|
lea rax, [rax+rdx] ;# dataset cache line
|
||||||
mov rdx, qword ptr [rax+0] ;# load first dataset quadword (prefetched into the cache by now)
|
mov rdx, qword ptr [rax+0] ;# load first dataset quadword (prefetched into the cache by now)
|
||||||
xor qword ptr [rcx+0], rdx ;# XOR the dataset item with a scratchpad item, repeat for the rest of the cacheline
|
xor qword ptr [rcx+0], rdx ;# XOR the dataset item with a scratchpad item, repeat for the rest of the cacheline
|
||||||
mov rdx, qword ptr [rax+8]
|
mov rdx, qword ptr [rax+8]
|
||||||
|
Loading…
Reference in New Issue
Block a user