mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 15:58:53 +00:00
Random dataset accesses - asm only
Initial support for large pages
This commit is contained in:
parent
bf8397b08d
commit
619bee5418
@ -28,6 +28,11 @@ namespace RandomX {
|
||||
static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
||||
static const char* regF[8] = { "xmm8", "xmm9", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||
|
||||
static const char* regMx = "edi";
|
||||
static const char* regIc = "ebp";
|
||||
static const char* regStackBeginAddr = "rbx";
|
||||
static const char* regScratchpadAddr = "rsi";
|
||||
|
||||
void AssemblyGeneratorX86::generateProgram(const void* seed) {
|
||||
asmCode.str(std::string()); //clear
|
||||
Pcg32 gen(seed);
|
||||
@ -48,7 +53,7 @@ namespace RandomX {
|
||||
|
||||
void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
|
||||
asmCode << "rx_i_" << i << ": ;" << instr.getName() << std::endl;
|
||||
asmCode << "\tdec edi" << std::endl;
|
||||
asmCode << "\tdec " << regIc << std::endl;
|
||||
asmCode << "\tjz rx_finish" << std::endl;
|
||||
auto generator = engine[instr.opcode];
|
||||
(this->*generator)(instr, i);
|
||||
@ -56,54 +61,34 @@ namespace RandomX {
|
||||
|
||||
void AssemblyGeneratorX86::genar(Instruction& instr) {
|
||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||
switch (instr.loca & 7)
|
||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
switch (instr.loca & 3)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\tcall rx_read_dataset_r" << std::endl;
|
||||
return;
|
||||
|
||||
case 4:
|
||||
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||
asmCode << "\tmov rax, qword ptr [rsi + rax * 8]" << std::endl;
|
||||
return;
|
||||
|
||||
default:
|
||||
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||
asmCode << "\tmov rax, qword ptr [rsi + rax * 8]" << std::endl;
|
||||
return;
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
asmCode << "\tcall rx_readint_l1" << std::endl;
|
||||
return;
|
||||
default: //3
|
||||
asmCode << "\tcall rx_readint_l2" << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void AssemblyGeneratorX86::genaf(Instruction& instr) {
|
||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||
switch (instr.loca & 7)
|
||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
switch (instr.loca & 3)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\tcall rx_read_dataset_f" << std::endl;
|
||||
return;
|
||||
|
||||
case 4:
|
||||
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl;
|
||||
return;
|
||||
|
||||
default:
|
||||
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl;
|
||||
return;
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
asmCode << "\tcall rx_readfloat_l1" << std::endl;
|
||||
return;
|
||||
default: //3
|
||||
asmCode << "\tcall rx_readfloat_l2" << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -169,9 +154,9 @@ namespace RandomX {
|
||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
|
||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
|
||||
if (trace) {
|
||||
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl;
|
||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rcx" << std::endl;
|
||||
}
|
||||
return;
|
||||
|
||||
@ -182,31 +167,31 @@ namespace RandomX {
|
||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
|
||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
|
||||
if (trace) {
|
||||
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl;
|
||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rcx" << std::endl;
|
||||
}
|
||||
return;
|
||||
|
||||
default:
|
||||
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl;
|
||||
if (trace) {
|
||||
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl;
|
||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rax" << std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::gencf(Instruction& instr, bool alwaysLow = false) {
|
||||
if(!alwaysLow)
|
||||
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||
const char* store = (!alwaysLow && (instr.locc & 8)) ? "movhpd" : "movlpd";
|
||||
void AssemblyGeneratorX86::gencf(Instruction& instr) {
|
||||
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||
const char* store = (instr.locc & 8) ? "movhpd" : "movlpd";
|
||||
switch (instr.locc & 7)
|
||||
{
|
||||
case 4:
|
||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||
asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||
break;
|
||||
|
||||
case 5:
|
||||
@ -215,11 +200,11 @@ namespace RandomX {
|
||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||
asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||
break;
|
||||
}
|
||||
if (trace) {
|
||||
asmCode << "\t" << store << " qword ptr [rsi + rdi * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -454,15 +439,14 @@ namespace RandomX {
|
||||
|
||||
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
asmCode << "\tmov rcx, rax" << std::endl;
|
||||
//asmCode << "\tmov rcx, rax" << std::endl;
|
||||
asmCode << "\tshl eax, 13" << std::endl;
|
||||
asmCode << "\tand rcx, -2048" << std::endl;
|
||||
//asmCode << "\tand rcx, -2048" << std::endl;
|
||||
asmCode << "\tand eax, 24576" << std::endl;
|
||||
asmCode << "\tcvtsi2sd " << regF[instr.regc % RegistersCount] << ", rcx" << std::endl;
|
||||
//asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||
asmCode << "\tor eax, 40896" << std::endl;
|
||||
asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl;
|
||||
asmCode << "\tldmxcsr dword ptr [rsp - 8]" << std::endl;
|
||||
gencf(instr, true);
|
||||
}
|
||||
|
||||
static inline const char* jumpCondition(Instruction& instr, bool invert = false) {
|
||||
@ -496,7 +480,7 @@ namespace RandomX {
|
||||
asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl;
|
||||
asmCode << "taken_call_" << i << ":" << std::endl;
|
||||
if (trace) {
|
||||
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl;
|
||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rax" << std::endl;
|
||||
}
|
||||
asmCode << "\tpush rax" << std::endl;
|
||||
asmCode << "\tcall rx_i_" << wrapInstr(i + (instr.imm8 & 127) + 2) << std::endl;
|
||||
@ -504,7 +488,7 @@ namespace RandomX {
|
||||
|
||||
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
asmCode << "\tcmp rsp, rbp" << std::endl;
|
||||
asmCode << "\tcmp rsp, " << regStackBeginAddr << std::endl;
|
||||
asmCode << "\tje short not_taken_ret_" << i << std::endl;
|
||||
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
|
||||
gencr(instr);
|
||||
|
@ -45,7 +45,7 @@ namespace RandomX {
|
||||
void genbr132(Instruction&);
|
||||
void genbf(Instruction&, const char*);
|
||||
void gencr(Instruction&);
|
||||
void gencf(Instruction&, bool);
|
||||
void gencf(Instruction&);
|
||||
|
||||
void generateCode(Instruction&, int);
|
||||
|
||||
|
@ -47,8 +47,8 @@ namespace RandomX {
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::execute() {
|
||||
//executeProgram(reg, mem, scratchpad, readDataset);
|
||||
compiler.getProgramFunc()(reg, mem, scratchpad);
|
||||
executeProgram(reg, mem, scratchpad, readDataset);
|
||||
//compiler.getProgramFunc()(reg, mem, scratchpad);
|
||||
#ifdef TRACEVM
|
||||
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
|
||||
std::cout << std::hex << tracepad[i].u64 << std::endl;
|
||||
|
@ -40,6 +40,6 @@ namespace RandomX {
|
||||
DatasetReadFunc readDataset;
|
||||
alignas(16) RegisterFile reg;
|
||||
MemoryRegisters mem;
|
||||
alignas(16) convertible_t scratchpad[ScratchpadLength];
|
||||
alignas(64) convertible_t scratchpad[ScratchpadLength];
|
||||
};
|
||||
}
|
@ -26,6 +26,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "dataset.hpp"
|
||||
#include "Pcg32.hpp"
|
||||
#include "Cache.hpp"
|
||||
#include "virtualMemory.hpp"
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include <wmmintrin.h>
|
||||
@ -161,12 +162,17 @@ namespace RandomX {
|
||||
template
|
||||
convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory);
|
||||
|
||||
void datasetAlloc(dataset_t& ds) {
|
||||
void datasetAlloc(dataset_t& ds, bool largePages) {
|
||||
if (sizeof(size_t) <= 4)
|
||||
throw std::runtime_error("Platform doesn't support enough memory for the dataset");
|
||||
ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, /*sizeof(__m128i)*/ 64);
|
||||
if (ds.dataset == nullptr) {
|
||||
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed.");
|
||||
if (largePages) {
|
||||
ds.dataset = (uint8_t*)allocLargePagesMemory(DatasetSize);
|
||||
}
|
||||
else {
|
||||
ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, 64);
|
||||
if (ds.dataset == nullptr) {
|
||||
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,7 @@ namespace RandomX {
|
||||
template<bool softAes>
|
||||
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
|
||||
|
||||
void datasetAlloc(dataset_t& ds);
|
||||
void datasetAlloc(dataset_t& ds, bool largePages);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount);
|
||||
|
@ -15,19 +15,19 @@
|
||||
;# You should have received a copy of the GNU General Public License
|
||||
;# along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
PUBLIC executeProgram
|
||||
_RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
|
||||
|
||||
.code
|
||||
PUBLIC executeProgram
|
||||
|
||||
executeProgram PROC
|
||||
; REGISTER ALLOCATION:
|
||||
; rax -> temporary
|
||||
; rbx -> MemoryRegisters& memory
|
||||
; rbx -> beginning of VM stack
|
||||
; rcx -> temporary
|
||||
; rdx -> temporary
|
||||
; rsi -> convertible_t& scratchpad
|
||||
; rdi -> "ic" (instruction counter)
|
||||
; rbp -> beginning of VM stack
|
||||
; rdi -> "mx"
|
||||
; rbp -> "ic"
|
||||
; rsp -> end of VM stack
|
||||
; r8 -> "r0"
|
||||
; r9 -> "r1"
|
||||
@ -55,7 +55,8 @@ executeProgram PROC
|
||||
; | saved registers
|
||||
; |
|
||||
; v
|
||||
; [rbp] RegisterFile& registerFile
|
||||
; [rbx+8] RegisterFile& registerFile
|
||||
; [rbx+0] uint8_t* dataset
|
||||
; |
|
||||
; |
|
||||
; | VM stack
|
||||
@ -80,17 +81,18 @@ executeProgram PROC
|
||||
movdqu xmmword ptr [rsp+0], xmm10
|
||||
|
||||
; function arguments
|
||||
push rcx ; RegisterFile& registerFile
|
||||
mov rbx, rdx ; MemoryRegisters& memory
|
||||
mov rsi, r8 ; convertible_t& scratchpad
|
||||
push r9
|
||||
push rcx ; RegisterFile& registerFile
|
||||
mov edi, dword ptr [rdx] ; "mx"
|
||||
mov rax, qword ptr [rdx+8] ; uint8_t* dataset
|
||||
push rax
|
||||
mov rsi, r8 ; convertible_t* scratchpad
|
||||
|
||||
mov rbp, rsp ; beginning of VM stack
|
||||
mov rdi, 1048577 ; number of VM instructions to execute + 1
|
||||
mov rbx, rsp ; beginning of VM stack
|
||||
mov ebp, 524289 ; number of VM instructions to execute + 1
|
||||
|
||||
xorps xmm10, xmm10
|
||||
cmpeqpd xmm10, xmm10
|
||||
psrlq xmm10, 1 ; mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
|
||||
psrlq xmm10, 1 ; mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
|
||||
|
||||
; reset rounding mode
|
||||
mov dword ptr [rsp-8], 40896
|
||||
@ -162,7 +164,7 @@ executeProgram PROC
|
||||
|
||||
rx_finish:
|
||||
; unroll the stack
|
||||
mov rsp, rbp
|
||||
mov rsp, rbx
|
||||
|
||||
; save VM register values
|
||||
pop rcx
|
||||
@ -202,57 +204,103 @@ rx_finish:
|
||||
pop rbx
|
||||
|
||||
; return
|
||||
ret 0
|
||||
ret
|
||||
|
||||
TransformAddress MACRO reg32, reg64
|
||||
;# Transforms the address in the register so that the transformed address
|
||||
;# lies in a different cache line than the original address (mod 2^N).
|
||||
;# This is done to prevent a load-store dependency.
|
||||
;# There are 3 different transformations that can be used: x -> 9*x+C, x -> x+C, x -> x^C
|
||||
lea reg32, [reg64+reg64*8+127] ;# C = -119 -110 -101 -92 -83 -74 -65 -55 -46 -37 -28 -19 -10 -1 9 18 27 36 45 54 63 73 82 91 100 109 118 127
|
||||
;lea reg32, [reg64-128] ;# C = all except -7 to +7
|
||||
;xor reg32, -8 ;# C = all except 0 to 7
|
||||
ENDM
|
||||
|
||||
ReadMemoryRandom MACRO spmask, float
|
||||
;# IN ecx = random 32-bit address
|
||||
;# OUT rax = 64-bit integer return value
|
||||
;# OUT xmm0 = 128-bit floating point return value
|
||||
;# GLOBAL rbp = "ic" number of instructions until the end of the program
|
||||
;# GLOBAL rbx = address of the dataset address
|
||||
;# GLOBAL rsi = address of the scratchpad
|
||||
;# GLOBAL rdi = "mx" random 32-bit dataset address
|
||||
;# MODIFY rcx, rdx
|
||||
LOCAL L_prefetch, L_read, L_return
|
||||
mov eax, ebp
|
||||
and al, 63
|
||||
jz short L_prefetch ;# "ic" divisible by 64 -> prefetch
|
||||
xor edx, edx
|
||||
cmp al, 14
|
||||
je short L_read ;# "ic" = 14 (mod 64) -> random read
|
||||
cmovb edx, ecx ;# "ic" < 14 (mod 64) -> modify random read address
|
||||
xor edi, edx
|
||||
L_return:
|
||||
and ecx, spmask ;# limit address to the specified scratchpad size
|
||||
IF float
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
ELSE
|
||||
mov rax, qword ptr [rsi+rcx*8]
|
||||
ENDIF
|
||||
ret
|
||||
L_prefetch:
|
||||
mov rax, qword ptr [rbx] ;# load the dataset address
|
||||
and edi, -64 ;# align "mx" to the start of a cache line
|
||||
prefetchnta byte ptr [rax+rdi]
|
||||
jmp short L_return
|
||||
L_read:
|
||||
push rcx
|
||||
TransformAddress ecx, rcx ;# TransformAddress function
|
||||
and ecx, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
|
||||
call rx_read_dataset
|
||||
pop rcx
|
||||
jmp short L_return
|
||||
ENDM
|
||||
|
||||
ALIGN 64
|
||||
rx_readint_l1:
|
||||
ReadMemoryRandom 2047, 0
|
||||
|
||||
ALIGN 64
|
||||
rx_readint_l2:
|
||||
ReadMemoryRandom 32767, 0
|
||||
|
||||
ALIGN 64
|
||||
rx_readfloat_l1:
|
||||
ReadMemoryRandom 2047, 1
|
||||
|
||||
ALIGN 64
|
||||
rx_readfloat_l2:
|
||||
ReadMemoryRandom 32767, 1
|
||||
|
||||
ALIGN 64
|
||||
rx_read_dataset:
|
||||
push r8
|
||||
push r9
|
||||
push r10
|
||||
push r11
|
||||
mov rdx, rbx
|
||||
movd qword ptr [rsp - 8], xmm1
|
||||
movd qword ptr [rsp - 16], xmm2
|
||||
sub rsp, 48
|
||||
call qword ptr [rbp]
|
||||
add rsp, 48
|
||||
movd xmm2, qword ptr [rsp - 16]
|
||||
movd xmm1, qword ptr [rsp - 8]
|
||||
pop r11
|
||||
pop r10
|
||||
pop r9
|
||||
pop r8
|
||||
ret 0
|
||||
|
||||
rx_read_dataset_r:
|
||||
mov edx, dword ptr [rbx] ; ma
|
||||
mov rax, qword ptr [rbx+8] ; dataset
|
||||
mov rax, qword ptr [rax+rdx]
|
||||
add dword ptr [rbx], 8
|
||||
xor ecx, dword ptr [rbx+4] ; mx
|
||||
mov dword ptr [rbx+4], ecx
|
||||
test ecx, 0FFF8h
|
||||
jne short rx_read_dataset_r_ret
|
||||
and ecx, -8
|
||||
mov dword ptr [rbx], ecx
|
||||
mov rdx, qword ptr [rbx+8]
|
||||
prefetcht0 byte ptr [rdx+rcx]
|
||||
rx_read_dataset_r_ret:
|
||||
ret 0
|
||||
|
||||
rx_read_dataset_f:
|
||||
mov edx, dword ptr [rbx] ; ma
|
||||
mov rax, qword ptr [rbx+8] ; dataset
|
||||
cvtdq2pd xmm0, qword ptr [rax+rdx]
|
||||
add dword ptr [rbx], 8
|
||||
xor ecx, dword ptr [rbx+4] ; mx
|
||||
mov dword ptr [rbx+4], ecx
|
||||
test ecx, 0FFF8h
|
||||
jne short rx_read_dataset_f_ret
|
||||
and ecx, -8
|
||||
mov dword ptr [rbx], ecx
|
||||
prefetcht0 byte ptr [rax+rcx]
|
||||
rx_read_dataset_f_ret:
|
||||
ret 0
|
||||
;# IN rcx = scratchpad index - must be divisible by 8
|
||||
;# GLOBAL rbx = address of the dataset address
|
||||
;# GLOBAL rsi = address of the scratchpad
|
||||
;# GLOBAL rdi = "mx" random 32-bit dataset address
|
||||
;# MODIFY rax, rcx, rdx
|
||||
mov rax, qword ptr [rbx] ;# load the dataset address
|
||||
lea rcx, [rsi+rcx*8] ;# scratchpad cache line
|
||||
lea rax, [rax+rdi] ;# dataset cache line
|
||||
mov rdx, qword ptr [rax+0] ;# load first dataset quadword (prefetched into the cache by now)
|
||||
xor qword ptr [rcx+0], rdx ;# XOR the dataset item with a scratchpad item, repeat for the rest of the cacheline
|
||||
mov rdx, qword ptr [rax+8]
|
||||
xor qword ptr [rcx+8], rdx
|
||||
mov rdx, qword ptr [rax+16]
|
||||
xor qword ptr [rcx+16], rdx
|
||||
mov rdx, qword ptr [rax+24]
|
||||
xor qword ptr [rcx+24], rdx
|
||||
mov rdx, qword ptr [rax+32]
|
||||
xor qword ptr [rcx+32], rdx
|
||||
mov rdx, qword ptr [rax+40]
|
||||
xor qword ptr [rcx+40], rdx
|
||||
mov rdx, qword ptr [rax+48]
|
||||
xor qword ptr [rcx+48], rdx
|
||||
mov rdx, qword ptr [rax+56]
|
||||
xor qword ptr [rcx+56], rdx
|
||||
ret
|
||||
executeProgram ENDP
|
||||
|
||||
_RANDOMX_EXECUTE_PROGRAM ENDS
|
||||
|
||||
END
|
||||
|
@ -162,7 +162,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool softAes, lightClient, genAsm, compiled, help;
|
||||
bool softAes, lightClient, genAsm, compiled, help, largePages;
|
||||
int programCount, threadCount;
|
||||
readOption("--help", argc, argv, help);
|
||||
|
||||
@ -177,6 +177,7 @@ int main(int argc, char** argv) {
|
||||
readOption("--compiled", argc, argv, compiled);
|
||||
readIntOption("--threads", argc, argv, threadCount, 1);
|
||||
readIntOption("--nonces", argc, argv, programCount, 1000);
|
||||
readOption("--largePages", argc, argv, largePages);
|
||||
|
||||
if (genAsm) {
|
||||
generateAsm(programCount);
|
||||
@ -216,7 +217,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
else {
|
||||
RandomX::Cache* cache = dataset.cache;
|
||||
RandomX::datasetAlloc(dataset);
|
||||
RandomX::datasetAlloc(dataset, largePages);
|
||||
if (threadCount > 1) {
|
||||
auto perThread = RandomX::DatasetBlockCount / threadCount;
|
||||
auto remainder = RandomX::DatasetBlockCount % threadCount;
|
||||
|
2905
src/program.inc
2905
src/program.inc
File diff suppressed because it is too large
Load Diff
108
src/virtualMemory.cpp
Normal file
108
src/virtualMemory.cpp
Normal file
@ -0,0 +1,108 @@
|
||||
/*
|
||||
Copyright (c) 2018 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "virtualMemory.hpp"
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
#include <mach/vm_statistics.h>
|
||||
#endif
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
std::string getErrorMessage(const char* function) {
|
||||
LPSTR messageBuffer = nullptr;
|
||||
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL);
|
||||
std::string message(messageBuffer, size);
|
||||
LocalFree(messageBuffer);
|
||||
return std::string(function) + std::string(": ") + message;
|
||||
}
|
||||
|
||||
void setPrivilege(const char* pszPrivilege, BOOL bEnable) {
|
||||
HANDLE hToken;
|
||||
TOKEN_PRIVILEGES tp;
|
||||
BOOL status;
|
||||
DWORD error;
|
||||
|
||||
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
|
||||
throw std::runtime_error(getErrorMessage("OpenProcessToken"));
|
||||
|
||||
if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid))
|
||||
throw std::runtime_error(getErrorMessage("LookupPrivilegeValue"));
|
||||
|
||||
tp.PrivilegeCount = 1;
|
||||
|
||||
if (bEnable)
|
||||
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
||||
else
|
||||
tp.Privileges[0].Attributes = 0;
|
||||
|
||||
status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
|
||||
|
||||
error = GetLastError();
|
||||
if (!status || (error != ERROR_SUCCESS))
|
||||
throw std::runtime_error(getErrorMessage("AdjustTokenPrivileges"));
|
||||
|
||||
if (!CloseHandle(hToken))
|
||||
throw std::runtime_error(getErrorMessage("CloseHandle"));
|
||||
}
|
||||
#endif
|
||||
|
||||
void* allocExecutableMemory(size_t bytes) {
|
||||
void* mem;
|
||||
#ifdef _WIN32
|
||||
mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||
if (mem == nullptr)
|
||||
throw std::runtime_error(getErrorMessage("allocExecutableMemory - VirtualAlloc"));
|
||||
#else
|
||||
mem = mmap(nullptr, CodeSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
if (mem == MAP_FAILED)
|
||||
throw std::runtime_error("allocExecutableMemory - mmap failed");
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
void* allocLargePagesMemory(size_t bytes) {
|
||||
void* mem;
|
||||
#ifdef _WIN32
|
||||
setPrivilege("SeLockMemoryPrivilege", 1);
|
||||
mem = VirtualAlloc(NULL, bytes, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||
if (mem == nullptr)
|
||||
throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc"));
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
||||
#else
|
||||
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
|
||||
#endif
|
||||
if (mem == MAP_FAILED)
|
||||
throw std::runtime_error("allocLargePagesMemory - mmap failed");
|
||||
#endif
|
||||
return mem;
|
||||
}
|
23
src/virtualMemory.hpp
Normal file
23
src/virtualMemory.hpp
Normal file
@ -0,0 +1,23 @@
|
||||
/*
|
||||
Copyright (c) 2018 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
void* allocExecutableMemory(size_t);
|
||||
void* allocLargePagesMemory(size_t);
|
Loading…
Reference in New Issue
Block a user