mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 15:58:53 +00:00
Random dataset accesses - asm only
Initial support for large pages
This commit is contained in:
parent
bf8397b08d
commit
619bee5418
@ -28,6 +28,11 @@ namespace RandomX {
|
|||||||
static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
||||||
static const char* regF[8] = { "xmm8", "xmm9", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
|
static const char* regF[8] = { "xmm8", "xmm9", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||||
|
|
||||||
|
static const char* regMx = "edi";
|
||||||
|
static const char* regIc = "ebp";
|
||||||
|
static const char* regStackBeginAddr = "rbx";
|
||||||
|
static const char* regScratchpadAddr = "rsi";
|
||||||
|
|
||||||
void AssemblyGeneratorX86::generateProgram(const void* seed) {
|
void AssemblyGeneratorX86::generateProgram(const void* seed) {
|
||||||
asmCode.str(std::string()); //clear
|
asmCode.str(std::string()); //clear
|
||||||
Pcg32 gen(seed);
|
Pcg32 gen(seed);
|
||||||
@ -48,7 +53,7 @@ namespace RandomX {
|
|||||||
|
|
||||||
void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
|
||||||
asmCode << "rx_i_" << i << ": ;" << instr.getName() << std::endl;
|
asmCode << "rx_i_" << i << ": ;" << instr.getName() << std::endl;
|
||||||
asmCode << "\tdec edi" << std::endl;
|
asmCode << "\tdec " << regIc << std::endl;
|
||||||
asmCode << "\tjz rx_finish" << std::endl;
|
asmCode << "\tjz rx_finish" << std::endl;
|
||||||
auto generator = engine[instr.opcode];
|
auto generator = engine[instr.opcode];
|
||||||
(this->*generator)(instr, i);
|
(this->*generator)(instr, i);
|
||||||
@ -56,54 +61,34 @@ namespace RandomX {
|
|||||||
|
|
||||||
void AssemblyGeneratorX86::genar(Instruction& instr) {
|
void AssemblyGeneratorX86::genar(Instruction& instr) {
|
||||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||||
switch (instr.loca & 7)
|
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||||
|
switch (instr.loca & 3)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
case 1:
|
||||||
case 2:
|
case 2:
|
||||||
case 3:
|
asmCode << "\tcall rx_readint_l1" << std::endl;
|
||||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
return;
|
||||||
asmCode << "\tcall rx_read_dataset_r" << std::endl;
|
default: //3
|
||||||
return;
|
asmCode << "\tcall rx_readint_l2" << std::endl;
|
||||||
|
return;
|
||||||
case 4:
|
|
||||||
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
|
||||||
asmCode << "\tmov rax, qword ptr [rsi + rax * 8]" << std::endl;
|
|
||||||
return;
|
|
||||||
|
|
||||||
default:
|
|
||||||
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
|
||||||
asmCode << "\tmov rax, qword ptr [rsi + rax * 8]" << std::endl;
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genaf(Instruction& instr) {
|
void AssemblyGeneratorX86::genaf(Instruction& instr) {
|
||||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||||
switch (instr.loca & 7)
|
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||||
|
switch (instr.loca & 3)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
case 1:
|
||||||
case 2:
|
case 2:
|
||||||
case 3:
|
asmCode << "\tcall rx_readfloat_l1" << std::endl;
|
||||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
return;
|
||||||
asmCode << "\tcall rx_read_dataset_f" << std::endl;
|
default: //3
|
||||||
return;
|
asmCode << "\tcall rx_readfloat_l2" << std::endl;
|
||||||
|
return;
|
||||||
case 4:
|
|
||||||
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
|
||||||
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl;
|
|
||||||
return;
|
|
||||||
|
|
||||||
default:
|
|
||||||
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
|
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
|
||||||
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl;
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -169,9 +154,9 @@ namespace RandomX {
|
|||||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||||
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
|
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
|
||||||
if (trace) {
|
if (trace) {
|
||||||
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl;
|
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rcx" << std::endl;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -182,31 +167,31 @@ namespace RandomX {
|
|||||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||||
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
|
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rax * 8], rcx" << std::endl;
|
||||||
if (trace) {
|
if (trace) {
|
||||||
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl;
|
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rcx" << std::endl;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl;
|
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl;
|
||||||
if (trace) {
|
if (trace) {
|
||||||
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl;
|
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rax" << std::endl;
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::gencf(Instruction& instr, bool alwaysLow = false) {
|
void AssemblyGeneratorX86::gencf(Instruction& instr) {
|
||||||
if(!alwaysLow)
|
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||||
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
const char* store = (instr.locc & 8) ? "movhpd" : "movlpd";
|
||||||
const char* store = (!alwaysLow && (instr.locc & 8)) ? "movhpd" : "movlpd";
|
|
||||||
switch (instr.locc & 7)
|
switch (instr.locc & 7)
|
||||||
{
|
{
|
||||||
case 4:
|
case 4:
|
||||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||||
asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 5:
|
case 5:
|
||||||
@ -215,11 +200,11 @@ namespace RandomX {
|
|||||||
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
|
||||||
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||||
asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (trace) {
|
if (trace) {
|
||||||
asmCode << "\t" << store << " qword ptr [rsi + rdi * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;
|
asmCode << "\t" << store << " qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -454,15 +439,14 @@ namespace RandomX {
|
|||||||
|
|
||||||
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
asmCode << "\tmov rcx, rax" << std::endl;
|
//asmCode << "\tmov rcx, rax" << std::endl;
|
||||||
asmCode << "\tshl eax, 13" << std::endl;
|
asmCode << "\tshl eax, 13" << std::endl;
|
||||||
asmCode << "\tand rcx, -2048" << std::endl;
|
//asmCode << "\tand rcx, -2048" << std::endl;
|
||||||
asmCode << "\tand eax, 24576" << std::endl;
|
asmCode << "\tand eax, 24576" << std::endl;
|
||||||
asmCode << "\tcvtsi2sd " << regF[instr.regc % RegistersCount] << ", rcx" << std::endl;
|
//asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||||
asmCode << "\tor eax, 40896" << std::endl;
|
asmCode << "\tor eax, 40896" << std::endl;
|
||||||
asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl;
|
asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl;
|
||||||
asmCode << "\tldmxcsr dword ptr [rsp - 8]" << std::endl;
|
asmCode << "\tldmxcsr dword ptr [rsp - 8]" << std::endl;
|
||||||
gencf(instr, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline const char* jumpCondition(Instruction& instr, bool invert = false) {
|
static inline const char* jumpCondition(Instruction& instr, bool invert = false) {
|
||||||
@ -496,7 +480,7 @@ namespace RandomX {
|
|||||||
asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl;
|
asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl;
|
||||||
asmCode << "taken_call_" << i << ":" << std::endl;
|
asmCode << "taken_call_" << i << ":" << std::endl;
|
||||||
if (trace) {
|
if (trace) {
|
||||||
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl;
|
asmCode << "\tmov qword ptr [" << regScratchpadAddr << " + rdi * 8 + 262136], rax" << std::endl;
|
||||||
}
|
}
|
||||||
asmCode << "\tpush rax" << std::endl;
|
asmCode << "\tpush rax" << std::endl;
|
||||||
asmCode << "\tcall rx_i_" << wrapInstr(i + (instr.imm8 & 127) + 2) << std::endl;
|
asmCode << "\tcall rx_i_" << wrapInstr(i + (instr.imm8 & 127) + 2) << std::endl;
|
||||||
@ -504,7 +488,7 @@ namespace RandomX {
|
|||||||
|
|
||||||
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
asmCode << "\tcmp rsp, rbp" << std::endl;
|
asmCode << "\tcmp rsp, " << regStackBeginAddr << std::endl;
|
||||||
asmCode << "\tje short not_taken_ret_" << i << std::endl;
|
asmCode << "\tje short not_taken_ret_" << i << std::endl;
|
||||||
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
|
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
|
@ -45,7 +45,7 @@ namespace RandomX {
|
|||||||
void genbr132(Instruction&);
|
void genbr132(Instruction&);
|
||||||
void genbf(Instruction&, const char*);
|
void genbf(Instruction&, const char*);
|
||||||
void gencr(Instruction&);
|
void gencr(Instruction&);
|
||||||
void gencf(Instruction&, bool);
|
void gencf(Instruction&);
|
||||||
|
|
||||||
void generateCode(Instruction&, int);
|
void generateCode(Instruction&, int);
|
||||||
|
|
||||||
|
@ -47,8 +47,8 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void CompiledVirtualMachine::execute() {
|
void CompiledVirtualMachine::execute() {
|
||||||
//executeProgram(reg, mem, scratchpad, readDataset);
|
executeProgram(reg, mem, scratchpad, readDataset);
|
||||||
compiler.getProgramFunc()(reg, mem, scratchpad);
|
//compiler.getProgramFunc()(reg, mem, scratchpad);
|
||||||
#ifdef TRACEVM
|
#ifdef TRACEVM
|
||||||
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
|
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
|
||||||
std::cout << std::hex << tracepad[i].u64 << std::endl;
|
std::cout << std::hex << tracepad[i].u64 << std::endl;
|
||||||
|
@ -40,6 +40,6 @@ namespace RandomX {
|
|||||||
DatasetReadFunc readDataset;
|
DatasetReadFunc readDataset;
|
||||||
alignas(16) RegisterFile reg;
|
alignas(16) RegisterFile reg;
|
||||||
MemoryRegisters mem;
|
MemoryRegisters mem;
|
||||||
alignas(16) convertible_t scratchpad[ScratchpadLength];
|
alignas(64) convertible_t scratchpad[ScratchpadLength];
|
||||||
};
|
};
|
||||||
}
|
}
|
@ -26,6 +26,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
#include "dataset.hpp"
|
#include "dataset.hpp"
|
||||||
#include "Pcg32.hpp"
|
#include "Pcg32.hpp"
|
||||||
#include "Cache.hpp"
|
#include "Cache.hpp"
|
||||||
|
#include "virtualMemory.hpp"
|
||||||
|
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
#include <wmmintrin.h>
|
#include <wmmintrin.h>
|
||||||
@ -161,12 +162,17 @@ namespace RandomX {
|
|||||||
template
|
template
|
||||||
convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory);
|
convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory);
|
||||||
|
|
||||||
void datasetAlloc(dataset_t& ds) {
|
void datasetAlloc(dataset_t& ds, bool largePages) {
|
||||||
if (sizeof(size_t) <= 4)
|
if (sizeof(size_t) <= 4)
|
||||||
throw std::runtime_error("Platform doesn't support enough memory for the dataset");
|
throw std::runtime_error("Platform doesn't support enough memory for the dataset");
|
||||||
ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, /*sizeof(__m128i)*/ 64);
|
if (largePages) {
|
||||||
if (ds.dataset == nullptr) {
|
ds.dataset = (uint8_t*)allocLargePagesMemory(DatasetSize);
|
||||||
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed.");
|
}
|
||||||
|
else {
|
||||||
|
ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, 64);
|
||||||
|
if (ds.dataset == nullptr) {
|
||||||
|
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ namespace RandomX {
|
|||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
|
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
|
||||||
|
|
||||||
void datasetAlloc(dataset_t& ds);
|
void datasetAlloc(dataset_t& ds, bool largePages);
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount);
|
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount);
|
||||||
|
@ -15,19 +15,19 @@
|
|||||||
;# You should have received a copy of the GNU General Public License
|
;# You should have received a copy of the GNU General Public License
|
||||||
;# along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
;# along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
PUBLIC executeProgram
|
_RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
|
||||||
|
|
||||||
.code
|
PUBLIC executeProgram
|
||||||
|
|
||||||
executeProgram PROC
|
executeProgram PROC
|
||||||
; REGISTER ALLOCATION:
|
; REGISTER ALLOCATION:
|
||||||
; rax -> temporary
|
; rax -> temporary
|
||||||
; rbx -> MemoryRegisters& memory
|
; rbx -> beginning of VM stack
|
||||||
; rcx -> temporary
|
; rcx -> temporary
|
||||||
; rdx -> temporary
|
; rdx -> temporary
|
||||||
; rsi -> convertible_t& scratchpad
|
; rsi -> convertible_t& scratchpad
|
||||||
; rdi -> "ic" (instruction counter)
|
; rdi -> "mx"
|
||||||
; rbp -> beginning of VM stack
|
; rbp -> "ic"
|
||||||
; rsp -> end of VM stack
|
; rsp -> end of VM stack
|
||||||
; r8 -> "r0"
|
; r8 -> "r0"
|
||||||
; r9 -> "r1"
|
; r9 -> "r1"
|
||||||
@ -55,7 +55,8 @@ executeProgram PROC
|
|||||||
; | saved registers
|
; | saved registers
|
||||||
; |
|
; |
|
||||||
; v
|
; v
|
||||||
; [rbp] RegisterFile& registerFile
|
; [rbx+8] RegisterFile& registerFile
|
||||||
|
; [rbx+0] uint8_t* dataset
|
||||||
; |
|
; |
|
||||||
; |
|
; |
|
||||||
; | VM stack
|
; | VM stack
|
||||||
@ -80,17 +81,18 @@ executeProgram PROC
|
|||||||
movdqu xmmword ptr [rsp+0], xmm10
|
movdqu xmmword ptr [rsp+0], xmm10
|
||||||
|
|
||||||
; function arguments
|
; function arguments
|
||||||
push rcx ; RegisterFile& registerFile
|
push rcx ; RegisterFile& registerFile
|
||||||
mov rbx, rdx ; MemoryRegisters& memory
|
mov edi, dword ptr [rdx] ; "mx"
|
||||||
mov rsi, r8 ; convertible_t& scratchpad
|
mov rax, qword ptr [rdx+8] ; uint8_t* dataset
|
||||||
push r9
|
push rax
|
||||||
|
mov rsi, r8 ; convertible_t* scratchpad
|
||||||
|
|
||||||
mov rbp, rsp ; beginning of VM stack
|
mov rbx, rsp ; beginning of VM stack
|
||||||
mov rdi, 1048577 ; number of VM instructions to execute + 1
|
mov ebp, 524289 ; number of VM instructions to execute + 1
|
||||||
|
|
||||||
xorps xmm10, xmm10
|
xorps xmm10, xmm10
|
||||||
cmpeqpd xmm10, xmm10
|
cmpeqpd xmm10, xmm10
|
||||||
psrlq xmm10, 1 ; mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
|
psrlq xmm10, 1 ; mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
|
||||||
|
|
||||||
; reset rounding mode
|
; reset rounding mode
|
||||||
mov dword ptr [rsp-8], 40896
|
mov dword ptr [rsp-8], 40896
|
||||||
@ -162,7 +164,7 @@ executeProgram PROC
|
|||||||
|
|
||||||
rx_finish:
|
rx_finish:
|
||||||
; unroll the stack
|
; unroll the stack
|
||||||
mov rsp, rbp
|
mov rsp, rbx
|
||||||
|
|
||||||
; save VM register values
|
; save VM register values
|
||||||
pop rcx
|
pop rcx
|
||||||
@ -202,57 +204,103 @@ rx_finish:
|
|||||||
pop rbx
|
pop rbx
|
||||||
|
|
||||||
; return
|
; return
|
||||||
ret 0
|
ret
|
||||||
|
|
||||||
|
TransformAddress MACRO reg32, reg64
|
||||||
|
;# Transforms the address in the register so that the transformed address
|
||||||
|
;# lies in a different cache line than the original address (mod 2^N).
|
||||||
|
;# This is done to prevent a load-store dependency.
|
||||||
|
;# There are 3 different transformations that can be used: x -> 9*x+C, x -> x+C, x -> x^C
|
||||||
|
lea reg32, [reg64+reg64*8+127] ;# C = -119 -110 -101 -92 -83 -74 -65 -55 -46 -37 -28 -19 -10 -1 9 18 27 36 45 54 63 73 82 91 100 109 118 127
|
||||||
|
;lea reg32, [reg64-128] ;# C = all except -7 to +7
|
||||||
|
;xor reg32, -8 ;# C = all except 0 to 7
|
||||||
|
ENDM
|
||||||
|
|
||||||
|
ReadMemoryRandom MACRO spmask, float
|
||||||
|
;# IN ecx = random 32-bit address
|
||||||
|
;# OUT rax = 64-bit integer return value
|
||||||
|
;# OUT xmm0 = 128-bit floating point return value
|
||||||
|
;# GLOBAL rbp = "ic" number of instructions until the end of the program
|
||||||
|
;# GLOBAL rbx = address of the dataset address
|
||||||
|
;# GLOBAL rsi = address of the scratchpad
|
||||||
|
;# GLOBAL rdi = "mx" random 32-bit dataset address
|
||||||
|
;# MODIFY rcx, rdx
|
||||||
|
LOCAL L_prefetch, L_read, L_return
|
||||||
|
mov eax, ebp
|
||||||
|
and al, 63
|
||||||
|
jz short L_prefetch ;# "ic" divisible by 64 -> prefetch
|
||||||
|
xor edx, edx
|
||||||
|
cmp al, 14
|
||||||
|
je short L_read ;# "ic" = 14 (mod 64) -> random read
|
||||||
|
cmovb edx, ecx ;# "ic" < 14 (mod 64) -> modify random read address
|
||||||
|
xor edi, edx
|
||||||
|
L_return:
|
||||||
|
and ecx, spmask ;# limit address to the specified scratchpad size
|
||||||
|
IF float
|
||||||
|
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||||
|
ELSE
|
||||||
|
mov rax, qword ptr [rsi+rcx*8]
|
||||||
|
ENDIF
|
||||||
|
ret
|
||||||
|
L_prefetch:
|
||||||
|
mov rax, qword ptr [rbx] ;# load the dataset address
|
||||||
|
and edi, -64 ;# align "mx" to the start of a cache line
|
||||||
|
prefetchnta byte ptr [rax+rdi]
|
||||||
|
jmp short L_return
|
||||||
|
L_read:
|
||||||
|
push rcx
|
||||||
|
TransformAddress ecx, rcx ;# TransformAddress function
|
||||||
|
and ecx, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
|
||||||
|
call rx_read_dataset
|
||||||
|
pop rcx
|
||||||
|
jmp short L_return
|
||||||
|
ENDM
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
rx_readint_l1:
|
||||||
|
ReadMemoryRandom 2047, 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
rx_readint_l2:
|
||||||
|
ReadMemoryRandom 32767, 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
rx_readfloat_l1:
|
||||||
|
ReadMemoryRandom 2047, 1
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
rx_readfloat_l2:
|
||||||
|
ReadMemoryRandom 32767, 1
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
rx_read_dataset:
|
rx_read_dataset:
|
||||||
push r8
|
;# IN rcx = scratchpad index - must be divisible by 8
|
||||||
push r9
|
;# GLOBAL rbx = address of the dataset address
|
||||||
push r10
|
;# GLOBAL rsi = address of the scratchpad
|
||||||
push r11
|
;# GLOBAL rdi = "mx" random 32-bit dataset address
|
||||||
mov rdx, rbx
|
;# MODIFY rax, rcx, rdx
|
||||||
movd qword ptr [rsp - 8], xmm1
|
mov rax, qword ptr [rbx] ;# load the dataset address
|
||||||
movd qword ptr [rsp - 16], xmm2
|
lea rcx, [rsi+rcx*8] ;# scratchpad cache line
|
||||||
sub rsp, 48
|
lea rax, [rax+rdi] ;# dataset cache line
|
||||||
call qword ptr [rbp]
|
mov rdx, qword ptr [rax+0] ;# load first dataset quadword (prefetched into the cache by now)
|
||||||
add rsp, 48
|
xor qword ptr [rcx+0], rdx ;# XOR the dataset item with a scratchpad item, repeat for the rest of the cacheline
|
||||||
movd xmm2, qword ptr [rsp - 16]
|
mov rdx, qword ptr [rax+8]
|
||||||
movd xmm1, qword ptr [rsp - 8]
|
xor qword ptr [rcx+8], rdx
|
||||||
pop r11
|
mov rdx, qword ptr [rax+16]
|
||||||
pop r10
|
xor qword ptr [rcx+16], rdx
|
||||||
pop r9
|
mov rdx, qword ptr [rax+24]
|
||||||
pop r8
|
xor qword ptr [rcx+24], rdx
|
||||||
ret 0
|
mov rdx, qword ptr [rax+32]
|
||||||
|
xor qword ptr [rcx+32], rdx
|
||||||
rx_read_dataset_r:
|
mov rdx, qword ptr [rax+40]
|
||||||
mov edx, dword ptr [rbx] ; ma
|
xor qword ptr [rcx+40], rdx
|
||||||
mov rax, qword ptr [rbx+8] ; dataset
|
mov rdx, qword ptr [rax+48]
|
||||||
mov rax, qword ptr [rax+rdx]
|
xor qword ptr [rcx+48], rdx
|
||||||
add dword ptr [rbx], 8
|
mov rdx, qword ptr [rax+56]
|
||||||
xor ecx, dword ptr [rbx+4] ; mx
|
xor qword ptr [rcx+56], rdx
|
||||||
mov dword ptr [rbx+4], ecx
|
ret
|
||||||
test ecx, 0FFF8h
|
|
||||||
jne short rx_read_dataset_r_ret
|
|
||||||
and ecx, -8
|
|
||||||
mov dword ptr [rbx], ecx
|
|
||||||
mov rdx, qword ptr [rbx+8]
|
|
||||||
prefetcht0 byte ptr [rdx+rcx]
|
|
||||||
rx_read_dataset_r_ret:
|
|
||||||
ret 0
|
|
||||||
|
|
||||||
rx_read_dataset_f:
|
|
||||||
mov edx, dword ptr [rbx] ; ma
|
|
||||||
mov rax, qword ptr [rbx+8] ; dataset
|
|
||||||
cvtdq2pd xmm0, qword ptr [rax+rdx]
|
|
||||||
add dword ptr [rbx], 8
|
|
||||||
xor ecx, dword ptr [rbx+4] ; mx
|
|
||||||
mov dword ptr [rbx+4], ecx
|
|
||||||
test ecx, 0FFF8h
|
|
||||||
jne short rx_read_dataset_f_ret
|
|
||||||
and ecx, -8
|
|
||||||
mov dword ptr [rbx], ecx
|
|
||||||
prefetcht0 byte ptr [rax+rcx]
|
|
||||||
rx_read_dataset_f_ret:
|
|
||||||
ret 0
|
|
||||||
executeProgram ENDP
|
executeProgram ENDP
|
||||||
|
|
||||||
|
_RANDOMX_EXECUTE_PROGRAM ENDS
|
||||||
|
|
||||||
END
|
END
|
||||||
|
@ -162,7 +162,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
bool softAes, lightClient, genAsm, compiled, help;
|
bool softAes, lightClient, genAsm, compiled, help, largePages;
|
||||||
int programCount, threadCount;
|
int programCount, threadCount;
|
||||||
readOption("--help", argc, argv, help);
|
readOption("--help", argc, argv, help);
|
||||||
|
|
||||||
@ -177,6 +177,7 @@ int main(int argc, char** argv) {
|
|||||||
readOption("--compiled", argc, argv, compiled);
|
readOption("--compiled", argc, argv, compiled);
|
||||||
readIntOption("--threads", argc, argv, threadCount, 1);
|
readIntOption("--threads", argc, argv, threadCount, 1);
|
||||||
readIntOption("--nonces", argc, argv, programCount, 1000);
|
readIntOption("--nonces", argc, argv, programCount, 1000);
|
||||||
|
readOption("--largePages", argc, argv, largePages);
|
||||||
|
|
||||||
if (genAsm) {
|
if (genAsm) {
|
||||||
generateAsm(programCount);
|
generateAsm(programCount);
|
||||||
@ -216,7 +217,7 @@ int main(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
RandomX::Cache* cache = dataset.cache;
|
RandomX::Cache* cache = dataset.cache;
|
||||||
RandomX::datasetAlloc(dataset);
|
RandomX::datasetAlloc(dataset, largePages);
|
||||||
if (threadCount > 1) {
|
if (threadCount > 1) {
|
||||||
auto perThread = RandomX::DatasetBlockCount / threadCount;
|
auto perThread = RandomX::DatasetBlockCount / threadCount;
|
||||||
auto remainder = RandomX::DatasetBlockCount % threadCount;
|
auto remainder = RandomX::DatasetBlockCount % threadCount;
|
||||||
|
2905
src/program.inc
2905
src/program.inc
File diff suppressed because it is too large
Load Diff
108
src/virtualMemory.cpp
Normal file
108
src/virtualMemory.cpp
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2018 tevador
|
||||||
|
|
||||||
|
This file is part of RandomX.
|
||||||
|
|
||||||
|
RandomX is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
RandomX is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "virtualMemory.hpp"
|
||||||
|
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#else
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include <mach/vm_statistics.h>
|
||||||
|
#endif
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#ifndef MAP_ANONYMOUS
|
||||||
|
#define MAP_ANONYMOUS MAP_ANON
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
std::string getErrorMessage(const char* function) {
|
||||||
|
LPSTR messageBuffer = nullptr;
|
||||||
|
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||||
|
NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL);
|
||||||
|
std::string message(messageBuffer, size);
|
||||||
|
LocalFree(messageBuffer);
|
||||||
|
return std::string(function) + std::string(": ") + message;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setPrivilege(const char* pszPrivilege, BOOL bEnable) {
|
||||||
|
HANDLE hToken;
|
||||||
|
TOKEN_PRIVILEGES tp;
|
||||||
|
BOOL status;
|
||||||
|
DWORD error;
|
||||||
|
|
||||||
|
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
|
||||||
|
throw std::runtime_error(getErrorMessage("OpenProcessToken"));
|
||||||
|
|
||||||
|
if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid))
|
||||||
|
throw std::runtime_error(getErrorMessage("LookupPrivilegeValue"));
|
||||||
|
|
||||||
|
tp.PrivilegeCount = 1;
|
||||||
|
|
||||||
|
if (bEnable)
|
||||||
|
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
||||||
|
else
|
||||||
|
tp.Privileges[0].Attributes = 0;
|
||||||
|
|
||||||
|
status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
|
||||||
|
|
||||||
|
error = GetLastError();
|
||||||
|
if (!status || (error != ERROR_SUCCESS))
|
||||||
|
throw std::runtime_error(getErrorMessage("AdjustTokenPrivileges"));
|
||||||
|
|
||||||
|
if (!CloseHandle(hToken))
|
||||||
|
throw std::runtime_error(getErrorMessage("CloseHandle"));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void* allocExecutableMemory(size_t bytes) {
|
||||||
|
void* mem;
|
||||||
|
#ifdef _WIN32
|
||||||
|
mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||||
|
if (mem == nullptr)
|
||||||
|
throw std::runtime_error(getErrorMessage("allocExecutableMemory - VirtualAlloc"));
|
||||||
|
#else
|
||||||
|
mem = mmap(nullptr, CodeSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||||
|
if (mem == MAP_FAILED)
|
||||||
|
throw std::runtime_error("allocExecutableMemory - mmap failed");
|
||||||
|
#endif
|
||||||
|
return mem;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* allocLargePagesMemory(size_t bytes) {
|
||||||
|
void* mem;
|
||||||
|
#ifdef _WIN32
|
||||||
|
setPrivilege("SeLockMemoryPrivilege", 1);
|
||||||
|
mem = VirtualAlloc(NULL, bytes, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||||
|
if (mem == nullptr)
|
||||||
|
throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc"));
|
||||||
|
#else
|
||||||
|
#ifdef __APPLE__
|
||||||
|
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
||||||
|
#else
|
||||||
|
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
|
||||||
|
#endif
|
||||||
|
if (mem == MAP_FAILED)
|
||||||
|
throw std::runtime_error("allocLargePagesMemory - mmap failed");
|
||||||
|
#endif
|
||||||
|
return mem;
|
||||||
|
}
|
23
src/virtualMemory.hpp
Normal file
23
src/virtualMemory.hpp
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2018 tevador
|
||||||
|
|
||||||
|
This file is part of RandomX.
|
||||||
|
|
||||||
|
RandomX is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
RandomX is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
void* allocExecutableMemory(size_t);
|
||||||
|
void* allocLargePagesMemory(size_t);
|
Loading…
Reference in New Issue
Block a user