mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-03 13:48:54 +00:00
4 scratchpad segments
This commit is contained in:
parent
16db607025
commit
bd0dba88a8
@ -33,7 +33,7 @@ namespace RandomX {
|
|||||||
mem.ds = ds;
|
mem.ds = ds;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CompiledVirtualMachine::initializeScratchpad(uint32_t index) {
|
void CompiledVirtualMachine::initializeScratchpad(uint8_t* scratchpad, int32_t index) {
|
||||||
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
|
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -42,6 +42,11 @@ namespace RandomX {
|
|||||||
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
|
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
|
||||||
*(((uint32_t*)®) + i) = gen();
|
*(((uint32_t*)®) + i) = gen();
|
||||||
}
|
}
|
||||||
|
FPINIT();
|
||||||
|
for (int i = 0; i < RegistersCount; ++i) {
|
||||||
|
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
|
||||||
|
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
|
||||||
|
}
|
||||||
compiler.generateProgram(gen);
|
compiler.generateProgram(gen);
|
||||||
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
|
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
|
||||||
mem.mx = *(((uint32_t*)seed) + 5);
|
mem.mx = *(((uint32_t*)seed) + 5);
|
||||||
|
@ -39,7 +39,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
CompiledVirtualMachine();
|
CompiledVirtualMachine();
|
||||||
void setDataset(dataset_t ds) override;
|
void setDataset(dataset_t ds) override;
|
||||||
void initializeScratchpad(uint32_t index) override;
|
void initializeScratchpad(uint8_t* scratchpad, int32_t index) override;
|
||||||
void initializeProgram(const void* seed) override;
|
void initializeProgram(const void* seed) override;
|
||||||
virtual void execute() override;
|
virtual void execute() override;
|
||||||
void* getProgram() {
|
void* getProgram() {
|
||||||
|
@ -69,7 +69,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void InterpretedVirtualMachine::initializeScratchpad(uint32_t index) {
|
void InterpretedVirtualMachine::initializeScratchpad(uint8_t* scratchpad, int32_t index) {
|
||||||
uint32_t startingBlock = (ScratchpadSize / CacheLineSize) * index;
|
uint32_t startingBlock = (ScratchpadSize / CacheLineSize) * index;
|
||||||
if (asyncWorker) {
|
if (asyncWorker) {
|
||||||
ILightClientAsyncWorker* worker = mem.ds.asyncWorker;
|
ILightClientAsyncWorker* worker = mem.ds.asyncWorker;
|
||||||
|
@ -42,7 +42,7 @@ namespace RandomX {
|
|||||||
InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
|
InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
|
||||||
~InterpretedVirtualMachine();
|
~InterpretedVirtualMachine();
|
||||||
void setDataset(dataset_t ds) override;
|
void setDataset(dataset_t ds) override;
|
||||||
void initializeScratchpad(uint32_t index) override;
|
void initializeScratchpad(uint8_t* scratchpad, int32_t index) override;
|
||||||
void initializeProgram(const void* seed) override;
|
void initializeProgram(const void* seed) override;
|
||||||
void execute() override;
|
void execute() override;
|
||||||
const Program& getProgam() {
|
const Program& getProgam() {
|
||||||
|
@ -182,17 +182,17 @@ namespace RandomX {
|
|||||||
emitByte(0xe8); //xor rbp, rax
|
emitByte(0xe8); //xor rbp, rax
|
||||||
}
|
}
|
||||||
emitByte(0x25); //and eax,
|
emitByte(0x25); //and eax,
|
||||||
if (instr.loca & 15) {
|
//if (instr.loca & 15) {
|
||||||
if (instr.loca & 3) {
|
if (instr.loca & 3) {
|
||||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
emit(ScratchpadL2 - 1); //first 256 KiB of scratchpad
|
emit(ScratchpadL2 - 1); //first 256 KiB of scratchpad
|
||||||
}
|
}
|
||||||
}
|
/*}
|
||||||
else {
|
else {
|
||||||
emit(ScratchpadL3 - 1); //whole scratchpad
|
emit(ScratchpadL3 - 1); //whole scratchpad
|
||||||
}
|
}*/
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genar(Instruction& instr) {
|
void JitCompilerX86::genar(Instruction& instr) {
|
||||||
@ -271,7 +271,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::gencr(Instruction& instr, bool rax = true) {
|
void JitCompilerX86::gencr(Instruction& instr, bool rax = true) {
|
||||||
if (instr.locc & 16) { //write to register
|
if (instr.locc & 8) { //write to register
|
||||||
emit(uint16_t(0x8b4c)); //mov
|
emit(uint16_t(0x8b4c)); //mov
|
||||||
if (rax) {
|
if (rax) {
|
||||||
emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
|
emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
|
||||||
@ -281,17 +281,17 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (instr.locc & 15) {
|
//if (instr.locc & 7) {
|
||||||
if (instr.locc & 3) {
|
if (instr.locc & 1) {
|
||||||
scratchpadStoreR(instr, ScratchpadL1, rax);
|
scratchpadStoreR(instr, ScratchpadL1, rax);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
scratchpadStoreR(instr, ScratchpadL2, rax);
|
scratchpadStoreR(instr, ScratchpadL2, rax);
|
||||||
}
|
}
|
||||||
}
|
/*}
|
||||||
else {
|
else {
|
||||||
scratchpadStoreR(instr, ScratchpadL3, rax);
|
scratchpadStoreR(instr, ScratchpadL3, rax);
|
||||||
}
|
}*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -319,18 +319,18 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
emit(uint16_t(0x280f)); //movaps
|
emit(uint16_t(0x280f)); //movaps
|
||||||
emitByte(0xc0 + 8 * regc); // regc, xmm0
|
emitByte(0xc0 + 8 * regc); // regc, xmm0
|
||||||
if (instr.locc & 16) { //write to scratchpad
|
if (instr.locc & 8) { //write to scratchpad
|
||||||
if (instr.locc & 15) {
|
//if (instr.locc & 7) {
|
||||||
if (instr.locc & 3) { //C.LOC.W
|
if (instr.locc & 1) { //C.LOC.W
|
||||||
scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad
|
scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //first 256 KiB of scratchpad
|
scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //first 256 KiB of scratchpad
|
||||||
}
|
}
|
||||||
}
|
//}
|
||||||
else {
|
/*else {
|
||||||
scratchpadStoreF(instr, regc, ScratchpadL3, (instr.locc & 128)); //whole scratchpad
|
scratchpadStoreF(instr, regc, ScratchpadL3, (instr.locc & 128)); //whole scratchpad
|
||||||
}
|
}*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39,11 +39,16 @@ namespace RandomX {
|
|||||||
mem.ds.dataset = nullptr;
|
mem.ds.dataset = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VirtualMachine::getResult(void* out) {
|
void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* out) {
|
||||||
constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 8;
|
constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 8;
|
||||||
alignas(16) uint64_t smallState[smallStateLength];
|
alignas(16) uint64_t smallState[smallStateLength];
|
||||||
memcpy(smallState, ®, sizeof(RegisterFile));
|
memcpy(smallState, ®, sizeof(RegisterFile));
|
||||||
hashAes1Rx4<false>(scratchpad, ScratchpadSize, smallState + 24);
|
if (scratchpadSize > 0) {
|
||||||
|
hashAes1Rx4<false>(scratchpad, scratchpadSize, smallState + 24);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
memset(smallState + 24, 0, 64);
|
||||||
|
}
|
||||||
blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0);
|
blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -28,10 +28,13 @@ namespace RandomX {
|
|||||||
VirtualMachine();
|
VirtualMachine();
|
||||||
virtual ~VirtualMachine() {}
|
virtual ~VirtualMachine() {}
|
||||||
virtual void setDataset(dataset_t ds) = 0;
|
virtual void setDataset(dataset_t ds) = 0;
|
||||||
virtual void initializeScratchpad(uint32_t index) = 0;
|
virtual void initializeScratchpad(uint8_t* scratchpad, int32_t index) = 0;
|
||||||
|
void setScratchpad(void* ptr) {
|
||||||
|
scratchpad = (convertible_t*)ptr;
|
||||||
|
}
|
||||||
virtual void initializeProgram(const void* seed) = 0;
|
virtual void initializeProgram(const void* seed) = 0;
|
||||||
virtual void execute() = 0;
|
virtual void execute() = 0;
|
||||||
void getResult(void*);
|
void getResult(void*, size_t, void*);
|
||||||
const RegisterFile& getRegisterFile() {
|
const RegisterFile& getRegisterFile() {
|
||||||
return reg;
|
return reg;
|
||||||
}
|
}
|
||||||
@ -39,6 +42,6 @@ namespace RandomX {
|
|||||||
DatasetReadFunc readDataset;
|
DatasetReadFunc readDataset;
|
||||||
alignas(16) RegisterFile reg;
|
alignas(16) RegisterFile reg;
|
||||||
MemoryRegisters mem;
|
MemoryRegisters mem;
|
||||||
alignas(64) convertible_t scratchpad[ScratchpadLength];
|
convertible_t* scratchpad;
|
||||||
};
|
};
|
||||||
}
|
}
|
@ -12,12 +12,12 @@
|
|||||||
mov qword ptr [rcx+40], r13
|
mov qword ptr [rcx+40], r13
|
||||||
mov qword ptr [rcx+48], r14
|
mov qword ptr [rcx+48], r14
|
||||||
mov qword ptr [rcx+56], r15
|
mov qword ptr [rcx+56], r15
|
||||||
movdqa xmmword ptr [rcx+64], xmm8
|
movapd xmmword ptr [rcx+64], xmm8
|
||||||
movdqa xmmword ptr [rcx+80], xmm9
|
movapd xmmword ptr [rcx+80], xmm9
|
||||||
movdqa xmmword ptr [rcx+96], xmm2
|
movapd xmmword ptr [rcx+96], xmm2
|
||||||
movdqa xmmword ptr [rcx+112], xmm3
|
movapd xmmword ptr [rcx+112], xmm3
|
||||||
lea rcx, [rcx+64]
|
lea rcx, [rcx+64]
|
||||||
movdqa xmmword ptr [rcx+64], xmm4
|
movapd xmmword ptr [rcx+64], xmm4
|
||||||
movdqa xmmword ptr [rcx+80], xmm5
|
movapd xmmword ptr [rcx+80], xmm5
|
||||||
movdqa xmmword ptr [rcx+96], xmm6
|
movapd xmmword ptr [rcx+96], xmm6
|
||||||
movdqa xmmword ptr [rcx+112], xmm7
|
movapd xmmword ptr [rcx+112], xmm7
|
@ -1,14 +1,10 @@
|
|||||||
mov rdi, rsp ;# beginning of VM stack
|
mov rdi, rsp ;# beginning of VM stack
|
||||||
mov ebx, 1048577 ;# number of VM instructions to execute + 1
|
mov ebx, 262145 ;# number of VM instructions to execute + 1
|
||||||
|
|
||||||
xorps xmm10, xmm10
|
xorps xmm10, xmm10
|
||||||
cmpeqpd xmm10, xmm10
|
cmpeqpd xmm10, xmm10
|
||||||
psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
|
psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
|
||||||
|
|
||||||
;# reset rounding mode
|
|
||||||
mov dword ptr [rsp-8], 40896
|
|
||||||
ldmxcsr dword ptr [rsp-8]
|
|
||||||
|
|
||||||
;# load integer registers
|
;# load integer registers
|
||||||
mov r8, qword ptr [rcx+0]
|
mov r8, qword ptr [rcx+0]
|
||||||
mov r9, qword ptr [rcx+8]
|
mov r9, qword ptr [rcx+8]
|
||||||
@ -19,45 +15,13 @@
|
|||||||
mov r14, qword ptr [rcx+48]
|
mov r14, qword ptr [rcx+48]
|
||||||
mov r15, qword ptr [rcx+56]
|
mov r15, qword ptr [rcx+56]
|
||||||
|
|
||||||
;# initialize floating point registers
|
;# load floating point registers
|
||||||
xorps xmm8, xmm8
|
movapd xmm8, xmmword ptr [rcx+64]
|
||||||
cvtsi2sd xmm8, qword ptr [rcx+72]
|
movapd xmm9, xmmword ptr [rcx+80]
|
||||||
pslldq xmm8, 8
|
movapd xmm2, xmmword ptr [rcx+96]
|
||||||
cvtsi2sd xmm8, qword ptr [rcx+64]
|
movapd xmm3, xmmword ptr [rcx+112]
|
||||||
|
|
||||||
xorps xmm9, xmm9
|
|
||||||
cvtsi2sd xmm9, qword ptr [rcx+88]
|
|
||||||
pslldq xmm9, 8
|
|
||||||
cvtsi2sd xmm9, qword ptr [rcx+80]
|
|
||||||
|
|
||||||
xorps xmm2, xmm2
|
|
||||||
cvtsi2sd xmm2, qword ptr [rcx+104]
|
|
||||||
pslldq xmm2, 8
|
|
||||||
cvtsi2sd xmm2, qword ptr [rcx+96]
|
|
||||||
|
|
||||||
xorps xmm3, xmm3
|
|
||||||
cvtsi2sd xmm3, qword ptr [rcx+120]
|
|
||||||
pslldq xmm3, 8
|
|
||||||
cvtsi2sd xmm3, qword ptr [rcx+112]
|
|
||||||
|
|
||||||
lea rcx, [rcx+64]
|
lea rcx, [rcx+64]
|
||||||
|
movapd xmm4, xmmword ptr [rcx+64]
|
||||||
xorps xmm4, xmm4
|
movapd xmm5, xmmword ptr [rcx+80]
|
||||||
cvtsi2sd xmm4, qword ptr [rcx+72]
|
movapd xmm6, xmmword ptr [rcx+96]
|
||||||
pslldq xmm4, 8
|
movapd xmm7, xmmword ptr [rcx+112]
|
||||||
cvtsi2sd xmm4, qword ptr [rcx+64]
|
|
||||||
|
|
||||||
xorps xmm5, xmm5
|
|
||||||
cvtsi2sd xmm5, qword ptr [rcx+88]
|
|
||||||
pslldq xmm5, 8
|
|
||||||
cvtsi2sd xmm5, qword ptr [rcx+80]
|
|
||||||
|
|
||||||
xorps xmm6, xmm6
|
|
||||||
cvtsi2sd xmm6, qword ptr [rcx+104]
|
|
||||||
pslldq xmm6, 8
|
|
||||||
cvtsi2sd xmm6, qword ptr [rcx+96]
|
|
||||||
|
|
||||||
xorps xmm7, xmm7
|
|
||||||
cvtsi2sd xmm7, qword ptr [rcx+120]
|
|
||||||
pslldq xmm7, 8
|
|
||||||
cvtsi2sd xmm7, qword ptr [rcx+112]
|
|
||||||
|
26
src/main.cpp
26
src/main.cpp
@ -130,7 +130,7 @@ void generateAsm(int nonce) {
|
|||||||
asmX86.printCode(std::cout);
|
asmX86.printCode(std::cout);
|
||||||
}
|
}
|
||||||
|
|
||||||
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread) {
|
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread, uint8_t* scratchpad) {
|
||||||
uint64_t hash[4];
|
uint64_t hash[4];
|
||||||
unsigned char blockTemplate[] = {
|
unsigned char blockTemplate[] = {
|
||||||
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
|
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
|
||||||
@ -146,11 +146,20 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
|||||||
*noncePtr = nonce;
|
*noncePtr = nonce;
|
||||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||||
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
|
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
|
||||||
vm->initializeScratchpad(spIndex);
|
vm->initializeScratchpad(scratchpad, spIndex);
|
||||||
vm->initializeProgram(hash);
|
vm->initializeProgram(hash);
|
||||||
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
||||||
|
vm->setScratchpad(scratchpad + 3 * RandomX::ScratchpadSize / 4);
|
||||||
vm->execute();
|
vm->execute();
|
||||||
vm->getResult(hash);
|
vm->setScratchpad(scratchpad + 2 * RandomX::ScratchpadSize / 4);
|
||||||
|
vm->execute();
|
||||||
|
vm->getResult(nullptr, 0, hash);
|
||||||
|
vm->initializeProgram(hash);
|
||||||
|
vm->setScratchpad(scratchpad + 1 * RandomX::ScratchpadSize / 4);
|
||||||
|
vm->execute();
|
||||||
|
vm->setScratchpad(scratchpad + 0 * RandomX::ScratchpadSize / 4);
|
||||||
|
vm->execute();
|
||||||
|
vm->getResult(scratchpad, RandomX::ScratchpadSize, hash);
|
||||||
result.xorWith(hash);
|
result.xorWith(hash);
|
||||||
if (RandomX::trace) {
|
if (RandomX::trace) {
|
||||||
std::cout << "Nonce: " << nonce << " ";
|
std::cout << "Nonce: " << nonce << " ";
|
||||||
@ -274,18 +283,25 @@ int main(int argc, char** argv) {
|
|||||||
vm->setDataset(dataset);
|
vm->setDataset(dataset);
|
||||||
vms.push_back(vm);
|
vms.push_back(vm);
|
||||||
}
|
}
|
||||||
|
uint8_t* scratchpadMem;
|
||||||
|
if (largePages) {
|
||||||
|
scratchpadMem = (uint8_t*)allocLargePagesMemory(RandomX::ScratchpadSize * (threadCount + 1) / 2);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
scratchpadMem = (uint8_t*)_mm_malloc(threadCount * RandomX::ScratchpadSize, RandomX::CacheLineSize);
|
||||||
|
}
|
||||||
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
|
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
|
||||||
sw.restart();
|
sw.restart();
|
||||||
if (threadCount > 1) {
|
if (threadCount > 1) {
|
||||||
for (int i = 0; i < vms.size(); ++i) {
|
for (int i = 0; i < vms.size(); ++i) {
|
||||||
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i));
|
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RandomX::ScratchpadSize * i));
|
||||||
}
|
}
|
||||||
for (int i = 0; i < threads.size(); ++i) {
|
for (int i = 0; i < threads.size(); ++i) {
|
||||||
threads[i].join();
|
threads[i].join();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0);
|
mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0, scratchpadMem);
|
||||||
if (compiled)
|
if (compiled)
|
||||||
std::cout << "Average program size: " << ((RandomX::CompiledVirtualMachine*)vms[0])->getTotalSize() / programCount << std::endl;
|
std::cout << "Average program size: " << ((RandomX::CompiledVirtualMachine*)vms[0])->getTotalSize() / programCount << std::endl;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user