mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 15:58:53 +00:00
4 scratchpad segments
This commit is contained in:
parent
16db607025
commit
bd0dba88a8
@ -33,7 +33,7 @@ namespace RandomX {
|
||||
mem.ds = ds;
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::initializeScratchpad(uint32_t index) {
|
||||
void CompiledVirtualMachine::initializeScratchpad(uint8_t* scratchpad, int32_t index) {
|
||||
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
|
||||
}
|
||||
|
||||
@ -42,6 +42,11 @@ namespace RandomX {
|
||||
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
|
||||
*(((uint32_t*)®) + i) = gen();
|
||||
}
|
||||
FPINIT();
|
||||
for (int i = 0; i < RegistersCount; ++i) {
|
||||
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
|
||||
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
|
||||
}
|
||||
compiler.generateProgram(gen);
|
||||
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
|
||||
mem.mx = *(((uint32_t*)seed) + 5);
|
||||
|
@ -39,7 +39,7 @@ namespace RandomX {
|
||||
}
|
||||
CompiledVirtualMachine();
|
||||
void setDataset(dataset_t ds) override;
|
||||
void initializeScratchpad(uint32_t index) override;
|
||||
void initializeScratchpad(uint8_t* scratchpad, int32_t index) override;
|
||||
void initializeProgram(const void* seed) override;
|
||||
virtual void execute() override;
|
||||
void* getProgram() {
|
||||
|
@ -69,7 +69,7 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void InterpretedVirtualMachine::initializeScratchpad(uint32_t index) {
|
||||
void InterpretedVirtualMachine::initializeScratchpad(uint8_t* scratchpad, int32_t index) {
|
||||
uint32_t startingBlock = (ScratchpadSize / CacheLineSize) * index;
|
||||
if (asyncWorker) {
|
||||
ILightClientAsyncWorker* worker = mem.ds.asyncWorker;
|
||||
|
@ -42,7 +42,7 @@ namespace RandomX {
|
||||
InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
|
||||
~InterpretedVirtualMachine();
|
||||
void setDataset(dataset_t ds) override;
|
||||
void initializeScratchpad(uint32_t index) override;
|
||||
void initializeScratchpad(uint8_t* scratchpad, int32_t index) override;
|
||||
void initializeProgram(const void* seed) override;
|
||||
void execute() override;
|
||||
const Program& getProgam() {
|
||||
|
@ -182,17 +182,17 @@ namespace RandomX {
|
||||
emitByte(0xe8); //xor rbp, rax
|
||||
}
|
||||
emitByte(0x25); //and eax,
|
||||
if (instr.loca & 15) {
|
||||
//if (instr.loca & 15) {
|
||||
if (instr.loca & 3) {
|
||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||
}
|
||||
else {
|
||||
emit(ScratchpadL2 - 1); //first 256 KiB of scratchpad
|
||||
}
|
||||
}
|
||||
/*}
|
||||
else {
|
||||
emit(ScratchpadL3 - 1); //whole scratchpad
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
void JitCompilerX86::genar(Instruction& instr) {
|
||||
@ -271,7 +271,7 @@ namespace RandomX {
|
||||
}
|
||||
|
||||
void JitCompilerX86::gencr(Instruction& instr, bool rax = true) {
|
||||
if (instr.locc & 16) { //write to register
|
||||
if (instr.locc & 8) { //write to register
|
||||
emit(uint16_t(0x8b4c)); //mov
|
||||
if (rax) {
|
||||
emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
|
||||
@ -281,17 +281,17 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (instr.locc & 15) {
|
||||
if (instr.locc & 3) {
|
||||
//if (instr.locc & 7) {
|
||||
if (instr.locc & 1) {
|
||||
scratchpadStoreR(instr, ScratchpadL1, rax);
|
||||
}
|
||||
else {
|
||||
scratchpadStoreR(instr, ScratchpadL2, rax);
|
||||
}
|
||||
}
|
||||
/*}
|
||||
else {
|
||||
scratchpadStoreR(instr, ScratchpadL3, rax);
|
||||
}
|
||||
}*/
|
||||
}
|
||||
}
|
||||
|
||||
@ -319,18 +319,18 @@ namespace RandomX {
|
||||
}
|
||||
emit(uint16_t(0x280f)); //movaps
|
||||
emitByte(0xc0 + 8 * regc); // regc, xmm0
|
||||
if (instr.locc & 16) { //write to scratchpad
|
||||
if (instr.locc & 15) {
|
||||
if (instr.locc & 3) { //C.LOC.W
|
||||
if (instr.locc & 8) { //write to scratchpad
|
||||
//if (instr.locc & 7) {
|
||||
if (instr.locc & 1) { //C.LOC.W
|
||||
scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad
|
||||
}
|
||||
else {
|
||||
scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //first 256 KiB of scratchpad
|
||||
}
|
||||
}
|
||||
else {
|
||||
//}
|
||||
/*else {
|
||||
scratchpadStoreF(instr, regc, ScratchpadL3, (instr.locc & 128)); //whole scratchpad
|
||||
}
|
||||
}*/
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -39,11 +39,16 @@ namespace RandomX {
|
||||
mem.ds.dataset = nullptr;
|
||||
}
|
||||
|
||||
void VirtualMachine::getResult(void* out) {
|
||||
void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* out) {
|
||||
constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 8;
|
||||
alignas(16) uint64_t smallState[smallStateLength];
|
||||
memcpy(smallState, ®, sizeof(RegisterFile));
|
||||
hashAes1Rx4<false>(scratchpad, ScratchpadSize, smallState + 24);
|
||||
if (scratchpadSize > 0) {
|
||||
hashAes1Rx4<false>(scratchpad, scratchpadSize, smallState + 24);
|
||||
}
|
||||
else {
|
||||
memset(smallState + 24, 0, 64);
|
||||
}
|
||||
blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0);
|
||||
}
|
||||
}
|
@ -28,10 +28,13 @@ namespace RandomX {
|
||||
VirtualMachine();
|
||||
virtual ~VirtualMachine() {}
|
||||
virtual void setDataset(dataset_t ds) = 0;
|
||||
virtual void initializeScratchpad(uint32_t index) = 0;
|
||||
virtual void initializeScratchpad(uint8_t* scratchpad, int32_t index) = 0;
|
||||
void setScratchpad(void* ptr) {
|
||||
scratchpad = (convertible_t*)ptr;
|
||||
}
|
||||
virtual void initializeProgram(const void* seed) = 0;
|
||||
virtual void execute() = 0;
|
||||
void getResult(void*);
|
||||
void getResult(void*, size_t, void*);
|
||||
const RegisterFile& getRegisterFile() {
|
||||
return reg;
|
||||
}
|
||||
@ -39,6 +42,6 @@ namespace RandomX {
|
||||
DatasetReadFunc readDataset;
|
||||
alignas(16) RegisterFile reg;
|
||||
MemoryRegisters mem;
|
||||
alignas(64) convertible_t scratchpad[ScratchpadLength];
|
||||
convertible_t* scratchpad;
|
||||
};
|
||||
}
|
@ -12,12 +12,12 @@
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
movdqa xmmword ptr [rcx+64], xmm8
|
||||
movdqa xmmword ptr [rcx+80], xmm9
|
||||
movdqa xmmword ptr [rcx+96], xmm2
|
||||
movdqa xmmword ptr [rcx+112], xmm3
|
||||
movapd xmmword ptr [rcx+64], xmm8
|
||||
movapd xmmword ptr [rcx+80], xmm9
|
||||
movapd xmmword ptr [rcx+96], xmm2
|
||||
movapd xmmword ptr [rcx+112], xmm3
|
||||
lea rcx, [rcx+64]
|
||||
movdqa xmmword ptr [rcx+64], xmm4
|
||||
movdqa xmmword ptr [rcx+80], xmm5
|
||||
movdqa xmmword ptr [rcx+96], xmm6
|
||||
movdqa xmmword ptr [rcx+112], xmm7
|
||||
movapd xmmword ptr [rcx+64], xmm4
|
||||
movapd xmmword ptr [rcx+80], xmm5
|
||||
movapd xmmword ptr [rcx+96], xmm6
|
||||
movapd xmmword ptr [rcx+112], xmm7
|
@ -1,14 +1,10 @@
|
||||
mov rdi, rsp ;# beginning of VM stack
|
||||
mov ebx, 1048577 ;# number of VM instructions to execute + 1
|
||||
mov ebx, 262145 ;# number of VM instructions to execute + 1
|
||||
|
||||
xorps xmm10, xmm10
|
||||
cmpeqpd xmm10, xmm10
|
||||
psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
|
||||
|
||||
;# reset rounding mode
|
||||
mov dword ptr [rsp-8], 40896
|
||||
ldmxcsr dword ptr [rsp-8]
|
||||
|
||||
;# load integer registers
|
||||
mov r8, qword ptr [rcx+0]
|
||||
mov r9, qword ptr [rcx+8]
|
||||
@ -19,45 +15,13 @@
|
||||
mov r14, qword ptr [rcx+48]
|
||||
mov r15, qword ptr [rcx+56]
|
||||
|
||||
;# initialize floating point registers
|
||||
xorps xmm8, xmm8
|
||||
cvtsi2sd xmm8, qword ptr [rcx+72]
|
||||
pslldq xmm8, 8
|
||||
cvtsi2sd xmm8, qword ptr [rcx+64]
|
||||
|
||||
xorps xmm9, xmm9
|
||||
cvtsi2sd xmm9, qword ptr [rcx+88]
|
||||
pslldq xmm9, 8
|
||||
cvtsi2sd xmm9, qword ptr [rcx+80]
|
||||
|
||||
xorps xmm2, xmm2
|
||||
cvtsi2sd xmm2, qword ptr [rcx+104]
|
||||
pslldq xmm2, 8
|
||||
cvtsi2sd xmm2, qword ptr [rcx+96]
|
||||
|
||||
xorps xmm3, xmm3
|
||||
cvtsi2sd xmm3, qword ptr [rcx+120]
|
||||
pslldq xmm3, 8
|
||||
cvtsi2sd xmm3, qword ptr [rcx+112]
|
||||
|
||||
;# load floating point registers
|
||||
movapd xmm8, xmmword ptr [rcx+64]
|
||||
movapd xmm9, xmmword ptr [rcx+80]
|
||||
movapd xmm2, xmmword ptr [rcx+96]
|
||||
movapd xmm3, xmmword ptr [rcx+112]
|
||||
lea rcx, [rcx+64]
|
||||
|
||||
xorps xmm4, xmm4
|
||||
cvtsi2sd xmm4, qword ptr [rcx+72]
|
||||
pslldq xmm4, 8
|
||||
cvtsi2sd xmm4, qword ptr [rcx+64]
|
||||
|
||||
xorps xmm5, xmm5
|
||||
cvtsi2sd xmm5, qword ptr [rcx+88]
|
||||
pslldq xmm5, 8
|
||||
cvtsi2sd xmm5, qword ptr [rcx+80]
|
||||
|
||||
xorps xmm6, xmm6
|
||||
cvtsi2sd xmm6, qword ptr [rcx+104]
|
||||
pslldq xmm6, 8
|
||||
cvtsi2sd xmm6, qword ptr [rcx+96]
|
||||
|
||||
xorps xmm7, xmm7
|
||||
cvtsi2sd xmm7, qword ptr [rcx+120]
|
||||
pslldq xmm7, 8
|
||||
cvtsi2sd xmm7, qword ptr [rcx+112]
|
||||
movapd xmm4, xmmword ptr [rcx+64]
|
||||
movapd xmm5, xmmword ptr [rcx+80]
|
||||
movapd xmm6, xmmword ptr [rcx+96]
|
||||
movapd xmm7, xmmword ptr [rcx+112]
|
||||
|
26
src/main.cpp
26
src/main.cpp
@ -130,7 +130,7 @@ void generateAsm(int nonce) {
|
||||
asmX86.printCode(std::cout);
|
||||
}
|
||||
|
||||
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread) {
|
||||
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread, uint8_t* scratchpad) {
|
||||
uint64_t hash[4];
|
||||
unsigned char blockTemplate[] = {
|
||||
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
|
||||
@ -146,11 +146,20 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
||||
*noncePtr = nonce;
|
||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
|
||||
vm->initializeScratchpad(spIndex);
|
||||
vm->initializeScratchpad(scratchpad, spIndex);
|
||||
vm->initializeProgram(hash);
|
||||
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
||||
vm->setScratchpad(scratchpad + 3 * RandomX::ScratchpadSize / 4);
|
||||
vm->execute();
|
||||
vm->getResult(hash);
|
||||
vm->setScratchpad(scratchpad + 2 * RandomX::ScratchpadSize / 4);
|
||||
vm->execute();
|
||||
vm->getResult(nullptr, 0, hash);
|
||||
vm->initializeProgram(hash);
|
||||
vm->setScratchpad(scratchpad + 1 * RandomX::ScratchpadSize / 4);
|
||||
vm->execute();
|
||||
vm->setScratchpad(scratchpad + 0 * RandomX::ScratchpadSize / 4);
|
||||
vm->execute();
|
||||
vm->getResult(scratchpad, RandomX::ScratchpadSize, hash);
|
||||
result.xorWith(hash);
|
||||
if (RandomX::trace) {
|
||||
std::cout << "Nonce: " << nonce << " ";
|
||||
@ -274,18 +283,25 @@ int main(int argc, char** argv) {
|
||||
vm->setDataset(dataset);
|
||||
vms.push_back(vm);
|
||||
}
|
||||
uint8_t* scratchpadMem;
|
||||
if (largePages) {
|
||||
scratchpadMem = (uint8_t*)allocLargePagesMemory(RandomX::ScratchpadSize * (threadCount + 1) / 2);
|
||||
}
|
||||
else {
|
||||
scratchpadMem = (uint8_t*)_mm_malloc(threadCount * RandomX::ScratchpadSize, RandomX::CacheLineSize);
|
||||
}
|
||||
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
|
||||
sw.restart();
|
||||
if (threadCount > 1) {
|
||||
for (int i = 0; i < vms.size(); ++i) {
|
||||
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i));
|
||||
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RandomX::ScratchpadSize * i));
|
||||
}
|
||||
for (int i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
else {
|
||||
mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0);
|
||||
mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0, scratchpadMem);
|
||||
if (compiled)
|
||||
std::cout << "Average program size: " << ((RandomX::CompiledVirtualMachine*)vms[0])->getTotalSize() / programCount << std::endl;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user