4 scratchpad segments

This commit is contained in:
tevador 2019-01-20 00:44:01 +01:00
parent 16db607025
commit bd0dba88a8
10 changed files with 75 additions and 82 deletions

View File

@ -33,7 +33,7 @@ namespace RandomX {
mem.ds = ds; mem.ds = ds;
} }
void CompiledVirtualMachine::initializeScratchpad(uint32_t index) { void CompiledVirtualMachine::initializeScratchpad(uint8_t* scratchpad, int32_t index) {
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize); memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
} }
@ -42,6 +42,11 @@ namespace RandomX {
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) { for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
*(((uint32_t*)&reg) + i) = gen(); *(((uint32_t*)&reg) + i) = gen();
} }
FPINIT();
for (int i = 0; i < RegistersCount; ++i) {
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
}
compiler.generateProgram(gen); compiler.generateProgram(gen);
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7; mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
mem.mx = *(((uint32_t*)seed) + 5); mem.mx = *(((uint32_t*)seed) + 5);

View File

@ -39,7 +39,7 @@ namespace RandomX {
} }
CompiledVirtualMachine(); CompiledVirtualMachine();
void setDataset(dataset_t ds) override; void setDataset(dataset_t ds) override;
void initializeScratchpad(uint32_t index) override; void initializeScratchpad(uint8_t* scratchpad, int32_t index) override;
void initializeProgram(const void* seed) override; void initializeProgram(const void* seed) override;
virtual void execute() override; virtual void execute() override;
void* getProgram() { void* getProgram() {

View File

@ -69,7 +69,7 @@ namespace RandomX {
} }
} }
void InterpretedVirtualMachine::initializeScratchpad(uint32_t index) { void InterpretedVirtualMachine::initializeScratchpad(uint8_t* scratchpad, int32_t index) {
uint32_t startingBlock = (ScratchpadSize / CacheLineSize) * index; uint32_t startingBlock = (ScratchpadSize / CacheLineSize) * index;
if (asyncWorker) { if (asyncWorker) {
ILightClientAsyncWorker* worker = mem.ds.asyncWorker; ILightClientAsyncWorker* worker = mem.ds.asyncWorker;

View File

@ -42,7 +42,7 @@ namespace RandomX {
InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {} InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
~InterpretedVirtualMachine(); ~InterpretedVirtualMachine();
void setDataset(dataset_t ds) override; void setDataset(dataset_t ds) override;
void initializeScratchpad(uint32_t index) override; void initializeScratchpad(uint8_t* scratchpad, int32_t index) override;
void initializeProgram(const void* seed) override; void initializeProgram(const void* seed) override;
void execute() override; void execute() override;
const Program& getProgam() { const Program& getProgam() {

View File

@ -182,17 +182,17 @@ namespace RandomX {
emitByte(0xe8); //xor rbp, rax emitByte(0xe8); //xor rbp, rax
} }
emitByte(0x25); //and eax, emitByte(0x25); //and eax,
if (instr.loca & 15) { //if (instr.loca & 15) {
if (instr.loca & 3) { if (instr.loca & 3) {
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
} }
else { else {
emit(ScratchpadL2 - 1); //first 256 KiB of scratchpad emit(ScratchpadL2 - 1); //first 256 KiB of scratchpad
} }
} /*}
else { else {
emit(ScratchpadL3 - 1); //whole scratchpad emit(ScratchpadL3 - 1); //whole scratchpad
} }*/
} }
void JitCompilerX86::genar(Instruction& instr) { void JitCompilerX86::genar(Instruction& instr) {
@ -271,7 +271,7 @@ namespace RandomX {
} }
void JitCompilerX86::gencr(Instruction& instr, bool rax = true) { void JitCompilerX86::gencr(Instruction& instr, bool rax = true) {
if (instr.locc & 16) { //write to register if (instr.locc & 8) { //write to register
emit(uint16_t(0x8b4c)); //mov emit(uint16_t(0x8b4c)); //mov
if (rax) { if (rax) {
emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
@ -281,17 +281,17 @@ namespace RandomX {
} }
} }
else { else {
if (instr.locc & 15) { //if (instr.locc & 7) {
if (instr.locc & 3) { if (instr.locc & 1) {
scratchpadStoreR(instr, ScratchpadL1, rax); scratchpadStoreR(instr, ScratchpadL1, rax);
} }
else { else {
scratchpadStoreR(instr, ScratchpadL2, rax); scratchpadStoreR(instr, ScratchpadL2, rax);
} }
} /*}
else { else {
scratchpadStoreR(instr, ScratchpadL3, rax); scratchpadStoreR(instr, ScratchpadL3, rax);
} }*/
} }
} }
@ -319,18 +319,18 @@ namespace RandomX {
} }
emit(uint16_t(0x280f)); //movaps emit(uint16_t(0x280f)); //movaps
emitByte(0xc0 + 8 * regc); // regc, xmm0 emitByte(0xc0 + 8 * regc); // regc, xmm0
if (instr.locc & 16) { //write to scratchpad if (instr.locc & 8) { //write to scratchpad
if (instr.locc & 15) { //if (instr.locc & 7) {
if (instr.locc & 3) { //C.LOC.W if (instr.locc & 1) { //C.LOC.W
scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad
} }
else { else {
scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //first 256 KiB of scratchpad scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //first 256 KiB of scratchpad
} }
} //}
else { /*else {
scratchpadStoreF(instr, regc, ScratchpadL3, (instr.locc & 128)); //whole scratchpad scratchpadStoreF(instr, regc, ScratchpadL3, (instr.locc & 128)); //whole scratchpad
} }*/
} }
} }

View File

@ -39,11 +39,16 @@ namespace RandomX {
mem.ds.dataset = nullptr; mem.ds.dataset = nullptr;
} }
void VirtualMachine::getResult(void* out) { void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* out) {
constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 8; constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 8;
alignas(16) uint64_t smallState[smallStateLength]; alignas(16) uint64_t smallState[smallStateLength];
memcpy(smallState, &reg, sizeof(RegisterFile)); memcpy(smallState, &reg, sizeof(RegisterFile));
hashAes1Rx4<false>(scratchpad, ScratchpadSize, smallState + 24); if (scratchpadSize > 0) {
hashAes1Rx4<false>(scratchpad, scratchpadSize, smallState + 24);
}
else {
memset(smallState + 24, 0, 64);
}
blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0); blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0);
} }
} }

View File

@ -28,10 +28,13 @@ namespace RandomX {
VirtualMachine(); VirtualMachine();
virtual ~VirtualMachine() {} virtual ~VirtualMachine() {}
virtual void setDataset(dataset_t ds) = 0; virtual void setDataset(dataset_t ds) = 0;
virtual void initializeScratchpad(uint32_t index) = 0; virtual void initializeScratchpad(uint8_t* scratchpad, int32_t index) = 0;
void setScratchpad(void* ptr) {
scratchpad = (convertible_t*)ptr;
}
virtual void initializeProgram(const void* seed) = 0; virtual void initializeProgram(const void* seed) = 0;
virtual void execute() = 0; virtual void execute() = 0;
void getResult(void*); void getResult(void*, size_t, void*);
const RegisterFile& getRegisterFile() { const RegisterFile& getRegisterFile() {
return reg; return reg;
} }
@ -39,6 +42,6 @@ namespace RandomX {
DatasetReadFunc readDataset; DatasetReadFunc readDataset;
alignas(16) RegisterFile reg; alignas(16) RegisterFile reg;
MemoryRegisters mem; MemoryRegisters mem;
alignas(64) convertible_t scratchpad[ScratchpadLength]; convertible_t* scratchpad;
}; };
} }

View File

@ -12,12 +12,12 @@
mov qword ptr [rcx+40], r13 mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14 mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15 mov qword ptr [rcx+56], r15
movdqa xmmword ptr [rcx+64], xmm8 movapd xmmword ptr [rcx+64], xmm8
movdqa xmmword ptr [rcx+80], xmm9 movapd xmmword ptr [rcx+80], xmm9
movdqa xmmword ptr [rcx+96], xmm2 movapd xmmword ptr [rcx+96], xmm2
movdqa xmmword ptr [rcx+112], xmm3 movapd xmmword ptr [rcx+112], xmm3
lea rcx, [rcx+64] lea rcx, [rcx+64]
movdqa xmmword ptr [rcx+64], xmm4 movapd xmmword ptr [rcx+64], xmm4
movdqa xmmword ptr [rcx+80], xmm5 movapd xmmword ptr [rcx+80], xmm5
movdqa xmmword ptr [rcx+96], xmm6 movapd xmmword ptr [rcx+96], xmm6
movdqa xmmword ptr [rcx+112], xmm7 movapd xmmword ptr [rcx+112], xmm7

View File

@ -1,14 +1,10 @@
mov rdi, rsp ;# beginning of VM stack mov rdi, rsp ;# beginning of VM stack
mov ebx, 1048577 ;# number of VM instructions to execute + 1 mov ebx, 262145 ;# number of VM instructions to execute + 1
xorps xmm10, xmm10 xorps xmm10, xmm10
cmpeqpd xmm10, xmm10 cmpeqpd xmm10, xmm10
psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
;# reset rounding mode
mov dword ptr [rsp-8], 40896
ldmxcsr dword ptr [rsp-8]
;# load integer registers ;# load integer registers
mov r8, qword ptr [rcx+0] mov r8, qword ptr [rcx+0]
mov r9, qword ptr [rcx+8] mov r9, qword ptr [rcx+8]
@ -19,45 +15,13 @@
mov r14, qword ptr [rcx+48] mov r14, qword ptr [rcx+48]
mov r15, qword ptr [rcx+56] mov r15, qword ptr [rcx+56]
;# initialize floating point registers ;# load floating point registers
xorps xmm8, xmm8 movapd xmm8, xmmword ptr [rcx+64]
cvtsi2sd xmm8, qword ptr [rcx+72] movapd xmm9, xmmword ptr [rcx+80]
pslldq xmm8, 8 movapd xmm2, xmmword ptr [rcx+96]
cvtsi2sd xmm8, qword ptr [rcx+64] movapd xmm3, xmmword ptr [rcx+112]
xorps xmm9, xmm9
cvtsi2sd xmm9, qword ptr [rcx+88]
pslldq xmm9, 8
cvtsi2sd xmm9, qword ptr [rcx+80]
xorps xmm2, xmm2
cvtsi2sd xmm2, qword ptr [rcx+104]
pslldq xmm2, 8
cvtsi2sd xmm2, qword ptr [rcx+96]
xorps xmm3, xmm3
cvtsi2sd xmm3, qword ptr [rcx+120]
pslldq xmm3, 8
cvtsi2sd xmm3, qword ptr [rcx+112]
lea rcx, [rcx+64] lea rcx, [rcx+64]
movapd xmm4, xmmword ptr [rcx+64]
xorps xmm4, xmm4 movapd xmm5, xmmword ptr [rcx+80]
cvtsi2sd xmm4, qword ptr [rcx+72] movapd xmm6, xmmword ptr [rcx+96]
pslldq xmm4, 8 movapd xmm7, xmmword ptr [rcx+112]
cvtsi2sd xmm4, qword ptr [rcx+64]
xorps xmm5, xmm5
cvtsi2sd xmm5, qword ptr [rcx+88]
pslldq xmm5, 8
cvtsi2sd xmm5, qword ptr [rcx+80]
xorps xmm6, xmm6
cvtsi2sd xmm6, qword ptr [rcx+104]
pslldq xmm6, 8
cvtsi2sd xmm6, qword ptr [rcx+96]
xorps xmm7, xmm7
cvtsi2sd xmm7, qword ptr [rcx+120]
pslldq xmm7, 8
cvtsi2sd xmm7, qword ptr [rcx+112]

View File

@ -130,7 +130,7 @@ void generateAsm(int nonce) {
asmX86.printCode(std::cout); asmX86.printCode(std::cout);
} }
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread) { void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread, uint8_t* scratchpad) {
uint64_t hash[4]; uint64_t hash[4];
unsigned char blockTemplate[] = { unsigned char blockTemplate[] = {
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, 0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
@ -146,11 +146,20 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
*noncePtr = nonce; *noncePtr = nonce;
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8); int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
vm->initializeScratchpad(spIndex); vm->initializeScratchpad(scratchpad, spIndex);
vm->initializeProgram(hash); vm->initializeProgram(hash);
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt"); //dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
vm->setScratchpad(scratchpad + 3 * RandomX::ScratchpadSize / 4);
vm->execute(); vm->execute();
vm->getResult(hash); vm->setScratchpad(scratchpad + 2 * RandomX::ScratchpadSize / 4);
vm->execute();
vm->getResult(nullptr, 0, hash);
vm->initializeProgram(hash);
vm->setScratchpad(scratchpad + 1 * RandomX::ScratchpadSize / 4);
vm->execute();
vm->setScratchpad(scratchpad + 0 * RandomX::ScratchpadSize / 4);
vm->execute();
vm->getResult(scratchpad, RandomX::ScratchpadSize, hash);
result.xorWith(hash); result.xorWith(hash);
if (RandomX::trace) { if (RandomX::trace) {
std::cout << "Nonce: " << nonce << " "; std::cout << "Nonce: " << nonce << " ";
@ -274,18 +283,25 @@ int main(int argc, char** argv) {
vm->setDataset(dataset); vm->setDataset(dataset);
vms.push_back(vm); vms.push_back(vm);
} }
uint8_t* scratchpadMem;
if (largePages) {
scratchpadMem = (uint8_t*)allocLargePagesMemory(RandomX::ScratchpadSize * (threadCount + 1) / 2);
}
else {
scratchpadMem = (uint8_t*)_mm_malloc(threadCount * RandomX::ScratchpadSize, RandomX::CacheLineSize);
}
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl; std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
sw.restart(); sw.restart();
if (threadCount > 1) { if (threadCount > 1) {
for (int i = 0; i < vms.size(); ++i) { for (int i = 0; i < vms.size(); ++i) {
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i)); threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RandomX::ScratchpadSize * i));
} }
for (int i = 0; i < threads.size(); ++i) { for (int i = 0; i < threads.size(); ++i) {
threads[i].join(); threads[i].join();
} }
} }
else { else {
mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0); mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0, scratchpadMem);
if (compiled) if (compiled)
std::cout << "Average program size: " << ((RandomX::CompiledVirtualMachine*)vms[0])->getTotalSize() / programCount << std::endl; std::cout << "Average program size: " << ((RandomX::CompiledVirtualMachine*)vms[0])->getTotalSize() / programCount << std::endl;
} }