diff --git a/src/CompiledLightVirtualMachine.cpp b/src/CompiledLightVirtualMachine.cpp new file mode 100644 index 0000000..49e593c --- /dev/null +++ b/src/CompiledLightVirtualMachine.cpp @@ -0,0 +1,40 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#include "CompiledLightVirtualMachine.hpp" +#include "common.hpp" +#include + +namespace RandomX { + + CompiledLightVirtualMachine::CompiledLightVirtualMachine() { + } + + void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size) { + mem.ds = ds; + datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; + //datasetBasePtr = ds.dataset.memory; + } + + void CompiledLightVirtualMachine::initialize() { + VirtualMachine::initialize(); + compiler.generateProgramLight(program); + //mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize); + } +} \ No newline at end of file diff --git a/src/CompiledLightVirtualMachine.hpp b/src/CompiledLightVirtualMachine.hpp new file mode 100644 index 0000000..9ac52be --- /dev/null +++ b/src/CompiledLightVirtualMachine.hpp @@ -0,0 +1,44 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#pragma once +//#define TRACEVM +#include +#include "CompiledVirtualMachine.hpp" +#include "JitCompilerX86.hpp" +#include "intrinPortable.h" + +namespace RandomX { + + class CompiledLightVirtualMachine : public CompiledVirtualMachine { + public: + void* operator new(size_t size) { + void* ptr = _mm_malloc(size, 64); + if (ptr == nullptr) + throw std::bad_alloc(); + return ptr; + } + void operator delete(void* ptr) { + _mm_free(ptr); + } + CompiledLightVirtualMachine(); + void setDataset(dataset_t ds, uint64_t size) override; + void initialize() override; + }; +} \ No newline at end of file diff --git a/src/CompiledVirtualMachine.hpp b/src/CompiledVirtualMachine.hpp index 17d79df..9deb621 100644 --- a/src/CompiledVirtualMachine.hpp +++ b/src/CompiledVirtualMachine.hpp @@ -48,10 +48,7 @@ namespace RandomX { void* getProgram() { return compiler.getCode(); } - private: -#ifdef TRACEVM - convertible_t tracepad[InstructionCount]; -#endif + protected: JitCompilerX86 compiler; uint8_t* datasetBasePtr; }; diff --git a/src/JitCompilerX86-static.asm b/src/JitCompilerX86-static.asm index 5b2d387..ffac80c 100644 --- a/src/JitCompilerX86-static.asm +++ b/src/JitCompilerX86-static.asm @@ -24,6 +24,8 @@ PUBLIC randomx_program_loop_begin PUBLIC randomx_program_loop_load PUBLIC randomx_program_start PUBLIC randomx_program_read_dataset +PUBLIC randomx_program_read_dataset_light +PUBLIC randomx_program_read_dataset_light_sub PUBLIC randomx_program_loop_store PUBLIC randomx_program_loop_end PUBLIC randomx_program_epilogue @@ -54,6 +56,10 @@ randomx_program_read_dataset PROC include asm/program_read_dataset.inc randomx_program_read_dataset ENDP +randomx_program_read_dataset_light PROC + include asm/program_read_dataset_light.inc +randomx_program_read_dataset_light ENDP + randomx_program_loop_store PROC include asm/program_loop_store.inc randomx_program_loop_store ENDP @@ -62,6 +68,13 @@ randomx_program_loop_end PROC nop randomx_program_loop_end ENDP +ALIGN 64 +randomx_program_read_dataset_light_sub PROC + include asm/program_read_dataset_light_sub.inc + squareHashSub: + include asm/squareHash.inc +randomx_program_read_dataset_light_sub ENDP + ALIGN 64 randomx_program_epilogue PROC include asm/program_epilogue_win64.inc diff --git a/src/JitCompilerX86-static.hpp b/src/JitCompilerX86-static.hpp index 64abfa3..3d835b6 100644 --- a/src/JitCompilerX86-static.hpp +++ b/src/JitCompilerX86-static.hpp @@ -23,8 +23,10 @@ extern "C" { void randomx_program_loop_load(); void randomx_program_start(); void randomx_program_read_dataset(); + void randomx_program_read_dataset_light(); void randomx_program_loop_store(); void randomx_program_loop_end(); + void randomx_program_read_dataset_light_sub(); void randomx_program_epilogue(); void randomx_program_end(); } \ No newline at end of file diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index 6d9ed69..bb3e578 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -86,8 +86,10 @@ namespace RandomX { const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load; const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start; const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset; + const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light; const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store; const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end; + const uint8_t* codeReadDatasetLightSub = (uint8_t*)&randomx_program_read_dataset_light_sub; const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue; const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end; @@ -95,10 +97,13 @@ namespace RandomX { const int32_t epilogueSize = codeProgramEnd - codeEpilogue; const int32_t loopLoadSize = codeProgamStart - codeLoopLoad; - const int32_t readDatasetSize = codeLoopStore - codeReadDataset; + const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset; + const int32_t readDatasetLightSize = codeLoopStore - codeReadDatasetLight; const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; + const int32_t readDatasetLightSubSize = codeEpilogue - codeReadDatasetLightSub; const int32_t epilogueOffset = CodeSize - epilogueSize; + const int32_t readDatasetLightSubOffset = epilogueOffset - readDatasetLightSubSize; static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 }; static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 }; @@ -168,6 +173,7 @@ namespace RandomX { static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 }; static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f }; static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 }; + static const uint8_t CALL = 0xe8; size_t JitCompilerX86::getCodeSize() { return codePos - prologueSize; @@ -176,10 +182,27 @@ namespace RandomX { JitCompilerX86::JitCompilerX86() { code = (uint8_t*)allocExecutableMemory(CodeSize); memcpy(code, codePrologue, prologueSize); - memcpy(code + CodeSize - epilogueSize, codeEpilogue, epilogueSize); + memcpy(code + epilogueOffset, codeEpilogue, epilogueSize); + memcpy(code + readDatasetLightSubOffset, codeReadDatasetLightSub, readDatasetLightSubSize); } void JitCompilerX86::generateProgram(Program& prog) { + generateProgramPrologue(prog); + memcpy(code + codePos, codeReadDataset, readDatasetSize); + codePos += readDatasetSize; + generateProgramEpilogue(prog); + } + + void JitCompilerX86::generateProgramLight(Program& prog) { + generateProgramPrologue(prog); + memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize); + codePos += readDatasetLightSize; + emitByte(CALL); + emit32(readDatasetLightSubOffset - (codePos + 4)); + generateProgramEpilogue(prog); + } + + void JitCompilerX86::generateProgramPrologue(Program& prog) { auto addressRegisters = prog.getEntropy(12); uint32_t readReg0 = 0 + (addressRegisters & 1); addressRegisters >>= 1; @@ -205,8 +228,9 @@ namespace RandomX { emitByte(0xc0 + readReg2); emit(REX_XOR_EAX); emitByte(0xc0 + readReg3); - memcpy(code + codePos, codeReadDataset, readDatasetSize); - codePos += readDatasetSize; + } + + void JitCompilerX86::generateProgramEpilogue(Program& prog) { memcpy(code + codePos, codeLoopStore, loopStoreSize); codePos += loopStoreSize; emit(SUB_EBX); diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp index fed3a8a..3c868c0 100644 --- a/src/JitCompilerX86.hpp +++ b/src/JitCompilerX86.hpp @@ -37,6 +37,7 @@ namespace RandomX { public: JitCompilerX86(); void generateProgram(Program&); + void generateProgramLight(Program&); ProgramFunc getProgramFunc() { return (ProgramFunc)code; } @@ -49,6 +50,8 @@ namespace RandomX { uint8_t* code; int32_t codePos; + void generateProgramPrologue(Program&); + void generateProgramEpilogue(Program&); void genAddressReg(Instruction&, bool); void genAddressRegDst(Instruction&, bool); void genAddressImm(Instruction&); diff --git a/src/asm/program_loop_store.inc b/src/asm/program_loop_store.inc index 1ba1635..53164cb 100644 --- a/src/asm/program_loop_store.inc +++ b/src/asm/program_loop_store.inc @@ -1,3 +1,4 @@ + xor eax, eax pop rcx mov qword ptr [rcx+0], r8 mov qword ptr [rcx+8], r9 diff --git a/src/asm/program_read_dataset.inc b/src/asm/program_read_dataset.inc index 061d32c..bae4817 100644 --- a/src/asm/program_read_dataset.inc +++ b/src/asm/program_read_dataset.inc @@ -1,5 +1,4 @@ xor rbp, rax ;# modify "mx" - xor eax, eax and rbp, -64 ;# align "mx" to the start of a cache line mov edx, ebp ;# edx = mx prefetchnta byte ptr [rdi+rdx] diff --git a/src/asm/program_read_dataset_light.inc b/src/asm/program_read_dataset_light.inc new file mode 100644 index 0000000..14395d8 --- /dev/null +++ b/src/asm/program_read_dataset_light.inc @@ -0,0 +1,4 @@ + xor rbp, rax ;# modify "mx" + ror rbp, 32 ;# swap "ma" and "mx" + mov ecx, ebp ;# ecx = ma + shr ecx, 6 ;# ecx = Dataset block number diff --git a/src/asm/program_read_dataset_light_sub.inc b/src/asm/program_read_dataset_light_sub.inc new file mode 100644 index 0000000..9c26fb2 --- /dev/null +++ b/src/asm/program_read_dataset_light_sub.inc @@ -0,0 +1,308 @@ + ;# rdi -> Cache pointer + ;# rcx -> Dataset block number + ;# rax, rbx, rcx, rdx -> scratch registers + sub rsp, 72 + mov qword ptr [rsp+64], rbx + mov qword ptr [rsp+56], r8 + mov qword ptr [rsp+48], r9 + mov qword ptr [rsp+40], r10 + mov qword ptr [rsp+32], r11 + mov qword ptr [rsp+24], r12 + mov qword ptr [rsp+16], r13 + mov qword ptr [rsp+8], r14 + mov qword ptr [rsp+0], r15 + mov r8, rcx + xor r9, r9 + xor r10, r10 + xor r11, r11 + xor r12, r12 + xor r13, r13 + xor r14, r14 + xor r15, r15 + ;# iteration 0 + ;# c0 + mov rbx, r8 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r8+r9] + call squareHashSub + mov r9, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c1 + mov rbx, r9 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r9+r10] + call squareHashSub + mov r10, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c2 + mov rbx, r10 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r10+r11] + call squareHashSub + mov r11, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c3 + mov rbx, r11 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r11+r12] + call squareHashSub + mov r12, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c4 + mov rbx, r12 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r12+r13] + call squareHashSub + mov r13, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c5 + mov rbx, r13 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r13+r14] + call squareHashSub + mov r14, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c6 + mov rbx, r14 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r14+r15] + call squareHashSub + mov r15, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c7 + mov rbx, r15 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r15+r8] + call squareHashSub + mov r8, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# iteration 1 + ;# c0 + mov rbx, r8 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r8+r9] + call squareHashSub + mov r9, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c1 + mov rbx, r9 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r9+r10] + call squareHashSub + mov r10, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c2 + mov rbx, r10 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r10+r11] + call squareHashSub + mov r11, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c3 + mov rbx, r11 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r11+r12] + call squareHashSub + mov r12, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c4 + mov rbx, r12 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r12+r13] + call squareHashSub + mov r13, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c5 + mov rbx, r13 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r13+r14] + call squareHashSub + mov r14, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c6 + mov rbx, r14 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r14+r15] + call squareHashSub + mov r15, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# c7 + mov rbx, r15 + and rbx, 4194303 + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rcx, [r15+r8] + call squareHashSub + mov r8, rax + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] + ;# -------------------------- + mov rbx, qword ptr [rsp+64] + xor r8, qword ptr [rsp+56] + xor r9, qword ptr [rsp+48] + xor r10, qword ptr [rsp+40] + xor r11, qword ptr [rsp+32] + xor r12, qword ptr [rsp+24] + xor r13, qword ptr [rsp+16] + xor r14, qword ptr [rsp+8] + xor r15, qword ptr [rsp+0] + add rsp, 72 + ;# xor eax, eax + ret \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 7de622b..fc37f57 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -19,6 +19,7 @@ along with RandomX. If not, see. //#define TRACE #include "InterpretedVirtualMachine.hpp" #include "CompiledVirtualMachine.hpp" +#include "CompiledLightVirtualMachine.hpp" #include "AssemblyGeneratorX86.hpp" #include "Stopwatch.hpp" #include "blake2/blake2.h" @@ -202,7 +203,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, Atomi } int main(int argc, char** argv) { - bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative; + bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit; int programCount, threadCount, initThreadCount, epoch; readOption("--softAes", argc, argv, softAes); @@ -214,7 +215,7 @@ int main(int argc, char** argv) { readIntOption("--init", argc, argv, initThreadCount, 1); readIntOption("--epoch", argc, argv, epoch, 0); readOption("--largePages", argc, argv, largePages); - readOption("--async", argc, argv, async); + readOption("--jit", argc, argv, jit); readOption("--genNative", argc, argv, genNative); readOption("--help", argc, argv, help); @@ -299,7 +300,10 @@ int main(int argc, char** argv) { vm = new RandomX::CompiledVirtualMachine(); } else { - vm = new RandomX::InterpretedVirtualMachine(softAes, async); + if (jit) + vm = new RandomX::CompiledLightVirtualMachine(); + else + vm = new RandomX::InterpretedVirtualMachine(softAes, async); } vm->setDataset(dataset, datasetSize); vms.push_back(vm);