Large page support for cache

Bug fixes
This commit is contained in:
tevador 2019-01-18 17:57:47 +01:00
parent 8b1102ee05
commit 4fb168e249
13 changed files with 135 additions and 33 deletions

View File

@ -11,7 +11,7 @@ SRCDIR=src
OBJDIR=obj OBJDIR=obj
LDFLAGS=-lpthread LDFLAGS=-lpthread
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o) TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o) ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o AddressTransform.o)
ifeq ($(PLATFORM),x86_64) ifeq ($(PLATFORM),x86_64)
ROBJS += $(OBJDIR)/JitCompilerX86-static.o ROBJS += $(OBJDIR)/JitCompilerX86-static.o
endif endif
@ -27,6 +27,11 @@ debug: CCFLAGS += -g
debug: LDFLAGS += -g debug: LDFLAGS += -g
debug: $(BINDIR)/randomx debug: $(BINDIR)/randomx
profile: CXXFLAGS += -pg
profile: CCFLAGS += -pg
profile: LDFLAGS += -pg
profile: $(BINDIR)/randomx
test: CXXFLAGS += -O0 test: CXXFLAGS += -O0
test: $(BINDIR)/AluFpuTest test: $(BINDIR)/AluFpuTest
@ -38,6 +43,9 @@ $(BINDIR)/AluFpuTest: $(TOBJS) | $(BINDIR)
$(OBJDIR)/TestAluFpu.o: $(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp) | $(OBJDIR) $(OBJDIR)/TestAluFpu.o: $(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/TestAluFpu.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/TestAluFpu.cpp -o $@
$(OBJDIR)/AddressTransform.o: $(addprefix $(SRCDIR)/,AddressTransform.cpp InterpretedVirtualMachine.hpp common.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/AddressTransform.cpp -o $@
$(OBJDIR)/argon2_core.o: $(addprefix $(SRCDIR)/,argon2_core.c argon2_core.h blake2/blake2.h blake2/blake2-impl.h) | $(OBJDIR) $(OBJDIR)/argon2_core.o: $(addprefix $(SRCDIR)/,argon2_core.c argon2_core.h blake2/blake2.h blake2/blake2-impl.h) | $(OBJDIR)
$(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_core.c -o $@ $(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_core.c -o $@
@ -74,6 +82,9 @@ $(OBJDIR)/Instruction.o: $(addprefix $(SRCDIR)/,Instruction.cpp Instruction.hpp
$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp Pcg32.hpp instructions.hpp instructionWeights.hpp) | $(OBJDIR) $(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp Pcg32.hpp instructions.hpp instructionWeights.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@
$(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightClientAsyncWorker.cpp -o $@
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h) | $(OBJDIR) $(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@

View File

@ -23,12 +23,32 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include <new> #include <new>
#include "common.hpp" #include "common.hpp"
#include "dataset.hpp" #include "dataset.hpp"
#include "virtualMemory.hpp"
namespace RandomX { namespace RandomX {
class Cache { class Cache {
public: public:
void* operator new(size_t size) { static void* alloc(bool largePages) {
if (largePages) {
return allocLargePagesMemory(sizeof(Cache));
}
else {
void* ptr = _mm_malloc(sizeof(Cache), sizeof(__m128i));
if (ptr == nullptr)
throw std::bad_alloc();
return ptr;
}
}
static void dealloc(Cache* cache, bool largePages) {
if (largePages) {
//allocLargePagesMemory(sizeof(Cache));
}
else {
_mm_free(cache);
}
}
/*void* operator new(size_t size) {
void* ptr = _mm_malloc(size, sizeof(__m128i)); void* ptr = _mm_malloc(size, sizeof(__m128i));
if (ptr == nullptr) if (ptr == nullptr)
throw std::bad_alloc(); throw std::bad_alloc();
@ -37,7 +57,7 @@ namespace RandomX {
void operator delete(void* ptr) { void operator delete(void* ptr) {
_mm_free(ptr); _mm_free(ptr);
} }*/
template<bool softAes> template<bool softAes>
void initialize(const void* seed, size_t seedSize); void initialize(const void* seed, size_t seedSize);

View File

@ -15,6 +15,8 @@
;# You should have received a copy of the GNU General Public License ;# You should have received a copy of the GNU General Public License
;# along with RandomX. If not, see<http://www.gnu.org/licenses/>. ;# along with RandomX. If not, see<http://www.gnu.org/licenses/>.
IFDEF RAX
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE _RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
PUBLIC randomx_program_prologue PUBLIC randomx_program_prologue
@ -57,4 +59,6 @@ randomx_program_transform ENDP
_RANDOMX_JITX86_STATIC ENDS _RANDOMX_JITX86_STATIC ENDS
ENDIF
END END

View File

@ -46,6 +46,10 @@ namespace RandomX {
void JitCompilerX86::generateProgram(Pcg32& gen) { void JitCompilerX86::generateProgram(Pcg32& gen) {
} }
size_t JitCompilerX86::getCodeSize() {
return 0;
}
#else #else
/* /*

View File

@ -24,12 +24,19 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX { namespace RandomX {
template<bool softAes> template<bool softAes>
LightClientAsyncWorker<softAes>::LightClientAsyncWorker(const Cache* c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false), workerThread(&LightClientAsyncWorker::runWorker, this) { LightClientAsyncWorker<softAes>::LightClientAsyncWorker(const Cache* c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false),
#ifdef TRACE
sw(true),
#endif
workerThread(&LightClientAsyncWorker::runWorker, this) {
} }
template<bool softAes> template<bool softAes>
void LightClientAsyncWorker<softAes>::prepareBlock(addr_t addr) { void LightClientAsyncWorker<softAes>::prepareBlock(addr_t addr) {
#ifdef TRACE
std::cout << sw.getElapsed() << ": prepareBlock-enter " << addr << std::endl;
#endif
{ {
std::lock_guard<std::mutex> lk(mutex); std::lock_guard<std::mutex> lk(mutex);
startBlock = addr / CacheLineSize; startBlock = addr / CacheLineSize;
@ -37,6 +44,9 @@ namespace RandomX {
output = currentLine.data(); output = currentLine.data();
hasWork = true; hasWork = true;
} }
#ifdef TRACE
std::cout << sw.getElapsed() << ": prepareBlock-notify " << startBlock << "/" << blockCount << std::endl;
#endif
notifier.notify_all(); notifier.notify_all();
} }
@ -54,10 +64,13 @@ namespace RandomX {
template<bool softAes> template<bool softAes>
void LightClientAsyncWorker<softAes>::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) { void LightClientAsyncWorker<softAes>::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
#ifdef TRACE
std::cout << sw.getElapsed() << ": prepareBlocks-enter " << startBlock << "/" << blockCount << std::endl;
#endif
{ {
std::lock_guard<std::mutex> lk(mutex); std::lock_guard<std::mutex> lk(mutex);
startBlock = startBlock; this->startBlock = startBlock;
blockCount = blockCount; this->blockCount = blockCount;
output = out; output = out;
hasWork = true; hasWork = true;
} }
@ -79,6 +92,9 @@ namespace RandomX {
template<bool softAes> template<bool softAes>
void LightClientAsyncWorker<softAes>::runWorker() { void LightClientAsyncWorker<softAes>::runWorker() {
#ifdef TRACE
std::cout << sw.getElapsed() << ": runWorker-enter " << std::endl;
#endif
for (;;) { for (;;) {
std::unique_lock<std::mutex> lk(mutex); std::unique_lock<std::mutex> lk(mutex);
notifier.wait(lk, [this] { return hasWork; }); notifier.wait(lk, [this] { return hasWork; });

View File

@ -17,12 +17,17 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>. along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/ */
//#define TRACE
#include "common.hpp" #include "common.hpp"
#include <thread> #include <thread>
#include <mutex> #include <mutex>
#include <condition_variable> #include <condition_variable>
#include <array> #include <array>
#ifdef TRACE
#include "Stopwatch.hpp"
#include <iostream>
#endif
namespace RandomX { namespace RandomX {
@ -43,10 +48,13 @@ namespace RandomX {
void runWorker(); void runWorker();
std::condition_variable notifier; std::condition_variable notifier;
std::mutex mutex; std::mutex mutex;
DatasetLine currentLine; alignas(16) DatasetLine currentLine;
void* output; void* output;
uint32_t startBlock, blockCount; uint32_t startBlock, blockCount;
bool hasWork; bool hasWork;
#ifdef TRACE
Stopwatch sw;
#endif
std::thread workerThread; std::thread workerThread;
}; };
} }

View File

@ -53,7 +53,7 @@ public:
isRunning = false; isRunning = false;
} }
} }
double getElapsed() { double getElapsed() const {
return getElapsedNanosec() / 1e+9; return getElapsedNanosec() / 1e+9;
} }
private: private:
@ -63,7 +63,7 @@ private:
uint64_t elapsed; uint64_t elapsed;
bool isRunning; bool isRunning;
uint64_t getElapsedNanosec() { uint64_t getElapsedNanosec() const {
uint64_t elns = elapsed; uint64_t elns = elapsed;
if (isRunning) { if (isRunning) {
chrono_t endMark = std::chrono::high_resolution_clock::now(); chrono_t endMark = std::chrono::high_resolution_clock::now();

View File

@ -34,20 +34,21 @@ namespace RandomX {
constexpr int SeedSize = 32; constexpr int SeedSize = 32;
constexpr int ResultSize = 32; constexpr int ResultSize = 32;
constexpr int CacheBlockCount = 1024 * 1024; constexpr int ArgonIterations = 6;
constexpr int CacheLineSize = 64; constexpr uint32_t ArgonMemorySize = 131072; //KiB
constexpr int BlockExpansionRatio = 64;
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
constexpr int DatasetIterations = 16;
constexpr uint32_t CacheSize = CacheBlockCount * CacheLineSize;
constexpr uint64_t DatasetSize = (uint64_t)CacheSize * BlockExpansionRatio;
constexpr int ArgonIterations = 12;
constexpr uint32_t ArgonMemorySize = 65536; //KiB
constexpr int ArgonLanes = 1; constexpr int ArgonLanes = 1;
const char ArgonSalt[] = "Monero\x1A$"; const char ArgonSalt[] = "Monero\x1A$";
constexpr int ArgonSaltSize = sizeof(ArgonSalt) - 1; constexpr int ArgonSaltSize = sizeof(ArgonSalt) - 1;
constexpr int CacheLineSize = 64;
constexpr uint64_t DatasetSize = 4ULL * 1024 * 1024 * 1024; //4 GiB
constexpr uint32_t CacheSize = ArgonMemorySize * 1024;
constexpr int CacheBlockCount = CacheSize / CacheLineSize;
constexpr int BlockExpansionRatio = DatasetSize / CacheSize;
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
constexpr int DatasetIterations = 32;
#ifdef TRACE #ifdef TRACE
constexpr bool trace = true; constexpr bool trace = true;
#else #else

View File

@ -67,7 +67,7 @@ namespace RandomX {
//block number 0..67108863 //block number 0..67108863
//Initialization vector = block number extended to 128 bits //Initialization vector = block number extended to 128 bits
iv = _mm_cvtsi32_si128(blockNumber); iv = _mm_cvtsi32_si128(blockNumber);
uint32_t cacheBlockNumber = blockNumber / BlockExpansionRatio; //0..1048575 uint32_t cacheBlockNumber = blockNumber / BlockExpansionRatio; //0..2097151
__m128i* cacheCacheLine = (__m128i*)(in + cacheBlockNumber * CacheLineSize); __m128i* cacheCacheLine = (__m128i*)(in + cacheBlockNumber * CacheLineSize);
__m128i* datasetCacheLine = (__m128i*)out; __m128i* datasetCacheLine = (__m128i*)out;
@ -173,14 +173,26 @@ namespace RandomX {
void datasetInit<true>(Cache*, dataset_t, uint32_t, uint32_t); void datasetInit<true>(Cache*, dataset_t, uint32_t, uint32_t);
template<bool softAes> template<bool softAes>
void datasetInitCache(const void* seed, dataset_t& ds) { void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
ds.cache = new Cache(); ds.cache = new(Cache::alloc(largePages)) Cache();
ds.cache->initialize<softAes>(seed, SeedSize); ds.cache->initialize<softAes>(seed, SeedSize);
} }
template template
void datasetInitCache<false>(const void*, dataset_t&); void datasetInitCache<false>(const void*, dataset_t&, bool);
template template
void datasetInitCache<true>(const void*, dataset_t&); void datasetInitCache<true>(const void*, dataset_t&, bool);
template<bool softAes>
void aesBench(uint32_t blockCount) {
alignas(16) KeysContainer keys;
alignas(16) uint8_t buffer[CacheLineSize];
for (uint32_t block = 0; block < blockCount; ++block) {
initBlock<softAes>(buffer, buffer, 0, keys);
}
}
template void aesBench<false>(uint32_t blockCount);
template void aesBench<true>(uint32_t blockCount);
} }

View File

@ -43,11 +43,14 @@ namespace RandomX {
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&); void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
template<bool softAes> template<bool softAes>
void datasetInitCache(const void* seed, dataset_t& dataset); void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages);
template<bool softAes> template<bool softAes>
void datasetReadLight(addr_t addr, MemoryRegisters& memory, RegisterFile&); void datasetReadLight(addr_t addr, MemoryRegisters& memory, RegisterFile&);
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, RegisterFile& reg); void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, RegisterFile& reg);
template<bool softAes>
void aesBench(uint32_t blockCount);
} }

View File

@ -15,6 +15,8 @@
;# You should have received a copy of the GNU General Public License ;# You should have received a copy of the GNU General Public License
;# along with RandomX. If not, see<http://www.gnu.org/licenses/>. ;# along with RandomX. If not, see<http://www.gnu.org/licenses/>.
IFDEF RAX
_RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE _RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
PUBLIC executeProgram PUBLIC executeProgram
@ -252,4 +254,6 @@ executeProgram ENDP
_RANDOMX_EXECUTE_PROGRAM ENDS _RANDOMX_EXECUTE_PROGRAM ENDS
ENDIF
END END

View File

@ -162,7 +162,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
bool softAes, lightClient, genAsm, compiled, help, largePages, async; bool softAes, lightClient, genAsm, compiled, help, largePages, async, aesBench;
int programCount, threadCount; int programCount, threadCount;
readOption("--help", argc, argv, help); readOption("--help", argc, argv, help);
@ -179,29 +179,44 @@ int main(int argc, char** argv) {
readIntOption("--nonces", argc, argv, programCount, 1000); readIntOption("--nonces", argc, argv, programCount, 1000);
readOption("--largePages", argc, argv, largePages); readOption("--largePages", argc, argv, largePages);
readOption("--async", argc, argv, async); readOption("--async", argc, argv, async);
readOption("--aesBench", argc, argv, aesBench);
if (genAsm) { if (genAsm) {
generateAsm(programCount); generateAsm(programCount);
return 0; return 0;
} }
if (softAes)
std::cout << "Using software AES." << std::endl;
if(aesBench) {
programCount *= 10;
Stopwatch sw(true);
if (softAes) {
RandomX::aesBench<true>(programCount);
}
else {
RandomX::aesBench<false>(programCount);
}
sw.stop();
std::cout << "AES performance: " << programCount / sw.getElapsed() << " blocks/s" << std::endl;
return 0;
}
std::atomic<int> atomicNonce(0); std::atomic<int> atomicNonce(0);
AtomicHash result; AtomicHash result;
std::vector<RandomX::VirtualMachine*> vms; std::vector<RandomX::VirtualMachine*> vms;
std::vector<std::thread> threads; std::vector<std::thread> threads;
RandomX::dataset_t dataset; RandomX::dataset_t dataset;
if (softAes)
std::cout << "Using software AES." << std::endl;
std::cout << "Initializing..." << std::endl; std::cout << "Initializing..." << std::endl;
try { try {
Stopwatch sw(true); Stopwatch sw(true);
if (softAes) { if (softAes) {
RandomX::datasetInitCache<true>(seed, dataset); RandomX::datasetInitCache<true>(seed, dataset, largePages);
} }
else { else {
RandomX::datasetInitCache<false>(seed, dataset); RandomX::datasetInitCache<false>(seed, dataset, largePages);
} }
if (RandomX::trace) { if (RandomX::trace) {
std::cout << "Keys: " << std::endl; std::cout << "Keys: " << std::endl;
@ -243,7 +258,7 @@ int main(int argc, char** argv) {
RandomX::datasetInit<false>(cache, dataset, 0, RandomX::DatasetBlockCount); RandomX::datasetInit<false>(cache, dataset, 0, RandomX::DatasetBlockCount);
} }
} }
delete cache; RandomX::Cache::dealloc(cache, largePages);
threads.clear(); threads.clear();
std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl; std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl;
} }

View File

@ -88,11 +88,15 @@ void* allocExecutableMemory(std::size_t bytes) {
return mem; return mem;
} }
constexpr std::size_t align(std::size_t pos, uint32_t align) {
return ((pos - 1) / align + 1) * align;
}
void* allocLargePagesMemory(std::size_t bytes) { void* allocLargePagesMemory(std::size_t bytes) {
void* mem; void* mem;
#ifdef _WIN32 #ifdef _WIN32
setPrivilege("SeLockMemoryPrivilege", 1); setPrivilege("SeLockMemoryPrivilege", 1);
mem = VirtualAlloc(NULL, bytes, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE); mem = VirtualAlloc(NULL, align(bytes, 2 * 1024 * 1024), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
if (mem == nullptr) if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc")); throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc"));
#else #else