mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-03 05:38:54 +00:00
Large page support for cache
Bug fixes
This commit is contained in:
parent
8b1102ee05
commit
4fb168e249
13
makefile
13
makefile
@ -11,7 +11,7 @@ SRCDIR=src
|
||||
OBJDIR=obj
|
||||
LDFLAGS=-lpthread
|
||||
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
||||
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o)
|
||||
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o AddressTransform.o)
|
||||
ifeq ($(PLATFORM),x86_64)
|
||||
ROBJS += $(OBJDIR)/JitCompilerX86-static.o
|
||||
endif
|
||||
@ -27,6 +27,11 @@ debug: CCFLAGS += -g
|
||||
debug: LDFLAGS += -g
|
||||
debug: $(BINDIR)/randomx
|
||||
|
||||
profile: CXXFLAGS += -pg
|
||||
profile: CCFLAGS += -pg
|
||||
profile: LDFLAGS += -pg
|
||||
profile: $(BINDIR)/randomx
|
||||
|
||||
test: CXXFLAGS += -O0
|
||||
test: $(BINDIR)/AluFpuTest
|
||||
|
||||
@ -38,6 +43,9 @@ $(BINDIR)/AluFpuTest: $(TOBJS) | $(BINDIR)
|
||||
|
||||
$(OBJDIR)/TestAluFpu.o: $(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/TestAluFpu.cpp -o $@
|
||||
|
||||
$(OBJDIR)/AddressTransform.o: $(addprefix $(SRCDIR)/,AddressTransform.cpp InterpretedVirtualMachine.hpp common.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/AddressTransform.cpp -o $@
|
||||
|
||||
$(OBJDIR)/argon2_core.o: $(addprefix $(SRCDIR)/,argon2_core.c argon2_core.h blake2/blake2.h blake2/blake2-impl.h) | $(OBJDIR)
|
||||
$(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_core.c -o $@
|
||||
@ -74,6 +82,9 @@ $(OBJDIR)/Instruction.o: $(addprefix $(SRCDIR)/,Instruction.cpp Instruction.hpp
|
||||
|
||||
$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp Pcg32.hpp instructions.hpp instructionWeights.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@
|
||||
|
||||
$(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightClientAsyncWorker.cpp -o $@
|
||||
|
||||
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@
|
||||
|
@ -23,12 +23,32 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include <new>
|
||||
#include "common.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "virtualMemory.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
class Cache {
|
||||
public:
|
||||
void* operator new(size_t size) {
|
||||
static void* alloc(bool largePages) {
|
||||
if (largePages) {
|
||||
return allocLargePagesMemory(sizeof(Cache));
|
||||
}
|
||||
else {
|
||||
void* ptr = _mm_malloc(sizeof(Cache), sizeof(__m128i));
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
static void dealloc(Cache* cache, bool largePages) {
|
||||
if (largePages) {
|
||||
//allocLargePagesMemory(sizeof(Cache));
|
||||
}
|
||||
else {
|
||||
_mm_free(cache);
|
||||
}
|
||||
}
|
||||
/*void* operator new(size_t size) {
|
||||
void* ptr = _mm_malloc(size, sizeof(__m128i));
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
@ -37,7 +57,7 @@ namespace RandomX {
|
||||
|
||||
void operator delete(void* ptr) {
|
||||
_mm_free(ptr);
|
||||
}
|
||||
}*/
|
||||
|
||||
template<bool softAes>
|
||||
void initialize(const void* seed, size_t seedSize);
|
||||
|
@ -15,6 +15,8 @@
|
||||
;# You should have received a copy of the GNU General Public License
|
||||
;# along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
IFDEF RAX
|
||||
|
||||
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
||||
|
||||
PUBLIC randomx_program_prologue
|
||||
@ -57,4 +59,6 @@ randomx_program_transform ENDP
|
||||
|
||||
_RANDOMX_JITX86_STATIC ENDS
|
||||
|
||||
ENDIF
|
||||
|
||||
END
|
@ -46,6 +46,10 @@ namespace RandomX {
|
||||
void JitCompilerX86::generateProgram(Pcg32& gen) {
|
||||
|
||||
}
|
||||
|
||||
size_t JitCompilerX86::getCodeSize() {
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
|
||||
/*
|
||||
|
@ -24,12 +24,19 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
namespace RandomX {
|
||||
|
||||
template<bool softAes>
|
||||
LightClientAsyncWorker<softAes>::LightClientAsyncWorker(const Cache* c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false), workerThread(&LightClientAsyncWorker::runWorker, this) {
|
||||
LightClientAsyncWorker<softAes>::LightClientAsyncWorker(const Cache* c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false),
|
||||
#ifdef TRACE
|
||||
sw(true),
|
||||
#endif
|
||||
workerThread(&LightClientAsyncWorker::runWorker, this) {
|
||||
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void LightClientAsyncWorker<softAes>::prepareBlock(addr_t addr) {
|
||||
#ifdef TRACE
|
||||
std::cout << sw.getElapsed() << ": prepareBlock-enter " << addr << std::endl;
|
||||
#endif
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
startBlock = addr / CacheLineSize;
|
||||
@ -37,6 +44,9 @@ namespace RandomX {
|
||||
output = currentLine.data();
|
||||
hasWork = true;
|
||||
}
|
||||
#ifdef TRACE
|
||||
std::cout << sw.getElapsed() << ": prepareBlock-notify " << startBlock << "/" << blockCount << std::endl;
|
||||
#endif
|
||||
notifier.notify_all();
|
||||
}
|
||||
|
||||
@ -54,10 +64,13 @@ namespace RandomX {
|
||||
|
||||
template<bool softAes>
|
||||
void LightClientAsyncWorker<softAes>::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
|
||||
#ifdef TRACE
|
||||
std::cout << sw.getElapsed() << ": prepareBlocks-enter " << startBlock << "/" << blockCount << std::endl;
|
||||
#endif
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
startBlock = startBlock;
|
||||
blockCount = blockCount;
|
||||
this->startBlock = startBlock;
|
||||
this->blockCount = blockCount;
|
||||
output = out;
|
||||
hasWork = true;
|
||||
}
|
||||
@ -79,6 +92,9 @@ namespace RandomX {
|
||||
|
||||
template<bool softAes>
|
||||
void LightClientAsyncWorker<softAes>::runWorker() {
|
||||
#ifdef TRACE
|
||||
std::cout << sw.getElapsed() << ": runWorker-enter " << std::endl;
|
||||
#endif
|
||||
for (;;) {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
notifier.wait(lk, [this] { return hasWork; });
|
||||
|
@ -17,12 +17,17 @@ You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//#define TRACE
|
||||
#include "common.hpp"
|
||||
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <array>
|
||||
#ifdef TRACE
|
||||
#include "Stopwatch.hpp"
|
||||
#include <iostream>
|
||||
#endif
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
@ -43,10 +48,13 @@ namespace RandomX {
|
||||
void runWorker();
|
||||
std::condition_variable notifier;
|
||||
std::mutex mutex;
|
||||
DatasetLine currentLine;
|
||||
alignas(16) DatasetLine currentLine;
|
||||
void* output;
|
||||
uint32_t startBlock, blockCount;
|
||||
bool hasWork;
|
||||
#ifdef TRACE
|
||||
Stopwatch sw;
|
||||
#endif
|
||||
std::thread workerThread;
|
||||
};
|
||||
}
|
@ -53,7 +53,7 @@ public:
|
||||
isRunning = false;
|
||||
}
|
||||
}
|
||||
double getElapsed() {
|
||||
double getElapsed() const {
|
||||
return getElapsedNanosec() / 1e+9;
|
||||
}
|
||||
private:
|
||||
@ -63,7 +63,7 @@ private:
|
||||
uint64_t elapsed;
|
||||
bool isRunning;
|
||||
|
||||
uint64_t getElapsedNanosec() {
|
||||
uint64_t getElapsedNanosec() const {
|
||||
uint64_t elns = elapsed;
|
||||
if (isRunning) {
|
||||
chrono_t endMark = std::chrono::high_resolution_clock::now();
|
||||
|
@ -34,20 +34,21 @@ namespace RandomX {
|
||||
constexpr int SeedSize = 32;
|
||||
constexpr int ResultSize = 32;
|
||||
|
||||
constexpr int CacheBlockCount = 1024 * 1024;
|
||||
constexpr int CacheLineSize = 64;
|
||||
constexpr int BlockExpansionRatio = 64;
|
||||
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
|
||||
constexpr int DatasetIterations = 16;
|
||||
constexpr uint32_t CacheSize = CacheBlockCount * CacheLineSize;
|
||||
constexpr uint64_t DatasetSize = (uint64_t)CacheSize * BlockExpansionRatio;
|
||||
|
||||
constexpr int ArgonIterations = 12;
|
||||
constexpr uint32_t ArgonMemorySize = 65536; //KiB
|
||||
constexpr int ArgonIterations = 6;
|
||||
constexpr uint32_t ArgonMemorySize = 131072; //KiB
|
||||
constexpr int ArgonLanes = 1;
|
||||
const char ArgonSalt[] = "Monero\x1A$";
|
||||
constexpr int ArgonSaltSize = sizeof(ArgonSalt) - 1;
|
||||
|
||||
constexpr int CacheLineSize = 64;
|
||||
constexpr uint64_t DatasetSize = 4ULL * 1024 * 1024 * 1024; //4 GiB
|
||||
constexpr uint32_t CacheSize = ArgonMemorySize * 1024;
|
||||
constexpr int CacheBlockCount = CacheSize / CacheLineSize;
|
||||
constexpr int BlockExpansionRatio = DatasetSize / CacheSize;
|
||||
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
|
||||
constexpr int DatasetIterations = 32;
|
||||
|
||||
|
||||
#ifdef TRACE
|
||||
constexpr bool trace = true;
|
||||
#else
|
||||
|
@ -67,7 +67,7 @@ namespace RandomX {
|
||||
//block number 0..67108863
|
||||
//Initialization vector = block number extended to 128 bits
|
||||
iv = _mm_cvtsi32_si128(blockNumber);
|
||||
uint32_t cacheBlockNumber = blockNumber / BlockExpansionRatio; //0..1048575
|
||||
uint32_t cacheBlockNumber = blockNumber / BlockExpansionRatio; //0..2097151
|
||||
__m128i* cacheCacheLine = (__m128i*)(in + cacheBlockNumber * CacheLineSize);
|
||||
__m128i* datasetCacheLine = (__m128i*)out;
|
||||
|
||||
@ -173,14 +173,26 @@ namespace RandomX {
|
||||
void datasetInit<true>(Cache*, dataset_t, uint32_t, uint32_t);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInitCache(const void* seed, dataset_t& ds) {
|
||||
ds.cache = new Cache();
|
||||
void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
|
||||
ds.cache = new(Cache::alloc(largePages)) Cache();
|
||||
ds.cache->initialize<softAes>(seed, SeedSize);
|
||||
}
|
||||
|
||||
template
|
||||
void datasetInitCache<false>(const void*, dataset_t&);
|
||||
void datasetInitCache<false>(const void*, dataset_t&, bool);
|
||||
|
||||
template
|
||||
void datasetInitCache<true>(const void*, dataset_t&);
|
||||
void datasetInitCache<true>(const void*, dataset_t&, bool);
|
||||
|
||||
template<bool softAes>
|
||||
void aesBench(uint32_t blockCount) {
|
||||
alignas(16) KeysContainer keys;
|
||||
alignas(16) uint8_t buffer[CacheLineSize];
|
||||
for (uint32_t block = 0; block < blockCount; ++block) {
|
||||
initBlock<softAes>(buffer, buffer, 0, keys);
|
||||
}
|
||||
}
|
||||
|
||||
template void aesBench<false>(uint32_t blockCount);
|
||||
template void aesBench<true>(uint32_t blockCount);
|
||||
}
|
||||
|
@ -43,11 +43,14 @@ namespace RandomX {
|
||||
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInitCache(const void* seed, dataset_t& dataset);
|
||||
void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetReadLight(addr_t addr, MemoryRegisters& memory, RegisterFile&);
|
||||
|
||||
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, RegisterFile& reg);
|
||||
|
||||
template<bool softAes>
|
||||
void aesBench(uint32_t blockCount);
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,8 @@
|
||||
;# You should have received a copy of the GNU General Public License
|
||||
;# along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
IFDEF RAX
|
||||
|
||||
_RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
|
||||
|
||||
PUBLIC executeProgram
|
||||
@ -252,4 +254,6 @@ executeProgram ENDP
|
||||
|
||||
_RANDOMX_EXECUTE_PROGRAM ENDS
|
||||
|
||||
ENDIF
|
||||
|
||||
END
|
||||
|
29
src/main.cpp
29
src/main.cpp
@ -162,7 +162,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool softAes, lightClient, genAsm, compiled, help, largePages, async;
|
||||
bool softAes, lightClient, genAsm, compiled, help, largePages, async, aesBench;
|
||||
int programCount, threadCount;
|
||||
readOption("--help", argc, argv, help);
|
||||
|
||||
@ -179,29 +179,44 @@ int main(int argc, char** argv) {
|
||||
readIntOption("--nonces", argc, argv, programCount, 1000);
|
||||
readOption("--largePages", argc, argv, largePages);
|
||||
readOption("--async", argc, argv, async);
|
||||
readOption("--aesBench", argc, argv, aesBench);
|
||||
|
||||
if (genAsm) {
|
||||
generateAsm(programCount);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (softAes)
|
||||
std::cout << "Using software AES." << std::endl;
|
||||
|
||||
if(aesBench) {
|
||||
programCount *= 10;
|
||||
Stopwatch sw(true);
|
||||
if (softAes) {
|
||||
RandomX::aesBench<true>(programCount);
|
||||
}
|
||||
else {
|
||||
RandomX::aesBench<false>(programCount);
|
||||
}
|
||||
sw.stop();
|
||||
std::cout << "AES performance: " << programCount / sw.getElapsed() << " blocks/s" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::atomic<int> atomicNonce(0);
|
||||
AtomicHash result;
|
||||
std::vector<RandomX::VirtualMachine*> vms;
|
||||
std::vector<std::thread> threads;
|
||||
RandomX::dataset_t dataset;
|
||||
|
||||
if (softAes)
|
||||
std::cout << "Using software AES." << std::endl;
|
||||
std::cout << "Initializing..." << std::endl;
|
||||
|
||||
try {
|
||||
Stopwatch sw(true);
|
||||
if (softAes) {
|
||||
RandomX::datasetInitCache<true>(seed, dataset);
|
||||
RandomX::datasetInitCache<true>(seed, dataset, largePages);
|
||||
}
|
||||
else {
|
||||
RandomX::datasetInitCache<false>(seed, dataset);
|
||||
RandomX::datasetInitCache<false>(seed, dataset, largePages);
|
||||
}
|
||||
if (RandomX::trace) {
|
||||
std::cout << "Keys: " << std::endl;
|
||||
@ -243,7 +258,7 @@ int main(int argc, char** argv) {
|
||||
RandomX::datasetInit<false>(cache, dataset, 0, RandomX::DatasetBlockCount);
|
||||
}
|
||||
}
|
||||
delete cache;
|
||||
RandomX::Cache::dealloc(cache, largePages);
|
||||
threads.clear();
|
||||
std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
}
|
||||
|
@ -88,11 +88,15 @@ void* allocExecutableMemory(std::size_t bytes) {
|
||||
return mem;
|
||||
}
|
||||
|
||||
constexpr std::size_t align(std::size_t pos, uint32_t align) {
|
||||
return ((pos - 1) / align + 1) * align;
|
||||
}
|
||||
|
||||
void* allocLargePagesMemory(std::size_t bytes) {
|
||||
void* mem;
|
||||
#ifdef _WIN32
|
||||
setPrivilege("SeLockMemoryPrivilege", 1);
|
||||
mem = VirtualAlloc(NULL, bytes, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||
mem = VirtualAlloc(NULL, align(bytes, 2 * 1024 * 1024), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||
if (mem == nullptr)
|
||||
throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc"));
|
||||
#else
|
||||
|
Loading…
Reference in New Issue
Block a user