diff --git a/makefile b/makefile
index d0a969c..0dcd7de 100644
--- a/makefile
+++ b/makefile
@@ -11,7 +11,7 @@ SRCDIR=src
OBJDIR=obj
LDFLAGS=-lpthread
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
-ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o)
+ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o AddressTransform.o)
ifeq ($(PLATFORM),x86_64)
ROBJS += $(OBJDIR)/JitCompilerX86-static.o
endif
@@ -27,6 +27,11 @@ debug: CCFLAGS += -g
debug: LDFLAGS += -g
debug: $(BINDIR)/randomx
+profile: CXXFLAGS += -pg
+profile: CCFLAGS += -pg
+profile: LDFLAGS += -pg
+profile: $(BINDIR)/randomx
+
test: CXXFLAGS += -O0
test: $(BINDIR)/AluFpuTest
@@ -38,6 +43,9 @@ $(BINDIR)/AluFpuTest: $(TOBJS) | $(BINDIR)
$(OBJDIR)/TestAluFpu.o: $(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/TestAluFpu.cpp -o $@
+
+$(OBJDIR)/AddressTransform.o: $(addprefix $(SRCDIR)/,AddressTransform.cpp InterpretedVirtualMachine.hpp common.hpp) | $(OBJDIR)
+ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/AddressTransform.cpp -o $@
$(OBJDIR)/argon2_core.o: $(addprefix $(SRCDIR)/,argon2_core.c argon2_core.h blake2/blake2.h blake2/blake2-impl.h) | $(OBJDIR)
$(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_core.c -o $@
@@ -74,6 +82,9 @@ $(OBJDIR)/Instruction.o: $(addprefix $(SRCDIR)/,Instruction.cpp Instruction.hpp
$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp Pcg32.hpp instructions.hpp instructionWeights.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@
+
+$(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR)
+ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightClientAsyncWorker.cpp -o $@
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@
diff --git a/src/Cache.hpp b/src/Cache.hpp
index 8a2b93a..bc3d6ed 100644
--- a/src/Cache.hpp
+++ b/src/Cache.hpp
@@ -23,12 +23,32 @@ along with RandomX. If not, see.
#include
#include "common.hpp"
#include "dataset.hpp"
+#include "virtualMemory.hpp"
namespace RandomX {
class Cache {
public:
- void* operator new(size_t size) {
+ static void* alloc(bool largePages) {
+ if (largePages) {
+ return allocLargePagesMemory(sizeof(Cache));
+ }
+ else {
+ void* ptr = _mm_malloc(sizeof(Cache), sizeof(__m128i));
+ if (ptr == nullptr)
+ throw std::bad_alloc();
+ return ptr;
+ }
+ }
+ static void dealloc(Cache* cache, bool largePages) {
+ if (largePages) {
+ //allocLargePagesMemory(sizeof(Cache));
+ }
+ else {
+ _mm_free(cache);
+ }
+ }
+ /*void* operator new(size_t size) {
void* ptr = _mm_malloc(size, sizeof(__m128i));
if (ptr == nullptr)
throw std::bad_alloc();
@@ -37,7 +57,7 @@ namespace RandomX {
void operator delete(void* ptr) {
_mm_free(ptr);
- }
+ }*/
template
void initialize(const void* seed, size_t seedSize);
diff --git a/src/JitCompilerX86-static.asm b/src/JitCompilerX86-static.asm
index cbbf658..031c2e4 100644
--- a/src/JitCompilerX86-static.asm
+++ b/src/JitCompilerX86-static.asm
@@ -15,6 +15,8 @@
;# You should have received a copy of the GNU General Public License
;# along with RandomX. If not, see.
+IFDEF RAX
+
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
PUBLIC randomx_program_prologue
@@ -57,4 +59,6 @@ randomx_program_transform ENDP
_RANDOMX_JITX86_STATIC ENDS
+ENDIF
+
END
\ No newline at end of file
diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp
index b41f7b5..f828d0a 100644
--- a/src/JitCompilerX86.cpp
+++ b/src/JitCompilerX86.cpp
@@ -46,6 +46,10 @@ namespace RandomX {
void JitCompilerX86::generateProgram(Pcg32& gen) {
}
+
+ size_t JitCompilerX86::getCodeSize() {
+ return 0;
+ }
#else
/*
diff --git a/src/LightClientAsyncWorker.cpp b/src/LightClientAsyncWorker.cpp
index c069f3f..32aa508 100644
--- a/src/LightClientAsyncWorker.cpp
+++ b/src/LightClientAsyncWorker.cpp
@@ -24,12 +24,19 @@ along with RandomX. If not, see.
namespace RandomX {
template
- LightClientAsyncWorker::LightClientAsyncWorker(const Cache* c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false), workerThread(&LightClientAsyncWorker::runWorker, this) {
+ LightClientAsyncWorker::LightClientAsyncWorker(const Cache* c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false),
+#ifdef TRACE
+ sw(true),
+#endif
+ workerThread(&LightClientAsyncWorker::runWorker, this) {
}
template
void LightClientAsyncWorker::prepareBlock(addr_t addr) {
+#ifdef TRACE
+ std::cout << sw.getElapsed() << ": prepareBlock-enter " << addr << std::endl;
+#endif
{
std::lock_guard lk(mutex);
startBlock = addr / CacheLineSize;
@@ -37,6 +44,9 @@ namespace RandomX {
output = currentLine.data();
hasWork = true;
}
+#ifdef TRACE
+ std::cout << sw.getElapsed() << ": prepareBlock-notify " << startBlock << "/" << blockCount << std::endl;
+#endif
notifier.notify_all();
}
@@ -54,10 +64,13 @@ namespace RandomX {
template
void LightClientAsyncWorker::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
+#ifdef TRACE
+ std::cout << sw.getElapsed() << ": prepareBlocks-enter " << startBlock << "/" << blockCount << std::endl;
+#endif
{
std::lock_guard lk(mutex);
- startBlock = startBlock;
- blockCount = blockCount;
+ this->startBlock = startBlock;
+ this->blockCount = blockCount;
output = out;
hasWork = true;
}
@@ -79,6 +92,9 @@ namespace RandomX {
template
void LightClientAsyncWorker::runWorker() {
+#ifdef TRACE
+ std::cout << sw.getElapsed() << ": runWorker-enter " << std::endl;
+#endif
for (;;) {
std::unique_lock lk(mutex);
notifier.wait(lk, [this] { return hasWork; });
diff --git a/src/LightClientAsyncWorker.hpp b/src/LightClientAsyncWorker.hpp
index 7596fd5..29571e5 100644
--- a/src/LightClientAsyncWorker.hpp
+++ b/src/LightClientAsyncWorker.hpp
@@ -17,12 +17,17 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see.
*/
+//#define TRACE
#include "common.hpp"
#include
#include
#include
#include
+#ifdef TRACE
+#include "Stopwatch.hpp"
+#include
+#endif
namespace RandomX {
@@ -43,10 +48,13 @@ namespace RandomX {
void runWorker();
std::condition_variable notifier;
std::mutex mutex;
- DatasetLine currentLine;
+ alignas(16) DatasetLine currentLine;
void* output;
uint32_t startBlock, blockCount;
bool hasWork;
+#ifdef TRACE
+ Stopwatch sw;
+#endif
std::thread workerThread;
};
}
\ No newline at end of file
diff --git a/src/Stopwatch.hpp b/src/Stopwatch.hpp
index 4f3a5a1..931bc02 100644
--- a/src/Stopwatch.hpp
+++ b/src/Stopwatch.hpp
@@ -53,7 +53,7 @@ public:
isRunning = false;
}
}
- double getElapsed() {
+ double getElapsed() const {
return getElapsedNanosec() / 1e+9;
}
private:
@@ -63,7 +63,7 @@ private:
uint64_t elapsed;
bool isRunning;
- uint64_t getElapsedNanosec() {
+ uint64_t getElapsedNanosec() const {
uint64_t elns = elapsed;
if (isRunning) {
chrono_t endMark = std::chrono::high_resolution_clock::now();
diff --git a/src/common.hpp b/src/common.hpp
index 62fae70..fea337f 100644
--- a/src/common.hpp
+++ b/src/common.hpp
@@ -34,20 +34,21 @@ namespace RandomX {
constexpr int SeedSize = 32;
constexpr int ResultSize = 32;
- constexpr int CacheBlockCount = 1024 * 1024;
- constexpr int CacheLineSize = 64;
- constexpr int BlockExpansionRatio = 64;
- constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
- constexpr int DatasetIterations = 16;
- constexpr uint32_t CacheSize = CacheBlockCount * CacheLineSize;
- constexpr uint64_t DatasetSize = (uint64_t)CacheSize * BlockExpansionRatio;
-
- constexpr int ArgonIterations = 12;
- constexpr uint32_t ArgonMemorySize = 65536; //KiB
+ constexpr int ArgonIterations = 6;
+ constexpr uint32_t ArgonMemorySize = 131072; //KiB
constexpr int ArgonLanes = 1;
const char ArgonSalt[] = "Monero\x1A$";
constexpr int ArgonSaltSize = sizeof(ArgonSalt) - 1;
+ constexpr int CacheLineSize = 64;
+ constexpr uint64_t DatasetSize = 4ULL * 1024 * 1024 * 1024; //4 GiB
+ constexpr uint32_t CacheSize = ArgonMemorySize * 1024;
+ constexpr int CacheBlockCount = CacheSize / CacheLineSize;
+ constexpr int BlockExpansionRatio = DatasetSize / CacheSize;
+ constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
+ constexpr int DatasetIterations = 32;
+
+
#ifdef TRACE
constexpr bool trace = true;
#else
diff --git a/src/dataset.cpp b/src/dataset.cpp
index ae31963..2ef6e7f 100644
--- a/src/dataset.cpp
+++ b/src/dataset.cpp
@@ -67,7 +67,7 @@ namespace RandomX {
//block number 0..67108863
//Initialization vector = block number extended to 128 bits
iv = _mm_cvtsi32_si128(blockNumber);
- uint32_t cacheBlockNumber = blockNumber / BlockExpansionRatio; //0..1048575
+ uint32_t cacheBlockNumber = blockNumber / BlockExpansionRatio; //0..2097151
__m128i* cacheCacheLine = (__m128i*)(in + cacheBlockNumber * CacheLineSize);
__m128i* datasetCacheLine = (__m128i*)out;
@@ -173,14 +173,26 @@ namespace RandomX {
void datasetInit(Cache*, dataset_t, uint32_t, uint32_t);
template
- void datasetInitCache(const void* seed, dataset_t& ds) {
- ds.cache = new Cache();
+ void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
+ ds.cache = new(Cache::alloc(largePages)) Cache();
ds.cache->initialize(seed, SeedSize);
}
template
- void datasetInitCache(const void*, dataset_t&);
+ void datasetInitCache(const void*, dataset_t&, bool);
template
- void datasetInitCache(const void*, dataset_t&);
+ void datasetInitCache(const void*, dataset_t&, bool);
+
+ template
+ void aesBench(uint32_t blockCount) {
+ alignas(16) KeysContainer keys;
+ alignas(16) uint8_t buffer[CacheLineSize];
+ for (uint32_t block = 0; block < blockCount; ++block) {
+ initBlock(buffer, buffer, 0, keys);
+ }
+ }
+
+ template void aesBench(uint32_t blockCount);
+ template void aesBench(uint32_t blockCount);
}
diff --git a/src/dataset.hpp b/src/dataset.hpp
index 0103271..bdd34d3 100644
--- a/src/dataset.hpp
+++ b/src/dataset.hpp
@@ -43,11 +43,14 @@ namespace RandomX {
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
template
- void datasetInitCache(const void* seed, dataset_t& dataset);
+ void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages);
template
void datasetReadLight(addr_t addr, MemoryRegisters& memory, RegisterFile&);
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, RegisterFile& reg);
+
+ template
+ void aesBench(uint32_t blockCount);
}
diff --git a/src/executeProgram-win64.asm b/src/executeProgram-win64.asm
index 2cc98fb..2da88b5 100644
--- a/src/executeProgram-win64.asm
+++ b/src/executeProgram-win64.asm
@@ -15,6 +15,8 @@
;# You should have received a copy of the GNU General Public License
;# along with RandomX. If not, see.
+IFDEF RAX
+
_RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
PUBLIC executeProgram
@@ -252,4 +254,6 @@ executeProgram ENDP
_RANDOMX_EXECUTE_PROGRAM ENDS
+ENDIF
+
END
diff --git a/src/main.cpp b/src/main.cpp
index 3295500..db3850e 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -162,7 +162,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, AtomicHash
}
int main(int argc, char** argv) {
- bool softAes, lightClient, genAsm, compiled, help, largePages, async;
+ bool softAes, lightClient, genAsm, compiled, help, largePages, async, aesBench;
int programCount, threadCount;
readOption("--help", argc, argv, help);
@@ -179,29 +179,44 @@ int main(int argc, char** argv) {
readIntOption("--nonces", argc, argv, programCount, 1000);
readOption("--largePages", argc, argv, largePages);
readOption("--async", argc, argv, async);
+ readOption("--aesBench", argc, argv, aesBench);
if (genAsm) {
generateAsm(programCount);
return 0;
}
+ if (softAes)
+ std::cout << "Using software AES." << std::endl;
+
+ if(aesBench) {
+ programCount *= 10;
+ Stopwatch sw(true);
+ if (softAes) {
+ RandomX::aesBench(programCount);
+ }
+ else {
+ RandomX::aesBench(programCount);
+ }
+ sw.stop();
+ std::cout << "AES performance: " << programCount / sw.getElapsed() << " blocks/s" << std::endl;
+ return 0;
+ }
+
std::atomic atomicNonce(0);
AtomicHash result;
std::vector vms;
std::vector threads;
RandomX::dataset_t dataset;
- if (softAes)
- std::cout << "Using software AES." << std::endl;
std::cout << "Initializing..." << std::endl;
-
try {
Stopwatch sw(true);
if (softAes) {
- RandomX::datasetInitCache(seed, dataset);
+ RandomX::datasetInitCache(seed, dataset, largePages);
}
else {
- RandomX::datasetInitCache(seed, dataset);
+ RandomX::datasetInitCache(seed, dataset, largePages);
}
if (RandomX::trace) {
std::cout << "Keys: " << std::endl;
@@ -243,7 +258,7 @@ int main(int argc, char** argv) {
RandomX::datasetInit(cache, dataset, 0, RandomX::DatasetBlockCount);
}
}
- delete cache;
+ RandomX::Cache::dealloc(cache, largePages);
threads.clear();
std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl;
}
diff --git a/src/virtualMemory.cpp b/src/virtualMemory.cpp
index e6e44fc..f324e95 100644
--- a/src/virtualMemory.cpp
+++ b/src/virtualMemory.cpp
@@ -88,11 +88,15 @@ void* allocExecutableMemory(std::size_t bytes) {
return mem;
}
+constexpr std::size_t align(std::size_t pos, uint32_t align) {
+ return ((pos - 1) / align + 1) * align;
+}
+
void* allocLargePagesMemory(std::size_t bytes) {
void* mem;
#ifdef _WIN32
setPrivilege("SeLockMemoryPrivilege", 1);
- mem = VirtualAlloc(NULL, bytes, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
+ mem = VirtualAlloc(NULL, align(bytes, 2 * 1024 * 1024), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc"));
#else