diff --git a/src/Cache.cpp b/src/Cache.cpp
index 85d481e..60b7755 100644
--- a/src/Cache.cpp
+++ b/src/Cache.cpp
@@ -17,11 +17,8 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see.
*/
-// Parts of this file are originally copyright (c) xmr-stak
-
#include
#include "Cache.hpp"
-#include "softAes.h"
#include "argon2.h"
#include "argon2_core.h"
@@ -29,52 +26,6 @@ namespace RandomX {
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
- // This will shift and xor tmp1 into itself as 4 32-bit vals such as
- // sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
- static inline __m128i sl_xor(__m128i tmp1) {
- __m128i tmp4;
- tmp4 = _mm_slli_si128(tmp1, 0x04);
- tmp1 = _mm_xor_si128(tmp1, tmp4);
- tmp4 = _mm_slli_si128(tmp4, 0x04);
- tmp1 = _mm_xor_si128(tmp1, tmp4);
- tmp4 = _mm_slli_si128(tmp4, 0x04);
- tmp1 = _mm_xor_si128(tmp1, tmp4);
- return tmp1;
- }
-
- template
- static inline void aesGenKeys(__m128i* xout0, __m128i* xout2) {
- __m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
- xout1 = _mm_shuffle_epi32(xout1, 0xFF);
- *xout0 = sl_xor(*xout0);
- *xout0 = _mm_xor_si128(*xout0, xout1);
- xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
- xout1 = _mm_shuffle_epi32(xout1, 0xAA);
- *xout2 = sl_xor(*xout2);
- *xout2 = _mm_xor_si128(*xout2, xout1);
- }
-
- template
- static inline void expandAesKeys(const __m128i* seed, __m128i* keys) {
- __m128i xout0, xout2;
- xout0 = _mm_load_si128(seed);
- xout2 = _mm_load_si128(seed + 1);
- *keys++ = xout0;
- *keys++ = xout2;
- aesGenKeys<0x01, soft>(&xout0, &xout2);
- *keys++ = xout0;
- *keys++ = xout2;
- aesGenKeys<0x02, soft>(&xout0, &xout2);
- *keys++ = xout0;
- *keys++ = xout2;
- aesGenKeys<0x04, soft>(&xout0, &xout2);
- *keys++ = xout0;
- *keys++ = xout2;
- aesGenKeys<0x08, soft>(&xout0, &xout2);
- *keys++ = xout0;
- *keys++ = xout2;
- }
-
void Cache::argonFill(const void* seed, size_t seedSize) {
uint32_t memory_blocks, segment_length;
argon2_instance_t instance;
@@ -128,16 +79,8 @@ namespace RandomX {
fill_memory_blocks(&instance);
}
- template
void Cache::initialize(const void* seed, size_t seedSize) {
//Argon2d memory fill
argonFill(seed, seedSize);
-
- //AES keys
- expandAesKeys((__m128i*)seed, keys.data());
}
-
- template void Cache::initialize(const void*, size_t);
-
- template void Cache::initialize(const void*, size_t);
}
\ No newline at end of file
diff --git a/src/Cache.hpp b/src/Cache.hpp
index bc3d6ed..927c5e4 100644
--- a/src/Cache.hpp
+++ b/src/Cache.hpp
@@ -42,7 +42,7 @@ namespace RandomX {
}
static void dealloc(Cache* cache, bool largePages) {
if (largePages) {
- //allocLargePagesMemory(sizeof(Cache));
+ freePagedMemory(cache, sizeof(Cache));
}
else {
_mm_free(cache);
@@ -59,18 +59,12 @@ namespace RandomX {
_mm_free(ptr);
}*/
- template
void initialize(const void* seed, size_t seedSize);
- const KeysContainer& getKeys() const {
- return keys;
- }
-
const uint8_t* getCache() const {
return memory;
}
private:
- alignas(16) KeysContainer keys;
uint8_t memory[CacheSize];
void argonFill(const void* seed, size_t seedSize);
};
diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp
index f44a391..08d4536 100644
--- a/src/InterpretedVirtualMachine.cpp
+++ b/src/InterpretedVirtualMachine.cpp
@@ -350,7 +350,7 @@ namespace RandomX {
mem.mx &= CacheLineAlignMask;
Cache* cache = mem.ds.cache;
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
- initBlock(cache->getCache(), (uint8_t*)datasetLine, mem.ma / CacheLineSize, cache->getKeys());
+ initBlock(cache->getCache(), (uint8_t*)datasetLine, mem.ma / CacheLineSize);
for (int i = 0; i < RegistersCount; ++i)
r[i] ^= datasetLine[i];
std::swap(mem.mx, mem.ma);
diff --git a/src/LightClientAsyncWorker.cpp b/src/LightClientAsyncWorker.cpp
index f79d03d..d9f62a2 100644
--- a/src/LightClientAsyncWorker.cpp
+++ b/src/LightClientAsyncWorker.cpp
@@ -57,7 +57,7 @@ namespace RandomX {
#endif
uint32_t currentBlock = addr / CacheLineSize;
if (currentBlock != startBlock || output != currentLine.data()) {
- initBlock(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock, cache->getKeys());
+ initBlock(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock);
}
else {
sync();
@@ -86,7 +86,7 @@ namespace RandomX {
template
void LightClientAsyncWorker::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
for (uint32_t i = 0; i < blockCount; ++i) {
- initBlock(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i, cache->getKeys());
+ initBlock(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i);
}
}
@@ -108,7 +108,7 @@ namespace RandomX {
std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl;
#endif
//getBlocks(output, startBlock, blockCount);
- initBlock(cache->getCache(), (uint8_t*)output, startBlock, cache->getKeys());
+ initBlock(cache->getCache(), (uint8_t*)output, startBlock);
hasWork = false;
#ifdef TRACE
std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl;
diff --git a/src/dataset.cpp b/src/dataset.cpp
index 4a6a5e6..a5132fd 100644
--- a/src/dataset.cpp
+++ b/src/dataset.cpp
@@ -39,36 +39,36 @@ along with RandomX. If not, see.
namespace RandomX {
- void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
- uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
+ void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber) {
+ uint64_t c0, c1, c2, c3, c4, c5, c6, c7;
- r0 = 4ULL * blockNumber;
- r1 = r2 = r3 = r4 = r5 = r6 = r7 = 0;
+ c0 = 4ULL * blockNumber;
+ c1 = c2 = c3 = c4 = c5 = c6 = c7 = 0;
constexpr uint32_t mask = (CacheSize - 1) & CacheLineAlignMask;
for (auto i = 0; i < DatasetIterations; ++i) {
- const uint8_t* mixBlock = cache + (r0 & mask);
+ const uint8_t* mixBlock = cache + (c0 & mask);
PREFETCHNTA(mixBlock);
- r0 = squareHash(r0);
- r0 ^= load64(mixBlock + 0);
- r1 ^= load64(mixBlock + 8);
- r2 ^= load64(mixBlock + 16);
- r3 ^= load64(mixBlock + 24);
- r4 ^= load64(mixBlock + 32);
- r5 ^= load64(mixBlock + 40);
- r6 ^= load64(mixBlock + 48);
- r7 ^= load64(mixBlock + 56);
+ c0 = squareHash(c0);
+ c0 ^= load64(mixBlock + 0);
+ c1 ^= load64(mixBlock + 8);
+ c2 ^= load64(mixBlock + 16);
+ c3 ^= load64(mixBlock + 24);
+ c4 ^= load64(mixBlock + 32);
+ c5 ^= load64(mixBlock + 40);
+ c6 ^= load64(mixBlock + 48);
+ c7 ^= load64(mixBlock + 56);
}
- store64(out + 0, r0);
- store64(out + 8, r1);
- store64(out + 16, r2);
- store64(out + 24, r3);
- store64(out + 32, r4);
- store64(out + 40, r5);
- store64(out + 48, r6);
- store64(out + 56, r7);
+ store64(out + 0, c0);
+ store64(out + 8, c1);
+ store64(out + 16, c2);
+ store64(out + 24, c3);
+ store64(out + 32, c4);
+ store64(out + 40, c5);
+ store64(out + 48, c6);
+ store64(out + 56, c7);
}
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
@@ -86,7 +86,7 @@ namespace RandomX {
memory.mx &= CacheLineAlignMask; //align to cache line
Cache* cache = memory.ds.cache;
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
- initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize, cache->getKeys());
+ initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize);
for (int i = 0; i < RegistersCount; ++i)
reg[i] ^= datasetLine[i];
std::swap(memory.mx, memory.ma);
@@ -119,31 +119,12 @@ namespace RandomX {
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
- initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i, cache->getKeys());
+ initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i);
}
}
- template
void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
ds.cache = new(Cache::alloc(largePages)) Cache();
- ds.cache->initialize(seed, SeedSize);
+ ds.cache->initialize(seed, SeedSize);
}
-
- template
- void datasetInitCache(const void*, dataset_t&, bool);
-
- template
- void datasetInitCache(const void*, dataset_t&, bool);
-
- template
- void aesBench(uint32_t blockCount) {
- alignas(16) KeysContainer keys;
- alignas(16) uint8_t buffer[CacheLineSize];
- for (uint32_t block = 0; block < blockCount; ++block) {
- initBlock(buffer, buffer, 0, keys);
- }
- }
-
- template void aesBench(uint32_t blockCount);
- template void aesBench(uint32_t blockCount);
}
diff --git a/src/dataset.hpp b/src/dataset.hpp
index 9438173..c01835a 100644
--- a/src/dataset.hpp
+++ b/src/dataset.hpp
@@ -20,18 +20,15 @@ along with RandomX. If not, see.
#pragma once
#include
-#include
#include "intrinPortable.h"
#include "common.hpp"
namespace RandomX {
- using KeysContainer = std::array<__m128i, 10>;
-
template
- void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys);
+ void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber);
- void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
+ void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber);
void datasetAlloc(dataset_t& ds, bool largePages);
@@ -39,14 +36,10 @@ namespace RandomX {
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
- template
void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages);
void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
-
- template
- void aesBench(uint32_t blockCount);
}
diff --git a/src/main.cpp b/src/main.cpp
index b6efceb..ad6e856 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -256,18 +256,8 @@ int main(int argc, char** argv) {
try {
Stopwatch sw(true);
- if (softAes) {
- RandomX::datasetInitCache(seed, dataset, largePages);
- }
- else {
- RandomX::datasetInitCache(seed, dataset, largePages);
- }
+ RandomX::datasetInitCache(seed, dataset, largePages);
if (RandomX::trace) {
- std::cout << "Keys: " << std::endl;
- for (unsigned i = 0; i < dataset.cache->getKeys().size(); ++i) {
- outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i));
- }
- std::cout << std::endl;
std::cout << "Cache: " << std::endl;
outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i));
std::cout << std::endl;
diff --git a/src/virtualMemory.cpp b/src/virtualMemory.cpp
index f324e95..17b91da 100644
--- a/src/virtualMemory.cpp
+++ b/src/virtualMemory.cpp
@@ -109,4 +109,12 @@ void* allocLargePagesMemory(std::size_t bytes) {
throw std::runtime_error("allocLargePagesMemory - mmap failed");
#endif
return mem;
-}
\ No newline at end of file
+}
+
+void freePagedMemory(void* ptr, std::size_t bytes) {
+#ifdef _WIN32
+ VirtualFree(ptr, 0, MEM_RELEASE);
+#else
+ munmap(ptr, bytes);
+#endif
+}
diff --git a/src/virtualMemory.hpp b/src/virtualMemory.hpp
index c80d33e..239f24c 100644
--- a/src/virtualMemory.hpp
+++ b/src/virtualMemory.hpp
@@ -22,4 +22,5 @@ along with RandomX. If not, see.
#include
void* allocExecutableMemory(std::size_t);
-void* allocLargePagesMemory(std::size_t);
\ No newline at end of file
+void* allocLargePagesMemory(std::size_t);
+void freePagedMemory(void*, std::size_t);