mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-05 06:38:53 +00:00
Implemented virtual memory free
Removed legacy AES code
This commit is contained in:
parent
6e8c83fdb6
commit
096a7c0d7b
@ -17,11 +17,8 @@ You should have received a copy of the GNU General Public License
|
|||||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Parts of this file are originally copyright (c) xmr-stak
|
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include "Cache.hpp"
|
#include "Cache.hpp"
|
||||||
#include "softAes.h"
|
|
||||||
#include "argon2.h"
|
#include "argon2.h"
|
||||||
#include "argon2_core.h"
|
#include "argon2_core.h"
|
||||||
|
|
||||||
@ -29,52 +26,6 @@ namespace RandomX {
|
|||||||
|
|
||||||
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
|
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
|
||||||
|
|
||||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
|
||||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
|
||||||
static inline __m128i sl_xor(__m128i tmp1) {
|
|
||||||
__m128i tmp4;
|
|
||||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
|
||||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
|
||||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
|
||||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
|
||||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
|
||||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
|
||||||
return tmp1;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<uint8_t rcon, bool soft>
|
|
||||||
static inline void aesGenKeys(__m128i* xout0, __m128i* xout2) {
|
|
||||||
__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
|
|
||||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF);
|
|
||||||
*xout0 = sl_xor(*xout0);
|
|
||||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
|
||||||
xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
|
|
||||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA);
|
|
||||||
*xout2 = sl_xor(*xout2);
|
|
||||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<bool soft>
|
|
||||||
static inline void expandAesKeys(const __m128i* seed, __m128i* keys) {
|
|
||||||
__m128i xout0, xout2;
|
|
||||||
xout0 = _mm_load_si128(seed);
|
|
||||||
xout2 = _mm_load_si128(seed + 1);
|
|
||||||
*keys++ = xout0;
|
|
||||||
*keys++ = xout2;
|
|
||||||
aesGenKeys<0x01, soft>(&xout0, &xout2);
|
|
||||||
*keys++ = xout0;
|
|
||||||
*keys++ = xout2;
|
|
||||||
aesGenKeys<0x02, soft>(&xout0, &xout2);
|
|
||||||
*keys++ = xout0;
|
|
||||||
*keys++ = xout2;
|
|
||||||
aesGenKeys<0x04, soft>(&xout0, &xout2);
|
|
||||||
*keys++ = xout0;
|
|
||||||
*keys++ = xout2;
|
|
||||||
aesGenKeys<0x08, soft>(&xout0, &xout2);
|
|
||||||
*keys++ = xout0;
|
|
||||||
*keys++ = xout2;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cache::argonFill(const void* seed, size_t seedSize) {
|
void Cache::argonFill(const void* seed, size_t seedSize) {
|
||||||
uint32_t memory_blocks, segment_length;
|
uint32_t memory_blocks, segment_length;
|
||||||
argon2_instance_t instance;
|
argon2_instance_t instance;
|
||||||
@ -128,16 +79,8 @@ namespace RandomX {
|
|||||||
fill_memory_blocks(&instance);
|
fill_memory_blocks(&instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
|
||||||
void Cache::initialize(const void* seed, size_t seedSize) {
|
void Cache::initialize(const void* seed, size_t seedSize) {
|
||||||
//Argon2d memory fill
|
//Argon2d memory fill
|
||||||
argonFill(seed, seedSize);
|
argonFill(seed, seedSize);
|
||||||
|
|
||||||
//AES keys
|
|
||||||
expandAesKeys<softAes>((__m128i*)seed, keys.data());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template void Cache::initialize<true>(const void*, size_t);
|
|
||||||
|
|
||||||
template void Cache::initialize<false>(const void*, size_t);
|
|
||||||
}
|
}
|
@ -42,7 +42,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
static void dealloc(Cache* cache, bool largePages) {
|
static void dealloc(Cache* cache, bool largePages) {
|
||||||
if (largePages) {
|
if (largePages) {
|
||||||
//allocLargePagesMemory(sizeof(Cache));
|
freePagedMemory(cache, sizeof(Cache));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
_mm_free(cache);
|
_mm_free(cache);
|
||||||
@ -59,18 +59,12 @@ namespace RandomX {
|
|||||||
_mm_free(ptr);
|
_mm_free(ptr);
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
template<bool softAes>
|
|
||||||
void initialize(const void* seed, size_t seedSize);
|
void initialize(const void* seed, size_t seedSize);
|
||||||
|
|
||||||
const KeysContainer& getKeys() const {
|
|
||||||
return keys;
|
|
||||||
}
|
|
||||||
|
|
||||||
const uint8_t* getCache() const {
|
const uint8_t* getCache() const {
|
||||||
return memory;
|
return memory;
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
alignas(16) KeysContainer keys;
|
|
||||||
uint8_t memory[CacheSize];
|
uint8_t memory[CacheSize];
|
||||||
void argonFill(const void* seed, size_t seedSize);
|
void argonFill(const void* seed, size_t seedSize);
|
||||||
};
|
};
|
||||||
|
@ -350,7 +350,7 @@ namespace RandomX {
|
|||||||
mem.mx &= CacheLineAlignMask;
|
mem.mx &= CacheLineAlignMask;
|
||||||
Cache* cache = mem.ds.cache;
|
Cache* cache = mem.ds.cache;
|
||||||
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
||||||
initBlock(cache->getCache(), (uint8_t*)datasetLine, mem.ma / CacheLineSize, cache->getKeys());
|
initBlock(cache->getCache(), (uint8_t*)datasetLine, mem.ma / CacheLineSize);
|
||||||
for (int i = 0; i < RegistersCount; ++i)
|
for (int i = 0; i < RegistersCount; ++i)
|
||||||
r[i] ^= datasetLine[i];
|
r[i] ^= datasetLine[i];
|
||||||
std::swap(mem.mx, mem.ma);
|
std::swap(mem.mx, mem.ma);
|
||||||
|
@ -57,7 +57,7 @@ namespace RandomX {
|
|||||||
#endif
|
#endif
|
||||||
uint32_t currentBlock = addr / CacheLineSize;
|
uint32_t currentBlock = addr / CacheLineSize;
|
||||||
if (currentBlock != startBlock || output != currentLine.data()) {
|
if (currentBlock != startBlock || output != currentLine.data()) {
|
||||||
initBlock(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock, cache->getKeys());
|
initBlock(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
sync();
|
sync();
|
||||||
@ -86,7 +86,7 @@ namespace RandomX {
|
|||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void LightClientAsyncWorker<softAes>::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
|
void LightClientAsyncWorker<softAes>::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
|
||||||
for (uint32_t i = 0; i < blockCount; ++i) {
|
for (uint32_t i = 0; i < blockCount; ++i) {
|
||||||
initBlock(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i, cache->getKeys());
|
initBlock(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -108,7 +108,7 @@ namespace RandomX {
|
|||||||
std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl;
|
std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl;
|
||||||
#endif
|
#endif
|
||||||
//getBlocks(output, startBlock, blockCount);
|
//getBlocks(output, startBlock, blockCount);
|
||||||
initBlock(cache->getCache(), (uint8_t*)output, startBlock, cache->getKeys());
|
initBlock(cache->getCache(), (uint8_t*)output, startBlock);
|
||||||
hasWork = false;
|
hasWork = false;
|
||||||
#ifdef TRACE
|
#ifdef TRACE
|
||||||
std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl;
|
std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl;
|
||||||
|
@ -39,36 +39,36 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber) {
|
||||||
uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
|
uint64_t c0, c1, c2, c3, c4, c5, c6, c7;
|
||||||
|
|
||||||
r0 = 4ULL * blockNumber;
|
c0 = 4ULL * blockNumber;
|
||||||
r1 = r2 = r3 = r4 = r5 = r6 = r7 = 0;
|
c1 = c2 = c3 = c4 = c5 = c6 = c7 = 0;
|
||||||
|
|
||||||
constexpr uint32_t mask = (CacheSize - 1) & CacheLineAlignMask;
|
constexpr uint32_t mask = (CacheSize - 1) & CacheLineAlignMask;
|
||||||
|
|
||||||
for (auto i = 0; i < DatasetIterations; ++i) {
|
for (auto i = 0; i < DatasetIterations; ++i) {
|
||||||
const uint8_t* mixBlock = cache + (r0 & mask);
|
const uint8_t* mixBlock = cache + (c0 & mask);
|
||||||
PREFETCHNTA(mixBlock);
|
PREFETCHNTA(mixBlock);
|
||||||
r0 = squareHash(r0);
|
c0 = squareHash(c0);
|
||||||
r0 ^= load64(mixBlock + 0);
|
c0 ^= load64(mixBlock + 0);
|
||||||
r1 ^= load64(mixBlock + 8);
|
c1 ^= load64(mixBlock + 8);
|
||||||
r2 ^= load64(mixBlock + 16);
|
c2 ^= load64(mixBlock + 16);
|
||||||
r3 ^= load64(mixBlock + 24);
|
c3 ^= load64(mixBlock + 24);
|
||||||
r4 ^= load64(mixBlock + 32);
|
c4 ^= load64(mixBlock + 32);
|
||||||
r5 ^= load64(mixBlock + 40);
|
c5 ^= load64(mixBlock + 40);
|
||||||
r6 ^= load64(mixBlock + 48);
|
c6 ^= load64(mixBlock + 48);
|
||||||
r7 ^= load64(mixBlock + 56);
|
c7 ^= load64(mixBlock + 56);
|
||||||
}
|
}
|
||||||
|
|
||||||
store64(out + 0, r0);
|
store64(out + 0, c0);
|
||||||
store64(out + 8, r1);
|
store64(out + 8, c1);
|
||||||
store64(out + 16, r2);
|
store64(out + 16, c2);
|
||||||
store64(out + 24, r3);
|
store64(out + 24, c3);
|
||||||
store64(out + 32, r4);
|
store64(out + 32, c4);
|
||||||
store64(out + 40, r5);
|
store64(out + 40, c5);
|
||||||
store64(out + 48, r6);
|
store64(out + 48, c6);
|
||||||
store64(out + 56, r7);
|
store64(out + 56, c7);
|
||||||
}
|
}
|
||||||
|
|
||||||
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
|
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
|
||||||
@ -86,7 +86,7 @@ namespace RandomX {
|
|||||||
memory.mx &= CacheLineAlignMask; //align to cache line
|
memory.mx &= CacheLineAlignMask; //align to cache line
|
||||||
Cache* cache = memory.ds.cache;
|
Cache* cache = memory.ds.cache;
|
||||||
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
||||||
initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize, cache->getKeys());
|
initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize);
|
||||||
for (int i = 0; i < RegistersCount; ++i)
|
for (int i = 0; i < RegistersCount; ++i)
|
||||||
reg[i] ^= datasetLine[i];
|
reg[i] ^= datasetLine[i];
|
||||||
std::swap(memory.mx, memory.ma);
|
std::swap(memory.mx, memory.ma);
|
||||||
@ -119,31 +119,12 @@ namespace RandomX {
|
|||||||
|
|
||||||
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
|
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
|
||||||
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
|
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
|
||||||
initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i, cache->getKeys());
|
initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
|
||||||
void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
|
void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
|
||||||
ds.cache = new(Cache::alloc(largePages)) Cache();
|
ds.cache = new(Cache::alloc(largePages)) Cache();
|
||||||
ds.cache->initialize<softAes>(seed, SeedSize);
|
ds.cache->initialize(seed, SeedSize);
|
||||||
}
|
|
||||||
|
|
||||||
template
|
|
||||||
void datasetInitCache<false>(const void*, dataset_t&, bool);
|
|
||||||
|
|
||||||
template
|
|
||||||
void datasetInitCache<true>(const void*, dataset_t&, bool);
|
|
||||||
|
|
||||||
template<bool softAes>
|
|
||||||
void aesBench(uint32_t blockCount) {
|
|
||||||
alignas(16) KeysContainer keys;
|
|
||||||
alignas(16) uint8_t buffer[CacheLineSize];
|
|
||||||
for (uint32_t block = 0; block < blockCount; ++block) {
|
|
||||||
initBlock(buffer, buffer, 0, keys);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void aesBench<false>(uint32_t blockCount);
|
|
||||||
template void aesBench<true>(uint32_t blockCount);
|
|
||||||
}
|
|
||||||
|
@ -20,18 +20,15 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <array>
|
|
||||||
#include "intrinPortable.h"
|
#include "intrinPortable.h"
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
using KeysContainer = std::array<__m128i, 10>;
|
|
||||||
|
|
||||||
template<bool soft, bool enc>
|
template<bool soft, bool enc>
|
||||||
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys);
|
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber);
|
||||||
|
|
||||||
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
|
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber);
|
||||||
|
|
||||||
void datasetAlloc(dataset_t& ds, bool largePages);
|
void datasetAlloc(dataset_t& ds, bool largePages);
|
||||||
|
|
||||||
@ -39,14 +36,10 @@ namespace RandomX {
|
|||||||
|
|
||||||
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
|
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
|
||||||
|
|
||||||
template<bool softAes>
|
|
||||||
void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages);
|
void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages);
|
||||||
|
|
||||||
void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
|
void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
|
||||||
|
|
||||||
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
|
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
|
||||||
|
|
||||||
template<bool softAes>
|
|
||||||
void aesBench(uint32_t blockCount);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
12
src/main.cpp
12
src/main.cpp
@ -256,18 +256,8 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
Stopwatch sw(true);
|
Stopwatch sw(true);
|
||||||
if (softAes) {
|
RandomX::datasetInitCache(seed, dataset, largePages);
|
||||||
RandomX::datasetInitCache<true>(seed, dataset, largePages);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
RandomX::datasetInitCache<false>(seed, dataset, largePages);
|
|
||||||
}
|
|
||||||
if (RandomX::trace) {
|
if (RandomX::trace) {
|
||||||
std::cout << "Keys: " << std::endl;
|
|
||||||
for (unsigned i = 0; i < dataset.cache->getKeys().size(); ++i) {
|
|
||||||
outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i));
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
std::cout << "Cache: " << std::endl;
|
std::cout << "Cache: " << std::endl;
|
||||||
outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i));
|
outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i));
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
@ -110,3 +110,11 @@ void* allocLargePagesMemory(std::size_t bytes) {
|
|||||||
#endif
|
#endif
|
||||||
return mem;
|
return mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void freePagedMemory(void* ptr, std::size_t bytes) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
VirtualFree(ptr, 0, MEM_RELEASE);
|
||||||
|
#else
|
||||||
|
munmap(ptr, bytes);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
@ -23,3 +23,4 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
void* allocExecutableMemory(std::size_t);
|
void* allocExecutableMemory(std::size_t);
|
||||||
void* allocLargePagesMemory(std::size_t);
|
void* allocLargePagesMemory(std::size_t);
|
||||||
|
void freePagedMemory(void*, std::size_t);
|
||||||
|
Loading…
Reference in New Issue
Block a user