mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-18 04:58:35 +00:00
Implemented virtual memory free
Removed legacy AES code
This commit is contained in:
parent
6e8c83fdb6
commit
096a7c0d7b
@ -17,11 +17,8 @@ You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Parts of this file are originally copyright (c) xmr-stak
|
||||
|
||||
#include <cstring>
|
||||
#include "Cache.hpp"
|
||||
#include "softAes.h"
|
||||
#include "argon2.h"
|
||||
#include "argon2_core.h"
|
||||
|
||||
@ -29,52 +26,6 @@ namespace RandomX {
|
||||
|
||||
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
static inline __m128i sl_xor(__m128i tmp1) {
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
template<uint8_t rcon, bool soft>
|
||||
static inline void aesGenKeys(__m128i* xout0, __m128i* xout2) {
|
||||
__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF);
|
||||
*xout0 = sl_xor(*xout0);
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||
xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA);
|
||||
*xout2 = sl_xor(*xout2);
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||
}
|
||||
|
||||
template<bool soft>
|
||||
static inline void expandAesKeys(const __m128i* seed, __m128i* keys) {
|
||||
__m128i xout0, xout2;
|
||||
xout0 = _mm_load_si128(seed);
|
||||
xout2 = _mm_load_si128(seed + 1);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aesGenKeys<0x01, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aesGenKeys<0x02, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aesGenKeys<0x04, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aesGenKeys<0x08, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
}
|
||||
|
||||
void Cache::argonFill(const void* seed, size_t seedSize) {
|
||||
uint32_t memory_blocks, segment_length;
|
||||
argon2_instance_t instance;
|
||||
@ -128,16 +79,8 @@ namespace RandomX {
|
||||
fill_memory_blocks(&instance);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void Cache::initialize(const void* seed, size_t seedSize) {
|
||||
//Argon2d memory fill
|
||||
argonFill(seed, seedSize);
|
||||
|
||||
//AES keys
|
||||
expandAesKeys<softAes>((__m128i*)seed, keys.data());
|
||||
}
|
||||
|
||||
template void Cache::initialize<true>(const void*, size_t);
|
||||
|
||||
template void Cache::initialize<false>(const void*, size_t);
|
||||
}
|
@ -42,7 +42,7 @@ namespace RandomX {
|
||||
}
|
||||
static void dealloc(Cache* cache, bool largePages) {
|
||||
if (largePages) {
|
||||
//allocLargePagesMemory(sizeof(Cache));
|
||||
freePagedMemory(cache, sizeof(Cache));
|
||||
}
|
||||
else {
|
||||
_mm_free(cache);
|
||||
@ -59,18 +59,12 @@ namespace RandomX {
|
||||
_mm_free(ptr);
|
||||
}*/
|
||||
|
||||
template<bool softAes>
|
||||
void initialize(const void* seed, size_t seedSize);
|
||||
|
||||
const KeysContainer& getKeys() const {
|
||||
return keys;
|
||||
}
|
||||
|
||||
const uint8_t* getCache() const {
|
||||
return memory;
|
||||
}
|
||||
private:
|
||||
alignas(16) KeysContainer keys;
|
||||
uint8_t memory[CacheSize];
|
||||
void argonFill(const void* seed, size_t seedSize);
|
||||
};
|
||||
|
@ -350,7 +350,7 @@ namespace RandomX {
|
||||
mem.mx &= CacheLineAlignMask;
|
||||
Cache* cache = mem.ds.cache;
|
||||
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
||||
initBlock(cache->getCache(), (uint8_t*)datasetLine, mem.ma / CacheLineSize, cache->getKeys());
|
||||
initBlock(cache->getCache(), (uint8_t*)datasetLine, mem.ma / CacheLineSize);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= datasetLine[i];
|
||||
std::swap(mem.mx, mem.ma);
|
||||
|
@ -57,7 +57,7 @@ namespace RandomX {
|
||||
#endif
|
||||
uint32_t currentBlock = addr / CacheLineSize;
|
||||
if (currentBlock != startBlock || output != currentLine.data()) {
|
||||
initBlock(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock, cache->getKeys());
|
||||
initBlock(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock);
|
||||
}
|
||||
else {
|
||||
sync();
|
||||
@ -86,7 +86,7 @@ namespace RandomX {
|
||||
template<bool softAes>
|
||||
void LightClientAsyncWorker<softAes>::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
|
||||
for (uint32_t i = 0; i < blockCount; ++i) {
|
||||
initBlock(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i, cache->getKeys());
|
||||
initBlock(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -108,7 +108,7 @@ namespace RandomX {
|
||||
std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl;
|
||||
#endif
|
||||
//getBlocks(output, startBlock, blockCount);
|
||||
initBlock(cache->getCache(), (uint8_t*)output, startBlock, cache->getKeys());
|
||||
initBlock(cache->getCache(), (uint8_t*)output, startBlock);
|
||||
hasWork = false;
|
||||
#ifdef TRACE
|
||||
std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl;
|
||||
|
@ -39,36 +39,36 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
||||
uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
|
||||
void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber) {
|
||||
uint64_t c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
|
||||
r0 = 4ULL * blockNumber;
|
||||
r1 = r2 = r3 = r4 = r5 = r6 = r7 = 0;
|
||||
c0 = 4ULL * blockNumber;
|
||||
c1 = c2 = c3 = c4 = c5 = c6 = c7 = 0;
|
||||
|
||||
constexpr uint32_t mask = (CacheSize - 1) & CacheLineAlignMask;
|
||||
|
||||
for (auto i = 0; i < DatasetIterations; ++i) {
|
||||
const uint8_t* mixBlock = cache + (r0 & mask);
|
||||
const uint8_t* mixBlock = cache + (c0 & mask);
|
||||
PREFETCHNTA(mixBlock);
|
||||
r0 = squareHash(r0);
|
||||
r0 ^= load64(mixBlock + 0);
|
||||
r1 ^= load64(mixBlock + 8);
|
||||
r2 ^= load64(mixBlock + 16);
|
||||
r3 ^= load64(mixBlock + 24);
|
||||
r4 ^= load64(mixBlock + 32);
|
||||
r5 ^= load64(mixBlock + 40);
|
||||
r6 ^= load64(mixBlock + 48);
|
||||
r7 ^= load64(mixBlock + 56);
|
||||
c0 = squareHash(c0);
|
||||
c0 ^= load64(mixBlock + 0);
|
||||
c1 ^= load64(mixBlock + 8);
|
||||
c2 ^= load64(mixBlock + 16);
|
||||
c3 ^= load64(mixBlock + 24);
|
||||
c4 ^= load64(mixBlock + 32);
|
||||
c5 ^= load64(mixBlock + 40);
|
||||
c6 ^= load64(mixBlock + 48);
|
||||
c7 ^= load64(mixBlock + 56);
|
||||
}
|
||||
|
||||
store64(out + 0, r0);
|
||||
store64(out + 8, r1);
|
||||
store64(out + 16, r2);
|
||||
store64(out + 24, r3);
|
||||
store64(out + 32, r4);
|
||||
store64(out + 40, r5);
|
||||
store64(out + 48, r6);
|
||||
store64(out + 56, r7);
|
||||
store64(out + 0, c0);
|
||||
store64(out + 8, c1);
|
||||
store64(out + 16, c2);
|
||||
store64(out + 24, c3);
|
||||
store64(out + 32, c4);
|
||||
store64(out + 40, c5);
|
||||
store64(out + 48, c6);
|
||||
store64(out + 56, c7);
|
||||
}
|
||||
|
||||
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
|
||||
@ -86,7 +86,7 @@ namespace RandomX {
|
||||
memory.mx &= CacheLineAlignMask; //align to cache line
|
||||
Cache* cache = memory.ds.cache;
|
||||
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
||||
initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize, cache->getKeys());
|
||||
initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
reg[i] ^= datasetLine[i];
|
||||
std::swap(memory.mx, memory.ma);
|
||||
@ -119,31 +119,12 @@ namespace RandomX {
|
||||
|
||||
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
|
||||
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
|
||||
initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i, cache->getKeys());
|
||||
initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i);
|
||||
}
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
|
||||
ds.cache = new(Cache::alloc(largePages)) Cache();
|
||||
ds.cache->initialize<softAes>(seed, SeedSize);
|
||||
ds.cache->initialize(seed, SeedSize);
|
||||
}
|
||||
|
||||
template
|
||||
void datasetInitCache<false>(const void*, dataset_t&, bool);
|
||||
|
||||
template
|
||||
void datasetInitCache<true>(const void*, dataset_t&, bool);
|
||||
|
||||
template<bool softAes>
|
||||
void aesBench(uint32_t blockCount) {
|
||||
alignas(16) KeysContainer keys;
|
||||
alignas(16) uint8_t buffer[CacheLineSize];
|
||||
for (uint32_t block = 0; block < blockCount; ++block) {
|
||||
initBlock(buffer, buffer, 0, keys);
|
||||
}
|
||||
}
|
||||
|
||||
template void aesBench<false>(uint32_t blockCount);
|
||||
template void aesBench<true>(uint32_t blockCount);
|
||||
}
|
||||
|
@ -20,18 +20,15 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <array>
|
||||
#include "intrinPortable.h"
|
||||
#include "common.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
using KeysContainer = std::array<__m128i, 10>;
|
||||
|
||||
template<bool soft, bool enc>
|
||||
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys);
|
||||
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber);
|
||||
|
||||
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
|
||||
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber);
|
||||
|
||||
void datasetAlloc(dataset_t& ds, bool largePages);
|
||||
|
||||
@ -39,14 +36,10 @@ namespace RandomX {
|
||||
|
||||
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages);
|
||||
|
||||
void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
|
||||
|
||||
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
|
||||
|
||||
template<bool softAes>
|
||||
void aesBench(uint32_t blockCount);
|
||||
}
|
||||
|
||||
|
12
src/main.cpp
12
src/main.cpp
@ -256,18 +256,8 @@ int main(int argc, char** argv) {
|
||||
|
||||
try {
|
||||
Stopwatch sw(true);
|
||||
if (softAes) {
|
||||
RandomX::datasetInitCache<true>(seed, dataset, largePages);
|
||||
}
|
||||
else {
|
||||
RandomX::datasetInitCache<false>(seed, dataset, largePages);
|
||||
}
|
||||
RandomX::datasetInitCache(seed, dataset, largePages);
|
||||
if (RandomX::trace) {
|
||||
std::cout << "Keys: " << std::endl;
|
||||
for (unsigned i = 0; i < dataset.cache->getKeys().size(); ++i) {
|
||||
outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i));
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "Cache: " << std::endl;
|
||||
outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i));
|
||||
std::cout << std::endl;
|
||||
|
@ -109,4 +109,12 @@ void* allocLargePagesMemory(std::size_t bytes) {
|
||||
throw std::runtime_error("allocLargePagesMemory - mmap failed");
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
}
|
||||
|
||||
void freePagedMemory(void* ptr, std::size_t bytes) {
|
||||
#ifdef _WIN32
|
||||
VirtualFree(ptr, 0, MEM_RELEASE);
|
||||
#else
|
||||
munmap(ptr, bytes);
|
||||
#endif
|
||||
}
|
||||
|
@ -22,4 +22,5 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include <cstddef>
|
||||
|
||||
void* allocExecutableMemory(std::size_t);
|
||||
void* allocLargePagesMemory(std::size_t);
|
||||
void* allocLargePagesMemory(std::size_t);
|
||||
void freePagedMemory(void*, std::size_t);
|
||||
|
Loading…
x
Reference in New Issue
Block a user