Memory-bound dataset initialization

This commit is contained in:
tevador 2019-01-18 18:44:06 +01:00
parent 4fb168e249
commit 89bc68d093
2 changed files with 34 additions and 33 deletions

View File

@ -34,8 +34,8 @@ namespace RandomX {
constexpr int SeedSize = 32; constexpr int SeedSize = 32;
constexpr int ResultSize = 32; constexpr int ResultSize = 32;
constexpr int ArgonIterations = 6; constexpr int ArgonIterations = 3;
constexpr uint32_t ArgonMemorySize = 131072; //KiB constexpr uint32_t ArgonMemorySize = 262144; //KiB
constexpr int ArgonLanes = 1; constexpr int ArgonLanes = 1;
const char ArgonSalt[] = "Monero\x1A$"; const char ArgonSalt[] = "Monero\x1A$";
constexpr int ArgonSaltSize = sizeof(ArgonSalt) - 1; constexpr int ArgonSaltSize = sizeof(ArgonSalt) - 1;
@ -46,7 +46,7 @@ namespace RandomX {
constexpr int CacheBlockCount = CacheSize / CacheLineSize; constexpr int CacheBlockCount = CacheSize / CacheLineSize;
constexpr int BlockExpansionRatio = DatasetSize / CacheSize; constexpr int BlockExpansionRatio = DatasetSize / CacheSize;
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount; constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
constexpr int DatasetIterations = 32; constexpr int DatasetIterations = 10;
#ifdef TRACE #ifdef TRACE

View File

@ -62,42 +62,43 @@ namespace RandomX {
x3 = aesenc<soft>(x3, keys[i]) x3 = aesenc<soft>(x3, keys[i])
template<bool soft> template<bool soft>
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) { void initBlock(const uint8_t* intermediate, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
__m128i x0, x1, x2, x3, iv; __m128i x0, x1, x2, x3;
//block number 0..67108863
//Initialization vector = block number extended to 128 bits
iv = _mm_cvtsi32_si128(blockNumber);
uint32_t cacheBlockNumber = blockNumber / BlockExpansionRatio; //0..2097151
__m128i* cacheCacheLine = (__m128i*)(in + cacheBlockNumber * CacheLineSize);
__m128i* datasetCacheLine = (__m128i*)out;
x0 = _mm_load_si128(cacheCacheLine + 0); __m128i* xit = (__m128i*)intermediate;
x1 = _mm_load_si128(cacheCacheLine + 1); __m128i* xout = (__m128i*)out;
x2 = _mm_load_si128(cacheCacheLine + 2);
x3 = _mm_load_si128(cacheCacheLine + 3);
x0 = _mm_xor_si128(x0, iv); x0 = _mm_cvtsi32_si128(blockNumber);
x1 = _mm_xor_si128(x1, iv); constexpr int mask = (CacheSize / CacheLineSize) - 1;
x2 = _mm_xor_si128(x2, iv);
x3 = _mm_xor_si128(x3, iv);
for (auto i = 0; i < DatasetIterations; ++i) { for (auto i = 0; i < DatasetIterations; ++i) {
AES_ROUND(0); x0 = aesenc<soft>(x0, keys[0]);
AES_ROUND(1); x0 = aesenc<soft>(x0, keys[1]);
AES_ROUND(2); x1 = aesenc<soft>(x0, keys[2]);
AES_ROUND(3); x1 = aesenc<soft>(x1, keys[3]);
AES_ROUND(4); x2 = aesenc<soft>(x1, keys[4]);
AES_ROUND(5); x2 = aesenc<soft>(x2, keys[5]);
AES_ROUND(6); x3 = aesenc<soft>(x2, keys[6]);
AES_ROUND(7); x3 = aesenc<soft>(x3, keys[7]);
AES_ROUND(8);
AES_ROUND(9); int index = _mm_cvtsi128_si32(x3);
index &= mask;
__m128i t0 = _mm_load_si128(xit + 4 * index + 0);
__m128i t1 = _mm_load_si128(xit + 4 * index + 1);
__m128i t2 = _mm_load_si128(xit + 4 * index + 2);
__m128i t3 = _mm_load_si128(xit + 4 * index + 3);
x0 = _mm_xor_si128(x0, t0);
x1 = _mm_xor_si128(x1, t1);
x2 = _mm_xor_si128(x2, t2);
x3 = _mm_xor_si128(x3, t3);
} }
_mm_store_si128(datasetCacheLine + 0, x0); _mm_store_si128(xout + 0, x0);
_mm_store_si128(datasetCacheLine + 1, x1); _mm_store_si128(xout + 1, x1);
_mm_store_si128(datasetCacheLine + 2, x2); _mm_store_si128(xout + 2, x2);
_mm_store_si128(datasetCacheLine + 3, x3); _mm_store_si128(xout + 3, x3);
} }
template template