Dataset intialization algorithm (AES)

This commit is contained in:
tevador 2019-01-13 13:47:25 +01:00
parent 67e741ff22
commit 48d85643de
7 changed files with 58 additions and 93 deletions

View File

@ -134,11 +134,6 @@ namespace RandomX {
//Argon2d memory fill //Argon2d memory fill
argonFill(seed, seedSize); argonFill(seed, seedSize);
//Circular shift of the cache buffer by 512 bytes
//realized by copying the first 512 bytes to the back
//of the buffer and shifting the start by 512 bytes
memcpy(memory + CacheSize, memory, CacheShift);
//AES keys //AES keys
expandAesKeys<softAes>((__m128i*)seed, keys.data()); expandAesKeys<softAes>((__m128i*)seed, keys.data());
} }

View File

@ -47,11 +47,11 @@ namespace RandomX {
} }
const uint8_t* getCache() { const uint8_t* getCache() {
return memory + CacheShift; return memory;
} }
private: private:
alignas(16) KeysContainer keys; alignas(16) KeysContainer keys;
uint8_t memory[CacheSize + CacheShift]; uint8_t memory[CacheSize];
void argonFill(const void* seed, size_t seedSize); void argonFill(const void* seed, size_t seedSize);
}; };
} }

View File

@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>. along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/ */
//#define MAGIC_DIVISION #define MAGIC_DIVISION
#include "JitCompilerX86.hpp" #include "JitCompilerX86.hpp"
#include "Pcg32.hpp" #include "Pcg32.hpp"
#include <cstring> #include <cstring>

View File

@ -33,7 +33,6 @@ namespace RandomX {
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int); typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
constexpr uint32_t CodeSize = 64 * 1024; constexpr uint32_t CodeSize = 64 * 1024;
constexpr uint32_t CacheLineSize = 64;
struct CallOffset { struct CallOffset {
CallOffset(int32_t p, int32_t i) : pos(p), index(i) {} CallOffset(int32_t p, int32_t i) : pos(p), index(i) {}

View File

@ -56,7 +56,7 @@ namespace RandomX {
if (light) { if (light) {
auto lds = mem.ds.lightDataset = new LightClientDataset(); auto lds = mem.ds.lightDataset = new LightClientDataset();
lds->cache = ds.cache; lds->cache = ds.cache;
lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i)); //lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
lds->blockNumber = -1; lds->blockNumber = -1;
if (lds->block == nullptr) { if (lds->block == nullptr) {
throw std::bad_alloc(); throw std::bad_alloc();
@ -78,13 +78,13 @@ namespace RandomX {
if (lightClient) { if (lightClient) {
auto cache = mem.ds.lightDataset->cache; auto cache = mem.ds.lightDataset->cache;
if (softAes) { if (softAes) {
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) { for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys()); initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
} }
} }
else { else {
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) { for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys()); initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
} }
} }
} }

View File

@ -34,13 +34,13 @@ namespace RandomX {
constexpr int SeedSize = 32; constexpr int SeedSize = 32;
constexpr int ResultSize = 32; constexpr int ResultSize = 32;
constexpr int CacheBlockSize = 1024; constexpr int CacheBlockCount = 1024 * 1024;
constexpr int CacheShift = CacheBlockSize / 2; constexpr int CacheLineSize = 64;
constexpr int BlockExpansionRatio = 64; constexpr int BlockExpansionRatio = 64;
constexpr uint32_t DatasetBlockSize = BlockExpansionRatio * CacheBlockSize; constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
constexpr uint32_t DatasetBlockCount = 65536; constexpr int DatasetIterations = 64;
constexpr uint32_t CacheSize = DatasetBlockCount * CacheBlockSize; constexpr uint32_t CacheSize = CacheBlockCount * CacheLineSize;
constexpr uint64_t DatasetSize = (uint64_t)DatasetBlockCount * DatasetBlockSize; constexpr uint64_t DatasetSize = (uint64_t)CacheSize * BlockExpansionRatio;
constexpr int ArgonIterations = 12; constexpr int ArgonIterations = 12;
constexpr uint32_t ArgonMemorySize = 65536; //KiB constexpr uint32_t ArgonMemorySize = 65536; //KiB

View File

@ -56,59 +56,55 @@ namespace RandomX {
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key); return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
} }
template<bool soft, bool enc> #define AES_ROUND(i) x0 = aesdec<soft>(x0, keys[i]); \
x1 = aesenc<soft>(x1, keys[i]); \
x2 = aesdec<soft>(x2, keys[i]); \
x3 = aesenc<soft>(x3, keys[i])
template<bool soft>
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) { void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
__m128i xin, xout; __m128i x0, x1, x2, x3, iv;
//block number 0..67108863
//Initialization vector = block number extended to 128 bits //Initialization vector = block number extended to 128 bits
xout = _mm_cvtsi32_si128(blockNumber); iv = _mm_cvtsi32_si128(blockNumber);
//Expand + AES uint32_t cacheBlockNumber = blockNumber / BlockExpansionRatio; //0..1048575
for (uint32_t i = 0; i < DatasetBlockSize / sizeof(__m128i); ++i) { __m128i* cacheCacheLine = (__m128i*)(in + cacheBlockNumber * CacheLineSize);
if ((i % 32) == 0) { __m128i* datasetCacheLine = (__m128i*)out;
xin = _mm_set_epi64x(*(uint64_t*)(in + i / 4), 0);
xout = _mm_xor_si128(xin, xout); x0 = _mm_load_si128(cacheCacheLine + 0);
x1 = _mm_load_si128(cacheCacheLine + 1);
x2 = _mm_load_si128(cacheCacheLine + 2);
x3 = _mm_load_si128(cacheCacheLine + 3);
x0 = _mm_xor_si128(x0, iv);
x1 = _mm_xor_si128(x1, iv);
x2 = _mm_xor_si128(x2, iv);
x3 = _mm_xor_si128(x3, iv);
for (auto i = 0; i < DatasetIterations; ++i) {
AES_ROUND(0);
AES_ROUND(1);
AES_ROUND(2);
AES_ROUND(3);
AES_ROUND(4);
AES_ROUND(5);
AES_ROUND(6);
AES_ROUND(7);
AES_ROUND(8);
AES_ROUND(9);
} }
if (enc) {
xout = aesenc<soft>(xout, keys[0]); _mm_store_si128(datasetCacheLine + 0, x0);
xout = aesenc<soft>(xout, keys[1]); _mm_store_si128(datasetCacheLine + 1, x1);
xout = aesenc<soft>(xout, keys[2]); _mm_store_si128(datasetCacheLine + 2, x2);
xout = aesenc<soft>(xout, keys[3]); _mm_store_si128(datasetCacheLine + 3, x3);
xout = aesenc<soft>(xout, keys[4]);
xout = aesenc<soft>(xout, keys[5]);
xout = aesenc<soft>(xout, keys[6]);
xout = aesenc<soft>(xout, keys[7]);
xout = aesenc<soft>(xout, keys[8]);
xout = aesenc<soft>(xout, keys[9]);
}
else {
xout = aesdec<soft>(xout, keys[0]);
xout = aesdec<soft>(xout, keys[1]);
xout = aesdec<soft>(xout, keys[2]);
xout = aesdec<soft>(xout, keys[3]);
xout = aesdec<soft>(xout, keys[4]);
xout = aesdec<soft>(xout, keys[5]);
xout = aesdec<soft>(xout, keys[6]);
xout = aesdec<soft>(xout, keys[7]);
xout = aesdec<soft>(xout, keys[8]);
xout = aesdec<soft>(xout, keys[9]);
}
_mm_store_si128((__m128i*)(out + i * sizeof(__m128i)), xout);
}
//Shuffle
Pcg32 gen(&xout);
shuffle<uint32_t>((uint32_t*)out, DatasetBlockSize, gen);
} }
template template
void initBlock<true, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&); void initBlock<true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template template
void initBlock<true, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&); void initBlock<false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template
void initBlock<false, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template
void initBlock<false, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) { convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) {
convertible_t data; convertible_t data;
@ -122,37 +118,12 @@ namespace RandomX {
return data; return data;
} }
template<bool softAes>
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys) {
if (blockNumber % 2 == 1) {
initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
}
else {
initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
}
}
template
void initBlock<true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template
void initBlock<false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template<bool softAes> template<bool softAes>
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) { convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) {
convertible_t data; convertible_t data;
LightClientDataset* lds = memory.ds.lightDataset; LightClientDataset* lds = memory.ds.lightDataset;
auto blockNumber = memory.ma / DatasetBlockSize; auto blockNumber = memory.ma / CacheLineSize;
if (lds->blockNumber != blockNumber) {
initBlock<softAes>(lds->cache->getCache(), (uint8_t*)lds->block, blockNumber, lds->cache->getKeys());
lds->blockNumber = blockNumber;
}
data.u64 = *(uint64_t*)(lds->block + (memory.ma % DatasetBlockSize));
memory.ma += 8;
memory.mx ^= addr;
if ((memory.mx & 0xFFF8) == 0) {
memory.ma = memory.mx & ~7;
}
return data; return data;
} }
@ -179,7 +150,7 @@ namespace RandomX {
template<bool softAes> template<bool softAes>
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) { void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) { for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
initBlock<softAes>(cache->getCache(), ds.dataset + i * DatasetBlockSize, i, cache->getKeys()); initBlock<softAes>(cache->getCache(), ds.dataset + i * CacheLineSize, i, cache->getKeys());
} }
} }