mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-05 06:38:53 +00:00
Dataset intialization algorithm (AES)
This commit is contained in:
parent
67e741ff22
commit
48d85643de
@ -134,11 +134,6 @@ namespace RandomX {
|
|||||||
//Argon2d memory fill
|
//Argon2d memory fill
|
||||||
argonFill(seed, seedSize);
|
argonFill(seed, seedSize);
|
||||||
|
|
||||||
//Circular shift of the cache buffer by 512 bytes
|
|
||||||
//realized by copying the first 512 bytes to the back
|
|
||||||
//of the buffer and shifting the start by 512 bytes
|
|
||||||
memcpy(memory + CacheSize, memory, CacheShift);
|
|
||||||
|
|
||||||
//AES keys
|
//AES keys
|
||||||
expandAesKeys<softAes>((__m128i*)seed, keys.data());
|
expandAesKeys<softAes>((__m128i*)seed, keys.data());
|
||||||
}
|
}
|
||||||
|
@ -47,11 +47,11 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const uint8_t* getCache() {
|
const uint8_t* getCache() {
|
||||||
return memory + CacheShift;
|
return memory;
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
alignas(16) KeysContainer keys;
|
alignas(16) KeysContainer keys;
|
||||||
uint8_t memory[CacheSize + CacheShift];
|
uint8_t memory[CacheSize];
|
||||||
void argonFill(const void* seed, size_t seedSize);
|
void argonFill(const void* seed, size_t seedSize);
|
||||||
};
|
};
|
||||||
}
|
}
|
@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
|
|||||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//#define MAGIC_DIVISION
|
#define MAGIC_DIVISION
|
||||||
#include "JitCompilerX86.hpp"
|
#include "JitCompilerX86.hpp"
|
||||||
#include "Pcg32.hpp"
|
#include "Pcg32.hpp"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
@ -33,7 +33,6 @@ namespace RandomX {
|
|||||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
|
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
|
||||||
|
|
||||||
constexpr uint32_t CodeSize = 64 * 1024;
|
constexpr uint32_t CodeSize = 64 * 1024;
|
||||||
constexpr uint32_t CacheLineSize = 64;
|
|
||||||
|
|
||||||
struct CallOffset {
|
struct CallOffset {
|
||||||
CallOffset(int32_t p, int32_t i) : pos(p), index(i) {}
|
CallOffset(int32_t p, int32_t i) : pos(p), index(i) {}
|
||||||
|
@ -56,7 +56,7 @@ namespace RandomX {
|
|||||||
if (light) {
|
if (light) {
|
||||||
auto lds = mem.ds.lightDataset = new LightClientDataset();
|
auto lds = mem.ds.lightDataset = new LightClientDataset();
|
||||||
lds->cache = ds.cache;
|
lds->cache = ds.cache;
|
||||||
lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
|
//lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
|
||||||
lds->blockNumber = -1;
|
lds->blockNumber = -1;
|
||||||
if (lds->block == nullptr) {
|
if (lds->block == nullptr) {
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
@ -78,13 +78,13 @@ namespace RandomX {
|
|||||||
if (lightClient) {
|
if (lightClient) {
|
||||||
auto cache = mem.ds.lightDataset->cache;
|
auto cache = mem.ds.lightDataset->cache;
|
||||||
if (softAes) {
|
if (softAes) {
|
||||||
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
|
for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
|
||||||
initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys());
|
initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
|
for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
|
||||||
initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys());
|
initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,13 +34,13 @@ namespace RandomX {
|
|||||||
constexpr int SeedSize = 32;
|
constexpr int SeedSize = 32;
|
||||||
constexpr int ResultSize = 32;
|
constexpr int ResultSize = 32;
|
||||||
|
|
||||||
constexpr int CacheBlockSize = 1024;
|
constexpr int CacheBlockCount = 1024 * 1024;
|
||||||
constexpr int CacheShift = CacheBlockSize / 2;
|
constexpr int CacheLineSize = 64;
|
||||||
constexpr int BlockExpansionRatio = 64;
|
constexpr int BlockExpansionRatio = 64;
|
||||||
constexpr uint32_t DatasetBlockSize = BlockExpansionRatio * CacheBlockSize;
|
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
|
||||||
constexpr uint32_t DatasetBlockCount = 65536;
|
constexpr int DatasetIterations = 64;
|
||||||
constexpr uint32_t CacheSize = DatasetBlockCount * CacheBlockSize;
|
constexpr uint32_t CacheSize = CacheBlockCount * CacheLineSize;
|
||||||
constexpr uint64_t DatasetSize = (uint64_t)DatasetBlockCount * DatasetBlockSize;
|
constexpr uint64_t DatasetSize = (uint64_t)CacheSize * BlockExpansionRatio;
|
||||||
|
|
||||||
constexpr int ArgonIterations = 12;
|
constexpr int ArgonIterations = 12;
|
||||||
constexpr uint32_t ArgonMemorySize = 65536; //KiB
|
constexpr uint32_t ArgonMemorySize = 65536; //KiB
|
||||||
|
117
src/dataset.cpp
117
src/dataset.cpp
@ -56,59 +56,55 @@ namespace RandomX {
|
|||||||
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
|
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool soft, bool enc>
|
#define AES_ROUND(i) x0 = aesdec<soft>(x0, keys[i]); \
|
||||||
|
x1 = aesenc<soft>(x1, keys[i]); \
|
||||||
|
x2 = aesdec<soft>(x2, keys[i]); \
|
||||||
|
x3 = aesenc<soft>(x3, keys[i])
|
||||||
|
|
||||||
|
template<bool soft>
|
||||||
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
||||||
__m128i xin, xout;
|
__m128i x0, x1, x2, x3, iv;
|
||||||
|
//block number 0..67108863
|
||||||
//Initialization vector = block number extended to 128 bits
|
//Initialization vector = block number extended to 128 bits
|
||||||
xout = _mm_cvtsi32_si128(blockNumber);
|
iv = _mm_cvtsi32_si128(blockNumber);
|
||||||
//Expand + AES
|
uint32_t cacheBlockNumber = blockNumber / BlockExpansionRatio; //0..1048575
|
||||||
for (uint32_t i = 0; i < DatasetBlockSize / sizeof(__m128i); ++i) {
|
__m128i* cacheCacheLine = (__m128i*)(in + cacheBlockNumber * CacheLineSize);
|
||||||
if ((i % 32) == 0) {
|
__m128i* datasetCacheLine = (__m128i*)out;
|
||||||
xin = _mm_set_epi64x(*(uint64_t*)(in + i / 4), 0);
|
|
||||||
xout = _mm_xor_si128(xin, xout);
|
x0 = _mm_load_si128(cacheCacheLine + 0);
|
||||||
|
x1 = _mm_load_si128(cacheCacheLine + 1);
|
||||||
|
x2 = _mm_load_si128(cacheCacheLine + 2);
|
||||||
|
x3 = _mm_load_si128(cacheCacheLine + 3);
|
||||||
|
|
||||||
|
x0 = _mm_xor_si128(x0, iv);
|
||||||
|
x1 = _mm_xor_si128(x1, iv);
|
||||||
|
x2 = _mm_xor_si128(x2, iv);
|
||||||
|
x3 = _mm_xor_si128(x3, iv);
|
||||||
|
|
||||||
|
for (auto i = 0; i < DatasetIterations; ++i) {
|
||||||
|
AES_ROUND(0);
|
||||||
|
AES_ROUND(1);
|
||||||
|
AES_ROUND(2);
|
||||||
|
AES_ROUND(3);
|
||||||
|
AES_ROUND(4);
|
||||||
|
AES_ROUND(5);
|
||||||
|
AES_ROUND(6);
|
||||||
|
AES_ROUND(7);
|
||||||
|
AES_ROUND(8);
|
||||||
|
AES_ROUND(9);
|
||||||
}
|
}
|
||||||
if (enc) {
|
|
||||||
xout = aesenc<soft>(xout, keys[0]);
|
_mm_store_si128(datasetCacheLine + 0, x0);
|
||||||
xout = aesenc<soft>(xout, keys[1]);
|
_mm_store_si128(datasetCacheLine + 1, x1);
|
||||||
xout = aesenc<soft>(xout, keys[2]);
|
_mm_store_si128(datasetCacheLine + 2, x2);
|
||||||
xout = aesenc<soft>(xout, keys[3]);
|
_mm_store_si128(datasetCacheLine + 3, x3);
|
||||||
xout = aesenc<soft>(xout, keys[4]);
|
|
||||||
xout = aesenc<soft>(xout, keys[5]);
|
|
||||||
xout = aesenc<soft>(xout, keys[6]);
|
|
||||||
xout = aesenc<soft>(xout, keys[7]);
|
|
||||||
xout = aesenc<soft>(xout, keys[8]);
|
|
||||||
xout = aesenc<soft>(xout, keys[9]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xout = aesdec<soft>(xout, keys[0]);
|
|
||||||
xout = aesdec<soft>(xout, keys[1]);
|
|
||||||
xout = aesdec<soft>(xout, keys[2]);
|
|
||||||
xout = aesdec<soft>(xout, keys[3]);
|
|
||||||
xout = aesdec<soft>(xout, keys[4]);
|
|
||||||
xout = aesdec<soft>(xout, keys[5]);
|
|
||||||
xout = aesdec<soft>(xout, keys[6]);
|
|
||||||
xout = aesdec<soft>(xout, keys[7]);
|
|
||||||
xout = aesdec<soft>(xout, keys[8]);
|
|
||||||
xout = aesdec<soft>(xout, keys[9]);
|
|
||||||
}
|
|
||||||
_mm_store_si128((__m128i*)(out + i * sizeof(__m128i)), xout);
|
|
||||||
}
|
|
||||||
//Shuffle
|
|
||||||
Pcg32 gen(&xout);
|
|
||||||
shuffle<uint32_t>((uint32_t*)out, DatasetBlockSize, gen);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template
|
template
|
||||||
void initBlock<true, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
void initBlock<true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||||
|
|
||||||
template
|
template
|
||||||
void initBlock<true, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
void initBlock<false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||||
|
|
||||||
template
|
|
||||||
void initBlock<false, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
|
||||||
|
|
||||||
template
|
|
||||||
void initBlock<false, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
|
||||||
|
|
||||||
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) {
|
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) {
|
||||||
convertible_t data;
|
convertible_t data;
|
||||||
@ -122,37 +118,12 @@ namespace RandomX {
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
|
||||||
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys) {
|
|
||||||
if (blockNumber % 2 == 1) {
|
|
||||||
initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template
|
|
||||||
void initBlock<true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
|
||||||
|
|
||||||
template
|
|
||||||
void initBlock<false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) {
|
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) {
|
||||||
convertible_t data;
|
convertible_t data;
|
||||||
LightClientDataset* lds = memory.ds.lightDataset;
|
LightClientDataset* lds = memory.ds.lightDataset;
|
||||||
auto blockNumber = memory.ma / DatasetBlockSize;
|
auto blockNumber = memory.ma / CacheLineSize;
|
||||||
if (lds->blockNumber != blockNumber) {
|
|
||||||
initBlock<softAes>(lds->cache->getCache(), (uint8_t*)lds->block, blockNumber, lds->cache->getKeys());
|
|
||||||
lds->blockNumber = blockNumber;
|
|
||||||
}
|
|
||||||
data.u64 = *(uint64_t*)(lds->block + (memory.ma % DatasetBlockSize));
|
|
||||||
memory.ma += 8;
|
|
||||||
memory.mx ^= addr;
|
|
||||||
if ((memory.mx & 0xFFF8) == 0) {
|
|
||||||
memory.ma = memory.mx & ~7;
|
|
||||||
}
|
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,7 +150,7 @@ namespace RandomX {
|
|||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
|
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
|
||||||
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
|
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
|
||||||
initBlock<softAes>(cache->getCache(), ds.dataset + i * DatasetBlockSize, i, cache->getKeys());
|
initBlock<softAes>(cache->getCache(), ds.dataset + i * CacheLineSize, i, cache->getKeys());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user