Full-width mantissa for group E registers and FDIV_M

This commit is contained in:
tevador 2019-04-17 16:18:02 +02:00
parent d43c7db416
commit 67046a9f38
11 changed files with 60 additions and 39 deletions

View File

@ -38,7 +38,7 @@ namespace RandomX {
template<bool superscalar>
void CompiledLightVirtualMachine<superscalar>::initialize() {
VirtualMachine::initialize();
compiler.generateProgramLight<superscalar>(program);
compiler.generateProgramLight<superscalar>(program, config);
//mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
}

View File

@ -37,7 +37,7 @@ namespace RandomX {
void CompiledVirtualMachine::initialize() {
VirtualMachine::initialize();
compiler.generateProgram(program);
compiler.generateProgram(program, config);
mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
}

View File

@ -116,6 +116,16 @@ namespace RandomX {
return scratchpad + addr;
}
template<bool superscalar>
FORCE_INLINE __m128d InterpretedVirtualMachine<superscalar>::maskRegisterExponentMantissa(__m128d x) {
constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1;
const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64));
const __m128d exponentMask = _mm_load_pd((const double*)&config.eMask);
x = _mm_and_pd(x, mantissaMask);
x = _mm_or_pd(x, exponentMask);
return x;
}
template<bool superscalar>
FORCE_INLINE void InterpretedVirtualMachine<superscalar>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
auto& ibc = byteCode[ic];
@ -229,7 +239,7 @@ namespace RandomX {
} break;
case InstructionType::FDIV_M: {
__m128d fsrc = ieee_set_exponent<-240>(load_cvt_i32x2(getScratchpadAddress(ibc)));
__m128d fsrc = maskRegisterExponentMantissa(load_cvt_i32x2(getScratchpadAddress(ibc)));
*ibc.fdst = _mm_div_pd(*ibc.fdst, fsrc);
} break;
@ -326,7 +336,7 @@ namespace RandomX {
uint32_t spAddr1 = mem.ma;
if (trace) {
std::cout << "execute (reg: r" << readReg0 << ", r" << readReg1 << ", r" << readReg2 << ", r" << readReg3 << ")" << std::endl;
std::cout << "execute (reg: r" << config.readReg0 << ", r" << config.readReg1 << ", r" << config.readReg2 << ", r" << config.readReg3 << ")" << std::endl;
std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
printState(r, f, e, a);
@ -334,7 +344,7 @@ namespace RandomX {
for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) {
//std::cout << "Iteration " << iter << std::endl;
uint64_t spMix = r[readReg0] ^ r[readReg1];
uint64_t spMix = r[config.readReg0] ^ r[config.readReg1];
spAddr0 ^= spMix;
spAddr0 &= ScratchpadL3Mask64;
spAddr1 ^= spMix >> 32;
@ -353,10 +363,10 @@ namespace RandomX {
f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8);
f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16);
f[3] = load_cvt_i32x2(scratchpad + spAddr1 + 24);
e[0] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 32));
e[1] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 40));
e[2] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 48));
e[3] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 56));
e[0] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 32));
e[1] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 40));
e[2] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 48));
e[3] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 56));
if (trace) {
std::cout << "iteration " << std::dec << ic << std::endl;
@ -368,7 +378,7 @@ namespace RandomX {
executeBytecode(r, f, e, a);
mem.mx ^= r[readReg2] ^ r[readReg3];
mem.mx ^= r[config.readReg2] ^ r[config.readReg3];
mem.mx &= CacheLineAlignMask;
if (superscalar) {
executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r);

View File

@ -133,5 +133,6 @@ namespace RandomX {
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]);
void* getScratchpadAddress(InstructionByteCode& ibc);
__m128d maskRegisterExponentMantissa(__m128d);
};
}

View File

@ -24,6 +24,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "Program.hpp"
#include "reciprocal.h"
#include "virtualMemory.hpp"
#include "intrinPortable.h"
#define RANDOMX_JUMP
@ -230,20 +231,20 @@ namespace RandomX {
freePagedMemory(code, CodeSize);
}
void JitCompilerX86::generateProgram(Program& prog) {
generateProgramPrologue(prog);
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
generateProgramPrologue(prog, pcfg);
memcpy(code + codePos, codeReadDataset, readDatasetSize);
codePos += readDatasetSize;
generateProgramEpilogue(prog);
}
template<bool superscalar>
void JitCompilerX86::generateProgramLight(Program& prog) {
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) {
if (RANDOMX_CACHE_ACCESSES != 8)
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES");
if (RANDOMX_ARGON_GROWTH != 0)
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH");
generateProgramPrologue(prog);
generateProgramPrologue(prog, pcfg);
if (superscalar) {
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
emitByte(CALL);
@ -259,8 +260,8 @@ namespace RandomX {
generateProgramEpilogue(prog);
}
template void JitCompilerX86::generateProgramLight<true>(Program& prog);
template void JitCompilerX86::generateProgramLight<false>(Program& prog);
template void JitCompilerX86::generateProgramLight<true>(Program& prog, ProgramConfiguration& pcfg);
template void JitCompilerX86::generateProgramLight<false>(Program& prog, ProgramConfiguration& pcfg);
template<size_t N>
void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) {
@ -298,33 +299,26 @@ namespace RandomX {
memcpy(code, codeDatasetInit, datasetInitSize);
}
void JitCompilerX86::generateProgramPrologue(Program& prog) {
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
#ifdef RANDOMX_JUMP
instructionOffsets.clear();
for (unsigned i = 0; i < 8; ++i) {
registerUsage[i] = -1;
}
#endif
auto addressRegisters = prog.getEntropy(12);
uint32_t readReg0 = 0 + (addressRegisters & 1);
addressRegisters >>= 1;
uint32_t readReg1 = 2 + (addressRegisters & 1);
addressRegisters >>= 1;
uint32_t readReg2 = 4 + (addressRegisters & 1);
addressRegisters >>= 1;
uint32_t readReg3 = 6 + (addressRegisters & 1);
codePos = prologueSize;
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
emit(REX_XOR_RAX_R64);
emitByte(0xc0 + readReg0);
emitByte(0xc0 + pcfg.readReg0);
emit(REX_XOR_RAX_R64);
emitByte(0xc0 + readReg1);
emitByte(0xc0 + pcfg.readReg1);
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
codePos += loopLoadSize;
generateCode(prog);
emit(REX_MOV_RR);
emitByte(0xc0 + readReg2);
emitByte(0xc0 + pcfg.readReg2);
emit(REX_XOR_EAX);
emitByte(0xc0 + readReg3);
emitByte(0xc0 + pcfg.readReg3);
}
void JitCompilerX86::generateProgramEpilogue(Program& prog) {

View File

@ -27,6 +27,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX {
class Program;
class ProgramConfiguration;
class SuperscalarProgram;
class JitCompilerX86;
@ -38,9 +39,9 @@ namespace RandomX {
public:
JitCompilerX86();
~JitCompilerX86();
void generateProgram(Program&);
void generateProgram(Program&, ProgramConfiguration&);
template<bool superscalar>
void generateProgramLight(Program&);
void generateProgramLight(Program&, ProgramConfiguration&);
template<size_t N>
void generateSuperScalarHash(SuperscalarProgram (&programs)[N]);
ProgramFunc getProgramFunc() {
@ -73,7 +74,7 @@ namespace RandomX {
void generateDatasetInitCode();
void generateProgramPrologue(Program&);
void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&);
int getConditionRegister();
void genAddressReg(Instruction&, bool);

View File

@ -27,6 +27,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX {
struct ProgramConfiguration {
uint64_t eMask[2];
uint32_t readReg0, readReg1, readReg2, readReg3;
};
class Program {
public:
Instruction& operator()(int pc) {

View File

@ -77,14 +77,18 @@ namespace RandomX {
mem.ma = program.getEntropy(8) & CacheLineAlignMask;
mem.mx = program.getEntropy(10);
auto addressRegisters = program.getEntropy(12);
readReg0 = 0 + (addressRegisters & 1);
config.readReg0 = 0 + (addressRegisters & 1);
addressRegisters >>= 1;
readReg1 = 2 + (addressRegisters & 1);
config.readReg1 = 2 + (addressRegisters & 1);
addressRegisters >>= 1;
readReg2 = 4 + (addressRegisters & 1);
config.readReg2 = 4 + (addressRegisters & 1);
addressRegisters >>= 1;
readReg3 = 6 + (addressRegisters & 1);
datasetBase = program.getEntropy(14) % datasetRange;
config.readReg3 = 6 + (addressRegisters & 1);
datasetBase = program.getEntropy(13) % datasetRange;
constexpr uint64_t mask22bit = (1ULL << 22) - 1;
constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>();
store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240);
store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
}
template<bool softAes>

View File

@ -46,9 +46,9 @@ namespace RandomX {
protected:
alignas(64) Program program;
alignas(64) RegisterFile reg;
alignas(16) ProgramConfiguration config;
MemoryRegisters mem;
uint8_t* scratchpad;
uint32_t readReg0, readReg1, readReg2, readReg3;
uint32_t datasetRange;
uint32_t datasetBase;
};

View File

@ -311,6 +311,12 @@ inline __m128d load_cvt_i32x2(const void* addr) {
return _mm_cvtepi32_pd(ix);
}
template<int E>
constexpr uint64_t ieee_get_exponent_mask() {
static_assert(E > -1023, "Invalid exponent value");
return (uint64_t)(E + 1023U) << 52;
}
template<int E>
__m128d ieee_set_exponent(__m128d x) {
static_assert(E > -1023, "Invalid exponent value");

View File

@ -396,7 +396,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: ";
result.print(std::cout);
if(!legacy && programCount == 1000)
std::cout << "Reference result: af72d8069bd95ef04b414d3a83772c7bd2df454940bad15ae0b48543aeef8ab2" << std::endl;
std::cout << "Reference result: 630ad3bc7f44fe8386462d7b671fa2a1167d3e062bfb9a2967f64832760cfedb" << std::endl;
if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
}