mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 07:48:54 +00:00
Full-width mantissa for group E registers and FDIV_M
This commit is contained in:
parent
d43c7db416
commit
67046a9f38
@ -38,7 +38,7 @@ namespace RandomX {
|
||||
template<bool superscalar>
|
||||
void CompiledLightVirtualMachine<superscalar>::initialize() {
|
||||
VirtualMachine::initialize();
|
||||
compiler.generateProgramLight<superscalar>(program);
|
||||
compiler.generateProgramLight<superscalar>(program, config);
|
||||
//mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
|
||||
}
|
||||
|
||||
|
@ -37,7 +37,7 @@ namespace RandomX {
|
||||
|
||||
void CompiledVirtualMachine::initialize() {
|
||||
VirtualMachine::initialize();
|
||||
compiler.generateProgram(program);
|
||||
compiler.generateProgram(program, config);
|
||||
mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
|
||||
}
|
||||
|
||||
|
@ -116,6 +116,16 @@ namespace RandomX {
|
||||
return scratchpad + addr;
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
FORCE_INLINE __m128d InterpretedVirtualMachine<superscalar>::maskRegisterExponentMantissa(__m128d x) {
|
||||
constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1;
|
||||
const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64));
|
||||
const __m128d exponentMask = _mm_load_pd((const double*)&config.eMask);
|
||||
x = _mm_and_pd(x, mantissaMask);
|
||||
x = _mm_or_pd(x, exponentMask);
|
||||
return x;
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
FORCE_INLINE void InterpretedVirtualMachine<superscalar>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
auto& ibc = byteCode[ic];
|
||||
@ -229,7 +239,7 @@ namespace RandomX {
|
||||
} break;
|
||||
|
||||
case InstructionType::FDIV_M: {
|
||||
__m128d fsrc = ieee_set_exponent<-240>(load_cvt_i32x2(getScratchpadAddress(ibc)));
|
||||
__m128d fsrc = maskRegisterExponentMantissa(load_cvt_i32x2(getScratchpadAddress(ibc)));
|
||||
*ibc.fdst = _mm_div_pd(*ibc.fdst, fsrc);
|
||||
} break;
|
||||
|
||||
@ -326,7 +336,7 @@ namespace RandomX {
|
||||
uint32_t spAddr1 = mem.ma;
|
||||
|
||||
if (trace) {
|
||||
std::cout << "execute (reg: r" << readReg0 << ", r" << readReg1 << ", r" << readReg2 << ", r" << readReg3 << ")" << std::endl;
|
||||
std::cout << "execute (reg: r" << config.readReg0 << ", r" << config.readReg1 << ", r" << config.readReg2 << ", r" << config.readReg3 << ")" << std::endl;
|
||||
std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
|
||||
std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
|
||||
printState(r, f, e, a);
|
||||
@ -334,7 +344,7 @@ namespace RandomX {
|
||||
|
||||
for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) {
|
||||
//std::cout << "Iteration " << iter << std::endl;
|
||||
uint64_t spMix = r[readReg0] ^ r[readReg1];
|
||||
uint64_t spMix = r[config.readReg0] ^ r[config.readReg1];
|
||||
spAddr0 ^= spMix;
|
||||
spAddr0 &= ScratchpadL3Mask64;
|
||||
spAddr1 ^= spMix >> 32;
|
||||
@ -353,10 +363,10 @@ namespace RandomX {
|
||||
f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8);
|
||||
f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16);
|
||||
f[3] = load_cvt_i32x2(scratchpad + spAddr1 + 24);
|
||||
e[0] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 32));
|
||||
e[1] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 40));
|
||||
e[2] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 48));
|
||||
e[3] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 56));
|
||||
e[0] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 32));
|
||||
e[1] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 40));
|
||||
e[2] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 48));
|
||||
e[3] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 56));
|
||||
|
||||
if (trace) {
|
||||
std::cout << "iteration " << std::dec << ic << std::endl;
|
||||
@ -368,7 +378,7 @@ namespace RandomX {
|
||||
|
||||
executeBytecode(r, f, e, a);
|
||||
|
||||
mem.mx ^= r[readReg2] ^ r[readReg3];
|
||||
mem.mx ^= r[config.readReg2] ^ r[config.readReg3];
|
||||
mem.mx &= CacheLineAlignMask;
|
||||
if (superscalar) {
|
||||
executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r);
|
||||
|
@ -133,5 +133,6 @@ namespace RandomX {
|
||||
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]);
|
||||
void* getScratchpadAddress(InstructionByteCode& ibc);
|
||||
__m128d maskRegisterExponentMantissa(__m128d);
|
||||
};
|
||||
}
|
@ -24,6 +24,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "Program.hpp"
|
||||
#include "reciprocal.h"
|
||||
#include "virtualMemory.hpp"
|
||||
#include "intrinPortable.h"
|
||||
|
||||
#define RANDOMX_JUMP
|
||||
|
||||
@ -230,20 +231,20 @@ namespace RandomX {
|
||||
freePagedMemory(code, CodeSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgram(Program& prog) {
|
||||
generateProgramPrologue(prog);
|
||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
memcpy(code + codePos, codeReadDataset, readDatasetSize);
|
||||
codePos += readDatasetSize;
|
||||
generateProgramEpilogue(prog);
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
void JitCompilerX86::generateProgramLight(Program& prog) {
|
||||
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) {
|
||||
if (RANDOMX_CACHE_ACCESSES != 8)
|
||||
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES");
|
||||
if (RANDOMX_ARGON_GROWTH != 0)
|
||||
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH");
|
||||
generateProgramPrologue(prog);
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
if (superscalar) {
|
||||
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
|
||||
emitByte(CALL);
|
||||
@ -259,8 +260,8 @@ namespace RandomX {
|
||||
generateProgramEpilogue(prog);
|
||||
}
|
||||
|
||||
template void JitCompilerX86::generateProgramLight<true>(Program& prog);
|
||||
template void JitCompilerX86::generateProgramLight<false>(Program& prog);
|
||||
template void JitCompilerX86::generateProgramLight<true>(Program& prog, ProgramConfiguration& pcfg);
|
||||
template void JitCompilerX86::generateProgramLight<false>(Program& prog, ProgramConfiguration& pcfg);
|
||||
|
||||
template<size_t N>
|
||||
void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) {
|
||||
@ -298,33 +299,26 @@ namespace RandomX {
|
||||
memcpy(code, codeDatasetInit, datasetInitSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramPrologue(Program& prog) {
|
||||
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
instructionOffsets.clear();
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
#endif
|
||||
auto addressRegisters = prog.getEntropy(12);
|
||||
uint32_t readReg0 = 0 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
uint32_t readReg1 = 2 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
uint32_t readReg2 = 4 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
uint32_t readReg3 = 6 + (addressRegisters & 1);
|
||||
codePos = prologueSize;
|
||||
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||
emit(REX_XOR_RAX_R64);
|
||||
emitByte(0xc0 + readReg0);
|
||||
emitByte(0xc0 + pcfg.readReg0);
|
||||
emit(REX_XOR_RAX_R64);
|
||||
emitByte(0xc0 + readReg1);
|
||||
emitByte(0xc0 + pcfg.readReg1);
|
||||
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
|
||||
codePos += loopLoadSize;
|
||||
generateCode(prog);
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc0 + readReg2);
|
||||
emitByte(0xc0 + pcfg.readReg2);
|
||||
emit(REX_XOR_EAX);
|
||||
emitByte(0xc0 + readReg3);
|
||||
emitByte(0xc0 + pcfg.readReg3);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramEpilogue(Program& prog) {
|
||||
|
@ -27,6 +27,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
namespace RandomX {
|
||||
|
||||
class Program;
|
||||
class ProgramConfiguration;
|
||||
class SuperscalarProgram;
|
||||
class JitCompilerX86;
|
||||
|
||||
@ -38,9 +39,9 @@ namespace RandomX {
|
||||
public:
|
||||
JitCompilerX86();
|
||||
~JitCompilerX86();
|
||||
void generateProgram(Program&);
|
||||
void generateProgram(Program&, ProgramConfiguration&);
|
||||
template<bool superscalar>
|
||||
void generateProgramLight(Program&);
|
||||
void generateProgramLight(Program&, ProgramConfiguration&);
|
||||
template<size_t N>
|
||||
void generateSuperScalarHash(SuperscalarProgram (&programs)[N]);
|
||||
ProgramFunc getProgramFunc() {
|
||||
@ -73,7 +74,7 @@ namespace RandomX {
|
||||
|
||||
void generateDatasetInitCode();
|
||||
|
||||
void generateProgramPrologue(Program&);
|
||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||
void generateProgramEpilogue(Program&);
|
||||
int getConditionRegister();
|
||||
void genAddressReg(Instruction&, bool);
|
||||
|
@ -27,6 +27,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
struct ProgramConfiguration {
|
||||
uint64_t eMask[2];
|
||||
uint32_t readReg0, readReg1, readReg2, readReg3;
|
||||
};
|
||||
|
||||
class Program {
|
||||
public:
|
||||
Instruction& operator()(int pc) {
|
||||
|
@ -77,14 +77,18 @@ namespace RandomX {
|
||||
mem.ma = program.getEntropy(8) & CacheLineAlignMask;
|
||||
mem.mx = program.getEntropy(10);
|
||||
auto addressRegisters = program.getEntropy(12);
|
||||
readReg0 = 0 + (addressRegisters & 1);
|
||||
config.readReg0 = 0 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
readReg1 = 2 + (addressRegisters & 1);
|
||||
config.readReg1 = 2 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
readReg2 = 4 + (addressRegisters & 1);
|
||||
config.readReg2 = 4 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
readReg3 = 6 + (addressRegisters & 1);
|
||||
datasetBase = program.getEntropy(14) % datasetRange;
|
||||
config.readReg3 = 6 + (addressRegisters & 1);
|
||||
datasetBase = program.getEntropy(13) % datasetRange;
|
||||
constexpr uint64_t mask22bit = (1ULL << 22) - 1;
|
||||
constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>();
|
||||
store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240);
|
||||
store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
|
@ -46,9 +46,9 @@ namespace RandomX {
|
||||
protected:
|
||||
alignas(64) Program program;
|
||||
alignas(64) RegisterFile reg;
|
||||
alignas(16) ProgramConfiguration config;
|
||||
MemoryRegisters mem;
|
||||
uint8_t* scratchpad;
|
||||
uint32_t readReg0, readReg1, readReg2, readReg3;
|
||||
uint32_t datasetRange;
|
||||
uint32_t datasetBase;
|
||||
};
|
||||
|
@ -311,6 +311,12 @@ inline __m128d load_cvt_i32x2(const void* addr) {
|
||||
return _mm_cvtepi32_pd(ix);
|
||||
}
|
||||
|
||||
template<int E>
|
||||
constexpr uint64_t ieee_get_exponent_mask() {
|
||||
static_assert(E > -1023, "Invalid exponent value");
|
||||
return (uint64_t)(E + 1023U) << 52;
|
||||
}
|
||||
|
||||
template<int E>
|
||||
__m128d ieee_set_exponent(__m128d x) {
|
||||
static_assert(E > -1023, "Invalid exponent value");
|
||||
|
@ -396,7 +396,7 @@ int main(int argc, char** argv) {
|
||||
std::cout << "Calculated result: ";
|
||||
result.print(std::cout);
|
||||
if(!legacy && programCount == 1000)
|
||||
std::cout << "Reference result: af72d8069bd95ef04b414d3a83772c7bd2df454940bad15ae0b48543aeef8ab2" << std::endl;
|
||||
std::cout << "Reference result: 630ad3bc7f44fe8386462d7b671fa2a1167d3e062bfb9a2967f64832760cfedb" << std::endl;
|
||||
if (!miningMode) {
|
||||
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user