diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 7ee00ba..423cefc 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -453,7 +453,7 @@ namespace RandomX { } constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; - constexpr uint64_t superscalarAdd1 = 9298410992540426048ULL; + constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL; constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL; constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; @@ -474,6 +474,55 @@ namespace RandomX { return mixBlock; } + template + void InterpretedVirtualMachine::executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector& reciprocals) { + for (unsigned j = 0; j < prog.getSize(); ++j) { + Instruction& instr = prog(j); + switch (instr.opcode) + { + case RandomX::LightInstructionType::ISUB_R: + r[instr.dst] -= r[instr.src]; + break; + case RandomX::LightInstructionType::IXOR_R: + r[instr.dst] ^= r[instr.src]; + break; + case RandomX::LightInstructionType::IADD_RS: + r[instr.dst] += r[instr.src] << (instr.mod % 4); + break; + case RandomX::LightInstructionType::IMUL_R: + r[instr.dst] *= r[instr.src]; + break; + case RandomX::LightInstructionType::IROR_C: + r[instr.dst] = rotr(r[instr.dst], instr.getImm32()); + break; + case RandomX::LightInstructionType::IADD_C7: + case RandomX::LightInstructionType::IADD_C8: + case RandomX::LightInstructionType::IADD_C9: + r[instr.dst] += signExtend2sCompl(instr.getImm32()); + break; + case RandomX::LightInstructionType::IXOR_C7: + case RandomX::LightInstructionType::IXOR_C8: + case RandomX::LightInstructionType::IXOR_C9: + r[instr.dst] ^= signExtend2sCompl(instr.getImm32()); + break; + case RandomX::LightInstructionType::IMULH_R: + r[instr.dst] = mulh(r[instr.dst], r[instr.src]); + break; + case RandomX::LightInstructionType::ISMULH_R: + r[instr.dst] = smulh(r[instr.dst], r[instr.src]); + break; + case RandomX::LightInstructionType::IMUL_RCP: + if(superscalar) + r[instr.dst] *= reciprocals[instr.getImm32()]; + else + r[instr.dst] *= reciprocal(instr.getImm32()); + break; + default: + UNREACHABLE; + } + } + } + template void InterpretedVirtualMachine::executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]) { int_reg_t rl[8]; @@ -491,49 +540,9 @@ namespace RandomX { for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { mixBlock = getMixBlock(registerValue, cache); LightProgram& prog = superScalarPrograms[i]; - for (unsigned j = 0; j < prog.getSize(); ++j) { - Instruction& instr = prog(j); - switch (instr.opcode) - { - case RandomX::LightInstructionType::ISUB_R: - rl[instr.dst] -= rl[instr.src]; - break; - case RandomX::LightInstructionType::IXOR_R: - rl[instr.dst] ^= rl[instr.src]; - break; - case RandomX::LightInstructionType::IADD_RS: - rl[instr.dst] += rl[instr.src] << (instr.mod % 4); - break; - case RandomX::LightInstructionType::IMUL_R: - rl[instr.dst] *= rl[instr.src]; - break; - case RandomX::LightInstructionType::IROR_C: - rl[instr.dst] = rotr(rl[instr.dst], instr.getImm32()); - break; - case RandomX::LightInstructionType::IADD_C7: - case RandomX::LightInstructionType::IADD_C8: - case RandomX::LightInstructionType::IADD_C9: - rl[instr.dst] += signExtend2sCompl(instr.getImm32()); - break; - case RandomX::LightInstructionType::IXOR_C7: - case RandomX::LightInstructionType::IXOR_C8: - case RandomX::LightInstructionType::IXOR_C9: - rl[instr.dst] ^= signExtend2sCompl(instr.getImm32()); - break; - case RandomX::LightInstructionType::IMULH_R: - rl[instr.dst] = mulh(rl[instr.dst], rl[instr.src]); - break; - case RandomX::LightInstructionType::ISMULH_R: - rl[instr.dst] = smulh(rl[instr.dst], rl[instr.src]); - break; - case RandomX::LightInstructionType::IMUL_RCP: - rl[instr.dst] *= reciprocals[instr.getImm32()]; - break; - default: - UNREACHABLE; - } - } + executeSuperscalar(rl, prog, reciprocals); + for(unsigned q = 0; q < 8; ++q) rl[q] ^= load64(mixBlock + 8 * q); diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp index 24bb9c6..ddefa67 100644 --- a/src/InterpretedVirtualMachine.hpp +++ b/src/InterpretedVirtualMachine.hpp @@ -73,6 +73,7 @@ namespace RandomX { void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; void initialize() override; void execute() override; + static void executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector& reciprocals); private: static InstructionHandler engine[256]; DatasetReadFunc readDataset; diff --git a/src/asm/program_sshash_constants.inc b/src/asm/program_sshash_constants.inc index 77b4ecd..2044a0e 100644 --- a/src/asm/program_sshash_constants.inc +++ b/src/asm/program_sshash_constants.inc @@ -2,8 +2,8 @@ r0_mul: ;#/ 6364136223846793005 db 45, 127, 149, 76, 45, 244, 81, 88 r1_add: - ;#/ 9298410992540426048 - db 64, 159, 245, 89, 136, 151, 10, 129 + ;#/ 9298410992540426748 + db 252, 161, 245, 89, 136, 151, 10, 129 r2_add: ;#/ 12065312585734608966 db 70, 216, 194, 56, 223, 153, 112, 167 diff --git a/src/main.cpp b/src/main.cpp index 36cd800..4866804 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -177,7 +177,6 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, Atomi fillAes1Rx4((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad); vm->resetRoundingMode(); vm->setScratchpad(scratchpad); - //dump((char*)scratchpad, RandomX::ScratchpadSize, "spad-before.txt"); for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { fillAes1Rx4((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer()); vm->initialize(); @@ -194,6 +193,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, Atomi } }*/ vm->getResult(scratchpad, RANDOMX_SCRATCHPAD_L3, hash); + //dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad.txt"); result.xorWith(hash); if (RandomX::trace) { std::cout << "Nonce: " << nonce << " "; @@ -204,8 +204,10 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, Atomi } } + + int main(int argc, char** argv) { - bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit, genSuperscalar, useSuperscalar; + bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit, genSuperscalar, legacy; int programCount, threadCount, initThreadCount, epoch; readOption("--softAes", argc, argv, softAes); @@ -221,7 +223,7 @@ int main(int argc, char** argv) { readOption("--genNative", argc, argv, genNative); readOption("--help", argc, argv, help); readOption("--genSuperscalar", argc, argv, genSuperscalar); - readOption("--useSuperscalar", argc, argv, useSuperscalar); + readOption("--legacy", argc, argv, legacy); if (genSuperscalar) { RandomX::LightProgram p; @@ -283,7 +285,7 @@ int main(int argc, char** argv) { outputHex(std::cout, (char*)dataset.cache.memory, sizeof(__m128i)); std::cout << std::endl; } - if (useSuperscalar) { + if (!legacy) { RandomX::Blake2Generator gen(seed, programCount); for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { RandomX::generateLightProg2(programs[i], gen); @@ -297,7 +299,7 @@ int main(int argc, char** argv) { dataset.dataset.size = datasetSize; RandomX::datasetAlloc(dataset, largePages); const uint64_t datasetBlockCount = datasetSize / RandomX::CacheLineSize; - if (useSuperscalar) { + if (!legacy) { RandomX::JitCompilerX86 jit86; jit86.generateSuperScalarHash(programs); jit86.getDatasetInitFunc()(cache.memory, dataset.dataset.memory, 0, datasetBlockCount); @@ -330,11 +332,11 @@ int main(int argc, char** argv) { vm = new RandomX::CompiledVirtualMachine(); } else { - if (jit && useSuperscalar) + if (jit && !legacy) vm = new RandomX::CompiledLightVirtualMachine(); else if (jit) vm = new RandomX::CompiledLightVirtualMachine(); - else if (useSuperscalar) + else if (!legacy) vm = new RandomX::InterpretedVirtualMachine(softAes); else vm = new RandomX::InterpretedVirtualMachine(softAes); @@ -373,8 +375,8 @@ int main(int argc, char** argv) { double elapsed = sw.getElapsed(); std::cout << "Calculated result: "; result.print(std::cout); - if(programCount == 1000) - std::cout << "Reference result: 83875c55fb9ff4a75205a744b82926ebbe23219c6291889c9ee91603c845c597" << std::endl; + if(!legacy && programCount == 1000) + std::cout << "Reference result: 4a74a376d490c8b41d42887e86d4addb5a95572e0c663d1e81aec928e4e094e1" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; }