diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp
index c4e009c..b3511c1 100644
--- a/src/AssemblyGeneratorX86.cpp
+++ b/src/AssemblyGeneratorX86.cpp
@@ -23,7 +23,7 @@ along with RandomX. If not, see.
#include "common.hpp"
#include "reciprocal.h"
#include "Program.hpp"
-#include "./LightProgramGenerator.hpp"
+#include "superscalarGenerator.hpp"
namespace RandomX {
@@ -62,7 +62,7 @@ namespace RandomX {
}
}
- void AssemblyGeneratorX86::generateAsm(LightProgram& prog) {
+ void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) {
asmCode.str(std::string()); //clear
asmCode << "ALIGN 16" << std::endl;
for (unsigned i = 0; i < prog.getSize(); ++i) {
@@ -126,7 +126,7 @@ namespace RandomX {
}
}
- void AssemblyGeneratorX86::generateC(LightProgram& prog) {
+ void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) {
asmCode.str(std::string()); //clear
asmCode << "#include " << std::endl;
asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl;
diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp
index 8688cd4..4b777e6 100644
--- a/src/AssemblyGeneratorX86.hpp
+++ b/src/AssemblyGeneratorX86.hpp
@@ -27,7 +27,7 @@ along with RandomX. If not, see.
namespace RandomX {
class Program;
- class LightProgram;
+ class SuperscalarProgram;
class AssemblyGeneratorX86;
typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int);
@@ -35,8 +35,8 @@ namespace RandomX {
class AssemblyGeneratorX86 {
public:
void generateProgram(Program& prog);
- void generateAsm(LightProgram& prog);
- void generateC(LightProgram& prog);
+ void generateAsm(SuperscalarProgram& prog);
+ void generateC(SuperscalarProgram& prog);
void printCode(std::ostream& os) {
os << asmCode.rdbuf();
}
diff --git a/src/Blake2Generator.cpp b/src/Blake2Generator.cpp
new file mode 100644
index 0000000..2879088
--- /dev/null
+++ b/src/Blake2Generator.cpp
@@ -0,0 +1,51 @@
+/*
+Copyright (c) 2019 tevador
+
+This file is part of RandomX.
+
+RandomX is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+RandomX is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RandomX. If not, see.
+*/
+
+#include "blake2/blake2.h"
+#include "blake2/endian.h"
+#include "Blake2Generator.hpp"
+#include "common.hpp"
+
+namespace RandomX {
+
+ Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) {
+ memset(data, 0, sizeof(data));
+ memcpy(data, seed, SeedSize);
+ store32(&data[60], nonce);
+ }
+
+ uint8_t Blake2Generator::getByte() {
+ checkData(1);
+ return data[dataIndex++];
+ }
+
+ uint32_t Blake2Generator::getInt32() {
+ checkData(4);
+ auto ret = load32(&data[dataIndex]);
+ dataIndex += 4;
+ return ret;
+ }
+
+ void Blake2Generator::checkData(const size_t bytesNeeded) {
+ if (dataIndex + bytesNeeded > sizeof(data)) {
+ blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
+ dataIndex = 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Blake2Generator.hpp b/src/Blake2Generator.hpp
new file mode 100644
index 0000000..24f2fca
--- /dev/null
+++ b/src/Blake2Generator.hpp
@@ -0,0 +1,36 @@
+/*
+Copyright (c) 2019 tevador
+
+This file is part of RandomX.
+
+RandomX is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+RandomX is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RandomX. If not, see.
+*/
+
+#pragma once
+#include
+
+namespace RandomX {
+
+ class Blake2Generator {
+ public:
+ Blake2Generator(const void* seed, int nonce);
+ uint8_t getByte();
+ uint32_t getInt32();
+ private:
+ uint8_t data[64];
+ size_t dataIndex;
+
+ void checkData(const size_t);
+ };
+}
\ No newline at end of file
diff --git a/src/CompiledLightVirtualMachine.cpp b/src/CompiledLightVirtualMachine.cpp
index 760842a..11bedf8 100644
--- a/src/CompiledLightVirtualMachine.cpp
+++ b/src/CompiledLightVirtualMachine.cpp
@@ -24,7 +24,7 @@ along with RandomX. If not, see.
namespace RandomX {
template
- void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
+ void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
mem.ds = ds;
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
if(superscalar)
@@ -32,8 +32,8 @@ namespace RandomX {
//datasetBasePtr = ds.dataset.memory;
}
- template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
- template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+ template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+ template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
template
void CompiledLightVirtualMachine::initialize() {
diff --git a/src/CompiledLightVirtualMachine.hpp b/src/CompiledLightVirtualMachine.hpp
index 9493c58..1d4b78e 100644
--- a/src/CompiledLightVirtualMachine.hpp
+++ b/src/CompiledLightVirtualMachine.hpp
@@ -39,7 +39,7 @@ namespace RandomX {
_mm_free(ptr);
}
CompiledLightVirtualMachine() {}
- void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
+ void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
void initialize() override;
};
}
\ No newline at end of file
diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp
index 4984938..3e44476 100644
--- a/src/CompiledVirtualMachine.cpp
+++ b/src/CompiledVirtualMachine.cpp
@@ -29,7 +29,7 @@ namespace RandomX {
CompiledVirtualMachine::CompiledVirtualMachine() {
}
- void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
+ void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
mem.ds = ds;
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
datasetBasePtr = ds.dataset.memory;
diff --git a/src/CompiledVirtualMachine.hpp b/src/CompiledVirtualMachine.hpp
index 65b1885..a2866ca 100644
--- a/src/CompiledVirtualMachine.hpp
+++ b/src/CompiledVirtualMachine.hpp
@@ -42,7 +42,7 @@ namespace RandomX {
_mm_free(ptr);
}
CompiledVirtualMachine();
- void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
+ void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
void initialize() override;
virtual void execute() override;
void* getProgram() {
diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp
index 673fecf..132a2c9 100644
--- a/src/InterpretedVirtualMachine.cpp
+++ b/src/InterpretedVirtualMachine.cpp
@@ -22,7 +22,6 @@ along with RandomX. If not, see.
#include "InterpretedVirtualMachine.hpp"
#include "dataset.hpp"
#include "Cache.hpp"
-#include "LightClientAsyncWorker.hpp"
#include
#include
#include
@@ -36,7 +35,7 @@ along with RandomX. If not, see.
#ifdef STATS
#include
#endif
-#include "LightProgramGenerator.hpp"
+#include "superscalarGenerator.hpp"
#ifdef FPUCHECK
constexpr bool fpuCheck = true;
@@ -47,7 +46,7 @@ constexpr bool fpuCheck = false;
namespace RandomX {
template
- void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
+ void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
mem.ds = ds;
readDataset = &datasetReadLight;
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
@@ -55,8 +54,8 @@ namespace RandomX {
precompileSuperscalar(programs);
}
- template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
- template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+ template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+ template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
template
void InterpretedVirtualMachine::initialize() {
@@ -475,7 +474,7 @@ namespace RandomX {
}
template
- void InterpretedVirtualMachine::executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector& reciprocals) {
+ void InterpretedVirtualMachine::executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector& reciprocals) {
for (unsigned j = 0; j < prog.getSize(); ++j) {
Instruction& instr = prog(j);
switch (instr.opcode)
@@ -539,7 +538,7 @@ namespace RandomX {
Cache& cache = mem.ds.cache;
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
mixBlock = getMixBlock(registerValue, cache);
- LightProgram& prog = superScalarPrograms[i];
+ SuperscalarProgram& prog = superScalarPrograms[i];
executeSuperscalar(rl, prog, reciprocals);
@@ -554,7 +553,7 @@ namespace RandomX {
}
template
- void InterpretedVirtualMachine::precompileSuperscalar(LightProgram* programs) {
+ void InterpretedVirtualMachine::precompileSuperscalar(SuperscalarProgram* programs) {
memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms));
reciprocals.clear();
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp
index ddefa67..3632112 100644
--- a/src/InterpretedVirtualMachine.hpp
+++ b/src/InterpretedVirtualMachine.hpp
@@ -70,17 +70,17 @@ namespace RandomX {
}
InterpretedVirtualMachine(bool soft) : softAes(soft) {}
~InterpretedVirtualMachine() {}
- void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
+ void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
void initialize() override;
void execute() override;
- static void executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector& reciprocals);
+ static void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector& reciprocals);
private:
static InstructionHandler engine[256];
DatasetReadFunc readDataset;
bool softAes;
InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
std::vector reciprocals;
- alignas(64) LightProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES];
+ alignas(64) SuperscalarProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES];
#ifdef STATS
int count_ADD_64 = 0;
int count_ADD_32 = 0;
@@ -128,7 +128,7 @@ namespace RandomX {
int datasetAccess[256] = { 0 };
#endif
void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
- void precompileSuperscalar(LightProgram*);
+ void precompileSuperscalar(SuperscalarProgram*);
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]);
diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp
index 8e15e15..ad7c85a 100644
--- a/src/JitCompilerX86.cpp
+++ b/src/JitCompilerX86.cpp
@@ -87,7 +87,7 @@ namespace RandomX {
*/
#include "JitCompilerX86-static.hpp"
-#include "LightProgramGenerator.hpp"
+#include "superscalarGenerator.hpp"
#define NOP_TEST true
@@ -261,16 +261,16 @@ namespace RandomX {
template void JitCompilerX86::generateProgramLight(Program& prog);
template
- void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[N]) {
+ void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) {
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
codePos = superScalarHashOffset + codeSshInitSize;
for (unsigned j = 0; j < N; ++j) {
- LightProgram& prog = programs[j];
+ SuperscalarProgram& prog = programs[j];
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
- generateCode(instr, i);
+ generateCode(instr, i);
}
emit(codeShhLoad, codeSshLoadSize);
if (j < N - 1) {
@@ -290,7 +290,7 @@ namespace RandomX {
}
template
- void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+ void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
void JitCompilerX86::generateDatasetInitCode() {
memcpy(code, codeDatasetInit, datasetInitSize);
@@ -345,7 +345,7 @@ namespace RandomX {
}
template<>
- void JitCompilerX86::generateCode(Instruction& instr, int i) {
+ void JitCompilerX86::generateCode(Instruction& instr, int i) {
switch (instr.opcode)
{
case RandomX::SuperscalarInstructionType::ISUB_R:
diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp
index 9240cfe..2908b04 100644
--- a/src/JitCompilerX86.hpp
+++ b/src/JitCompilerX86.hpp
@@ -27,7 +27,7 @@ along with RandomX. If not, see.
namespace RandomX {
class Program;
- class LightProgram;
+ class SuperscalarProgram;
class JitCompilerX86;
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
@@ -42,7 +42,7 @@ namespace RandomX {
template
void generateProgramLight(Program&);
template
- void generateSuperScalarHash(LightProgram (&programs)[N]);
+ void generateSuperScalarHash(SuperscalarProgram (&programs)[N]);
ProgramFunc getProgramFunc() {
return (ProgramFunc)code;
}
diff --git a/src/LightClientAsyncWorker.cpp b/src/LightClientAsyncWorker.cpp
deleted file mode 100644
index fbba713..0000000
--- a/src/LightClientAsyncWorker.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
-Copyright (c) 2019 tevador
-
-This file is part of RandomX.
-
-RandomX is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-RandomX is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with RandomX. If not, see.
-*/
-
-#include "LightClientAsyncWorker.hpp"
-#include "dataset.hpp"
-#include "Cache.hpp"
-
-namespace RandomX {
-
- LightClientAsyncWorker::LightClientAsyncWorker(const Cache& c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false),
-#ifdef TRACE
- sw(true),
-#endif
- workerThread(&LightClientAsyncWorker::runWorker, this) {
-
- }
-
- void LightClientAsyncWorker::prepareBlock(addr_t addr) {
-#ifdef TRACE
- std::cout << sw.getElapsed() << ": prepareBlock-enter " << addr / CacheLineSize << std::endl;
-#endif
- {
- std::lock_guard lk(mutex);
- startBlock = addr / CacheLineSize;
- blockCount = 1;
- output = currentLine.data();
- hasWork = true;
- }
-#ifdef TRACE
- std::cout << sw.getElapsed() << ": prepareBlock-notify " << startBlock << "/" << blockCount << std::endl;
-#endif
- notifier.notify_one();
- }
-
- const uint64_t* LightClientAsyncWorker::getBlock(addr_t addr) {
-#ifdef TRACE
- std::cout << sw.getElapsed() << ": getBlock-enter " << addr / CacheLineSize << std::endl;
-#endif
- uint32_t currentBlock = addr / CacheLineSize;
- if (currentBlock != startBlock || output != currentLine.data()) {
- initBlock(cache, (uint8_t*)currentLine.data(), currentBlock, RANDOMX_CACHE_ACCESSES / 8);
- }
- else {
- sync();
- }
-#ifdef TRACE
- std::cout << sw.getElapsed() << ": getBlock-return " << addr / CacheLineSize << std::endl;
-#endif
- return currentLine.data();
- }
-
- void LightClientAsyncWorker::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
-#ifdef TRACE
- std::cout << sw.getElapsed() << ": prepareBlocks-enter " << startBlock << "/" << blockCount << std::endl;
-#endif
- {
- std::lock_guard lk(mutex);
- this->startBlock = startBlock;
- this->blockCount = blockCount;
- output = out;
- hasWork = true;
- notifier.notify_one();
- }
- }
-
- void LightClientAsyncWorker::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
- for (uint32_t i = 0; i < blockCount; ++i) {
- initBlock(cache, (uint8_t*)out + CacheLineSize * i, startBlock + i, RANDOMX_CACHE_ACCESSES / 8);
- }
- }
-
- void LightClientAsyncWorker::sync() {
- std::unique_lock lk(mutex);
- notifier.wait(lk, [this] { return !hasWork; });
- }
-
- void LightClientAsyncWorker::runWorker() {
-#ifdef TRACE
- std::cout << sw.getElapsed() << ": runWorker-enter " << std::endl;
-#endif
- for (;;) {
- std::unique_lock lk(mutex);
- notifier.wait(lk, [this] { return hasWork; });
-#ifdef TRACE
- std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl;
-#endif
- //getBlocks(output, startBlock, blockCount);
- initBlock(cache, (uint8_t*)output, startBlock, RANDOMX_CACHE_ACCESSES / 8);
- hasWork = false;
-#ifdef TRACE
- std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl;
-#endif
- lk.unlock();
- notifier.notify_one();
- }
- }
-}
\ No newline at end of file
diff --git a/src/LightClientAsyncWorker.hpp b/src/LightClientAsyncWorker.hpp
deleted file mode 100644
index 7c45e53..0000000
--- a/src/LightClientAsyncWorker.hpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-Copyright (c) 2019 tevador
-
-This file is part of RandomX.
-
-RandomX is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-RandomX is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with RandomX. If not, see.
-*/
-
-//#define TRACE
-#include "common.hpp"
-
-#include
-#include
-#include
-#include
-#ifdef TRACE
-#include "Stopwatch.hpp"
-#include
-#endif
-
-namespace RandomX {
-
- using DatasetLine = std::array;
-
- class LightClientAsyncWorker : public ILightClientAsyncWorker {
- public:
- LightClientAsyncWorker(const Cache&);
- void prepareBlock(addr_t) final;
- void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final;
- const uint64_t* getBlock(addr_t) final;
- void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final;
- void sync() final;
- private:
- void runWorker();
- std::condition_variable notifier;
- std::mutex mutex;
- alignas(16) DatasetLine currentLine;
- void* output;
- uint32_t startBlock, blockCount;
- bool hasWork;
-#ifdef TRACE
- Stopwatch sw;
-#endif
- std::thread workerThread;
- };
-}
\ No newline at end of file
diff --git a/src/LightProgramGenerator.hpp b/src/LightProgramGenerator.hpp
deleted file mode 100644
index beb7974..0000000
--- a/src/LightProgramGenerator.hpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-Copyright (c) 2019 tevador
-
-This file is part of RandomX.
-
-RandomX is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-RandomX is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with RandomX. If not, see.
-*/
-
-#include "Program.hpp"
-
-namespace RandomX {
-
- // Intel Ivy Bridge reference
- namespace SuperscalarInstructionType { //uOPs (decode) execution ports latency code size
- constexpr int ISUB_R = 0; //1 p015 1 3
- constexpr int IXOR_R = 1; //1 p015 1 3
- constexpr int IADD_RS = 2; //1 p01 1 4
- constexpr int IMUL_R = 3; //1 p1 3 4
- constexpr int IROR_C = 4; //1 p05 1 4
- constexpr int IADD_C7 = 5; //1 p015 1 7
- constexpr int IXOR_C7 = 6; //1 p015 1 7
- constexpr int IADD_C8 = 7; //1+0 p015 1 8
- constexpr int IXOR_C8 = 8; //1+0 p015 1 8
- constexpr int IADD_C9 = 9; //1+0 p015 1 9
- constexpr int IXOR_C9 = 10; //1+0 p015 1 9
- constexpr int IMULH_R = 11; //1+2+1 0+(p1,p5)+0 3 3+3+3
- constexpr int ISMULH_R = 12; //1+2+1 0+(p1,p5)+0 3 3+3+3
- constexpr int IMUL_RCP = 13; //1+1 p015+p1 4 10+4
-
- constexpr int COUNT = 14;
- constexpr int INVALID = -1;
- }
-
- class Blake2Generator {
- public:
- Blake2Generator(const void* seed, int nonce);
- uint8_t getByte();
- uint32_t getInt32();
- private:
- uint8_t data[64];
- size_t dataIndex;
-
- void checkData(const size_t);
- };
-
- double generateSuperscalar(LightProgram& prog, Blake2Generator& gen);
-}
\ No newline at end of file
diff --git a/src/Program.hpp b/src/Program.hpp
index 37c8303..2f2a402 100644
--- a/src/Program.hpp
+++ b/src/Program.hpp
@@ -53,12 +53,14 @@ namespace RandomX {
Instruction programBuffer[RANDOMX_PROGRAM_SIZE];
};
- class LightProgram {
+ static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program");
+
+ class SuperscalarProgram {
public:
Instruction& operator()(int pc) {
return programBuffer[pc];
}
- friend std::ostream& operator<<(std::ostream& os, const LightProgram& p) {
+ friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) {
p.print(os);
return os;
}
@@ -74,6 +76,15 @@ namespace RandomX {
void setAddressRegister(uint32_t val) {
addrReg = val;
}
+ double ipc;
+ int codeSize;
+ int macroOps;
+ int decodeCycles;
+ int cpuLatency;
+ int asicLatency;
+ int mulCount;
+ int cpuLatencies[8];
+ int asicLatencies[8];
private:
void print(std::ostream& os) const {
for (unsigned i = 0; i < size; ++i) {
@@ -85,6 +96,4 @@ namespace RandomX {
uint32_t size;
int addrReg;
};
-
- static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program");
}
diff --git a/src/VirtualMachine.hpp b/src/VirtualMachine.hpp
index 1edacdb..7352933 100644
--- a/src/VirtualMachine.hpp
+++ b/src/VirtualMachine.hpp
@@ -28,7 +28,7 @@ namespace RandomX {
public:
VirtualMachine();
virtual ~VirtualMachine() {}
- virtual void setDataset(dataset_t ds, uint64_t size, LightProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0;
+ virtual void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0;
void setScratchpad(void* ptr) {
scratchpad = (uint8_t*)ptr;
}
diff --git a/src/main.cpp b/src/main.cpp
index a120cf9..42dc15f 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -36,7 +36,7 @@ along with RandomX. If not, see.
#include "dataset.hpp"
#include "Cache.hpp"
#include "hashAes1Rx4.hpp"
-#include "LightProgramGenerator.hpp"
+#include "superscalarGenerator.hpp"
#include "JitCompilerX86.hpp"
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
@@ -226,13 +226,13 @@ int main(int argc, char** argv) {
readOption("--legacy", argc, argv, legacy);
if (genSuperscalar) {
- RandomX::LightProgram p;
+ RandomX::SuperscalarProgram p;
RandomX::Blake2Generator gen(seed, programCount);
RandomX::generateSuperscalar(p, gen);
RandomX::AssemblyGeneratorX86 asmX86;
asmX86.generateAsm(p);
//std::ofstream file("lightProg2.asm");
- //asmX86.printCode(std::cout);
+ asmX86.printCode(std::cout);
return 0;
}
@@ -268,7 +268,7 @@ int main(int argc, char** argv) {
const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize;
const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch);
dataset.cache.size = cacheSize;
- RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES];
+ RandomX::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES];
std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl;
diff --git a/src/LightProgramGenerator.cpp b/src/superscalarGenerator.cpp
similarity index 76%
rename from src/LightProgramGenerator.cpp
rename to src/superscalarGenerator.cpp
index 40a767b..d4fd32a 100644
--- a/src/LightProgramGenerator.cpp
+++ b/src/superscalarGenerator.cpp
@@ -18,7 +18,6 @@ along with RandomX. If not, see.
*/
#include
-#include "blake2/blake2.h"
#include "configuration.h"
#include "Program.hpp"
#include "blake2/endian.h"
@@ -27,7 +26,7 @@ along with RandomX. If not, see.
#include
#include
#include
-#include "LightProgramGenerator.hpp"
+#include "superscalarGenerator.hpp"
namespace RandomX {
@@ -35,6 +34,7 @@ namespace RandomX {
return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP;
}
+ //uOPs (micro-ops) are represented only by the execution port they can go to
namespace ExecutionPort {
using type = int;
constexpr type Null = 0;
@@ -46,40 +46,9 @@ namespace RandomX {
constexpr type P015 = P0 | P1 | P5;
}
- Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) {
- memset(data, 0, sizeof(data));
- memcpy(data, seed, SeedSize);
- store32(&data[60], nonce);
- }
-
- uint8_t Blake2Generator::getByte() {
- checkData(1);
- return data[dataIndex++];
- }
-
- uint32_t Blake2Generator::getInt32() {
- checkData(4);
- auto ret = load32(&data[dataIndex]);
- dataIndex += 4;
- return ret;
- }
-
- void Blake2Generator::checkData(const size_t bytesNeeded) {
- if (dataIndex + bytesNeeded > sizeof(data)) {
- blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
- dataIndex = 0;
- }
- }
-
- class RegisterInfo {
- public:
- RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {}
- int latency;
- int lastOpGroup;
- int lastOpPar;
- int value;
- };
-
+ //Macro-operation as output of the x86 decoder
+ //Usually one macro-op = one x86 instruction, but 2 instructions are sometimes fused into 1 macro-op
+ //Macro-op can consist of 1 or 2 uOPs.
class MacroOp {
public:
MacroOp(const char* name, int size)
@@ -137,10 +106,7 @@ namespace RandomX {
int latency_;
ExecutionPort::type uop1_;
ExecutionPort::type uop2_;
- int cycle_;
bool dependent_ = false;
- MacroOp* depDst_ = nullptr;
- MacroOp* depSrc_ = nullptr;
};
//Size: 3 bytes
@@ -174,7 +140,7 @@ namespace RandomX {
const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr };
const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) };
- class LightInstructionInfo {
+ class SuperscalarInstructionInfo {
public:
const char* getName() const {
return name_;
@@ -203,21 +169,21 @@ namespace RandomX {
int getSrcOp() const {
return srcOp_;
}
- static const LightInstructionInfo ISUB_R;
- static const LightInstructionInfo IXOR_R;
- static const LightInstructionInfo IADD_RS;
- static const LightInstructionInfo IMUL_R;
- static const LightInstructionInfo IROR_C;
- static const LightInstructionInfo IADD_C7;
- static const LightInstructionInfo IXOR_C7;
- static const LightInstructionInfo IADD_C8;
- static const LightInstructionInfo IXOR_C8;
- static const LightInstructionInfo IADD_C9;
- static const LightInstructionInfo IXOR_C9;
- static const LightInstructionInfo IMULH_R;
- static const LightInstructionInfo ISMULH_R;
- static const LightInstructionInfo IMUL_RCP;
- static const LightInstructionInfo NOP;
+ static const SuperscalarInstructionInfo ISUB_R;
+ static const SuperscalarInstructionInfo IXOR_R;
+ static const SuperscalarInstructionInfo IADD_RS;
+ static const SuperscalarInstructionInfo IMUL_R;
+ static const SuperscalarInstructionInfo IROR_C;
+ static const SuperscalarInstructionInfo IADD_C7;
+ static const SuperscalarInstructionInfo IXOR_C7;
+ static const SuperscalarInstructionInfo IADD_C8;
+ static const SuperscalarInstructionInfo IXOR_C8;
+ static const SuperscalarInstructionInfo IADD_C9;
+ static const SuperscalarInstructionInfo IXOR_C9;
+ static const SuperscalarInstructionInfo IMULH_R;
+ static const SuperscalarInstructionInfo ISMULH_R;
+ static const SuperscalarInstructionInfo IMUL_RCP;
+ static const SuperscalarInstructionInfo NOP;
private:
const char* name_;
int type_;
@@ -227,14 +193,14 @@ namespace RandomX {
int dstOp_ = 0;
int srcOp_;
- LightInstructionInfo(const char* name)
+ SuperscalarInstructionInfo(const char* name)
: name_(name), type_(-1), latency_(0) {}
- LightInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp)
+ SuperscalarInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp)
: name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) {
ops_.push_back(MacroOp(op));
}
template
- LightInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp)
+ SuperscalarInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp)
: name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) {
for (unsigned i = 0; i < N; ++i) {
ops_.push_back(MacroOp(arr[i]));
@@ -244,24 +210,34 @@ namespace RandomX {
}
};
- const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0);
- const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0);
- const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0);
- const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0);
- const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISUB_R = SuperscalarInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_R = SuperscalarInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_RS = SuperscalarInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_R = SuperscalarInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IROR_C = SuperscalarInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1);
- const LightInstructionInfo LightInstructionInfo::IADD_C7 = LightInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1);
- const LightInstructionInfo LightInstructionInfo::IXOR_C7 = LightInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1);
- const LightInstructionInfo LightInstructionInfo::IADD_C8 = LightInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1);
- const LightInstructionInfo LightInstructionInfo::IXOR_C8 = LightInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1);
- const LightInstructionInfo LightInstructionInfo::IADD_C9 = LightInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1);
- const LightInstructionInfo LightInstructionInfo::IXOR_C9 = LightInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C7 = SuperscalarInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C7 = SuperscalarInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C8 = SuperscalarInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C8 = SuperscalarInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C9 = SuperscalarInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C9 = SuperscalarInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1);
- const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
- const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
- const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
- const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP");
+ const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP");
+
+ //these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
+ //RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
+ //Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
+ const int buffer0[] = { 4, 8, 4 };
+ const int buffer1[] = { 7, 3, 3, 3 };
+ const int buffer2[] = { 3, 7, 3, 3 };
+ const int buffer3[] = { 4, 9, 3 };
+ const int buffer4[] = { 4, 4, 4, 4 };
+ const int buffer5[] = { 3, 3, 10 };
class DecoderBuffer {
public:
@@ -318,16 +294,6 @@ namespace RandomX {
}
};
- //these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
- //RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
- //Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
- const int buffer0[] = { 4, 8, 4 };
- const int buffer1[] = { 7, 3, 3, 3 };
- const int buffer2[] = { 3, 7, 3, 3 };
- const int buffer3[] = { 4, 9, 3 };
- const int buffer4[] = { 4, 4, 4, 4 };
- const int buffer5[] = { 3, 3, 10 };
-
const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0);
const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1);
const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2);
@@ -344,13 +310,13 @@ namespace RandomX {
const DecoderBuffer DecoderBuffer::Default = DecoderBuffer();
- const LightInstructionInfo* slot_3[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R };
- const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R };
- const LightInstructionInfo* slot_4[] = { &LightInstructionInfo::IROR_C, &LightInstructionInfo::IADD_RS };
- const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::IXOR_C7, &LightInstructionInfo::IADD_C7 };
- const LightInstructionInfo* slot_8[] = { &LightInstructionInfo::IXOR_C8, &LightInstructionInfo::IADD_C8 };
- const LightInstructionInfo* slot_9[] = { &LightInstructionInfo::IXOR_C9, &LightInstructionInfo::IADD_C9 };
- const LightInstructionInfo* slot_10 = &LightInstructionInfo::IMUL_RCP;
+ const SuperscalarInstructionInfo* slot_3[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R };
+ const SuperscalarInstructionInfo* slot_3L[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R, &SuperscalarInstructionInfo::IMULH_R, &SuperscalarInstructionInfo::ISMULH_R };
+ const SuperscalarInstructionInfo* slot_4[] = { &SuperscalarInstructionInfo::IROR_C, &SuperscalarInstructionInfo::IADD_RS };
+ const SuperscalarInstructionInfo* slot_7[] = { &SuperscalarInstructionInfo::IXOR_C7, &SuperscalarInstructionInfo::IADD_C7 };
+ const SuperscalarInstructionInfo* slot_8[] = { &SuperscalarInstructionInfo::IXOR_C8, &SuperscalarInstructionInfo::IADD_C8 };
+ const SuperscalarInstructionInfo* slot_9[] = { &SuperscalarInstructionInfo::IXOR_C9, &SuperscalarInstructionInfo::IADD_C9 };
+ const SuperscalarInstructionInfo* slot_10 = &SuperscalarInstructionInfo::IMUL_RCP;
static bool selectRegister(std::vector& availableRegisters, Blake2Generator& gen, int& reg) {
int index;
@@ -367,9 +333,19 @@ namespace RandomX {
return true;
}
- class LightInstruction {
+ class RegisterInfo {
public:
- void toInstr(Instruction& instr) {
+ RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {}
+ int latency;
+ int lastOpGroup;
+ int lastOpPar;
+ int value;
+ };
+
+ //"SuperscalarInstruction" consists of one or more macro-ops
+ class SuperscalarInstruction {
+ public:
+ void toInstr(Instruction& instr) { //translate to a RandomX instruction format
instr.opcode = getType();
instr.dst = dst_;
instr.src = src_ >= 0 ? src_ : dst_;
@@ -392,7 +368,7 @@ namespace RandomX {
case 4:
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
if (fetchType == 4 && !isLast) {
- create(&LightInstructionInfo::IMUL_R, gen);
+ create(&SuperscalarInstructionInfo::IMUL_R, gen);
}
else {
create(slot_4[gen.getByte() & 1], gen);
@@ -415,7 +391,7 @@ namespace RandomX {
}
}
- void create(const LightInstructionInfo* info, Blake2Generator& gen) {
+ void create(const SuperscalarInstructionInfo* info, Blake2Generator& gen) {
info_ = info;
reset();
switch (info->getType())
@@ -445,7 +421,7 @@ namespace RandomX {
mod_ = 0;
imm32_ = 0;
opGroup_ = SuperscalarInstructionType::IMUL_R;
- opGroupPar_ = -1;
+ groupParIsSource_ = true;
} break;
case SuperscalarInstructionType::IROR_C: {
@@ -505,18 +481,22 @@ namespace RandomX {
}
}
- bool selectDestination(int cycle, RegisterInfo (®isters)[8], Blake2Generator& gen) {
+ bool selectDestination(int cycle, bool allowChainedMul, RegisterInfo (®isters)[8], Blake2Generator& gen) {
+ /*if (allowChainedMultiplication && opGroup_ == SuperscalarInstructionType::IMUL_R)
+ std::cout << "Selecting destination with chained MUL enabled" << std::endl;*/
std::vector availableRegisters;
//Conditions for the destination register:
// * value must be ready at the required cycle
// * cannot be the same as the source register unless the instruction allows it
// - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
+ // * register cannot be multiplied twice in a row unless allowChainedMul is true
+ // - this avoids accumulation of trailing zeroes in registers due to excessive multiplication
+ // - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
// * either the last instruction applied to the register or its source must be different than this instruction
// - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
- // - it also avoids accumulation of trailing zeroes in registers due to excessive multiplication
// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
for (unsigned i = 0; i < 8; ++i) {
- if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister))
+ if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister))
availableRegisters.push_back(i);
}
return selectRegister(availableRegisters, gen, dst_);
@@ -560,14 +540,14 @@ namespace RandomX {
return opGroupPar_;
}
- const LightInstructionInfo& getInfo() const {
+ const SuperscalarInstructionInfo& getInfo() const {
return *info_;
}
- static const LightInstruction Null;
+ static const SuperscalarInstruction Null;
private:
- const LightInstructionInfo* info_;
+ const SuperscalarInstructionInfo* info_;
int src_ = -1;
int dst_ = -1;
int mod_;
@@ -582,15 +562,16 @@ namespace RandomX {
canReuse_ = groupParIsSource_ = false;
}
- LightInstruction(const LightInstructionInfo* info) : info_(info) {
+ SuperscalarInstruction(const SuperscalarInstructionInfo* info) : info_(info) {
}
};
- const LightInstruction LightInstruction::Null = LightInstruction(&LightInstructionInfo::NOP);
+ const SuperscalarInstruction SuperscalarInstruction::Null = SuperscalarInstruction(&SuperscalarInstructionInfo::NOP);
- constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 3;
+ constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 4;
constexpr int LOOK_FORWARD_CYCLES = 4;
constexpr int MAX_THROWAWAY_COUNT = 256;
+
#ifndef _DEBUG
constexpr bool TRACE = false;
constexpr bool INFO = false;
@@ -602,7 +583,7 @@ namespace RandomX {
template
static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
- //P1 (multiplication port) by instructions that can go to any port.
+ //port P1 (multiplication) by instructions that can go to any port.
for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
if (commit) {
@@ -666,14 +647,14 @@ namespace RandomX {
return -1;
}
- double generateSuperscalar(LightProgram& prog, Blake2Generator& gen) {
+ void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen) {
ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3];
memset(portBusy, 0, sizeof(portBusy));
RegisterInfo registers[8];
const DecoderBuffer* decodeBuffer = &DecoderBuffer::Default;
- LightInstruction currentInstruction = LightInstruction::Null;
+ SuperscalarInstruction currentInstruction = SuperscalarInstruction::Null;
int macroOpIndex = 0;
int codeSize = 0;
int macroOpCount = 0;
@@ -719,7 +700,9 @@ namespace RandomX {
int scheduleCycle = scheduleMop(mop, portBusy, cycle, depCycle);
if (scheduleCycle < 0) {
/*if (TRACE)*/ std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
- return 0;
+ //__debugbreak();
+ portsSaturated = true;
+ break;
}
//find a source register (if applicable) that will be ready when this instruction executes
@@ -737,20 +720,20 @@ namespace RandomX {
throwAwayCount++;
macroOpIndex = currentInstruction.getInfo().getSize();
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
+ //cycle = topCycle;
continue;
}
//abort this decode buffer
- /*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available" << std::endl;
- currentInstruction = LightInstruction::Null;
+ /*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
+ currentInstruction = SuperscalarInstruction::Null;
break;
}
if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
}
- throwAwayCount = 0;
//find a destination register that will be ready when this instruction executes
if (macroOpIndex == currentInstruction.getInfo().getDstOp()) {
int forward;
- for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, registers, gen); ++forward) {
+ for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) {
if (TRACE) std::cout << "; dst STALL at cycle " << cycle << std::endl;
++scheduleCycle;
++cycle;
@@ -760,16 +743,18 @@ namespace RandomX {
throwAwayCount++;
macroOpIndex = currentInstruction.getInfo().getSize();
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
+ //cycle = topCycle;
continue;
}
//abort this decode buffer
/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
- currentInstruction = LightInstruction::Null;
+ currentInstruction = SuperscalarInstruction::Null;
break;
}
if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
}
throwAwayCount = 0;
+
//recalculate when the instruction can be scheduled for execution based on operand availability
scheduleCycle = scheduleMop(mop, portBusy, scheduleCycle, scheduleCycle);
@@ -809,67 +794,53 @@ namespace RandomX {
++cycle;
}
- if(INFO) std::cout << "; ALU port utilization:" << std::endl;
- if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
-
- int portCycles = 0;
- for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
- //std::cout << "; " << std::setw(3) << i << " ";
- for (int j = 0; j < 3; ++j) {
- //std::cout << (portBusy[i][j] ? '*' : '_');
- portCycles += !!portBusy[i][j];
- }
- //std::cout << std::endl;
- }
-
double ipc = (macroOpCount / (double)retireCycle);
- if (INFO) std::cout << "; code size " << codeSize << " bytes" << std::endl;
- if (INFO) std::cout << "; x86 macro-ops: " << macroOpCount << std::endl;
- if (INFO) std::cout << "; fetch cycles: " << decodeCycle << std::endl;
- if (INFO) std::cout << "; RandomX instructions: " << programSize << std::endl;
- if (INFO) std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl;
- if (INFO) std::cout << "; IPC = " << ipc << std::endl;
- if (INFO) std::cout << "; Port-cycles: " << portCycles << std::endl;
- if (INFO) std::cout << "; Multiplications: " << mulCount << std::endl;
-
- int asicLatency[8];
- memset(asicLatency, 0, sizeof(asicLatency));
+ memset(prog.asicLatencies, 0, sizeof(prog.asicLatencies));
//Calculate ASIC latency:
//Assumes 1 cycle latency for all operations and unlimited parallelization.
for (int i = 0; i < programSize; ++i) {
Instruction& instr = prog(i);
- int latDst = asicLatency[instr.dst] + 1;
- int latSrc = instr.dst != instr.src ? asicLatency[instr.src] + 1 : 0;
- asicLatency[instr.dst] = std::max(latDst, latSrc);
+ int latDst = prog.asicLatencies[instr.dst] + 1;
+ int latSrc = instr.dst != instr.src ? prog.asicLatencies[instr.src] + 1 : 0;
+ prog.asicLatencies[instr.dst] = std::max(latDst, latSrc);
}
//address register is the register with the highest ASIC latency
int asicLatencyMax = 0;
int addressReg = 0;
for (int i = 0; i < 8; ++i) {
- if (asicLatency[i] > asicLatencyMax) {
- asicLatencyMax = asicLatency[i];
+ if (prog.asicLatencies[i] > asicLatencyMax) {
+ asicLatencyMax = prog.asicLatencies[i];
addressReg = i;
}
- }
-
- if (INFO) std::cout << "; ASIC latency: " << asicLatencyMax << std::endl;
-
- if (INFO) {
- std::cout << "; ASIC latency:" << std::endl;
- for (int i = 0; i < 8; ++i) {
- std::cout << "; r" << i << " = " << asicLatency[i] << std::endl;
- }
- if (INFO) std::cout << "; CPU latency:" << std::endl;
- for (int i = 0; i < 8; ++i) {
- std::cout << "; r" << i << " = " << registers[i].latency << std::endl;
- }
+ prog.cpuLatencies[i] = registers[i].latency;
}
prog.setSize(programSize);
prog.setAddressRegister(addressReg);
- return ipc;
+
+ prog.cpuLatency = retireCycle;
+ prog.asicLatency = asicLatencyMax;
+ prog.codeSize = codeSize;
+ prog.macroOps = macroOpCount;
+ prog.decodeCycles = decodeCycle;
+ prog.ipc = ipc;
+ prog.mulCount = mulCount;
+
+
+ /*if(INFO) std::cout << "; ALU port utilization:" << std::endl;
+ if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
+
+ int portCycles = 0;
+ for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
+ std::cout << "; " << std::setw(3) << i << " ";
+ for (int j = 0; j < 3; ++j) {
+ std::cout << (portBusy[i][j] ? '*' : '_');
+ portCycles += !!portBusy[i][j];
+ }
+ std::cout << std::endl;
+ }*/
}
}
\ No newline at end of file
diff --git a/src/superscalarGenerator.hpp b/src/superscalarGenerator.hpp
new file mode 100644
index 0000000..a64e80d
--- /dev/null
+++ b/src/superscalarGenerator.hpp
@@ -0,0 +1,47 @@
+/*
+Copyright (c) 2019 tevador
+
+This file is part of RandomX.
+
+RandomX is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+RandomX is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RandomX. If not, see.
+*/
+
+#pragma once
+#include "Program.hpp"
+#include "Blake2Generator.hpp"
+
+namespace RandomX {
+ // Intel Ivy Bridge reference
+ namespace SuperscalarInstructionType { //uOPs (decode) execution ports latency code size
+ constexpr int ISUB_R = 0; //1 p015 1 3 (sub)
+ constexpr int IXOR_R = 1; //1 p015 1 3 (xor)
+ constexpr int IADD_RS = 2; //1 p01 1 4 (lea)
+ constexpr int IMUL_R = 3; //1 p1 3 4 (imul)
+ constexpr int IROR_C = 4; //1 p05 1 4 (ror)
+ constexpr int IADD_C7 = 5; //1 p015 1 7 (add)
+ constexpr int IXOR_C7 = 6; //1 p015 1 7 (xor)
+ constexpr int IADD_C8 = 7; //1+0 p015 1 7+1 (add+nop)
+ constexpr int IXOR_C8 = 8; //1+0 p015 1 7+1 (xor+nop)
+ constexpr int IADD_C9 = 9; //1+0 p015 1 7+2 (add+nop)
+ constexpr int IXOR_C9 = 10; //1+0 p015 1 7+2 (xor+nop)
+ constexpr int IMULH_R = 11; //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+mul+mov)
+ constexpr int ISMULH_R = 12; //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov)
+ constexpr int IMUL_RCP = 13; //1+1 p015+p1 4 10+4 (mov+imul)
+
+ constexpr int COUNT = 14;
+ constexpr int INVALID = -1;
+ }
+
+ void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen);
+}
\ No newline at end of file
diff --git a/src/tests/superscalar-avalanche.cpp b/src/tests/superscalar-avalanche.cpp
index 9c91a88..9fa1613 100644
--- a/src/tests/superscalar-avalanche.cpp
+++ b/src/tests/superscalar-avalanche.cpp
@@ -20,9 +20,10 @@ along with RandomX. If not, see.
#include
#include
#include
-#include "../LightProgramGenerator.hpp"
+#include "../superscalarGenerator.hpp"
#include "../InterpretedVirtualMachine.hpp"
#include "../intrinPortable.h"
+#include "../Blake2Generator.hpp"
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
@@ -45,9 +46,9 @@ int main() {
uint64_t rb[8];
memcpy(rb, ra, sizeof rb);
rb[0] ^= (1ULL << bit);
- RandomX::LightProgram p;
+ RandomX::SuperscalarProgram p;
RandomX::Blake2Generator gen(seed, i);
- RandomX::generateLightProg2(p, gen);
+ RandomX::generateSuperscalar(p, gen);
RandomX::InterpretedVirtualMachine::executeSuperscalar(ra, p, dummy);
RandomX::InterpretedVirtualMachine::executeSuperscalar(rb, p, dummy);
uint64_t diff = 0;
diff --git a/src/tests/superscalar-init.cpp b/src/tests/superscalar-init.cpp
index b366355..a7c1208 100644
--- a/src/tests/superscalar-init.cpp
+++ b/src/tests/superscalar-init.cpp
@@ -21,7 +21,7 @@ along with RandomX. If not, see.
#include
#include
#include
-#include "../LightProgramGenerator.hpp"
+#include "../superscalarGenerator.hpp"
#include "../InterpretedVirtualMachine.hpp"
#include "../intrinPortable.h"
#include "../configuration.h"
diff --git a/vcxproj/randomx.vcxproj b/vcxproj/randomx.vcxproj
index 1c1cae0..d646143 100644
--- a/vcxproj/randomx.vcxproj
+++ b/vcxproj/randomx.vcxproj
@@ -127,6 +127,7 @@
+
@@ -137,8 +138,7 @@
-
-
+
@@ -153,6 +153,7 @@
+
@@ -167,8 +168,7 @@
-
-
+
diff --git a/vcxproj/randomx.vcxproj.filters b/vcxproj/randomx.vcxproj.filters
index 5b821c8..77939bd 100644
--- a/vcxproj/randomx.vcxproj.filters
+++ b/vcxproj/randomx.vcxproj.filters
@@ -54,12 +54,6 @@
Source Files
-
- Source Files
-
-
- Source Files
-
Source Files
@@ -75,6 +69,12 @@
Source Files
+
+ Source Files
+
+
+ Source Files
+
@@ -136,12 +136,6 @@
Header Files
-
- Header Files
-
-
- Header Files
-
Header Files
@@ -166,5 +160,11 @@
Header Files
+
+ Header Files
+
+
+ Header Files
+
\ No newline at end of file
diff --git a/vcxproj/superscalar-avalanche.vcxproj b/vcxproj/superscalar-avalanche.vcxproj
index dab0311..1cac62b 100644
--- a/vcxproj/superscalar-avalanche.vcxproj
+++ b/vcxproj/superscalar-avalanche.vcxproj
@@ -118,6 +118,7 @@
+
@@ -125,9 +126,9 @@
-
+
diff --git a/vcxproj/superscalar-avalanche.vcxproj.filters b/vcxproj/superscalar-avalanche.vcxproj.filters
index 9984ed1..93b3838 100644
--- a/vcxproj/superscalar-avalanche.vcxproj.filters
+++ b/vcxproj/superscalar-avalanche.vcxproj.filters
@@ -45,9 +45,6 @@
Source Files
-
- Source Files
-
Source Files
@@ -60,6 +57,12 @@
Source Files
+
+ Source Files
+
+
+ Source Files
+
diff --git a/vcxproj/superscalar-init.vcxproj b/vcxproj/superscalar-init.vcxproj
index 4c4794c..d765f85 100644
--- a/vcxproj/superscalar-init.vcxproj
+++ b/vcxproj/superscalar-init.vcxproj
@@ -118,6 +118,7 @@
+
@@ -125,9 +126,9 @@
-
+
diff --git a/vcxproj/superscalar-init.vcxproj.filters b/vcxproj/superscalar-init.vcxproj.filters
index 4666d07..cad6e2b 100644
--- a/vcxproj/superscalar-init.vcxproj.filters
+++ b/vcxproj/superscalar-init.vcxproj.filters
@@ -42,9 +42,6 @@
Source Files
-
- Source Files
-
Source Files
@@ -60,6 +57,12 @@
Source Files
+
+ Source Files
+
+
+ Source Files
+