More refactoring

2024-12-22 15:58:53 +00:00 · 2019-04-12 19:36:08 +02:00 · 2019-04-12 19:36:08 +02:00 · 8c37d4aac3
commit 8c37d4aac3
parent 9404516dd8
28 changed files with 347 additions and 453 deletions
--- a/src/AssemblyGeneratorX86.cpp
+++ b/src/AssemblyGeneratorX86.cpp
@ -23,7 +23,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include "common.hpp"
 #include "reciprocal.h"
 #include "Program.hpp"
-#include "./LightProgramGenerator.hpp"
+#include "superscalarGenerator.hpp"
 namespace RandomX {
@ -62,7 +62,7 @@ namespace RandomX {
 		}
 	}
-	void AssemblyGeneratorX86::generateAsm(LightProgram& prog) {
+	void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) {
 		asmCode.str(std::string()); //clear
 		asmCode << "ALIGN 16" << std::endl;
 		for (unsigned i = 0; i < prog.getSize(); ++i) {
@ -126,7 +126,7 @@ namespace RandomX {
 		}
 	}
-	void AssemblyGeneratorX86::generateC(LightProgram& prog) {
+	void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) {
 		asmCode.str(std::string()); //clear
 		asmCode << "#include <stdint.h>" << std::endl;
 		asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl;
--- a/src/AssemblyGeneratorX86.hpp
+++ b/src/AssemblyGeneratorX86.hpp
@ -27,7 +27,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 namespace RandomX {
 	class Program;
-	class LightProgram;
+	class SuperscalarProgram;
 	class AssemblyGeneratorX86;
 	typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int);
@ -35,8 +35,8 @@ namespace RandomX {
 	class AssemblyGeneratorX86 {
 	public:
 		void generateProgram(Program& prog);
-		void generateAsm(LightProgram& prog);
+		void generateAsm(SuperscalarProgram& prog);
-		void generateC(LightProgram& prog);
+		void generateC(SuperscalarProgram& prog);
 		void printCode(std::ostream& os) {
 			os << asmCode.rdbuf();
 		}
--- a/src/Blake2Generator.cpp
+++ b/src/Blake2Generator.cpp
@ -0,0 +1,51 @@
 /*
 Copyright (c) 2019 tevador
 This file is part of RandomX.
 RandomX is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 RandomX is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 */
 #include "blake2/blake2.h"
 #include "blake2/endian.h"
 #include "Blake2Generator.hpp"
 #include "common.hpp"
 namespace RandomX {
 	Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) {
 		memset(data, 0, sizeof(data));
 		memcpy(data, seed, SeedSize);
 		store32(&data[60], nonce);
 	}
 	uint8_t Blake2Generator::getByte() {
 		checkData(1);
 		return data[dataIndex++];
 	}
 	uint32_t Blake2Generator::getInt32() {
 		checkData(4);
 		auto ret = load32(&data[dataIndex]);
 		dataIndex += 4;
 		return ret;
 	}
 	void Blake2Generator::checkData(const size_t bytesNeeded) {
 		if (dataIndex + bytesNeeded > sizeof(data)) {
 			blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
 			dataIndex = 0;
 		}
 	}
 }
--- a/src/Blake2Generator.hpp
+++ b/src/Blake2Generator.hpp
@ -0,0 +1,36 @@
 /*
 Copyright (c) 2019 tevador
 This file is part of RandomX.
 RandomX is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 RandomX is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 */
 #pragma once
 #include <cstdint>
 namespace RandomX {
 	class Blake2Generator {
 	public:
 		Blake2Generator(const void* seed, int nonce);
 		uint8_t getByte();
 		uint32_t getInt32();
 	private:
 		uint8_t data[64];
 		size_t dataIndex;
 		void checkData(const size_t);
 	};
 }
--- a/src/CompiledLightVirtualMachine.cpp
+++ b/src/CompiledLightVirtualMachine.cpp
@ -24,7 +24,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 namespace RandomX {
 	template<bool superscalar>
-	void CompiledLightVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
+	void CompiledLightVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
 		mem.ds = ds;
 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
 		if(superscalar)
@ -32,8 +32,8 @@ namespace RandomX {
 		//datasetBasePtr = ds.dataset.memory;
 	}
-	template void CompiledLightVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+	template void CompiledLightVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
-	template void CompiledLightVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+	template void CompiledLightVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
 	template<bool superscalar>
 	void CompiledLightVirtualMachine<superscalar>::initialize() {
--- a/src/CompiledLightVirtualMachine.hpp
+++ b/src/CompiledLightVirtualMachine.hpp
@ -39,7 +39,7 @@ namespace RandomX {
 			_mm_free(ptr);
 		}
 		CompiledLightVirtualMachine() {}
-		void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
+		void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
 		void initialize() override;
 	};
 }
--- a/src/CompiledVirtualMachine.cpp
+++ b/src/CompiledVirtualMachine.cpp
@ -29,7 +29,7 @@ namespace RandomX {
 	CompiledVirtualMachine::CompiledVirtualMachine() {
 	}
-	void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
+	void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
 		mem.ds = ds;
 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
 		datasetBasePtr = ds.dataset.memory;
--- a/src/CompiledVirtualMachine.hpp
+++ b/src/CompiledVirtualMachine.hpp
@ -42,7 +42,7 @@ namespace RandomX {
 			_mm_free(ptr);
 		}
 		CompiledVirtualMachine();
-		void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
+		void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
 		void initialize() override;
 		virtual void execute() override;
 		void* getProgram() {
--- a/src/InterpretedVirtualMachine.cpp
+++ b/src/InterpretedVirtualMachine.cpp
@ -22,7 +22,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include "InterpretedVirtualMachine.hpp"
 #include "dataset.hpp"
 #include "Cache.hpp"
 #include "LightClientAsyncWorker.hpp"
 #include <iostream>
 #include <iomanip>
 #include <stdexcept>
@ -36,7 +35,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #ifdef STATS
 #include <algorithm>
 #endif
-#include "LightProgramGenerator.hpp"
+#include "superscalarGenerator.hpp"
 #ifdef FPUCHECK
 constexpr bool fpuCheck = true;
@ -47,7 +46,7 @@ constexpr bool fpuCheck = false;
 namespace RandomX {
 	template<bool superscalar>
-	void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
+	void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
 		mem.ds = ds;
 		readDataset = &datasetReadLight;
 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
@ -55,8 +54,8 @@ namespace RandomX {
 			precompileSuperscalar(programs);
 	}
-	template void InterpretedVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+	template void InterpretedVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
-	template void InterpretedVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+	template void InterpretedVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
 	template<bool superscalar>
 	void InterpretedVirtualMachine<superscalar>::initialize() {
@ -475,7 +474,7 @@ namespace RandomX {
 	}
 	template<bool superscalar>
-	void InterpretedVirtualMachine<superscalar>::executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector<uint64_t>& reciprocals) {
+	void InterpretedVirtualMachine<superscalar>::executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t>& reciprocals) {
 		for (unsigned j = 0; j < prog.getSize(); ++j) {
 			Instruction& instr = prog(j);
 			switch (instr.opcode)
@ -539,7 +538,7 @@ namespace RandomX {
 		Cache& cache = mem.ds.cache;
 		for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
 			mixBlock = getMixBlock(registerValue, cache);
-			LightProgram& prog = superScalarPrograms[i];
+			SuperscalarProgram& prog = superScalarPrograms[i];
 			executeSuperscalar(rl, prog, reciprocals);
@ -554,7 +553,7 @@ namespace RandomX {
 	}
 	template<bool superscalar>
-	void InterpretedVirtualMachine<superscalar>::precompileSuperscalar(LightProgram* programs) {
+	void InterpretedVirtualMachine<superscalar>::precompileSuperscalar(SuperscalarProgram* programs) {
 		memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms));
 		reciprocals.clear();
 		for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
--- a/src/InterpretedVirtualMachine.hpp
+++ b/src/InterpretedVirtualMachine.hpp
@ -70,17 +70,17 @@ namespace RandomX {
 		}
 		InterpretedVirtualMachine(bool soft) : softAes(soft) {}
 		~InterpretedVirtualMachine() {}
-		void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
+		void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
 		void initialize() override;
 		void execute() override;
-		static void executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector<uint64_t>& reciprocals);
+		static void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t>& reciprocals);
 	private:
 		static InstructionHandler<superscalar> engine[256];
 		DatasetReadFunc readDataset;
 		bool softAes;
 		InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
 		std::vector<uint64_t> reciprocals;
-		alignas(64) LightProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES];
+		alignas(64) SuperscalarProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES];
 #ifdef STATS
 		int count_ADD_64 = 0;
 		int count_ADD_32 = 0;
@ -128,7 +128,7 @@ namespace RandomX {
 		int datasetAccess[256] = { 0 };
 #endif
 		void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
-		void precompileSuperscalar(LightProgram*);
+		void precompileSuperscalar(SuperscalarProgram*);
 		void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
 		void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
 		void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]);
--- a/src/JitCompilerX86.cpp
+++ b/src/JitCompilerX86.cpp
@ -87,7 +87,7 @@ namespace RandomX {
 	*/
 #include "JitCompilerX86-static.hpp"
-#include "LightProgramGenerator.hpp"
+#include "superscalarGenerator.hpp"
 #define NOP_TEST true
@ -261,16 +261,16 @@ namespace RandomX {
 	template void JitCompilerX86::generateProgramLight<false>(Program& prog);
 	template<size_t N>
-	void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[N]) {
+	void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) {
 		memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
 		codePos = superScalarHashOffset + codeSshInitSize;
 		for (unsigned j = 0; j < N; ++j) {
-			LightProgram& prog = programs[j];
+			SuperscalarProgram& prog = programs[j];
 			for (unsigned i = 0; i < prog.getSize(); ++i) {
 				Instruction& instr = prog(i);
 				instr.src %= RegistersCount;
 				instr.dst %= RegistersCount;
-				generateCode<LightProgram>(instr, i);
+				generateCode<SuperscalarProgram>(instr, i);
 			}
 			emit(codeShhLoad, codeSshLoadSize);
 			if (j < N - 1) {
@ -290,7 +290,7 @@ namespace RandomX {
 	}
 	template
-	void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+	void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
 	void JitCompilerX86::generateDatasetInitCode() {
 		memcpy(code, codeDatasetInit, datasetInitSize);
@ -345,7 +345,7 @@ namespace RandomX {
 	}
 	template<>
-	void JitCompilerX86::generateCode<LightProgram>(Instruction& instr, int i) {
+	void JitCompilerX86::generateCode<SuperscalarProgram>(Instruction& instr, int i) {
 		switch (instr.opcode)
 		{
 		case RandomX::SuperscalarInstructionType::ISUB_R:
--- a/src/JitCompilerX86.hpp
+++ b/src/JitCompilerX86.hpp
@ -27,7 +27,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 namespace RandomX {
 	class Program;
-	class LightProgram;
+	class SuperscalarProgram;
 	class JitCompilerX86;
 	typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
@ -42,7 +42,7 @@ namespace RandomX {
 		template<bool superscalar>
 		void generateProgramLight(Program&);
 		template<size_t N>
-		void generateSuperScalarHash(LightProgram (&programs)[N]);
+		void generateSuperScalarHash(SuperscalarProgram (&programs)[N]);
 		ProgramFunc getProgramFunc() {
 			return (ProgramFunc)code;
 		}
--- a/src/LightClientAsyncWorker.cpp
+++ b/src/LightClientAsyncWorker.cpp
@ -1,113 +0,0 @@
 /*
 Copyright (c) 2019 tevador
 This file is part of RandomX.
 RandomX is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 RandomX is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 */
 #include "LightClientAsyncWorker.hpp"
 #include "dataset.hpp"
 #include "Cache.hpp"
 namespace RandomX {
 	LightClientAsyncWorker::LightClientAsyncWorker(const Cache& c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false), 
 #ifdef TRACE
 		sw(true),
 #endif
 		workerThread(&LightClientAsyncWorker::runWorker, this) {
 	}
 	void LightClientAsyncWorker::prepareBlock(addr_t addr) {
 #ifdef TRACE
 		std::cout << sw.getElapsed() << ": prepareBlock-enter " << addr / CacheLineSize << std::endl;
 #endif
 		{
 			std::lock_guard<std::mutex> lk(mutex);
 			startBlock = addr / CacheLineSize;
 			blockCount = 1;
 			output = currentLine.data();
 			hasWork = true;
 		}
 #ifdef TRACE
 		std::cout << sw.getElapsed() << ": prepareBlock-notify " << startBlock << "/" << blockCount << std::endl;
 #endif
 		notifier.notify_one();
 	}
 	const uint64_t* LightClientAsyncWorker::getBlock(addr_t addr) {
 #ifdef TRACE
 		std::cout << sw.getElapsed() << ": getBlock-enter " << addr / CacheLineSize << std::endl;
 #endif
 		uint32_t currentBlock = addr / CacheLineSize;
 		if (currentBlock != startBlock || output != currentLine.data()) {
 			initBlock(cache, (uint8_t*)currentLine.data(), currentBlock, RANDOMX_CACHE_ACCESSES / 8);
 		}
 		else {
 			sync();
 		}
 #ifdef TRACE
 		std::cout << sw.getElapsed() << ": getBlock-return " << addr / CacheLineSize << std::endl;
 #endif
 		return currentLine.data();
 	}
 	void LightClientAsyncWorker::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
 #ifdef TRACE
 		std::cout << sw.getElapsed() << ": prepareBlocks-enter " << startBlock << "/" << blockCount << std::endl;
 #endif
 		{
 			std::lock_guard<std::mutex> lk(mutex);
 			this->startBlock = startBlock;
 			this->blockCount = blockCount;
 			output = out;
 			hasWork = true;
 			notifier.notify_one();
 		}
 	}
 	void LightClientAsyncWorker::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
 		for (uint32_t i = 0; i < blockCount; ++i) {
 			initBlock(cache, (uint8_t*)out + CacheLineSize * i, startBlock + i, RANDOMX_CACHE_ACCESSES / 8);
 		}
 	}
 	void LightClientAsyncWorker::sync() {
 		std::unique_lock<std::mutex> lk(mutex);
 		notifier.wait(lk, [this] { return !hasWork; });
 	}
 	void LightClientAsyncWorker::runWorker() {
 #ifdef TRACE
 		std::cout << sw.getElapsed() << ": runWorker-enter " << std::endl;
 #endif
 		for (;;) {
 			std::unique_lock<std::mutex> lk(mutex);
 			notifier.wait(lk, [this] { return hasWork; });
 #ifdef TRACE
 			std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl;
 #endif
 			//getBlocks(output, startBlock, blockCount);
 			initBlock(cache, (uint8_t*)output, startBlock, RANDOMX_CACHE_ACCESSES / 8);
 			hasWork = false;
 #ifdef TRACE
 			std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl;
 #endif
 			lk.unlock();
 			notifier.notify_one();
 		}
 	}
 }
--- a/src/LightClientAsyncWorker.hpp
+++ b/src/LightClientAsyncWorker.hpp
@ -1,57 +0,0 @@
 /*
 Copyright (c) 2019 tevador
 This file is part of RandomX.
 RandomX is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 RandomX is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 */
 //#define TRACE
 #include "common.hpp"
 #include <thread>
 #include <mutex>
 #include <condition_variable>
 #include <array>
 #ifdef TRACE
 #include "Stopwatch.hpp"
 #include <iostream>
 #endif
 namespace RandomX {
 	using DatasetLine = std::array<uint64_t, CacheLineSize / sizeof(uint64_t)>;
 	class LightClientAsyncWorker : public ILightClientAsyncWorker {
 	public:
 		LightClientAsyncWorker(const Cache&);
 		void prepareBlock(addr_t) final;
 		void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final;
 		const uint64_t* getBlock(addr_t) final;
 		void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final;
 		void sync() final;
 	private:
 		void runWorker();
 		std::condition_variable notifier;
 		std::mutex mutex;
 		alignas(16) DatasetLine currentLine;
 		void* output;
 		uint32_t startBlock, blockCount;
 		bool hasWork;
 #ifdef TRACE
 		Stopwatch sw;
 #endif
 		std::thread workerThread;
 	};
 }
--- a/src/LightProgramGenerator.hpp
+++ b/src/LightProgramGenerator.hpp
@ -1,58 +0,0 @@
 /*
 Copyright (c) 2019 tevador
 This file is part of RandomX.
 RandomX is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 RandomX is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 */
 #include "Program.hpp"
 namespace RandomX {
 	//                             Intel Ivy Bridge reference
 	namespace SuperscalarInstructionType {        //uOPs (decode)   execution ports         latency       code size
 		constexpr int ISUB_R = 0;           //1               p015                    1               3
 		constexpr int IXOR_R = 1;           //1               p015                    1               3
 		constexpr int IADD_RS = 2;          //1               p01                     1               4
 		constexpr int IMUL_R = 3;           //1               p1                      3               4
 		constexpr int IROR_C = 4;           //1               p05                     1               4
 		constexpr int IADD_C7 = 5;          //1               p015                    1               7
 		constexpr int IXOR_C7 = 6;          //1               p015                    1               7
 		constexpr int IADD_C8 = 7;          //1+0             p015                    1               8
 		constexpr int IXOR_C8 = 8;          //1+0             p015                    1               8
 		constexpr int IADD_C9 = 9;          //1+0             p015                    1               9
 		constexpr int IXOR_C9 = 10;         //1+0             p015                    1               9
 		constexpr int IMULH_R = 11;         //1+2+1           0+(p1,p5)+0             3               3+3+3
 		constexpr int ISMULH_R = 12;        //1+2+1           0+(p1,p5)+0             3               3+3+3
 		constexpr int IMUL_RCP = 13;        //1+1             p015+p1                 4              10+4
 		constexpr int COUNT = 14;
 		constexpr int INVALID = -1;
 	}
 	class Blake2Generator {
 	public:
 		Blake2Generator(const void* seed, int nonce);
 		uint8_t getByte();
 		uint32_t getInt32();
 	private:
 		uint8_t data[64];
 		size_t dataIndex;
 		void checkData(const size_t);
 	};
 	double generateSuperscalar(LightProgram& prog, Blake2Generator& gen);
 }
--- a/src/Program.hpp
+++ b/src/Program.hpp
@ -53,12 +53,14 @@ namespace RandomX {
 		Instruction programBuffer[RANDOMX_PROGRAM_SIZE];
 	};
-	class LightProgram {
+	static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program");
 	class SuperscalarProgram {
 	public:
 		Instruction& operator()(int pc) {
 			return programBuffer[pc];
 		}
-		friend std::ostream& operator<<(std::ostream& os, const LightProgram& p) {
+		friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) {
 			p.print(os);
 			return os;
 		}
@ -74,6 +76,15 @@ namespace RandomX {
 		void setAddressRegister(uint32_t val) {
 			addrReg = val;
 		}
 		double ipc;
 		int codeSize;
 		int macroOps;
 		int decodeCycles;
 		int cpuLatency;
 		int asicLatency;
 		int mulCount;
 		int cpuLatencies[8];
 		int asicLatencies[8];
 	private:
 		void print(std::ostream& os) const {
 			for (unsigned i = 0; i < size; ++i) {
@ -85,6 +96,4 @@ namespace RandomX {
 		uint32_t size;
 		int addrReg;
 	};
 	static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program");
 }
--- a/src/VirtualMachine.hpp
+++ b/src/VirtualMachine.hpp
@ -28,7 +28,7 @@ namespace RandomX {
 	public:
 		VirtualMachine();
 		virtual ~VirtualMachine() {}
-		virtual void setDataset(dataset_t ds, uint64_t size, LightProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0;
+		virtual void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0;
 		void setScratchpad(void* ptr) {
 			scratchpad = (uint8_t*)ptr;
 		}
--- a/src/main.cpp
+++ b/src/main.cpp
@ -36,7 +36,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include "dataset.hpp"
 #include "Cache.hpp"
 #include "hashAes1Rx4.hpp"
-#include "LightProgramGenerator.hpp"
+#include "superscalarGenerator.hpp"
 #include "JitCompilerX86.hpp"
 const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
@ -226,13 +226,13 @@ int main(int argc, char** argv) {
 	readOption("--legacy", argc, argv, legacy);
 	if (genSuperscalar) {
-		RandomX::LightProgram p;
+		RandomX::SuperscalarProgram p;
 		RandomX::Blake2Generator gen(seed, programCount);
 		RandomX::generateSuperscalar(p, gen);
 		RandomX::AssemblyGeneratorX86 asmX86;
 		asmX86.generateAsm(p);
 		//std::ofstream file("lightProg2.asm");
-		//asmX86.printCode(std::cout);
+		asmX86.printCode(std::cout);
 		return 0;
 	}
@ -268,7 +268,7 @@ int main(int argc, char** argv) {
 	const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize;
 	const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch);
 	dataset.cache.size = cacheSize;
-	RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES];
+	RandomX::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES];
 	std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl;
--- a/src/LightProgramGenerator.cpp
+++ b/src/LightProgramGenerator.cpp
@ -18,7 +18,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 */
 #include <stddef.h>
 #include "blake2/blake2.h"
 #include "configuration.h"
 #include "Program.hpp"
 #include "blake2/endian.h"
@ -27,7 +26,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include <algorithm>
 #include <stdexcept>
 #include <iomanip>
-#include "LightProgramGenerator.hpp"
+#include "superscalarGenerator.hpp"
 namespace RandomX {
@ -35,6 +34,7 @@ namespace RandomX {
 		return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP;
 	}
 	//uOPs (micro-ops) are represented only by the execution port they can go to
 	namespace ExecutionPort {
 		using type = int;
 		constexpr type Null = 0;
@ -46,40 +46,9 @@ namespace RandomX {
 		constexpr type P015 = P0 | P1 | P5;
 	}
-	Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) {
+	//Macro-operation as output of the x86 decoder
-		memset(data, 0, sizeof(data));
+	//Usually one macro-op = one x86 instruction, but 2 instructions are sometimes fused into 1 macro-op
-		memcpy(data, seed, SeedSize);
+	//Macro-op can consist of 1 or 2 uOPs.
 		store32(&data[60], nonce);
 	}
 	uint8_t Blake2Generator::getByte() {
 		checkData(1);
 		return data[dataIndex++];
 	}
 	uint32_t Blake2Generator::getInt32() {
 		checkData(4);
 		auto ret = load32(&data[dataIndex]);
 		dataIndex += 4;
 		return ret;
 	}
 	void Blake2Generator::checkData(const size_t bytesNeeded) {
 		if (dataIndex + bytesNeeded > sizeof(data))	{
 			blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
 			dataIndex = 0;
 		}
 	}
 	class RegisterInfo {
 	public:
 		RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {}
 		int latency;
 		int lastOpGroup;
 		int lastOpPar;
 		int value;
 	};
 	class MacroOp {
 	public:
 		MacroOp(const char* name, int size)
@ -137,10 +106,7 @@ namespace RandomX {
 		int latency_;
 		ExecutionPort::type uop1_;
 		ExecutionPort::type uop2_;
 		int cycle_;
 		bool dependent_ = false;
 		MacroOp* depDst_ = nullptr;
 		MacroOp* depSrc_ = nullptr;
 	};
 	//Size: 3 bytes
@ -174,7 +140,7 @@ namespace RandomX {
 	const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr };
 	const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) };
-	class LightInstructionInfo {
+	class SuperscalarInstructionInfo {
 	public:
 		const char* getName() const {
 			return name_;
@ -203,21 +169,21 @@ namespace RandomX {
 		int getSrcOp() const {
 			return srcOp_;
 		}
-		static const LightInstructionInfo ISUB_R;
+		static const SuperscalarInstructionInfo ISUB_R;
-		static const LightInstructionInfo IXOR_R;
+		static const SuperscalarInstructionInfo IXOR_R;
-		static const LightInstructionInfo IADD_RS;
+		static const SuperscalarInstructionInfo IADD_RS;
-		static const LightInstructionInfo IMUL_R;
+		static const SuperscalarInstructionInfo IMUL_R;
-		static const LightInstructionInfo IROR_C;
+		static const SuperscalarInstructionInfo IROR_C;
-		static const LightInstructionInfo IADD_C7;
+		static const SuperscalarInstructionInfo IADD_C7;
-		static const LightInstructionInfo IXOR_C7;
+		static const SuperscalarInstructionInfo IXOR_C7;
-		static const LightInstructionInfo IADD_C8;
+		static const SuperscalarInstructionInfo IADD_C8;
-		static const LightInstructionInfo IXOR_C8;
+		static const SuperscalarInstructionInfo IXOR_C8;
-		static const LightInstructionInfo IADD_C9;
+		static const SuperscalarInstructionInfo IADD_C9;
-		static const LightInstructionInfo IXOR_C9;
+		static const SuperscalarInstructionInfo IXOR_C9;
-		static const LightInstructionInfo IMULH_R;
+		static const SuperscalarInstructionInfo IMULH_R;
-		static const LightInstructionInfo ISMULH_R;
+		static const SuperscalarInstructionInfo ISMULH_R;
-		static const LightInstructionInfo IMUL_RCP;
+		static const SuperscalarInstructionInfo IMUL_RCP;
-		static const LightInstructionInfo NOP;
+		static const SuperscalarInstructionInfo NOP;
 	private:
 		const char* name_;
 		int type_;
@ -227,14 +193,14 @@ namespace RandomX {
 		int dstOp_ = 0;
 		int srcOp_;
-		LightInstructionInfo(const char* name)
+		SuperscalarInstructionInfo(const char* name)
 			: name_(name), type_(-1), latency_(0) {}
-		LightInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp)
+		SuperscalarInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp)
 			: name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) {
 			ops_.push_back(MacroOp(op));
 		}
 		template <size_t N>
-		LightInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp)
+		SuperscalarInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp)
 			: name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) {
 			for (unsigned i = 0; i < N; ++i) {
 				ops_.push_back(MacroOp(arr[i]));
@ -244,24 +210,34 @@ namespace RandomX {
 		}
 	};
-	const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISUB_R = SuperscalarInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0);
-	const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_R = SuperscalarInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0);
-	const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_RS = SuperscalarInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0);
-	const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_R = SuperscalarInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0);
-	const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IROR_C = SuperscalarInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1);
-	const LightInstructionInfo LightInstructionInfo::IADD_C7 = LightInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C7 = SuperscalarInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1);
-	const LightInstructionInfo LightInstructionInfo::IXOR_C7 = LightInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C7 = SuperscalarInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1);
-	const LightInstructionInfo LightInstructionInfo::IADD_C8 = LightInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C8 = SuperscalarInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1);
-	const LightInstructionInfo LightInstructionInfo::IXOR_C8 = LightInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C8 = SuperscalarInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1);
-	const LightInstructionInfo LightInstructionInfo::IADD_C9 = LightInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C9 = SuperscalarInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1);
-	const LightInstructionInfo LightInstructionInfo::IXOR_C9 = LightInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C9 = SuperscalarInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1);
-	const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
-	const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
-	const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
-	const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP");
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP");
 	//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
 	//RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
 	//Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
 	const int buffer0[] = { 4, 8, 4 };
 	const int buffer1[] = { 7, 3, 3, 3 };
 	const int buffer2[] = { 3, 7, 3, 3 };
 	const int buffer3[] = { 4, 9, 3 };
 	const int buffer4[] = { 4, 4, 4, 4 };
 	const int buffer5[] = { 3, 3, 10 };
 	class DecoderBuffer {
 	public:
@ -318,16 +294,6 @@ namespace RandomX {
 		}
 	};
 	//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
 	//RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
 	//Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
 	const int buffer0[] = { 4, 8, 4 };
 	const int buffer1[] = { 7, 3, 3, 3 };
 	const int buffer2[] = { 3, 7, 3, 3 };
 	const int buffer3[] = { 4, 9, 3 };
 	const int buffer4[] = { 4, 4, 4, 4 };
 	const int buffer5[] = { 3, 3, 10 };
 	const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0);
 	const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1);
 	const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2);
@ -344,13 +310,13 @@ namespace RandomX {
 	const DecoderBuffer DecoderBuffer::Default = DecoderBuffer();
-	const LightInstructionInfo* slot_3[]  = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R };
+	const SuperscalarInstructionInfo* slot_3[]  = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R };
-	const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R };
+	const SuperscalarInstructionInfo* slot_3L[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R, &SuperscalarInstructionInfo::IMULH_R, &SuperscalarInstructionInfo::ISMULH_R };
-	const LightInstructionInfo* slot_4[]  = { &LightInstructionInfo::IROR_C, &LightInstructionInfo::IADD_RS };
+	const SuperscalarInstructionInfo* slot_4[]  = { &SuperscalarInstructionInfo::IROR_C, &SuperscalarInstructionInfo::IADD_RS };
-	const LightInstructionInfo* slot_7[]  = { &LightInstructionInfo::IXOR_C7, &LightInstructionInfo::IADD_C7 };
+	const SuperscalarInstructionInfo* slot_7[]  = { &SuperscalarInstructionInfo::IXOR_C7, &SuperscalarInstructionInfo::IADD_C7 };
-	const LightInstructionInfo* slot_8[] = { &LightInstructionInfo::IXOR_C8, &LightInstructionInfo::IADD_C8 };
+	const SuperscalarInstructionInfo* slot_8[] = { &SuperscalarInstructionInfo::IXOR_C8, &SuperscalarInstructionInfo::IADD_C8 };
-	const LightInstructionInfo* slot_9[] = { &LightInstructionInfo::IXOR_C9, &LightInstructionInfo::IADD_C9 };
+	const SuperscalarInstructionInfo* slot_9[] = { &SuperscalarInstructionInfo::IXOR_C9, &SuperscalarInstructionInfo::IADD_C9 };
-	const LightInstructionInfo* slot_10   = &LightInstructionInfo::IMUL_RCP;
+	const SuperscalarInstructionInfo* slot_10   = &SuperscalarInstructionInfo::IMUL_RCP;
 	static bool selectRegister(std::vector<int>& availableRegisters, Blake2Generator& gen, int& reg) {
 		int index;
@ -367,9 +333,19 @@ namespace RandomX {
 		return true;
 	}
-	class LightInstruction {
+	class RegisterInfo {
 	public:
-		void toInstr(Instruction& instr) {
+		RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {}
 		int latency;
 		int lastOpGroup;
 		int lastOpPar;
 		int value;
 	};
 	//"SuperscalarInstruction" consists of one or more macro-ops
 	class SuperscalarInstruction {
 	public:
 		void toInstr(Instruction& instr) { //translate to a RandomX instruction format
 			instr.opcode = getType();
 			instr.dst = dst_;
 			instr.src = src_ >= 0 ? src_ : dst_;
@ -392,7 +368,7 @@ namespace RandomX {
 			case 4:
 				//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
 				if (fetchType == 4 && !isLast) {
-					create(&LightInstructionInfo::IMUL_R, gen);
+					create(&SuperscalarInstructionInfo::IMUL_R, gen);
 				}
 				else {
 					create(slot_4[gen.getByte() & 1], gen);
@ -415,7 +391,7 @@ namespace RandomX {
 			}
 		}
-		void create(const LightInstructionInfo* info, Blake2Generator& gen) {
+		void create(const SuperscalarInstructionInfo* info, Blake2Generator& gen) {
 			info_ = info;
 			reset();
 			switch (info->getType())
@ -445,7 +421,7 @@ namespace RandomX {
 				mod_ = 0;
 				imm32_ = 0;
 				opGroup_ = SuperscalarInstructionType::IMUL_R;
-				opGroupPar_ = -1;
+				groupParIsSource_ = true;
 			} break;
 			case SuperscalarInstructionType::IROR_C: {
@ -505,18 +481,22 @@ namespace RandomX {
 			}
 		}
-		bool selectDestination(int cycle, RegisterInfo (&registers)[8], Blake2Generator& gen) {
+		bool selectDestination(int cycle, bool allowChainedMul, RegisterInfo (&registers)[8], Blake2Generator& gen) {
 			/*if (allowChainedMultiplication && opGroup_ == SuperscalarInstructionType::IMUL_R)
 				std::cout << "Selecting destination with chained MUL enabled" << std::endl;*/
 			std::vector<int> availableRegisters;
 			//Conditions for the destination register:
 			// * value must be ready at the required cycle
 			// * cannot be the same as the source register unless the instruction allows it
 			//   - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
 			// * register cannot be multiplied twice in a row unless allowChainedMul is true 
 			//   - this avoids accumulation of trailing zeroes in registers due to excessive multiplication
 			//   - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
 			// * either the last instruction applied to the register or its source must be different than this instruction
 			//   - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
 			//   - it also avoids accumulation of trailing zeroes in registers due to excessive multiplication
 			// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
 			for (unsigned i = 0; i < 8; ++i) {
-				if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister))
+				if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister))
 					availableRegisters.push_back(i);
 			}
 			return selectRegister(availableRegisters, gen, dst_);
@ -560,14 +540,14 @@ namespace RandomX {
 			return opGroupPar_;
 		}
-		const LightInstructionInfo& getInfo() const {
+		const SuperscalarInstructionInfo& getInfo() const {
 			return *info_;
 		}
-		static const LightInstruction Null;
+		static const SuperscalarInstruction Null;
 	private:
-		const LightInstructionInfo* info_;
+		const SuperscalarInstructionInfo* info_;
 		int src_ = -1;
 		int dst_ = -1;
 		int mod_;
@ -582,15 +562,16 @@ namespace RandomX {
 			canReuse_ = groupParIsSource_ = false;
 		}
-		LightInstruction(const LightInstructionInfo* info) : info_(info) {
+		SuperscalarInstruction(const SuperscalarInstructionInfo* info) : info_(info) {
 		}
 	};
-	const LightInstruction LightInstruction::Null = LightInstruction(&LightInstructionInfo::NOP);
+	const SuperscalarInstruction SuperscalarInstruction::Null = SuperscalarInstruction(&SuperscalarInstructionInfo::NOP);
-	constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 3;
+	constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 4;
 	constexpr int LOOK_FORWARD_CYCLES = 4;
 	constexpr int MAX_THROWAWAY_COUNT = 256;
 #ifndef _DEBUG
 	constexpr bool TRACE = false;
 	constexpr bool INFO = false;
@ -602,7 +583,7 @@ namespace RandomX {
 	template<bool commit>
 	static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
 		//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
-		//P1 (multiplication port) by instructions that can go to any port.
+		//port P1 (multiplication) by instructions that can go to any port.
 		for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
 			if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
 				if (commit) {
@ -666,14 +647,14 @@ namespace RandomX {
 		return -1;
 	}
-	double generateSuperscalar(LightProgram& prog, Blake2Generator& gen) {
+	void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen) {
 		ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3];
 		memset(portBusy, 0, sizeof(portBusy));
 		RegisterInfo registers[8];
 		const DecoderBuffer* decodeBuffer = &DecoderBuffer::Default;
-		LightInstruction currentInstruction = LightInstruction::Null;
+		SuperscalarInstruction currentInstruction = SuperscalarInstruction::Null;
 		int macroOpIndex = 0;
 		int codeSize = 0;
 		int macroOpCount = 0;
@ -719,7 +700,9 @@ namespace RandomX {
 				int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle);
 				if (scheduleCycle < 0) {
 					/*if (TRACE)*/ std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
-					return 0;
+					//__debugbreak();
 					portsSaturated = true;
 					break;
 				}
 				//find a source register (if applicable) that will be ready when this instruction executes
@ -737,20 +720,20 @@ namespace RandomX {
 							throwAwayCount++;
 							macroOpIndex = currentInstruction.getInfo().getSize();
 							if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
 							//cycle = topCycle;
 							continue;
 						}
 						//abort this decode buffer
-						/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available" << std::endl;
+						/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
-						currentInstruction = LightInstruction::Null;
+						currentInstruction = SuperscalarInstruction::Null;
 						break;
 					}
 					if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
 				}
 				throwAwayCount = 0;
 				//find a destination register that will be ready when this instruction executes
 				if (macroOpIndex == currentInstruction.getInfo().getDstOp()) {
 					int forward;
-					for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, registers, gen); ++forward) {
+					for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) {
 						if (TRACE) std::cout << "; dst STALL at cycle " << cycle << std::endl;
 						++scheduleCycle;
 						++cycle;
@ -760,16 +743,18 @@ namespace RandomX {
 							throwAwayCount++;
 							macroOpIndex = currentInstruction.getInfo().getSize();
 							if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
 							//cycle = topCycle;
 							continue;
 						}
 						//abort this decode buffer
 						/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
-						currentInstruction = LightInstruction::Null;
+						currentInstruction = SuperscalarInstruction::Null;
 						break;
 					}
 					if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
 				}
 				throwAwayCount = 0;
 				//recalculate when the instruction can be scheduled for execution based on operand availability
 				scheduleCycle = scheduleMop<true>(mop, portBusy, scheduleCycle, scheduleCycle);
@ -809,67 +794,53 @@ namespace RandomX {
 			++cycle;
 		}
 		if(INFO) std::cout << "; ALU port utilization:" << std::endl;
 		if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
 		int portCycles = 0;
 		for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
 			//std::cout << "; " << std::setw(3) << i << " ";
 			for (int j = 0; j < 3; ++j) {
 				//std::cout << (portBusy[i][j] ? '*' : '_');
 				portCycles += !!portBusy[i][j];
 			}
 			//std::cout << std::endl;
 		}
 		double ipc = (macroOpCount / (double)retireCycle);
-		if (INFO) std::cout << "; code size " << codeSize << " bytes" << std::endl;
+		memset(prog.asicLatencies, 0, sizeof(prog.asicLatencies));
 		if (INFO) std::cout << "; x86 macro-ops: " << macroOpCount << std::endl;
 		if (INFO) std::cout << "; fetch cycles: " << decodeCycle << std::endl;
 		if (INFO) std::cout << "; RandomX instructions: " << programSize << std::endl;
 		if (INFO) std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl;
 		if (INFO) std::cout << "; IPC = " << ipc << std::endl;
 		if (INFO) std::cout << "; Port-cycles: " << portCycles << std::endl;
 		if (INFO) std::cout << "; Multiplications: " << mulCount << std::endl;
 		int asicLatency[8];
 		memset(asicLatency, 0, sizeof(asicLatency));
 		//Calculate ASIC latency:
 		//Assumes 1 cycle latency for all operations and unlimited parallelization.
 		for (int i = 0; i < programSize; ++i) {
 			Instruction& instr = prog(i);
-			int latDst = asicLatency[instr.dst] + 1;
+			int latDst = prog.asicLatencies[instr.dst] + 1;
-			int latSrc = instr.dst != instr.src ? asicLatency[instr.src] + 1 : 0;
+			int latSrc = instr.dst != instr.src ? prog.asicLatencies[instr.src] + 1 : 0;
-			asicLatency[instr.dst] = std::max(latDst, latSrc);
+			prog.asicLatencies[instr.dst] = std::max(latDst, latSrc);
 		}
 		//address register is the register with the highest ASIC latency
 		int asicLatencyMax = 0;
 		int addressReg = 0;
 		for (int i = 0; i < 8; ++i) {
-			if (asicLatency[i] > asicLatencyMax) {
+			if (prog.asicLatencies[i] > asicLatencyMax) {
-				asicLatencyMax = asicLatency[i];
+				asicLatencyMax = prog.asicLatencies[i];
 				addressReg = i;
 			}
-		}
+			prog.cpuLatencies[i] = registers[i].latency;
 		if (INFO) std::cout << "; ASIC latency: " << asicLatencyMax << std::endl;
 		if (INFO) {
 			std::cout << "; ASIC latency:" << std::endl;
 			for (int i = 0; i < 8; ++i) {
 				std::cout << ";  r" << i << " = " << asicLatency[i] << std::endl;
 			}
 			if (INFO) std::cout << "; CPU latency:" << std::endl;
 			for (int i = 0; i < 8; ++i) {
 				std::cout << ";  r" << i << " = " << registers[i].latency << std::endl;
 			}
 		}
 		prog.setSize(programSize);
 		prog.setAddressRegister(addressReg);
-		return ipc;
+
 		prog.cpuLatency = retireCycle;
 		prog.asicLatency = asicLatencyMax;
 		prog.codeSize = codeSize;
 		prog.macroOps = macroOpCount;
 		prog.decodeCycles = decodeCycle;
 		prog.ipc = ipc;
 		prog.mulCount = mulCount;
 		/*if(INFO) std::cout << "; ALU port utilization:" << std::endl;
 		if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
 		int portCycles = 0;
 		for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
 			std::cout << "; " << std::setw(3) << i << " ";
 			for (int j = 0; j < 3; ++j) {
 				std::cout << (portBusy[i][j] ? '*' : '_');
 				portCycles += !!portBusy[i][j];
 			}
 			std::cout << std::endl;
 		}*/
 	}
 }
--- a/src/superscalarGenerator.hpp
+++ b/src/superscalarGenerator.hpp
@ -0,0 +1,47 @@
 /*
 Copyright (c) 2019 tevador
 This file is part of RandomX.
 RandomX is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 RandomX is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 */
 #pragma once
 #include "Program.hpp"
 #include "Blake2Generator.hpp"
 namespace RandomX {
 	                                              //                  Intel Ivy Bridge reference
 	namespace SuperscalarInstructionType {        //uOPs (decode)   execution ports         latency       code size
 		constexpr int ISUB_R = 0;                 //1               p015                    1               3 (sub)
 		constexpr int IXOR_R = 1;                 //1               p015                    1               3 (xor)
 		constexpr int IADD_RS = 2;                //1               p01                     1               4 (lea)
 		constexpr int IMUL_R = 3;                 //1               p1                      3               4 (imul)
 		constexpr int IROR_C = 4;                 //1               p05                     1               4 (ror)
 		constexpr int IADD_C7 = 5;                //1               p015                    1               7 (add)
 		constexpr int IXOR_C7 = 6;                //1               p015                    1               7 (xor)
 		constexpr int IADD_C8 = 7;                //1+0             p015                    1               7+1 (add+nop)
 		constexpr int IXOR_C8 = 8;                //1+0             p015                    1               7+1 (xor+nop)
 		constexpr int IADD_C9 = 9;                //1+0             p015                    1               7+2 (add+nop)
 		constexpr int IXOR_C9 = 10;               //1+0             p015                    1               7+2 (xor+nop)
 		constexpr int IMULH_R = 11;               //1+2+1           0+(p1,p5)+0             3               3+3+3 (mov+mul+mov)
 		constexpr int ISMULH_R = 12;              //1+2+1           0+(p1,p5)+0             3               3+3+3 (mov+imul+mov)
 		constexpr int IMUL_RCP = 13;              //1+1             p015+p1                 4              10+4   (mov+imul)
 		constexpr int COUNT = 14;
 		constexpr int INVALID = -1;
 	}
 	void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen);
 }
--- a/src/tests/superscalar-avalanche.cpp
+++ b/src/tests/superscalar-avalanche.cpp
@ -20,9 +20,10 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include <iostream>
 #include <cstdint>
 #include <vector>
-#include "../LightProgramGenerator.hpp"
+#include "../superscalarGenerator.hpp"
 #include "../InterpretedVirtualMachine.hpp"
 #include "../intrinPortable.h"
 #include "../Blake2Generator.hpp"
 const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
@ -45,9 +46,9 @@ int main() {
 			uint64_t rb[8];
 			memcpy(rb, ra, sizeof rb);
 			rb[0] ^= (1ULL << bit);
-			RandomX::LightProgram p;
+			RandomX::SuperscalarProgram p;
 			RandomX::Blake2Generator gen(seed, i);
-			RandomX::generateLightProg2(p, gen);
+			RandomX::generateSuperscalar(p, gen);
 			RandomX::InterpretedVirtualMachine<false>::executeSuperscalar(ra, p, dummy);
 			RandomX::InterpretedVirtualMachine<false>::executeSuperscalar(rb, p, dummy);
 			uint64_t diff = 0;
--- a/src/tests/superscalar-init.cpp
+++ b/src/tests/superscalar-init.cpp
@ -21,7 +21,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include <cstdint>
 #include <vector>
 #include <unordered_set>
-#include "../LightProgramGenerator.hpp"
+#include "../superscalarGenerator.hpp"
 #include "../InterpretedVirtualMachine.hpp"
 #include "../intrinPortable.h"
 #include "../configuration.h"
--- a/vcxproj/randomx.vcxproj
+++ b/vcxproj/randomx.vcxproj
@ -127,6 +127,7 @@
    <ClCompile Include="..\src\argon2_core.c" />
    <ClCompile Include="..\src\argon2_ref.c" />
    <ClCompile Include="..\src\AssemblyGeneratorX86.cpp" />
    <ClCompile Include="..\src\Blake2Generator.cpp" />
    <ClCompile Include="..\src\blake2\blake2b.c" />
    <ClCompile Include="..\src\Cache.cpp" />
    <ClCompile Include="..\src\CompiledLightVirtualMachine.cpp" />
@ -137,8 +138,7 @@
    <ClCompile Include="..\src\instructionsPortable.cpp" />
    <ClCompile Include="..\src\InterpretedVirtualMachine.cpp" />
    <ClCompile Include="..\src\JitCompilerX86.cpp" />
-    <ClCompile Include="..\src\LightClientAsyncWorker.cpp" />
+    <ClCompile Include="..\src\superscalarGenerator.cpp" />
    <ClCompile Include="..\src\LightProgramGenerator.cpp" />
    <ClCompile Include="..\src\main.cpp" />
    <ClCompile Include="..\src\reciprocal.c" />
    <ClCompile Include="..\src\softAes.cpp" />
@ -153,6 +153,7 @@
    <ClInclude Include="..\src\argon2.h" />
    <ClInclude Include="..\src\argon2_core.h" />
    <ClInclude Include="..\src\AssemblyGeneratorX86.hpp" />
    <ClInclude Include="..\src\Blake2Generator.hpp" />
    <ClInclude Include="..\src\Cache.hpp" />
    <ClInclude Include="..\src\catch.hpp" />
    <ClInclude Include="..\src\common.hpp" />
@ -167,8 +168,7 @@
    <ClInclude Include="..\src\intrinPortable.h" />
    <ClInclude Include="..\src\JitCompilerX86-static.hpp" />
    <ClInclude Include="..\src\JitCompilerX86.hpp" />
-    <ClInclude Include="..\src\LightClientAsyncWorker.hpp" />
+    <ClInclude Include="..\src\superscalarGenerator.hpp" />
    <ClInclude Include="..\src\LightProgramGenerator.hpp" />
    <ClInclude Include="..\src\Program.hpp" />
    <ClInclude Include="..\src\reciprocal.h" />
    <ClInclude Include="..\src\softAes.h" />
--- a/vcxproj/randomx.vcxproj.filters
+++ b/vcxproj/randomx.vcxproj.filters
@ -54,12 +54,6 @@
    <ClCompile Include="..\src\JitCompilerX86.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\LightClientAsyncWorker.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\LightProgramGenerator.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\main.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
@ -75,6 +69,12 @@
    <ClCompile Include="..\src\blake2\blake2b.c">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\Blake2Generator.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\superscalarGenerator.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <MASM Include="..\src\JitCompilerX86-static.asm">
@ -136,12 +136,6 @@
    <ClInclude Include="..\src\JitCompilerX86-static.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="..\src\LightClientAsyncWorker.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="..\src\LightProgramGenerator.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="..\src\Program.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
@ -166,5 +160,11 @@
    <ClInclude Include="..\src\virtualMemory.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="..\src\Blake2Generator.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="..\src\superscalarGenerator.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
  </ItemGroup>
 </Project>
--- a/vcxproj/superscalar-avalanche.vcxproj
+++ b/vcxproj/superscalar-avalanche.vcxproj
@ -118,6 +118,7 @@
  <ItemGroup>
    <ClCompile Include="..\src\argon2_core.c" />
    <ClCompile Include="..\src\argon2_ref.c" />
    <ClCompile Include="..\src\Blake2Generator.cpp" />
    <ClCompile Include="..\src\blake2\blake2b.c" />
    <ClCompile Include="..\src\Cache.cpp" />
    <ClCompile Include="..\src\dataset.cpp" />
@ -125,9 +126,9 @@
    <ClCompile Include="..\src\Instruction.cpp" />
    <ClCompile Include="..\src\instructionsPortable.cpp" />
    <ClCompile Include="..\src\InterpretedVirtualMachine.cpp" />
    <ClCompile Include="..\src\LightProgramGenerator.cpp" />
    <ClCompile Include="..\src\reciprocal.c" />
    <ClCompile Include="..\src\softAes.cpp" />
    <ClCompile Include="..\src\superscalarGenerator.cpp" />
    <ClCompile Include="..\src\tests\superscalar-avalanche.cpp" />
    <ClCompile Include="..\src\VirtualMachine.cpp" />
    <ClCompile Include="..\src\virtualMemory.cpp" />
--- a/vcxproj/superscalar-avalanche.vcxproj.filters
+++ b/vcxproj/superscalar-avalanche.vcxproj.filters
@ -45,9 +45,6 @@
    <ClCompile Include="..\src\blake2\blake2b.c">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\LightProgramGenerator.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\hashAes1Rx4.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
@ -60,6 +57,12 @@
    <ClCompile Include="..\src\virtualMemory.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\superscalarGenerator.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\Blake2Generator.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <MASM Include="..\src\squareHash.asm">
--- a/vcxproj/superscalar-init.vcxproj
+++ b/vcxproj/superscalar-init.vcxproj
@ -118,6 +118,7 @@
  <ItemGroup>
    <ClCompile Include="..\src\argon2_core.c" />
    <ClCompile Include="..\src\argon2_ref.c" />
    <ClCompile Include="..\src\Blake2Generator.cpp" />
    <ClCompile Include="..\src\blake2\blake2b.c" />
    <ClCompile Include="..\src\Cache.cpp" />
    <ClCompile Include="..\src\dataset.cpp" />
@ -125,9 +126,9 @@
    <ClCompile Include="..\src\Instruction.cpp" />
    <ClCompile Include="..\src\instructionsPortable.cpp" />
    <ClCompile Include="..\src\InterpretedVirtualMachine.cpp" />
    <ClCompile Include="..\src\LightProgramGenerator.cpp" />
    <ClCompile Include="..\src\reciprocal.c" />
    <ClCompile Include="..\src\softAes.cpp" />
    <ClCompile Include="..\src\superscalarGenerator.cpp" />
    <ClCompile Include="..\src\tests\superscalar-init.cpp" />
    <ClCompile Include="..\src\VirtualMachine.cpp" />
    <ClCompile Include="..\src\virtualMemory.cpp" />
--- a/vcxproj/superscalar-init.vcxproj.filters
+++ b/vcxproj/superscalar-init.vcxproj.filters
@ -42,9 +42,6 @@
    <ClCompile Include="..\src\InterpretedVirtualMachine.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\LightProgramGenerator.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\reciprocal.c">
      <Filter>Source Files</Filter>
    </ClCompile>
@ -60,6 +57,12 @@
    <ClCompile Include="..\src\virtualMemory.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\superscalarGenerator.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\Blake2Generator.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <MASM Include="..\src\squareHash.asm">