From 296e77eebcaa6250e692d71427c24f4708b46e81 Mon Sep 17 00:00:00 2001 From: tevador Date: Sat, 20 Apr 2019 11:08:01 +0200 Subject: [PATCH] C API - first working version --- src/AssemblyGeneratorX86.cpp | 58 +++--- src/AssemblyGeneratorX86.hpp | 2 +- src/Blake2Generator.cpp | 2 +- src/Blake2Generator.hpp | 2 +- src/Cache.cpp | 82 -------- src/Cache.hpp | 52 ----- src/CompiledLightVirtualMachine.cpp | 30 ++- src/CompiledLightVirtualMachine.hpp | 19 +- src/CompiledVirtualMachine.cpp | 44 ++-- src/CompiledVirtualMachine.hpp | 27 +-- src/Instruction.cpp | 2 +- src/Instruction.hpp | 2 +- src/InterpretedLightVirtualMachine.cpp | 47 +++++ src/InterpretedLightVirtualMachine.hpp | 53 +++++ src/InterpretedVirtualMachine.cpp | 184 +++-------------- src/InterpretedVirtualMachine.hpp | 93 ++------- src/JitCompilerX86-static.S | 2 +- src/JitCompilerX86-static.asm | 2 +- src/JitCompilerX86.cpp | 61 +++--- src/JitCompilerX86.hpp | 22 +- src/Program.hpp | 44 +--- src/VirtualMachine.cpp | 127 +++++++----- src/VirtualMachine.hpp | 56 ++--- src/allocator.cpp | 52 +++++ src/allocator.hpp | 37 ++++ src/common.hpp | 41 +--- src/dataset.cpp | 272 +++++++++++++++---------- src/dataset.hpp | 60 ++++-- src/main.cpp | 246 +++++++++------------- src/randomx.cpp | 209 +++++++++++++++++++ src/randomx.h | 130 ++++++++++++ src/superscalarGenerator.cpp | 52 ++++- src/superscalarGenerator.hpp | 6 +- src/superscalar_program.hpp | 70 +++++++ vcxproj/randomx.vcxproj | 9 +- vcxproj/randomx.vcxproj.filters | 27 ++- 36 files changed, 1286 insertions(+), 938 deletions(-) delete mode 100644 src/Cache.cpp delete mode 100644 src/Cache.hpp create mode 100644 src/InterpretedLightVirtualMachine.cpp create mode 100644 src/InterpretedLightVirtualMachine.hpp create mode 100644 src/allocator.cpp create mode 100644 src/allocator.hpp create mode 100644 src/randomx.cpp create mode 100644 src/randomx.h create mode 100644 src/superscalar_program.hpp diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index 0c75461..be91988 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -25,7 +25,7 @@ along with RandomX. If not, see. #include "Program.hpp" #include "superscalarGenerator.hpp" -namespace RandomX { +namespace randomx { static const char* regR[8] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }; static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }; @@ -69,54 +69,54 @@ namespace RandomX { Instruction& instr = prog(i); switch (instr.opcode) { - case RandomX::SuperscalarInstructionType::ISUB_R: + case SuperscalarInstructionType::ISUB_R: asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; break; - case RandomX::SuperscalarInstructionType::IXOR_R: + case SuperscalarInstructionType::IXOR_R: asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; break; - case RandomX::SuperscalarInstructionType::IADD_RS: + case SuperscalarInstructionType::IADD_RS: asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << "]" << std::endl; break; - case RandomX::SuperscalarInstructionType::IMUL_R: + case SuperscalarInstructionType::IMUL_R: asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; break; - case RandomX::SuperscalarInstructionType::IROR_C: + case SuperscalarInstructionType::IROR_C: asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl; break; - case RandomX::SuperscalarInstructionType::IADD_C7: + case SuperscalarInstructionType::IADD_C7: asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; break; - case RandomX::SuperscalarInstructionType::IXOR_C7: + case SuperscalarInstructionType::IXOR_C7: asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; break; - case RandomX::SuperscalarInstructionType::IADD_C8: + case SuperscalarInstructionType::IADD_C8: asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; asmCode << "nop" << std::endl; break; - case RandomX::SuperscalarInstructionType::IXOR_C8: + case SuperscalarInstructionType::IXOR_C8: asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; asmCode << "nop" << std::endl; break; - case RandomX::SuperscalarInstructionType::IADD_C9: + case SuperscalarInstructionType::IADD_C9: asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; asmCode << "xchg ax, ax ;nop" << std::endl; break; - case RandomX::SuperscalarInstructionType::IXOR_C9: + case SuperscalarInstructionType::IXOR_C9: asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; asmCode << "xchg ax, ax ;nop" << std::endl; break; - case RandomX::SuperscalarInstructionType::IMULH_R: + case SuperscalarInstructionType::IMULH_R: asmCode << "mov rax, " << regR[instr.dst] << std::endl; asmCode << "mul " << regR[instr.src] << std::endl; asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl; break; - case RandomX::SuperscalarInstructionType::ISMULH_R: + case SuperscalarInstructionType::ISMULH_R: asmCode << "mov rax, " << regR[instr.dst] << std::endl; asmCode << "imul " << regR[instr.src] << std::endl; asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl; break; - case RandomX::SuperscalarInstructionType::IMUL_RCP: + case SuperscalarInstructionType::IMUL_RCP: asmCode << "mov rax, " << (int64_t)reciprocal(instr.getImm32()) << std::endl; asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl; break; @@ -178,38 +178,38 @@ namespace RandomX { Instruction& instr = prog(i); switch (instr.opcode) { - case RandomX::SuperscalarInstructionType::ISUB_R: + case SuperscalarInstructionType::ISUB_R: asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl; break; - case RandomX::SuperscalarInstructionType::IXOR_R: + case SuperscalarInstructionType::IXOR_R: asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl; break; - case RandomX::SuperscalarInstructionType::IADD_RS: + case SuperscalarInstructionType::IADD_RS: asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << ";" << std::endl; break; - case RandomX::SuperscalarInstructionType::IMUL_R: + case SuperscalarInstructionType::IMUL_R: asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl; break; - case RandomX::SuperscalarInstructionType::IROR_C: + case SuperscalarInstructionType::IROR_C: asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl; break; - case RandomX::SuperscalarInstructionType::IADD_C7: - case RandomX::SuperscalarInstructionType::IADD_C8: - case RandomX::SuperscalarInstructionType::IADD_C9: + case SuperscalarInstructionType::IADD_C7: + case SuperscalarInstructionType::IADD_C8: + case SuperscalarInstructionType::IADD_C9: asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl; break; - case RandomX::SuperscalarInstructionType::IXOR_C7: - case RandomX::SuperscalarInstructionType::IXOR_C8: - case RandomX::SuperscalarInstructionType::IXOR_C9: + case SuperscalarInstructionType::IXOR_C7: + case SuperscalarInstructionType::IXOR_C8: + case SuperscalarInstructionType::IXOR_C9: asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl; break; - case RandomX::SuperscalarInstructionType::IMULH_R: + case SuperscalarInstructionType::IMULH_R: asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl; break; - case RandomX::SuperscalarInstructionType::ISMULH_R: + case SuperscalarInstructionType::ISMULH_R: asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl; break; - case RandomX::SuperscalarInstructionType::IMUL_RCP: + case SuperscalarInstructionType::IMUL_RCP: asmCode << regR[instr.dst] << " *= " << (int64_t)reciprocal(instr.getImm32()) << ";" << std::endl; break; default: diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp index 4b777e6..417491c 100644 --- a/src/AssemblyGeneratorX86.hpp +++ b/src/AssemblyGeneratorX86.hpp @@ -24,7 +24,7 @@ along with RandomX. If not, see. #include "common.hpp" #include -namespace RandomX { +namespace randomx { class Program; class SuperscalarProgram; diff --git a/src/Blake2Generator.cpp b/src/Blake2Generator.cpp index 98f0869..2cc0129 100644 --- a/src/Blake2Generator.cpp +++ b/src/Blake2Generator.cpp @@ -23,7 +23,7 @@ along with RandomX. If not, see. #include "Blake2Generator.hpp" #include "common.hpp" -namespace RandomX { +namespace randomx { Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) { memset(data, 0, sizeof(data)); diff --git a/src/Blake2Generator.hpp b/src/Blake2Generator.hpp index 24f2fca..fa85548 100644 --- a/src/Blake2Generator.hpp +++ b/src/Blake2Generator.hpp @@ -20,7 +20,7 @@ along with RandomX. If not, see. #pragma once #include -namespace RandomX { +namespace randomx { class Blake2Generator { public: diff --git a/src/Cache.cpp b/src/Cache.cpp deleted file mode 100644 index 2de6023..0000000 --- a/src/Cache.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/* -Copyright (c) 2018 tevador - -This file is part of RandomX. - -RandomX is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -RandomX is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with RandomX. If not, see. -*/ - -#include -#include "Cache.hpp" -#include "argon2.h" -#include "argon2_core.h" - -namespace RandomX { - - static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); - static_assert(RANDOMX_ARGON_GROWTH % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_GROWTH - invalid value"); - - void argonFill(Cache& cache, const void* seed, size_t seedSize) { - uint32_t memory_blocks, segment_length; - argon2_instance_t instance; - argon2_context context; - - context.out = nullptr; - context.outlen = 0; - context.pwd = CONST_CAST(uint8_t *)seed; - context.pwdlen = (uint32_t)seedSize; - context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT; - context.saltlen = (uint32_t)ArgonSaltSize; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.t_cost = RANDOMX_ARGON_ITERATIONS; - context.m_cost = cache.size / ArgonBlockSize; - context.lanes = RANDOMX_ARGON_LANES; - context.threads = 1; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = ARGON2_DEFAULT_FLAGS; - context.version = ARGON2_VERSION_NUMBER; - - /* 2. Align memory size */ - /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ - memory_blocks = context.m_cost; - - segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); - - instance.version = context.version; - instance.memory = NULL; - instance.passes = context.t_cost; - instance.memory_blocks = memory_blocks; - instance.segment_length = segment_length; - instance.lane_length = segment_length * ARGON2_SYNC_POINTS; - instance.lanes = context.lanes; - instance.threads = context.threads; - instance.type = Argon2_d; - instance.memory = (block*)cache.memory; - - if (instance.threads > instance.lanes) { - instance.threads = instance.lanes; - } - - /* 3. Initialization: Hashing inputs, allocating memory, filling first - * blocks - */ - argon_initialize(&instance, &context); - - fill_memory_blocks(&instance); - } -} \ No newline at end of file diff --git a/src/Cache.hpp b/src/Cache.hpp deleted file mode 100644 index bfc7ddf..0000000 --- a/src/Cache.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* -Copyright (c) 2018 tevador - -This file is part of RandomX. - -RandomX is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -RandomX is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with RandomX. If not, see. -*/ - -#pragma once - -#include -#include -#include "common.hpp" -#include "intrinPortable.h" -#include "virtualMemory.hpp" - -namespace RandomX { - - void argonFill(Cache& cache, const void* seed, size_t seedSize); - - inline uint8_t* allocCache(size_t size, bool largePages) { - if (largePages) { - return (uint8_t*)allocLargePagesMemory(size); - } - else { - void* ptr = _mm_malloc(size, CacheLineSize); - if (ptr == nullptr) - throw std::bad_alloc(); - return (uint8_t*)ptr; - } - } - - inline void deallocCache(Cache cache, bool largePages) { - if (largePages) { - freePagedMemory(cache.memory, cache.size); - } - else { - _mm_free(cache.memory); - } - } -} \ No newline at end of file diff --git a/src/CompiledLightVirtualMachine.cpp b/src/CompiledLightVirtualMachine.cpp index 0d0fa67..9012047 100644 --- a/src/CompiledLightVirtualMachine.cpp +++ b/src/CompiledLightVirtualMachine.cpp @@ -21,27 +21,25 @@ along with RandomX. If not, see. #include "common.hpp" #include -namespace RandomX { +namespace randomx { - template - void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { - mem.ds = ds; - datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; - if(superscalar) - compiler.generateSuperScalarHash(programs); + template + void CompiledLightVm::setCache(randomx_cache* cache) { + this->mem.memory = cache->memory; + //datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; + this->compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache); //datasetBasePtr = ds.dataset.memory; } - template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); - template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); - - template - void CompiledLightVirtualMachine::initialize() { - VirtualMachine::initialize(); - compiler.generateProgramLight(program, config); + template + void CompiledLightVm::initialize() { + randomx_vm::initialize(); + this->compiler.generateProgramLight(this->program, this->config); //mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize); } - template void CompiledLightVirtualMachine::initialize(); - template void CompiledLightVirtualMachine::initialize(); + template class CompiledLightVm, false>; + template class CompiledLightVm, true>; + template class CompiledLightVm; + template class CompiledLightVm; } \ No newline at end of file diff --git a/src/CompiledLightVirtualMachine.hpp b/src/CompiledLightVirtualMachine.hpp index 1d4b78e..20ebdbc 100644 --- a/src/CompiledLightVirtualMachine.hpp +++ b/src/CompiledLightVirtualMachine.hpp @@ -24,22 +24,27 @@ along with RandomX. If not, see. #include "JitCompilerX86.hpp" #include "intrinPortable.h" -namespace RandomX { +namespace randomx { - template - class CompiledLightVirtualMachine : public CompiledVirtualMachine { + template + class CompiledLightVm : public CompiledVm { public: void* operator new(size_t size) { - void* ptr = _mm_malloc(size, 64); + void* ptr = AlignedAllocator::allocMemory(size); if (ptr == nullptr) throw std::bad_alloc(); return ptr; } void operator delete(void* ptr) { - _mm_free(ptr); + AlignedAllocator::freeMemory(ptr, sizeof(CompiledLightVm)); } - CompiledLightVirtualMachine() {} - void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; + void setCache(randomx_cache* cache) override; + void setDataset(randomx_dataset* dataset) override {} void initialize() override; }; + + using CompiledLightVmDefault = CompiledLightVm, true>; + using CompiledLightVmHardAes = CompiledLightVm, false>; + using CompiledLightVmLargePage = CompiledLightVm; + using CompiledLightVmLargePageHardAes = CompiledLightVm; } \ No newline at end of file diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp index 14de68e..c0305ff 100644 --- a/src/CompiledVirtualMachine.cpp +++ b/src/CompiledVirtualMachine.cpp @@ -21,34 +21,34 @@ along with RandomX. If not, see. #include "common.hpp" #include -namespace RandomX { +namespace randomx { - //static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters"); - static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct RandomX::RegisterFile"); + static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters"); + static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile"); - CompiledVirtualMachine::CompiledVirtualMachine() { + + template + void CompiledVm::setDataset(randomx_dataset* dataset) { + this->mem.memory = dataset->memory; + //datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; + //datasetBasePtr = ds.dataset.memory; } - void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { - mem.ds = ds; - datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; - datasetBasePtr = ds.dataset.memory; + template + void CompiledVm::initialize() { + randomx_vm::initialize(); + this->compiler.generateProgram(this->program, this->config); + //mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize); } - void CompiledVirtualMachine::initialize() { - VirtualMachine::initialize(); - compiler.generateProgram(program, config); - mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize); - } - - void CompiledVirtualMachine::execute() { + template + void CompiledVm::execute() { //executeProgram(reg, mem, scratchpad, InstructionCount); - compiler.getProgramFunc()(reg, mem, scratchpad, RANDOMX_PROGRAM_ITERATIONS); -#ifdef TRACEVM - for (int32_t i = InstructionCount - 1; i >= 0; --i) { - std::cout << std::hex << tracepad[i].u64 << std::endl; - } -#endif - + compiler.getProgramFunc()(this->reg, this->mem, this->scratchpad, RANDOMX_PROGRAM_ITERATIONS); } + + template class CompiledVm, false>; + template class CompiledVm, true>; + template class CompiledVm; + template class CompiledVm; } \ No newline at end of file diff --git a/src/CompiledVirtualMachine.hpp b/src/CompiledVirtualMachine.hpp index a2866ca..8f8b427 100644 --- a/src/CompiledVirtualMachine.hpp +++ b/src/CompiledVirtualMachine.hpp @@ -18,38 +18,39 @@ along with RandomX. If not, see. */ #pragma once -//#define TRACEVM + #include #include "VirtualMachine.hpp" #include "JitCompilerX86.hpp" -#include "intrinPortable.h" -namespace RandomX { +namespace randomx { extern "C" { void executeProgram(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); } - class CompiledVirtualMachine : public VirtualMachine { + template + class CompiledVm : public VmBase { public: void* operator new(size_t size) { - void* ptr = _mm_malloc(size, 64); + void* ptr = AlignedAllocator::allocMemory(size); if (ptr == nullptr) throw std::bad_alloc(); return ptr; } void operator delete(void* ptr) { - _mm_free(ptr); + AlignedAllocator::freeMemory(ptr, sizeof(CompiledVm)); } - CompiledVirtualMachine(); - void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; + void setDataset(randomx_dataset* dataset) override; + void execute() override; void initialize() override; - virtual void execute() override; - void* getProgram() { - return compiler.getCode(); - } protected: JitCompilerX86 compiler; uint8_t* datasetBasePtr; }; -} \ No newline at end of file + + using CompiledVmDefault = CompiledVm, true>; + using CompiledVmHardAes = CompiledVm, false>; + using CompiledVmLargePage = CompiledVm; + using CompiledVmLargePageHardAes = CompiledVm; +} diff --git a/src/Instruction.cpp b/src/Instruction.cpp index 528798d..ac687d0 100644 --- a/src/Instruction.cpp +++ b/src/Instruction.cpp @@ -20,7 +20,7 @@ along with RandomX. If not, see. #include "Instruction.hpp" #include "common.hpp" -namespace RandomX { +namespace randomx { void Instruction::print(std::ostream& os) const { os << names[opcode] << " "; diff --git a/src/Instruction.hpp b/src/Instruction.hpp index e6b9d2b..89db7e3 100644 --- a/src/Instruction.hpp +++ b/src/Instruction.hpp @@ -23,7 +23,7 @@ along with RandomX. If not, see. #include #include "blake2/endian.h" -namespace RandomX { +namespace randomx { class Instruction; diff --git a/src/InterpretedLightVirtualMachine.cpp b/src/InterpretedLightVirtualMachine.cpp new file mode 100644 index 0000000..76f633f --- /dev/null +++ b/src/InterpretedLightVirtualMachine.cpp @@ -0,0 +1,47 @@ +/* +Copyright (c) 2018 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#include "InterpretedLightVirtualMachine.hpp" +#include "dataset.hpp" + +namespace randomx { + + template + void InterpretedLightVm::setCache(randomx_cache* cache) { + mem.memory = cache->memory; + //datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; + cachePtr = cache; + } + + template + void InterpretedLightVm::datasetRead(uint32_t address, int_reg_t(&r)[8]) { + uint32_t blockNumber = address / CacheLineSize; + int_reg_t rl[8]; + + initDatasetBlock(cachePtr, (uint8_t*)rl, blockNumber); + + for (unsigned q = 0; q < 8; ++q) + r[q] ^= rl[q]; + } + + template class InterpretedLightVm, false>; + template class InterpretedLightVm, true>; + template class InterpretedLightVm; + template class InterpretedLightVm; +} diff --git a/src/InterpretedLightVirtualMachine.hpp b/src/InterpretedLightVirtualMachine.hpp new file mode 100644 index 0000000..ab94a07 --- /dev/null +++ b/src/InterpretedLightVirtualMachine.hpp @@ -0,0 +1,53 @@ +/* +Copyright (c) 2018 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#pragma once + +#include +#include "InterpretedVirtualMachine.hpp" +#include "superscalar_program.hpp" + +namespace randomx { + + template + class InterpretedLightVm : public InterpretedVm { + public: + using VmBase::mem; + void* operator new(size_t size) { + void* ptr = AlignedAllocator::allocMemory(size); + if (ptr == nullptr) + throw std::bad_alloc(); + return ptr; + } + void operator delete(void* ptr) { + AlignedAllocator::freeMemory(ptr, sizeof(InterpretedLightVm)); + } + void setDataset(randomx_dataset* dataset) override { } + void setCache(randomx_cache* cache) override; + protected: + virtual void datasetRead(uint32_t address, int_reg_t(&r)[8]); + private: + randomx_cache* cachePtr; + }; + + using InterpretedLightVmDefault = InterpretedLightVm, true>; + using InterpretedLightVmHardAes = InterpretedLightVm, false>; + using InterpretedLightVmLargePage = InterpretedLightVm; + using InterpretedLightVmLargePageHardAes = InterpretedLightVm; +} diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 2fa1018..259d2ef 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -21,7 +21,6 @@ along with RandomX. If not, see. #define RANDOMX_JUMP #include "InterpretedVirtualMachine.hpp" #include "dataset.hpp" -#include "Cache.hpp" #include #include #include @@ -32,10 +31,6 @@ along with RandomX. If not, see. #include #include "intrinPortable.h" #include "reciprocal.h" -#ifdef STATS -#include -#endif -#include "superscalarGenerator.hpp" #ifdef FPUCHECK constexpr bool fpuCheck = true; @@ -43,44 +38,31 @@ constexpr bool fpuCheck = true; constexpr bool fpuCheck = false; #endif -namespace RandomX { +namespace randomx { static int_reg_t Zero = 0; - template - void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { - mem.ds = ds; - readDataset = &datasetReadLight; - datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; - if(superscalar) - precompileSuperscalar(programs); + template + void InterpretedVm::setDataset(randomx_dataset* dataset) { + mem.memory = dataset->memory; } - template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); - template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); - - template - void InterpretedVirtualMachine::initialize() { - VirtualMachine::initialize(); + template + void InterpretedVm::initialize() { + randomx_vm::initialize(); for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { program(i).src %= RegistersCount; program(i).dst %= RegistersCount; } } - template void InterpretedVirtualMachine::initialize(); - template void InterpretedVirtualMachine::initialize(); - - template - void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { + template + void InterpretedVm::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) { executeBytecode(ic, r, f, e, a); } } - template void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); - template void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); - static void print(int_reg_t r) { std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl; } @@ -110,14 +92,14 @@ namespace RandomX { return std::fpclassify(x) == FP_SUBNORMAL; } - template - FORCE_INLINE void* InterpretedVirtualMachine::getScratchpadAddress(InstructionByteCode& ibc) { + template + FORCE_INLINE void* InterpretedVm::getScratchpadAddress(InstructionByteCode& ibc) { uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask; return scratchpad + addr; } - template - FORCE_INLINE __m128d InterpretedVirtualMachine::maskRegisterExponentMantissa(__m128d x) { + template + FORCE_INLINE __m128d InterpretedVm::maskRegisterExponentMantissa(__m128d x) { constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1; const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64)); const __m128d exponentMask = _mm_load_pd((const double*)&config.eMask); @@ -126,8 +108,8 @@ namespace RandomX { return x; } - template - FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { + template + void InterpretedVm::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { auto& ibc = byteCode[ic]; if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic); //if(trace) printState(r, f, e, a); @@ -318,8 +300,8 @@ namespace RandomX { #endif } - template - void InterpretedVirtualMachine::execute() { + template + void InterpretedVm::execute() { int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; __m128d f[4]; __m128d e[4]; @@ -380,16 +362,8 @@ namespace RandomX { mem.mx ^= r[config.readReg2] ^ r[config.readReg3]; mem.mx &= CacheLineAlignMask; - if (superscalar) { - executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r); - } - else { - Cache& cache = mem.ds.cache; - uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)]; - initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8); - for (int i = 0; i < RegistersCount; ++i) - r[i] ^= datasetLine[i]; - } + datasetRead(mem.ma, r); + //executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r); std::swap(mem.mx, mem.ma); if (trace) { @@ -454,9 +428,6 @@ namespace RandomX { _mm_store_pd(®.e[3].lo, e[3]); } - template void InterpretedVirtualMachine::execute(); - template void InterpretedVirtualMachine::execute(); - static int getConditionRegister(int(®isterUsage)[8]) { int min = INT_MAX; int minIndex; @@ -469,108 +440,14 @@ namespace RandomX { return minIndex; } - constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; - constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL; - constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; - constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL; - constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; - constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; - constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; - constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; - - static uint8_t* getMixBlock(uint64_t registerValue, Cache& cache) { - uint8_t* mixBlock; - if (RANDOMX_ARGON_GROWTH == 0) { - constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1); - mixBlock = cache.memory + (registerValue & mask) * CacheLineSize; - } - else { - const uint32_t modulus = cache.size / CacheLineSize; - mixBlock = cache.memory + (registerValue % modulus) * CacheLineSize; - } - return mixBlock; + template + void InterpretedVm::datasetRead(uint32_t address, int_reg_t(&r)[8]) { + uint64_t* datasetLine = (uint64_t*)(mem.memory + address); + for (int i = 0; i < RegistersCount; ++i) + r[i] ^= datasetLine[i]; } - template - void InterpretedVirtualMachine::executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector& reciprocals) { - for (unsigned j = 0; j < prog.getSize(); ++j) { - Instruction& instr = prog(j); - switch (instr.opcode) - { - case RandomX::SuperscalarInstructionType::ISUB_R: - r[instr.dst] -= r[instr.src]; - break; - case RandomX::SuperscalarInstructionType::IXOR_R: - r[instr.dst] ^= r[instr.src]; - break; - case RandomX::SuperscalarInstructionType::IADD_RS: - r[instr.dst] += r[instr.src] << instr.getModShift2(); - break; - case RandomX::SuperscalarInstructionType::IMUL_R: - r[instr.dst] *= r[instr.src]; - break; - case RandomX::SuperscalarInstructionType::IROR_C: - r[instr.dst] = rotr(r[instr.dst], instr.getImm32()); - break; - case RandomX::SuperscalarInstructionType::IADD_C7: - case RandomX::SuperscalarInstructionType::IADD_C8: - case RandomX::SuperscalarInstructionType::IADD_C9: - r[instr.dst] += signExtend2sCompl(instr.getImm32()); - break; - case RandomX::SuperscalarInstructionType::IXOR_C7: - case RandomX::SuperscalarInstructionType::IXOR_C8: - case RandomX::SuperscalarInstructionType::IXOR_C9: - r[instr.dst] ^= signExtend2sCompl(instr.getImm32()); - break; - case RandomX::SuperscalarInstructionType::IMULH_R: - r[instr.dst] = mulh(r[instr.dst], r[instr.src]); - break; - case RandomX::SuperscalarInstructionType::ISMULH_R: - r[instr.dst] = smulh(r[instr.dst], r[instr.src]); - break; - case RandomX::SuperscalarInstructionType::IMUL_RCP: - if(superscalar) - r[instr.dst] *= reciprocals[instr.getImm32()]; - else - r[instr.dst] *= reciprocal(instr.getImm32()); - break; - default: - UNREACHABLE; - } - } - } - - template - void InterpretedVirtualMachine::executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]) { - int_reg_t rl[8]; - uint8_t* mixBlock; - uint64_t registerValue = blockNumber; - rl[0] = (blockNumber + 1) * superscalarMul0; - rl[1] = rl[0] ^ superscalarAdd1; - rl[2] = rl[0] ^ superscalarAdd2; - rl[3] = rl[0] ^ superscalarAdd3; - rl[4] = rl[0] ^ superscalarAdd4; - rl[5] = rl[0] ^ superscalarAdd5; - rl[6] = rl[0] ^ superscalarAdd6; - rl[7] = rl[0] ^ superscalarAdd7; - Cache& cache = mem.ds.cache; - for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { - mixBlock = getMixBlock(registerValue, cache); - SuperscalarProgram& prog = superScalarPrograms[i]; - - executeSuperscalar(rl, prog, reciprocals); - - for(unsigned q = 0; q < 8; ++q) - rl[q] ^= load64(mixBlock + 8 * q); - - registerValue = rl[prog.getAddressRegister()]; - } - - for (unsigned q = 0; q < 8; ++q) - r[q] ^= rl[q]; - } - - template + /*template void InterpretedVirtualMachine::precompileSuperscalar(SuperscalarProgram* programs) { memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms)); reciprocals.clear(); @@ -584,12 +461,12 @@ namespace RandomX { } } } - } + }*/ #include "instructionWeights.hpp" - template - void InterpretedVirtualMachine::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { + template + void InterpretedVm::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { int registerUsage[8]; for (unsigned i = 0; i < 8; ++i) { registerUsage[i] = -1; @@ -1007,4 +884,9 @@ namespace RandomX { } } } + + template class InterpretedVm, false>; + template class InterpretedVm, true>; + template class InterpretedVm; + template class InterpretedVm; } \ No newline at end of file diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp index d7cb340..62d2f25 100644 --- a/src/InterpretedVirtualMachine.hpp +++ b/src/InterpretedVirtualMachine.hpp @@ -18,20 +18,14 @@ along with RandomX. If not, see. */ #pragma once -//#define STATS + #include #include "VirtualMachine.hpp" #include "Program.hpp" #include "intrinPortable.h" #include -namespace RandomX { - - struct InstructionByteCode; - template class InterpretedVirtualMachine; - - template - using InstructionHandler = void(InterpretedVirtualMachine::*)(Instruction&); +namespace randomx { struct InstructionByteCode { union { @@ -56,83 +50,40 @@ namespace RandomX { constexpr int asedwfagdewsa = sizeof(InstructionByteCode); - template - class InterpretedVirtualMachine : public VirtualMachine { + template + class InterpretedVm : public VmBase { public: + using VmBase::mem; + using VmBase::scratchpad; + using VmBase::program; + using VmBase::config; + using VmBase::reg; void* operator new(size_t size) { - void* ptr = _mm_malloc(size, 64); + void* ptr = AlignedAllocator::allocMemory(size); if (ptr == nullptr) throw std::bad_alloc(); return ptr; } void operator delete(void* ptr) { - _mm_free(ptr); + AlignedAllocator::freeMemory(ptr, sizeof(InterpretedVm)); } - InterpretedVirtualMachine(bool soft) : softAes(soft) {} - ~InterpretedVirtualMachine() {} - void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; - void initialize() override; void execute() override; - static void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector& reciprocals); + void setDataset(randomx_dataset* dataset) override; + void initialize() override; + protected: + virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[8]); private: - static InstructionHandler engine[256]; - DatasetReadFunc readDataset; - bool softAes; - InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE]; - std::vector reciprocals; - alignas(64) SuperscalarProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES]; -#ifdef STATS - int count_ADD_64 = 0; - int count_ADD_32 = 0; - int count_SUB_64 = 0; - int count_SUB_32 = 0; - int count_MUL_64 = 0; - int count_MULH_64 = 0; - int count_MUL_32 = 0; - int count_IMUL_32 = 0; - int count_IMULH_64 = 0; - int count_DIV_64 = 0; - int count_IDIV_64 = 0; - int count_AND_64 = 0; - int count_AND_32 = 0; - int count_OR_64 = 0; - int count_OR_32 = 0; - int count_XOR_64 = 0; - int count_XOR_32 = 0; - int count_SHL_64 = 0; - int count_SHR_64 = 0; - int count_SAR_64 = 0; - int count_ROL_64 = 0; - int count_ROR_64 = 0; - int count_FADD = 0; - int count_FSUB = 0; - int count_FMUL = 0; - int count_FDIV = 0; - int count_FSQRT = 0; - int count_FPROUND = 0; - int count_JUMP_taken = 0; - int count_JUMP_not_taken = 0; - int count_jump_taken[8] = { 0 }; - int count_jump_not_taken[8] = { 0 }; - int count_max_stack = 0; - int count_retdepth = 0; - int count_retdepth_max = 0; - int count_endstack = 0; - int count_instructions[RANDOMX_PROGRAM_SIZE] = { 0 }; - int count_FADD_nop = 0; - int count_FADD_nop2 = 0; - int count_FSUB_nop = 0; - int count_FSUB_nop2 = 0; - int count_FMUL_nop = 0; - int count_FMUL_nop2 = 0; - int datasetAccess[256] = { 0 }; -#endif void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); - void precompileSuperscalar(SuperscalarProgram*); void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); - void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]); void* getScratchpadAddress(InstructionByteCode& ibc); __m128d maskRegisterExponentMantissa(__m128d); + + InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE]; }; + + using InterpretedVmDefault = InterpretedVm, true>; + using InterpretedVmHardAes = InterpretedVm, false>; + using InterpretedVmLargePage = InterpretedVm; + using InterpretedVmLargePageHardAes = InterpretedVm; } \ No newline at end of file diff --git a/src/JitCompilerX86-static.S b/src/JitCompilerX86-static.S index e78dbe7..233eac6 100644 --- a/src/JitCompilerX86-static.S +++ b/src/JitCompilerX86-static.S @@ -97,7 +97,7 @@ DECL(randomx_dataset_init): push r13 push r14 push r15 - ;# cache in rdi + mov rdi, qword ptr [rdi+8] ;# after virtual method table pointer ;# dataset in rsi mov rbp, rdx ;# block index push rcx ;# max. block index diff --git a/src/JitCompilerX86-static.asm b/src/JitCompilerX86-static.asm index ab29312..3e66398 100644 --- a/src/JitCompilerX86-static.asm +++ b/src/JitCompilerX86-static.asm @@ -100,7 +100,7 @@ randomx_dataset_init PROC push r13 push r14 push r15 - mov rdi, rcx ;# cache + mov rdi, qword ptr [rcx+8] ;# after virtual method table pointer mov rsi, rdx ;# dataset mov rbp, r8 ;# block index push r9 ;# max. block index diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index 8c70041..3b423a0 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -28,7 +28,7 @@ along with RandomX. If not, see. #define RANDOMX_JUMP -namespace RandomX { +namespace randomx { #if !defined(_M_X64) && !defined(__x86_64__) JitCompilerX86::JitCompilerX86() { @@ -238,33 +238,29 @@ namespace RandomX { generateProgramEpilogue(prog); } - template void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) { if (RANDOMX_CACHE_ACCESSES != 8) throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES"); if (RANDOMX_ARGON_GROWTH != 0) throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH"); generateProgramPrologue(prog, pcfg); - if (superscalar) { + //if (superscalar) { emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); emitByte(CALL); emit32(superScalarHashOffset - (codePos + 4)); emit(codeReadDatasetLightSshFin, readDatasetLightFinSize); - } + /*} else { memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize); codePos += readDatasetLightSize; emitByte(CALL); emit32(readDatasetLightSubOffset - (codePos + 4)); - } + }*/ generateProgramEpilogue(prog); } - template void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg); - template void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg); - template - void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) { + void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &reciprocalCache) { memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize); codePos = superScalarHashOffset + codeSshInitSize; for (unsigned j = 0; j < N; ++j) { @@ -273,7 +269,7 @@ namespace RandomX { Instruction& instr = prog(i); instr.src %= RegistersCount; instr.dst %= RegistersCount; - generateCode(instr, i); + generateSuperscalarCode(instr, reciprocalCache); } emit(codeShhLoad, codeSshLoadSize); if (j < N - 1) { @@ -293,7 +289,7 @@ namespace RandomX { } template - void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); + void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector &reciprocalCache); void JitCompilerX86::generateDatasetInitCode() { memcpy(code, codeDatasetInit, datasetInitSize); @@ -314,7 +310,12 @@ namespace RandomX { emitByte(0xc0 + pcfg.readReg1); memcpy(code + codePos, codeLoopLoad, loopLoadSize); codePos += loopLoadSize; - generateCode(prog); + for (unsigned i = 0; i < prog.getSize(); ++i) { + Instruction& instr = prog(i); + instr.src %= RegistersCount; + instr.dst %= RegistersCount; + generateCode(instr, i); + } emit(REX_MOV_RR); emitByte(0xc0 + pcfg.readReg2); emit(REX_XOR_EAX); @@ -331,7 +332,6 @@ namespace RandomX { emit32(epilogueOffset - codePos - 4); } - template void JitCompilerX86::generateCode(Instruction& instr, int i) { #ifdef RANDOMX_JUMP instructionOffsets.push_back(codePos); @@ -340,67 +340,66 @@ namespace RandomX { (this->*generator)(instr, i); } - template<> - void JitCompilerX86::generateCode(Instruction& instr, int i) { + void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector &reciprocalCache) { switch (instr.opcode) { - case RandomX::SuperscalarInstructionType::ISUB_R: + case randomx::SuperscalarInstructionType::ISUB_R: emit(REX_SUB_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); break; - case RandomX::SuperscalarInstructionType::IXOR_R: + case randomx::SuperscalarInstructionType::IXOR_R: emit(REX_XOR_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); break; - case RandomX::SuperscalarInstructionType::IADD_RS: + case randomx::SuperscalarInstructionType::IADD_RS: emit(REX_LEA); emitByte(0x04 + 8 * instr.dst); genSIB(instr.getModShift2(), instr.src, instr.dst); break; - case RandomX::SuperscalarInstructionType::IMUL_R: + case randomx::SuperscalarInstructionType::IMUL_R: emit(REX_IMUL_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); break; - case RandomX::SuperscalarInstructionType::IROR_C: + case randomx::SuperscalarInstructionType::IROR_C: emit(REX_ROT_I8); emitByte(0xc8 + instr.dst); emitByte(instr.getImm32() & 63); break; - case RandomX::SuperscalarInstructionType::IADD_C7: + case randomx::SuperscalarInstructionType::IADD_C7: emit(REX_81); emitByte(0xc0 + instr.dst); emit32(instr.getImm32()); break; - case RandomX::SuperscalarInstructionType::IXOR_C7: + case randomx::SuperscalarInstructionType::IXOR_C7: emit(REX_XOR_RI); emitByte(0xf0 + instr.dst); emit32(instr.getImm32()); break; - case RandomX::SuperscalarInstructionType::IADD_C8: + case randomx::SuperscalarInstructionType::IADD_C8: emit(REX_81); emitByte(0xc0 + instr.dst); emit32(instr.getImm32()); emit(NOP1); break; - case RandomX::SuperscalarInstructionType::IXOR_C8: + case randomx::SuperscalarInstructionType::IXOR_C8: emit(REX_XOR_RI); emitByte(0xf0 + instr.dst); emit32(instr.getImm32()); emit(NOP1); break; - case RandomX::SuperscalarInstructionType::IADD_C9: + case randomx::SuperscalarInstructionType::IADD_C9: emit(REX_81); emitByte(0xc0 + instr.dst); emit32(instr.getImm32()); emit(NOP2); break; - case RandomX::SuperscalarInstructionType::IXOR_C9: + case randomx::SuperscalarInstructionType::IXOR_C9: emit(REX_XOR_RI); emitByte(0xf0 + instr.dst); emit32(instr.getImm32()); emit(NOP2); break; - case RandomX::SuperscalarInstructionType::IMULH_R: + case randomx::SuperscalarInstructionType::IMULH_R: emit(REX_MOV_RR64); emitByte(0xc0 + instr.dst); emit(REX_MUL_R); @@ -408,7 +407,7 @@ namespace RandomX { emit(REX_MOV_R64R); emitByte(0xc2 + 8 * instr.dst); break; - case RandomX::SuperscalarInstructionType::ISMULH_R: + case randomx::SuperscalarInstructionType::ISMULH_R: emit(REX_MOV_RR64); emitByte(0xc0 + instr.dst); emit(REX_MUL_R); @@ -416,9 +415,9 @@ namespace RandomX { emit(REX_MOV_R64R); emitByte(0xc2 + 8 * instr.dst); break; - case RandomX::SuperscalarInstructionType::IMUL_RCP: + case randomx::SuperscalarInstructionType::IMUL_RCP: emit(MOV_RAX_I); - emit64(reciprocal(instr.getImm32())); + emit64(reciprocalCache[instr.getImm32()]); emit(REX_IMUL_RM); emitByte(0xc0 + 8 * instr.dst); break; @@ -427,8 +426,6 @@ namespace RandomX { } } - template void JitCompilerX86::generateCode(Instruction& instr, int i); - void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) { emit(LEA_32); emitByte(0x80 + instr.src + (rax ? 0 : 8)); diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp index 9c15ac7..fd02434 100644 --- a/src/JitCompilerX86.hpp +++ b/src/JitCompilerX86.hpp @@ -21,10 +21,11 @@ along with RandomX. If not, see. #include "common.hpp" #include "Instruction.hpp" +#include "superscalar_program.hpp" #include #include -namespace RandomX { +namespace randomx { class Program; class ProgramConfiguration; @@ -40,15 +41,14 @@ namespace RandomX { JitCompilerX86(); ~JitCompilerX86(); void generateProgram(Program&, ProgramConfiguration&); - template void generateProgramLight(Program&, ProgramConfiguration&); template - void generateSuperScalarHash(SuperscalarProgram (&programs)[N]); + void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector &); + void generateDatasetInitCode(); ProgramFunc getProgramFunc() { return (ProgramFunc)code; } DatasetInitFunc getDatasetInitFunc() { - generateDatasetInitCode(); return (DatasetInitFunc)code; } uint8_t* getCode() { @@ -62,18 +62,6 @@ namespace RandomX { uint8_t* code; int32_t codePos; - template - void generateCode(P& prog) { - for (unsigned i = 0; i < prog.getSize(); ++i) { - Instruction& instr = prog(i); - instr.src %= RegistersCount; - instr.dst %= RegistersCount; - generateCode

(instr, i); - } - } - - void generateDatasetInitCode(); - void generateProgramPrologue(Program&, ProgramConfiguration&); void generateProgramEpilogue(Program&); int getConditionRegister(); @@ -84,8 +72,8 @@ namespace RandomX { void handleCondition(Instruction&, int); - template void generateCode(Instruction&, int); + void generateSuperscalarCode(Instruction &, std::vector &); void emitByte(uint8_t val) { code[codePos] = val; diff --git a/src/Program.hpp b/src/Program.hpp index 854a557..c7ba0f9 100644 --- a/src/Program.hpp +++ b/src/Program.hpp @@ -25,7 +25,7 @@ along with RandomX. If not, see. #include "Instruction.hpp" #include "blake2/endian.h" -namespace RandomX { +namespace randomx { struct ProgramConfiguration { uint64_t eMask[2]; @@ -59,46 +59,4 @@ namespace RandomX { }; static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program"); - - class SuperscalarProgram { - public: - Instruction& operator()(int pc) { - return programBuffer[pc]; - } - friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) { - p.print(os); - return os; - } - uint32_t getSize() { - return size; - } - void setSize(uint32_t val) { - size = val; - } - int getAddressRegister() { - return addrReg; - } - void setAddressRegister(uint32_t val) { - addrReg = val; - } - double ipc; - int codeSize; - int macroOps; - int decodeCycles; - int cpuLatency; - int asicLatency; - int mulCount; - int cpuLatencies[8]; - int asicLatencies[8]; - private: - void print(std::ostream& os) const { - for (unsigned i = 0; i < size; ++i) { - auto instr = programBuffer[i]; - os << instr; - } - } - Instruction programBuffer[RANDOMX_SUPERSCALAR_MAX_SIZE]; - uint32_t size; - int addrReg; - }; } diff --git a/src/VirtualMachine.cpp b/src/VirtualMachine.cpp index 4af0374..05bf79b 100644 --- a/src/VirtualMachine.cpp +++ b/src/VirtualMachine.cpp @@ -24,9 +24,60 @@ along with RandomX. If not, see. #include #include #include "intrinPortable.h" +#include "allocator.hpp" -std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) { - for (int i = 0; i < RandomX::RegistersCount; ++i) +randomx_vm::~randomx_vm() { + +} + +void randomx_vm::resetRoundingMode() { + initFpu(); +} + +constexpr int mantissaSize = 52; +constexpr int exponentSize = 11; +constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1; +constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1; +constexpr int exponentBias = 1023; + +static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) { + auto exponent = entropy >> 59; //0..31 + auto mantissa = entropy & mantissaMask; + exponent += exponentBias; + exponent &= exponentMask; + exponent <<= mantissaSize; + return exponent | mantissa; +} + +void randomx_vm::initialize() { + store64(®.a[0].lo, getSmallPositiveFloatBits(program.getEntropy(0))); + store64(®.a[0].hi, getSmallPositiveFloatBits(program.getEntropy(1))); + store64(®.a[1].lo, getSmallPositiveFloatBits(program.getEntropy(2))); + store64(®.a[1].hi, getSmallPositiveFloatBits(program.getEntropy(3))); + store64(®.a[2].lo, getSmallPositiveFloatBits(program.getEntropy(4))); + store64(®.a[2].hi, getSmallPositiveFloatBits(program.getEntropy(5))); + store64(®.a[3].lo, getSmallPositiveFloatBits(program.getEntropy(6))); + store64(®.a[3].hi, getSmallPositiveFloatBits(program.getEntropy(7))); + mem.ma = program.getEntropy(8) & randomx::CacheLineAlignMask; + mem.mx = program.getEntropy(10); + auto addressRegisters = program.getEntropy(12); + config.readReg0 = 0 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg1 = 2 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg2 = 4 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg3 = 6 + (addressRegisters & 1); + //datasetBase = program.getEntropy(13) % datasetRange; + constexpr uint64_t mask22bit = (1ULL << 22) - 1; + constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>(); + store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240); + store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240); +} + +//TODO +std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf) { + for (int i = 0; i < randomx::RegistersCount; ++i) os << std::hex << "r" << i << " = " << rf.r[i] << std::endl << std::dec; for (int i = 0; i < 4; ++i) os << std::hex << "f" << i << " = " << *(uint64_t*)&rf.f[i].hi << " (" << rf.f[i].hi << ")" << std::endl @@ -40,66 +91,32 @@ std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) { return os; } -namespace RandomX { +namespace randomx { - constexpr int mantissaSize = 52; - constexpr int exponentSize = 11; - constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1; - constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1; - constexpr int exponentBias = 1023; - - static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) { - auto exponent = entropy >> 59; //0..31 - auto mantissa = entropy & mantissaMask; - exponent += exponentBias; - exponent &= exponentMask; - exponent <<= mantissaSize; - return exponent | mantissa; + template + VmBase::~VmBase() { + Allocator::freeMemory(scratchpad, ScratchpadSize); } - VirtualMachine::VirtualMachine() { - mem.ds.dataset.memory = nullptr; + template + bool VmBase::allocate() { + scratchpad = (uint8_t*)Allocator::allocMemory(ScratchpadSize); + return scratchpad != nullptr; } - void VirtualMachine::resetRoundingMode() { - initFpu(); + template + void VmBase::generate(void* seed, void* buffer, size_t bufferSize) { + fillAes1Rx4(seed, bufferSize, buffer); } - void VirtualMachine::initialize() { - store64(®.a[0].lo, getSmallPositiveFloatBits(program.getEntropy(0))); - store64(®.a[0].hi, getSmallPositiveFloatBits(program.getEntropy(1))); - store64(®.a[1].lo, getSmallPositiveFloatBits(program.getEntropy(2))); - store64(®.a[1].hi, getSmallPositiveFloatBits(program.getEntropy(3))); - store64(®.a[2].lo, getSmallPositiveFloatBits(program.getEntropy(4))); - store64(®.a[2].hi, getSmallPositiveFloatBits(program.getEntropy(5))); - store64(®.a[3].lo, getSmallPositiveFloatBits(program.getEntropy(6))); - store64(®.a[3].hi, getSmallPositiveFloatBits(program.getEntropy(7))); - mem.ma = program.getEntropy(8) & CacheLineAlignMask; - mem.mx = program.getEntropy(10); - auto addressRegisters = program.getEntropy(12); - config.readReg0 = 0 + (addressRegisters & 1); - addressRegisters >>= 1; - config.readReg1 = 2 + (addressRegisters & 1); - addressRegisters >>= 1; - config.readReg2 = 4 + (addressRegisters & 1); - addressRegisters >>= 1; - config.readReg3 = 6 + (addressRegisters & 1); - datasetBase = program.getEntropy(13) % datasetRange; - constexpr uint64_t mask22bit = (1ULL << 22) - 1; - constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>(); - store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240); - store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240); + template + void VmBase::getFinalResult(void* out, size_t outSize) { + hashAes1Rx4(scratchpad, ScratchpadSize, ®.a); + blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0); } - template - void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* outHash) { - if (scratchpadSize > 0) { - hashAes1Rx4(scratchpad, scratchpadSize, ®.a); - } - blake2b(outHash, ResultSize, ®, sizeof(RegisterFile), nullptr, 0); - } - - template void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* outHash); - template void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* outHash); - + template class VmBase, false>; + template class VmBase, true>; + template class VmBase; + template class VmBase; } \ No newline at end of file diff --git a/src/VirtualMachine.hpp b/src/VirtualMachine.hpp index b8382f6..15aa6b9 100644 --- a/src/VirtualMachine.hpp +++ b/src/VirtualMachine.hpp @@ -18,38 +18,40 @@ along with RandomX. If not, see. */ #pragma once + #include #include "common.hpp" +#include "dataset.hpp" #include "Program.hpp" -namespace RandomX { +/* Global namespace for C binding */ +struct randomx_vm { + virtual ~randomx_vm() = 0; + virtual bool allocate() = 0; + virtual void generate(void* seed, void* buffer, size_t bufferSize) = 0; + void resetRoundingMode(); + virtual void initialize(); + virtual void execute() = 0; + virtual void getFinalResult(void* out, size_t outSize) = 0; + virtual void setDataset(randomx_dataset* dataset) { } + virtual void setCache(randomx_cache* cache) { } - class VirtualMachine { + alignas(64) randomx::Program program; + alignas(64) randomx::RegisterFile reg; + alignas(16) randomx::ProgramConfiguration config; + randomx::MemoryRegisters mem; + uint8_t* scratchpad; +}; + +namespace randomx { + + template + class VmBase : public randomx_vm { public: - VirtualMachine(); - virtual ~VirtualMachine() {} - virtual void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0; - void setScratchpad(void* ptr) { - scratchpad = (uint8_t*)ptr; - } - void resetRoundingMode(); - virtual void initialize(); - virtual void execute() = 0; - template - void getResult(void* scratchpad, size_t scratchpadSize, void* outHash); - const RegisterFile& getRegisterFile() { - return reg; - } - Program* getProgramBuffer() { - return &program; - } - protected: - alignas(64) Program program; - alignas(64) RegisterFile reg; - alignas(16) ProgramConfiguration config; - MemoryRegisters mem; - uint8_t* scratchpad; - uint32_t datasetRange; - uint32_t datasetBase; + ~VmBase() override; + bool allocate() override; + void generate(void* seed, void* buffer, size_t bufferSize) override; + void getFinalResult(void* out, size_t outSize) override; }; + } \ No newline at end of file diff --git a/src/allocator.cpp b/src/allocator.cpp new file mode 100644 index 0000000..f5a9a06 --- /dev/null +++ b/src/allocator.cpp @@ -0,0 +1,52 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#pragma once + +#include "allocator.hpp" +#include "virtualMemory.hpp" +#include "intrinPortable.h" +#include "common.hpp" + +namespace randomx { + + template + void* AlignedAllocator::allocMemory(size_t count) { + return _mm_malloc(count, alignment); + } + + template + void AlignedAllocator::freeMemory(void* ptr, size_t count) { + _mm_free(ptr); + } + + template void* AlignedAllocator::allocMemory(size_t count); + template void AlignedAllocator::freeMemory(void* ptr, size_t count); + template void* AlignedAllocator::allocMemory(size_t count); + template void AlignedAllocator::freeMemory(void* ptr, size_t count); + + void* LargePageAllocator::allocMemory(size_t count) { + return allocLargePagesMemory(count); + } + + void LargePageAllocator::freeMemory(void* ptr, size_t count) { + freePagedMemory(ptr, count); + }; + +} \ No newline at end of file diff --git a/src/allocator.hpp b/src/allocator.hpp new file mode 100644 index 0000000..8d7402c --- /dev/null +++ b/src/allocator.hpp @@ -0,0 +1,37 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#pragma once + +#include + +namespace randomx { + + template + struct AlignedAllocator { + static void* allocMemory(size_t); + static void freeMemory(void*, size_t); + }; + + struct LargePageAllocator { + static void* allocMemory(size_t); + static void freeMemory(void*, size_t); + }; + +} \ No newline at end of file diff --git a/src/common.hpp b/src/common.hpp index ade8abc..4de76b2 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -23,8 +23,9 @@ along with RandomX. If not, see. #include #include "blake2/endian.h" #include "configuration.h" +#include "randomx.h" -namespace RandomX { +namespace randomx { static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2."); static_assert((RANDOMX_DATASET_SIZE & (RANDOMX_DATASET_SIZE - 1)) == 0, "RANDOMX_DATASET_SIZE must be a power of 2."); @@ -58,6 +59,7 @@ namespace RandomX { constexpr int ArgonBlockSize = 1024; constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1; constexpr int CacheLineSize = 64; + constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3; constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_SIZE - 1) & ~(CacheLineSize - 1); constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * 1024; constexpr int CacheBlockCount = CacheSize / CacheLineSize; @@ -98,39 +100,9 @@ namespace RandomX { constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register constexpr int RegisterNeedsSib = 4; //x86 r12 register - struct Cache { - uint8_t* memory; - uint64_t size; - }; - - struct Dataset : public Cache { - }; - - class ILightClientAsyncWorker { - public: - virtual ~ILightClientAsyncWorker() {} - virtual void prepareBlock(addr_t) = 0; - virtual void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0; - virtual const uint64_t* getBlock(addr_t) = 0; - virtual void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0; - virtual void sync() = 0; - const Cache& getCache() { - return cache; - } - protected: - ILightClientAsyncWorker(const Cache& c) : cache(c) {} - const Cache& cache; - }; - - union dataset_t { - Dataset dataset; - Cache cache; - ILightClientAsyncWorker* asyncWorker; - }; - struct MemoryRegisters { addr_t mx, ma; - dataset_t ds; + uint8_t* memory = nullptr; }; struct RegisterFile { @@ -141,9 +113,8 @@ namespace RandomX { }; typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, int_reg_t(®)[RegistersCount]); - typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); - typedef void(*DatasetInitFunc)(uint8_t* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); + typedef void(*DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); } -std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf); +std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf); diff --git a/src/dataset.cpp b/src/dataset.cpp index 40e72b1..e632547 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -22,14 +22,17 @@ along with RandomX. If not, see. #include #include #include +#include #include "common.hpp" #include "dataset.hpp" -#include "Cache.hpp" #include "virtualMemory.hpp" -#include "softAes.h" -#include "squareHash.h" +#include "superscalarGenerator.hpp" +#include "Blake2Generator.hpp" +#include "reciprocal.h" #include "blake2/endian.h" +#include "argon2.h" +#include "argon2_core.h" #if defined(__SSE2__) #include @@ -38,113 +41,174 @@ along with RandomX. If not, see. #define PREFETCH(memory) #endif -namespace RandomX { +randomx_dataset::~randomx_dataset() { -#if true //RANDOMX_ARGON_GROWTH != 0 || (!defined(_M_X64) && !defined(__x86_64__)) - static FORCE_INLINE uint8_t* selectMixBlock(const Cache& cache, uint64_t& currentIndex, uint64_t& nextIndex) { +} + +static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); + +void randomx_cache::initialize(const void *seed, size_t seedSize) { + uint32_t memory_blocks, segment_length; + argon2_instance_t instance; + argon2_context context; + + context.out = nullptr; + context.outlen = 0; + context.pwd = CONST_CAST(uint8_t *)seed; + context.pwdlen = (uint32_t)seedSize; + context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT; + context.saltlen = (uint32_t)randomx::ArgonSaltSize; + context.secret = NULL; + context.secretlen = 0; + context.ad = NULL; + context.adlen = 0; + context.t_cost = RANDOMX_ARGON_ITERATIONS; + context.m_cost = RANDOMX_ARGON_MEMORY; + context.lanes = RANDOMX_ARGON_LANES; + context.threads = 1; + context.allocate_cbk = NULL; + context.free_cbk = NULL; + context.flags = ARGON2_DEFAULT_FLAGS; + context.version = ARGON2_VERSION_NUMBER; + + /* 2. Align memory size */ + /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ + memory_blocks = context.m_cost; + + segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); + + instance.version = context.version; + instance.memory = NULL; + instance.passes = context.t_cost; + instance.memory_blocks = memory_blocks; + instance.segment_length = segment_length; + instance.lane_length = segment_length * ARGON2_SYNC_POINTS; + instance.lanes = context.lanes; + instance.threads = context.threads; + instance.type = Argon2_d; + instance.memory = (block*)memory; + + if (instance.threads > instance.lanes) { + instance.threads = instance.lanes; + } + + /* 3. Initialization: Hashing inputs, allocating memory, filling first + * blocks + */ + argon_initialize(&instance, &context); + + fill_memory_blocks(&instance); + + reciprocalCache.clear(); + randomx::Blake2Generator gen(seed, 1000); + for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { + randomx::generateSuperscalar(programs[i], gen); + for (unsigned j = 0; j < programs[i].getSize(); ++j) { + auto& instr = programs[i](j); + if (instr.opcode == randomx::SuperscalarInstructionType::IMUL_RCP) { + auto rcp = reciprocal(instr.getImm32()); + instr.setImm32(reciprocalCache.size()); + reciprocalCache.push_back(rcp); + } + } + } +} + +namespace randomx { + + template + bool Dataset::allocate() { + memory = (uint8_t*)Allocator::allocMemory(RANDOMX_DATASET_SIZE); + return true; + } + + template + Dataset::~Dataset() { + Allocator::freeMemory(memory, RANDOMX_DATASET_SIZE); + } + + template + bool Cache::allocate() { + memory = (uint8_t*)Allocator::allocMemory(RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE); + return true; + } + + template + Cache::~Cache() { + Allocator::freeMemory(memory, RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE); + } + + template + DatasetInitFunc Cache::getInitFunc() { + return &initDataset; + } + + template + DatasetInitFunc CacheWithJit::getInitFunc() { + return jit.getDatasetInitFunc(); + } + + template + void CacheWithJit::initialize(const void *seed, size_t seedSize) { + randomx_cache::initialize(seed, seedSize); + jit.generateSuperscalarHash(programs, reciprocalCache); + jit.generateDatasetInitCode(); + } + + template class Dataset>; + template class Dataset; + template class Cache>; + template class Cache; + template class CacheWithJit>; + template class CacheWithJit; + + constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; + constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL; + constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; + constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL; + constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; + constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; + constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; + constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; + + static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) { + constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1); + return memory + (registerValue & mask) * CacheLineSize; + } + + void initDatasetBlock(randomx_cache* cache, uint8_t* out, uint64_t blockNumber) { + int_reg_t rl[8]; uint8_t* mixBlock; - if (RANDOMX_ARGON_GROWTH == 0) { - constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1); - mixBlock = cache.memory + (currentIndex & mask) * CacheLineSize; - } - else { - const uint32_t modulus = cache.size / CacheLineSize; - mixBlock = cache.memory + (currentIndex % modulus) * CacheLineSize; - } - PREFETCHNTA(mixBlock); - nextIndex = squareHash(currentIndex + nextIndex); - return mixBlock; - } + uint64_t registerValue = blockNumber; + rl[0] = (blockNumber + 1) * superscalarMul0; + rl[1] = rl[0] ^ superscalarAdd1; + rl[2] = rl[0] ^ superscalarAdd2; + rl[3] = rl[0] ^ superscalarAdd3; + rl[4] = rl[0] ^ superscalarAdd4; + rl[5] = rl[0] ^ superscalarAdd5; + rl[6] = rl[0] ^ superscalarAdd6; + rl[7] = rl[0] ^ superscalarAdd7; + for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { + mixBlock = getMixBlock(registerValue, cache->memory); + SuperscalarProgram& prog = cache->programs[i]; - static FORCE_INLINE void mixCache(uint8_t* mixBlock, uint64_t& c0, uint64_t& c1, uint64_t& c2, uint64_t& c3, uint64_t& c4, uint64_t& c5, uint64_t& c6, uint64_t& c7) { - c0 ^= load64(mixBlock + 0); - c1 ^= load64(mixBlock + 8); - c2 ^= load64(mixBlock + 16); - c3 ^= load64(mixBlock + 24); - c4 ^= load64(mixBlock + 32); - c5 ^= load64(mixBlock + 40); - c6 ^= load64(mixBlock + 48); - c7 ^= load64(mixBlock + 56); - } + executeSuperscalar(rl, prog, &cache->reciprocalCache); - void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber, unsigned iterations) { - uint64_t c0, c1, c2, c3, c4, c5, c6, c7; + for (unsigned q = 0; q < 8; ++q) + rl[q] ^= load64(mixBlock + 8 * q); - c0 = blockNumber; - c1 = c2 = c3 = c4 = c5 = c6 = c7 = 0; - - uint8_t* mixBlock; - - for (auto i = 0; i < iterations; ++i) { - mixBlock = selectMixBlock(cache, c0, c1); - mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7); - - mixBlock = selectMixBlock(cache, c1, c2); - mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7); - - mixBlock = selectMixBlock(cache, c2, c3); - mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7); - - mixBlock = selectMixBlock(cache, c3, c4); - mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7); - - mixBlock = selectMixBlock(cache, c4, c5); - mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7); - - mixBlock = selectMixBlock(cache, c5, c6); - mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7); - - mixBlock = selectMixBlock(cache, c6, c7); - mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7); - - mixBlock = selectMixBlock(cache, c7, c0); - mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7); + registerValue = rl[prog.getAddressRegister()]; } - store64(out + 0, c0); - store64(out + 8, c1); - store64(out + 16, c2); - store64(out + 24, c3); - store64(out + 32, c4); - store64(out + 40, c5); - store64(out + 48, c6); - store64(out + 56, c7); - } -#endif - - void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) { - uint64_t* datasetLine = (uint64_t*)(memory.ds.dataset.memory + memory.ma); - memory.mx ^= addr; - memory.mx &= -64; //align to cache line - std::swap(memory.mx, memory.ma); - PREFETCHNTA(memory.ds.dataset.memory + memory.ma); - for (int i = 0; i < RegistersCount; ++i) - reg.r[i] ^= datasetLine[i]; + memcpy(out, &rl, CacheLineSize); } - void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t (®)[RegistersCount]) { - memory.mx ^= addr; - memory.mx &= CacheLineAlignMask; //align to cache line - Cache& cache = memory.ds.cache; - uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)]; - initBlock(cache, (uint8_t*)datasetLine, memory.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8); - for (int i = 0; i < RegistersCount; ++i) - reg[i] ^= datasetLine[i]; - std::swap(memory.mx, memory.ma); + void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock) { + for (uint32_t blockNumber = startBlock; blockNumber < endBlock; ++blockNumber, dataset += CacheLineSize) + initDatasetBlock(cache, dataset, blockNumber); } - - void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]) { - ILightClientAsyncWorker* aw = memory.ds.asyncWorker; - const uint64_t* datasetLine = aw->getBlock(memory.ma); - for (int i = 0; i < RegistersCount; ++i) - reg[i] ^= datasetLine[i]; - memory.mx ^= addr; - memory.mx &= CacheLineAlignMask; //align to cache line - std::swap(memory.mx, memory.ma); - aw->prepareBlock(memory.ma); - } - - void datasetAlloc(dataset_t& ds, bool largePages) { + + /*void datasetAlloc(dataset_t& ds, bool largePages) { if (std::numeric_limits::max() < RANDOMX_DATASET_SIZE) throw std::runtime_error("Platform doesn't support enough memory for the dataset"); if (largePages) { @@ -158,14 +222,8 @@ namespace RandomX { } } - void datasetInit(Cache& cache, Dataset& ds, uint32_t startBlock, uint32_t blockCount) { - for (uint64_t i = startBlock; i < startBlock + blockCount; ++i) { - initBlock(cache, ds.memory + i * CacheLineSize, i, RANDOMX_CACHE_ACCESSES / 8); - } - } - void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) { ds.cache.memory = allocCache(ds.cache.size, largePages); argonFill(ds.cache, seed, SeedSize); - } + }*/ } diff --git a/src/dataset.hpp b/src/dataset.hpp index fcc863b..12c2021 100644 --- a/src/dataset.hpp +++ b/src/dataset.hpp @@ -20,26 +20,62 @@ along with RandomX. If not, see. #pragma once #include +#include #include "intrinPortable.h" #include "common.hpp" +#include "randomx.h" +#include "Program.hpp" +#include "superscalar_program.hpp" +#include "JitCompilerX86.hpp" +#include "allocator.hpp" -namespace RandomX { +struct randomx_dataset { + virtual ~randomx_dataset() = 0; + virtual bool allocate() = 0; + uint8_t* memory = nullptr; +}; -#if false //RANDOMX_ARGON_GROWTH == 0 && (defined(_M_X64) || defined(__x86_64__)) - extern "C" -#endif - void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber, unsigned iterations); +struct randomx_cache : public randomx_dataset { + virtual randomx::DatasetInitFunc getInitFunc() = 0; + virtual void initialize(const void *seed, size_t seedSize); //argon2 + randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES]; + std::vector reciprocalCache; +}; - void datasetAlloc(dataset_t& ds, bool largePages); - void datasetInit(Cache& cache, Dataset& ds, uint32_t startBlock, uint32_t blockCount); - void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&); +namespace randomx { - void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages); + template + struct Dataset : public randomx_dataset { + ~Dataset() override; + bool allocate() override; + }; - void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]); + using DatasetDefault = Dataset>; + using DatasetLargePage = Dataset; - void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]); + template + struct Cache : public randomx_cache { + ~Cache() override; + bool allocate() override; + DatasetInitFunc getInitFunc() override; + }; + + template + struct CacheWithJit : public Cache { + using Cache::programs; + using Cache::reciprocalCache; + void initialize(const void *seed, size_t seedSize) override; + DatasetInitFunc getInitFunc() override; + JitCompilerX86 jit; + }; + + using CacheDefault = Cache>; + using CacheWithJitDefault = CacheWithJit>; + using CacheLargePage = Cache; + using CacheWithJitLargePage = CacheWithJit; + + void initDatasetBlock(randomx_cache* cache, uint8_t* out, uint64_t blockNumber); + void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); } - diff --git a/src/main.cpp b/src/main.cpp index 2b653ae..cbf2a61 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -17,31 +17,28 @@ You should have received a copy of the GNU General Public License along with RandomX. If not, see. */ //#define TRACE -#include "InterpretedVirtualMachine.hpp" -#include "CompiledVirtualMachine.hpp" -#include "CompiledLightVirtualMachine.hpp" -#include "AssemblyGeneratorX86.hpp" + +//#include "AssemblyGeneratorX86.hpp" #include "Stopwatch.hpp" -#include "blake2/blake2.h" +//#include "blake2/blake2.h" #include "blake2/endian.h" #include #include #include #include #include -#include "Program.hpp" +//#include "Program.hpp" #include +#include #include #include -#include "dataset.hpp" -#include "Cache.hpp" -#include "hashAes1Rx4.hpp" -#include "superscalarGenerator.hpp" -#include "JitCompilerX86.hpp" +//#include "hashAes1Rx4.hpp" +//#include "JitCompilerX86.hpp" +#include "randomx.h" const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; -const uint8_t blockTemplate__[] = { +const uint8_t blockTemplate_[] = { 0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, 0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e, 0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca, @@ -131,77 +128,57 @@ void printUsage(const char* executable) { template void generateAsm(uint32_t nonce) { - alignas(16) uint64_t hash[8]; - uint8_t blockTemplate[sizeof(blockTemplate__)]; - memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate)); + /*alignas(16) uint64_t hash[8]; + uint8_t blockTemplate[sizeof(blockTemplate_)]; + memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate)); store32(blockTemplate + 39, nonce); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); uint8_t scratchpad[RANDOMX_SCRATCHPAD_L3]; fillAes1Rx4((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad); - RandomX::AssemblyGeneratorX86 asmX86; - RandomX::Program p; + randomx::AssemblyGeneratorX86 asmX86; + randomx::Program p; fillAes1Rx4(hash, sizeof(p), &p); asmX86.generateProgram(p); - asmX86.printCode(std::cout); + asmX86.printCode(std::cout);*/ } template void generateNative(uint32_t nonce) { - alignas(16) uint64_t hash[8]; - uint8_t blockTemplate[sizeof(blockTemplate__)]; - memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate)); + /*alignas(16) uint64_t hash[8]; + uint8_t blockTemplate[sizeof(blockTemplate_)]; + memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate)); store32(blockTemplate + 39, nonce); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); uint8_t scratchpad[RANDOMX_SCRATCHPAD_L3]; fillAes1Rx4((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad); - alignas(16) RandomX::Program prog; + alignas(16) randomx::Program prog; fillAes1Rx4((void*)hash, sizeof(prog), &prog); for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { prog(i).dst %= 8; prog(i).src %= 8; } - std::cout << prog << std::endl; + std::cout << prog << std::endl;*/ } -template -void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, uint8_t* scratchpad) { - alignas(16) uint64_t hash[8]; - uint8_t blockTemplate[sizeof(blockTemplate__)]; - memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate)); +void mine(randomx_vm* vm, std::atomic& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread) { + uint64_t hash[RANDOMX_HASH_SIZE / 4]; + uint8_t blockTemplate[sizeof(blockTemplate_)]; + memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate)); void* noncePtr = blockTemplate + 39; auto nonce = atomicNonce.fetch_add(1); while (nonce < noncesCount) { //std::cout << "Thread " << thread << " nonce " << nonce << std::endl; store32(noncePtr, nonce); - blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); - fillAes1Rx4((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad); - //dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-before.txt"); - vm->resetRoundingMode(); - vm->setScratchpad(scratchpad); - for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { - fillAes1Rx4((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer()); - vm->initialize(); - vm->execute(); - vm->getResult(nullptr, 0, hash); - } - fillAes1Rx4((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer()); - vm->initialize(); - vm->execute(); - /*if (RandomX::trace) { - for (int j = 0; j < RandomX::ProgramLength; ++j) { - uint64_t res = *(uint64_t*)(scratchpad + 8 * (RandomX::ProgramLength - 1 - j)); - std::cout << std::hex << std::setw(16) << std::setfill('0') << res << std::endl; - } - }*/ - vm->getResult(scratchpad, RANDOMX_SCRATCHPAD_L3, hash); - //dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-after.txt"); + + randomx_calculate_hash(vm, blockTemplate, sizeof(blockTemplate), &hash); + result.xorWith(hash); - if (RandomX::trace) { + /*if (randomx::trace) { std::cout << "Nonce: " << nonce << " "; outputHex(std::cout, (char*)hash, 16); std::cout << std::endl; - } + }*/ nonce = atomicNonce.fetch_add(1); } } @@ -227,16 +204,16 @@ int main(int argc, char** argv) { readOption("--genSuperscalar", argc, argv, genSuperscalar); readOption("--legacy", argc, argv, legacy); - if (genSuperscalar) { - RandomX::SuperscalarProgram p; - RandomX::Blake2Generator gen(seed, programCount); - RandomX::generateSuperscalar(p, gen); - RandomX::AssemblyGeneratorX86 asmX86; + /*if (genSuperscalar) { + randomx::SuperscalarProgram p; + randomx::Blake2Generator gen(seed, programCount); + randomx::generateSuperscalar(p, gen); + randomx::AssemblyGeneratorX86 asmX86; asmX86.generateAsm(p); //std::ofstream file("lightProg2.asm"); asmX86.printCode(std::cout); return 0; - } + }*/ if (genAsm) { if (softAes) @@ -264,15 +241,42 @@ int main(int argc, char** argv) { std::atomic atomicNonce(0); AtomicHash result; - std::vector vms; + std::vector vms; std::vector threads; - RandomX::dataset_t dataset; - const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize; - const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch); - dataset.cache.size = cacheSize; - RandomX::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES]; + randomx_dataset* dataset; + randomx_cache* cache; + randomx_flags flags = RANDOMX_FLAG_DEFAULT; - std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl; + if (miningMode) { + flags = (randomx_flags)(flags | RANDOMX_FLAG_FULL_MEM); + std::cout << "RandomX - full memory mode (2 GiB)" << std::endl; + } else { + std::cout << "RandomX - light memory mode (256 MiB)" << std::endl; + } + + if (jit) { + flags = (randomx_flags)(flags | RANDOMX_FLAG_JIT); + std::cout << "RandomX - JIT compiled mode" << std::endl; + } + else { + std::cout << "RandomX - interpreted mode" << std::endl; + } + + if (softAes) { + std::cout << "RandomX - software AES mode" << std::endl; + } + else { + flags = (randomx_flags)(flags | RANDOMX_FLAG_HARD_AES); + std::cout << "RandomX - hardware AES mode" << std::endl; + } + + if (largePages) { + flags = (randomx_flags)(flags | RANDOMX_FLAG_LARGE_PAGES); + std::cout << "RandomX - large pages mode" << std::endl; + } + else { + std::cout << "RandomX - small pages mode" << std::endl; + } std::cout << "Initializing"; if(miningMode) @@ -281,116 +285,60 @@ int main(int argc, char** argv) { try { Stopwatch sw(true); - RandomX::datasetInitCache(seed, dataset, largePages); - if (RandomX::trace) { + cache = randomx_alloc_cache(flags); + randomx_init_cache(cache, seed, sizeof(seed)); + /*if (randomx::trace) { std::cout << "Cache: " << std::endl; outputHex(std::cout, (char*)dataset.cache.memory, sizeof(__m128i)); std::cout << std::endl; - } - if (!legacy) { - RandomX::Blake2Generator gen(seed, programCount); - for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { - RandomX::generateSuperscalar(programs[i], gen); - } - } - if (!miningMode) { - std::cout << "Cache (" << cacheSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl; - } - else { - auto cache = dataset.cache; - dataset.dataset.size = datasetSize; - RandomX::datasetAlloc(dataset, largePages); - const uint64_t datasetBlockCount = datasetSize / RandomX::CacheLineSize; - if (!legacy) { - RandomX::JitCompilerX86 jit86; - jit86.generateSuperScalarHash(programs); - RandomX::DatasetInitFunc dsfunc = jit86.getDatasetInitFunc(); - if (initThreadCount > 1) { - auto perThread = datasetBlockCount / initThreadCount; - auto remainder = datasetBlockCount % initThreadCount; - uint32_t startBlock = 0; - uint32_t endBlock = 0; - for (int i = 0; i < initThreadCount; ++i) { - auto count = perThread + (i == initThreadCount - 1 ? remainder : 0); - endBlock += count; - threads.push_back(std::thread(dsfunc, cache.memory, dataset.dataset.memory + startBlock * RandomX::CacheLineSize, startBlock, endBlock)); - startBlock += count; - } - for (unsigned i = 0; i < threads.size(); ++i) { - threads[i].join(); - } + }*/ + if (miningMode) { + dataset = randomx_alloc_dataset(flags); + if (initThreadCount > 1) { + auto perThread = RANDOMX_DATASET_BLOCKS / initThreadCount; + auto remainder = RANDOMX_DATASET_BLOCKS % initThreadCount; + uint32_t startBlock = 0; + for (int i = 0; i < initThreadCount; ++i) { + auto count = perThread + (i == initThreadCount - 1 ? remainder : 0); + threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startBlock, count)); + startBlock += count; } - else { - dsfunc(cache.memory, dataset.dataset.memory, 0, datasetBlockCount); + for (unsigned i = 0; i < threads.size(); ++i) { + threads[i].join(); } - //dump((const char*)dataset.dataset.memory, RANDOMX_DATASET_SIZE, "dataset.dat"); } else { - if (initThreadCount > 1) { - auto perThread = datasetBlockCount / initThreadCount; - auto remainder = datasetBlockCount % initThreadCount; - for (int i = 0; i < initThreadCount; ++i) { - auto count = perThread + (i == initThreadCount - 1 ? remainder : 0); - threads.push_back(std::thread(&RandomX::datasetInit, std::ref(cache), std::ref(dataset.dataset), i * perThread, count)); - } - for (unsigned i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - } - else { - RandomX::datasetInit(cache, dataset.dataset, 0, datasetBlockCount); - } + randomx_init_dataset(dataset, cache, 0, RANDOMX_DATASET_BLOCKS); } - RandomX::deallocCache(cache, largePages); + //dump((const char*)dataset.dataset.memory, RANDOMX_DATASET_SIZE, "dataset.dat"); + randomx_release_cache(cache); threads.clear(); - std::cout << "Dataset (" << datasetSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl; } + std::cout << "Memory initialized in " << sw.getElapsed() << " s" << std::endl; std::cout << "Initializing " << threadCount << " virtual machine(s) ..." << std::endl; for (int i = 0; i < threadCount; ++i) { - RandomX::VirtualMachine* vm; - if (miningMode) { - vm = new RandomX::CompiledVirtualMachine(); - } - else { - if (jit && !legacy) - vm = new RandomX::CompiledLightVirtualMachine(); - else if (jit) - vm = new RandomX::CompiledLightVirtualMachine(); - else if (!legacy) - vm = new RandomX::InterpretedVirtualMachine(softAes); - else - vm = new RandomX::InterpretedVirtualMachine(softAes); - } - vm->setDataset(dataset, datasetSize, programs); + randomx_vm *vm = randomx_create_vm(flags); + if (miningMode) + randomx_vm_set_dataset(vm, dataset); + else + randomx_vm_set_cache(vm, cache); vms.push_back(vm); } - uint8_t* scratchpadMem; - if (largePages) { - scratchpadMem = (uint8_t*)allocLargePagesMemory(threadCount * RANDOMX_SCRATCHPAD_L3); - } - else { - scratchpadMem = (uint8_t*)_mm_malloc(threadCount * RANDOMX_SCRATCHPAD_L3, RandomX::CacheLineSize); - } std::cout << "Running benchmark (" << programCount << " nonces) ..." << std::endl; sw.restart(); if (threadCount > 1) { for (unsigned i = 0; i < vms.size(); ++i) { if (softAes) - threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RANDOMX_SCRATCHPAD_L3 * i)); + threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i)); else - threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RANDOMX_SCRATCHPAD_L3 * i)); + threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i)); } for (unsigned i = 0; i < threads.size(); ++i) { threads[i].join(); } } else { - if(softAes) - mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0, scratchpadMem); - else - mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0, scratchpadMem); - /*if (miningMode) - std::cout << "Average program size: " << ((RandomX::CompiledVirtualMachine*)vms[0])->getTotalSize() / programCount / RandomX::ChainLength << std::endl;*/ + mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0); } double elapsed = sw.getElapsed(); std::cout << "Calculated result: "; diff --git a/src/randomx.cpp b/src/randomx.cpp new file mode 100644 index 0000000..87953cf --- /dev/null +++ b/src/randomx.cpp @@ -0,0 +1,209 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#include "randomx.h" +#include "dataset.hpp" +#include "VirtualMachine.hpp" +#include "./InterpretedVirtualMachine.hpp" +#include "./InterpretedLightVirtualMachine.hpp" +#include "./CompiledVirtualMachine.hpp" +#include "./CompiledLightVirtualMachine.hpp" +#include "virtualMemory.hpp" +#include "blake2/blake2.h" + +extern "C" { + + randomx_cache *randomx_alloc_cache(randomx_flags flags) { + randomx_cache *cache; + switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)) + { + case RANDOMX_FLAG_DEFAULT: + cache = new randomx::CacheDefault(); + break; + + case RANDOMX_FLAG_JIT: + cache = new randomx::CacheWithJitDefault(); + break; + + case RANDOMX_FLAG_LARGE_PAGES: + cache = new randomx::CacheLargePage(); + break; + + case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: + cache = new randomx::CacheWithJitLargePage(); + break; + + default: + UNREACHABLE; + } + + if (!cache->allocate()) { + delete cache; + cache = nullptr; + } + + return cache; + } + + void randomx_init_cache(randomx_cache *cache, const void *seed, size_t seedSize) { + cache->initialize(seed, seedSize); + } + + void randomx_release_cache(randomx_cache* cache) { + delete cache; + } + + randomx_dataset *randomx_alloc_dataset(randomx_flags flags) { + randomx_dataset *dataset; + if (flags & RANDOMX_FLAG_LARGE_PAGES) { + dataset = new randomx::DatasetLargePage(); + } + else { + dataset = new randomx::DatasetDefault(); + } + if (!dataset->allocate()) { + delete dataset; + dataset = nullptr; + } + + return dataset; + } + + void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startBlock, unsigned long blockCount) { + randomx::DatasetInitFunc dsfunc = cache->getInitFunc(); + dsfunc(cache, dataset->memory + startBlock * randomx::CacheLineSize, startBlock, startBlock + blockCount); + } + + void randomx_release_dataset(randomx_dataset *dataset) { + delete dataset; + } + + randomx_vm *randomx_create_vm(randomx_flags flags) { + randomx_vm *vm; + switch (flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES)) { + case RANDOMX_FLAG_DEFAULT: //0 + vm = new randomx::InterpretedLightVmDefault(); + break; + + case RANDOMX_FLAG_FULL_MEM: //1 + vm = new randomx::InterpretedVmDefault(); + break; + + case RANDOMX_FLAG_JIT: //2 + vm = new randomx::CompiledLightVmDefault(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT: //3 + vm = new randomx::CompiledVmDefault(); + break; + + case RANDOMX_FLAG_HARD_AES: //4 + vm = new randomx::InterpretedLightVmHardAes(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES: //5 + vm = new randomx::InterpretedVmHardAes(); + break; + + case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES: //6 + vm = new randomx::CompiledLightVmHardAes(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES: //7 + vm = new randomx::CompiledVmHardAes(); + break; + + case RANDOMX_FLAG_LARGE_PAGES: //8 + vm = new randomx::InterpretedLightVmLargePage(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_LARGE_PAGES: //9 + vm = new randomx::InterpretedVmLargePage(); + break; + + case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: //10 + vm = new randomx::CompiledLightVmLargePage(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: //11 + vm = new randomx::CompiledVmLargePage(); + break; + + case RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: //12 + vm = new randomx::InterpretedLightVmLargePageHardAes(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: //13 + vm = new randomx::InterpretedVmLargePageHardAes(); + break; + + case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: //14 + vm = new randomx::CompiledLightVmLargePageHardAes(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: //15 + vm = new randomx::CompiledVmLargePageHardAes(); + break; + + default: + UNREACHABLE; + } + + if (!vm->allocate()) { + delete vm; + vm = nullptr; + } + + return vm; + } + + void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) { + machine->setCache(cache); + } + + void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) { + machine->setDataset(dataset); + } + + void randomx_destroy_vm(randomx_vm *machine) { + delete machine; + } + + void randomx_calculate_hash(randomx_vm *machine, void *input, size_t inputSize, void *output) { + alignas(16) uint64_t hash[8]; + blake2b(hash, sizeof(hash), input, inputSize, nullptr, 0); + machine->generate(&hash, machine->scratchpad, randomx::ScratchpadSize); + //fillAes1Rx4((void*)hash, RANDOMX_SCRATCHPAD_L3, machine->scratchpad); + //dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-before.txt"); + machine->resetRoundingMode(); + for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { + machine->generate(&hash, &machine->program, sizeof(randomx::Program)); + //fillAes1Rx4((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer()); + machine->initialize(); + machine->execute(); + blake2b(hash, sizeof(hash), &machine->reg, sizeof(machine->reg), nullptr, 0); + } + machine->generate((void*)hash, &machine->program, sizeof(randomx::Program)); + //fillAes1Rx4((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer()); + machine->initialize(); + machine->execute(); + machine->getFinalResult(output, 64); + } + +} diff --git a/src/randomx.h b/src/randomx.h new file mode 100644 index 0000000..43f232e --- /dev/null +++ b/src/randomx.h @@ -0,0 +1,130 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#ifndef RANDOMX_H +#define RANDOMX_H + +/* + +Minimal usage example: +---------------------- + +#include "randomx.h" +#include + +int main() { + const char mySeed[] = "RandomX example seed"; + const char myInput[] = "RandomX example input"; + char hash[RANDOMX_HASH_SIZE]; + + randomx_cache *myCache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT); + randomx_init_cache(myCache, mySeed, sizeof mySeed); + randomx_vm *myMachine = randomx_create_vm(RANDOMX_FLAG_DEFAULT); + randomx_vm_set_cache(myMachine, myCache); + + randomx_calculate_hash(myMachine, myInput, sizeof myInput, hash); + + randomx_destroy_vm(myMachine); + randomx_release_cache(myCache); + + for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i) + printf("%02x", hash[i]); + + printf("\n"); + + return 0; +} + +Optimized usage example: +------------------------ + +#include "randomx.h" +#include + +int main() { + const char mySeed[] = "RandomX example seed"; + const char myInput[] = "RandomX example input"; + char hash[RANDOMX_HASH_SIZE]; + + randomx_cache *myCache = randomx_alloc_cache(RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES); + randomx_init_cache(myCache, mySeed, sizeof mySeed); + + randomx_dataset *myDataset = randomx_alloc_dataset(RANDOMX_FLAG_LARGE_PAGES); + randomx_init_dataset(myDataset, myCache, 0, RANDOMX_DATASET_BLOCKS); + randomx_release_cache(myCache); + + randomx_vm *myMachine = randomx_create_vm(RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES); + randomx_vm_set_dataset(myMachine, myDataset); + + randomx_calculate_hash(myMachine, myInput, sizeof myInput, hash); + + randomx_destroy_vm(myMachine); + randomx_release_dataset(myDataset); + + for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i) + printf("%02x", hash[i]); + + printf("\n"); + + return 0; +} + +*/ + +#include + +#define RANDOMX_HASH_SIZE 32 +#define RANDOMX_DATASET_BLOCKS 33554432UL + +typedef enum { + RANDOMX_FLAG_DEFAULT = 0, + RANDOMX_FLAG_FULL_MEM = 1, + RANDOMX_FLAG_JIT = 2, + RANDOMX_FLAG_HARD_AES = 4, + RANDOMX_FLAG_LARGE_PAGES = 8, +} randomx_flags; + +typedef struct randomx_dataset randomx_dataset; +typedef struct randomx_cache randomx_cache; +typedef struct randomx_vm randomx_vm; + +#if defined(__cplusplus) +extern "C" { +#endif + +randomx_cache *randomx_alloc_cache(randomx_flags flags); +void randomx_init_cache(randomx_cache *cache, const void *seed, size_t seedSize); +void randomx_release_cache(randomx_cache* cache); + +randomx_dataset *randomx_alloc_dataset(randomx_flags flags); +void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startBlock, unsigned long blockCount); +void randomx_release_dataset(randomx_dataset *dataset); + +randomx_vm *randomx_create_vm(randomx_flags flags); +void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache); +void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset); +void randomx_destroy_vm(randomx_vm *machine); + +void randomx_calculate_hash(randomx_vm *machine, void *input, size_t inputSize, void *output); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/superscalarGenerator.cpp b/src/superscalarGenerator.cpp index 8184045..98d8658 100644 --- a/src/superscalarGenerator.cpp +++ b/src/superscalarGenerator.cpp @@ -26,8 +26,10 @@ along with RandomX. If not, see. #include #include #include "superscalarGenerator.hpp" +#include "intrinPortable.h" +#include "reciprocal.h" -namespace RandomX { +namespace randomx { static bool isMultiplication(int type) { return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP; @@ -842,4 +844,52 @@ namespace RandomX { std::cout << std::endl; }*/ } + + void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector *reciprocals) { + for (unsigned j = 0; j < prog.getSize(); ++j) { + Instruction& instr = prog(j); + switch (instr.opcode) + { + case randomx::SuperscalarInstructionType::ISUB_R: + r[instr.dst] -= r[instr.src]; + break; + case randomx::SuperscalarInstructionType::IXOR_R: + r[instr.dst] ^= r[instr.src]; + break; + case randomx::SuperscalarInstructionType::IADD_RS: + r[instr.dst] += r[instr.src] << instr.getModShift2(); + break; + case randomx::SuperscalarInstructionType::IMUL_R: + r[instr.dst] *= r[instr.src]; + break; + case randomx::SuperscalarInstructionType::IROR_C: + r[instr.dst] = rotr(r[instr.dst], instr.getImm32()); + break; + case randomx::SuperscalarInstructionType::IADD_C7: + case randomx::SuperscalarInstructionType::IADD_C8: + case randomx::SuperscalarInstructionType::IADD_C9: + r[instr.dst] += signExtend2sCompl(instr.getImm32()); + break; + case randomx::SuperscalarInstructionType::IXOR_C7: + case randomx::SuperscalarInstructionType::IXOR_C8: + case randomx::SuperscalarInstructionType::IXOR_C9: + r[instr.dst] ^= signExtend2sCompl(instr.getImm32()); + break; + case randomx::SuperscalarInstructionType::IMULH_R: + r[instr.dst] = mulh(r[instr.dst], r[instr.src]); + break; + case randomx::SuperscalarInstructionType::ISMULH_R: + r[instr.dst] = smulh(r[instr.dst], r[instr.src]); + break; + case randomx::SuperscalarInstructionType::IMUL_RCP: + if (reciprocals != nullptr) + r[instr.dst] *= (*reciprocals)[instr.getImm32()]; + else + r[instr.dst] *= reciprocal(instr.getImm32()); + break; + default: + UNREACHABLE; + } + } + } } \ No newline at end of file diff --git a/src/superscalarGenerator.hpp b/src/superscalarGenerator.hpp index a64e80d..71d38b1 100644 --- a/src/superscalarGenerator.hpp +++ b/src/superscalarGenerator.hpp @@ -18,10 +18,11 @@ along with RandomX. If not, see. */ #pragma once -#include "Program.hpp" +#include "superscalar_program.hpp" #include "Blake2Generator.hpp" +#include -namespace RandomX { +namespace randomx { // Intel Ivy Bridge reference namespace SuperscalarInstructionType { //uOPs (decode) execution ports latency code size constexpr int ISUB_R = 0; //1 p015 1 3 (sub) @@ -44,4 +45,5 @@ namespace RandomX { } void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen); + void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector *reciprocals = nullptr); } \ No newline at end of file diff --git a/src/superscalar_program.hpp b/src/superscalar_program.hpp new file mode 100644 index 0000000..d4296ca --- /dev/null +++ b/src/superscalar_program.hpp @@ -0,0 +1,70 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#pragma once + +#include +#include "Instruction.hpp" +#include "configuration.h" + +namespace randomx { + + class SuperscalarProgram { + public: + Instruction& operator()(int pc) { + return programBuffer[pc]; + } + friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) { + p.print(os); + return os; + } + uint32_t getSize() { + return size; + } + void setSize(uint32_t val) { + size = val; + } + int getAddressRegister() { + return addrReg; + } + void setAddressRegister(uint32_t val) { + addrReg = val; + } + double ipc; + int codeSize; + int macroOps; + int decodeCycles; + int cpuLatency; + int asicLatency; + int mulCount; + int cpuLatencies[8]; + int asicLatencies[8]; + private: + void print(std::ostream& os) const { + for (unsigned i = 0; i < size; ++i) { + auto instr = programBuffer[i]; + os << instr; + } + } + Instruction programBuffer[RANDOMX_SUPERSCALAR_MAX_SIZE]; + uint32_t size; + int addrReg; + }; + +} \ No newline at end of file diff --git a/vcxproj/randomx.vcxproj b/vcxproj/randomx.vcxproj index 60504e9..1ed504b 100644 --- a/vcxproj/randomx.vcxproj +++ b/vcxproj/randomx.vcxproj @@ -124,20 +124,22 @@ + - + + @@ -150,11 +152,11 @@ + - @@ -164,16 +166,19 @@ + + + diff --git a/vcxproj/randomx.vcxproj.filters b/vcxproj/randomx.vcxproj.filters index 30d2109..2e6958a 100644 --- a/vcxproj/randomx.vcxproj.filters +++ b/vcxproj/randomx.vcxproj.filters @@ -27,9 +27,6 @@ Source Files - - Source Files - Source Files @@ -75,6 +72,15 @@ Source Files + + Source Files + + + Source Files + + + Source Files + @@ -94,9 +100,6 @@ Header Files - - Header Files - Header Files @@ -163,5 +166,17 @@ Header Files + + Header Files + + + Header Files + + + Header Files + + + Header Files + \ No newline at end of file