mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-21 23:38:54 +00:00
C API - first working version
This commit is contained in:
parent
67046a9f38
commit
296e77eebc
@ -25,7 +25,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "Program.hpp"
|
||||
#include "superscalarGenerator.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
static const char* regR[8] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
|
||||
static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
||||
@ -69,54 +69,54 @@ namespace RandomX {
|
||||
Instruction& instr = prog(i);
|
||||
switch (instr.opcode)
|
||||
{
|
||||
case RandomX::SuperscalarInstructionType::ISUB_R:
|
||||
case SuperscalarInstructionType::ISUB_R:
|
||||
asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_R:
|
||||
case SuperscalarInstructionType::IXOR_R:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_RS:
|
||||
case SuperscalarInstructionType::IADD_RS:
|
||||
asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << "]" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMUL_R:
|
||||
case SuperscalarInstructionType::IMUL_R:
|
||||
asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IROR_C:
|
||||
case SuperscalarInstructionType::IROR_C:
|
||||
asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_C7:
|
||||
case SuperscalarInstructionType::IADD_C7:
|
||||
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C7:
|
||||
case SuperscalarInstructionType::IXOR_C7:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_C8:
|
||||
case SuperscalarInstructionType::IADD_C8:
|
||||
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
asmCode << "nop" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C8:
|
||||
case SuperscalarInstructionType::IXOR_C8:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
asmCode << "nop" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_C9:
|
||||
case SuperscalarInstructionType::IADD_C9:
|
||||
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
asmCode << "xchg ax, ax ;nop" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C9:
|
||||
case SuperscalarInstructionType::IXOR_C9:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
asmCode << "xchg ax, ax ;nop" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMULH_R:
|
||||
case SuperscalarInstructionType::IMULH_R:
|
||||
asmCode << "mov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "mul " << regR[instr.src] << std::endl;
|
||||
asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::ISMULH_R:
|
||||
case SuperscalarInstructionType::ISMULH_R:
|
||||
asmCode << "mov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "imul " << regR[instr.src] << std::endl;
|
||||
asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMUL_RCP:
|
||||
case SuperscalarInstructionType::IMUL_RCP:
|
||||
asmCode << "mov rax, " << (int64_t)reciprocal(instr.getImm32()) << std::endl;
|
||||
asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl;
|
||||
break;
|
||||
@ -178,38 +178,38 @@ namespace RandomX {
|
||||
Instruction& instr = prog(i);
|
||||
switch (instr.opcode)
|
||||
{
|
||||
case RandomX::SuperscalarInstructionType::ISUB_R:
|
||||
case SuperscalarInstructionType::ISUB_R:
|
||||
asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_R:
|
||||
case SuperscalarInstructionType::IXOR_R:
|
||||
asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_RS:
|
||||
case SuperscalarInstructionType::IADD_RS:
|
||||
asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMUL_R:
|
||||
case SuperscalarInstructionType::IMUL_R:
|
||||
asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IROR_C:
|
||||
case SuperscalarInstructionType::IROR_C:
|
||||
asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_C7:
|
||||
case RandomX::SuperscalarInstructionType::IADD_C8:
|
||||
case RandomX::SuperscalarInstructionType::IADD_C9:
|
||||
case SuperscalarInstructionType::IADD_C7:
|
||||
case SuperscalarInstructionType::IADD_C8:
|
||||
case SuperscalarInstructionType::IADD_C9:
|
||||
asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C7:
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C8:
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C9:
|
||||
case SuperscalarInstructionType::IXOR_C7:
|
||||
case SuperscalarInstructionType::IXOR_C8:
|
||||
case SuperscalarInstructionType::IXOR_C9:
|
||||
asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMULH_R:
|
||||
case SuperscalarInstructionType::IMULH_R:
|
||||
asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::ISMULH_R:
|
||||
case SuperscalarInstructionType::ISMULH_R:
|
||||
asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMUL_RCP:
|
||||
case SuperscalarInstructionType::IMUL_RCP:
|
||||
asmCode << regR[instr.dst] << " *= " << (int64_t)reciprocal(instr.getImm32()) << ";" << std::endl;
|
||||
break;
|
||||
default:
|
||||
|
@ -24,7 +24,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "common.hpp"
|
||||
#include <sstream>
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class SuperscalarProgram;
|
||||
|
@ -23,7 +23,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "Blake2Generator.hpp"
|
||||
#include "common.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) {
|
||||
memset(data, 0, sizeof(data));
|
||||
|
@ -20,7 +20,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
class Blake2Generator {
|
||||
public:
|
||||
|
@ -1,82 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2018 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include "Cache.hpp"
|
||||
#include "argon2.h"
|
||||
#include "argon2_core.h"
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
|
||||
static_assert(RANDOMX_ARGON_GROWTH % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_GROWTH - invalid value");
|
||||
|
||||
void argonFill(Cache& cache, const void* seed, size_t seedSize) {
|
||||
uint32_t memory_blocks, segment_length;
|
||||
argon2_instance_t instance;
|
||||
argon2_context context;
|
||||
|
||||
context.out = nullptr;
|
||||
context.outlen = 0;
|
||||
context.pwd = CONST_CAST(uint8_t *)seed;
|
||||
context.pwdlen = (uint32_t)seedSize;
|
||||
context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT;
|
||||
context.saltlen = (uint32_t)ArgonSaltSize;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.t_cost = RANDOMX_ARGON_ITERATIONS;
|
||||
context.m_cost = cache.size / ArgonBlockSize;
|
||||
context.lanes = RANDOMX_ARGON_LANES;
|
||||
context.threads = 1;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = ARGON2_VERSION_NUMBER;
|
||||
|
||||
/* 2. Align memory size */
|
||||
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
|
||||
memory_blocks = context.m_cost;
|
||||
|
||||
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
|
||||
|
||||
instance.version = context.version;
|
||||
instance.memory = NULL;
|
||||
instance.passes = context.t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
instance.lanes = context.lanes;
|
||||
instance.threads = context.threads;
|
||||
instance.type = Argon2_d;
|
||||
instance.memory = (block*)cache.memory;
|
||||
|
||||
if (instance.threads > instance.lanes) {
|
||||
instance.threads = instance.lanes;
|
||||
}
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
* blocks
|
||||
*/
|
||||
argon_initialize(&instance, &context);
|
||||
|
||||
fill_memory_blocks(&instance);
|
||||
}
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2018 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <new>
|
||||
#include "common.hpp"
|
||||
#include "intrinPortable.h"
|
||||
#include "virtualMemory.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
void argonFill(Cache& cache, const void* seed, size_t seedSize);
|
||||
|
||||
inline uint8_t* allocCache(size_t size, bool largePages) {
|
||||
if (largePages) {
|
||||
return (uint8_t*)allocLargePagesMemory(size);
|
||||
}
|
||||
else {
|
||||
void* ptr = _mm_malloc(size, CacheLineSize);
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return (uint8_t*)ptr;
|
||||
}
|
||||
}
|
||||
|
||||
inline void deallocCache(Cache cache, bool largePages) {
|
||||
if (largePages) {
|
||||
freePagedMemory(cache.memory, cache.size);
|
||||
}
|
||||
else {
|
||||
_mm_free(cache.memory);
|
||||
}
|
||||
}
|
||||
}
|
@ -21,27 +21,25 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "common.hpp"
|
||||
#include <stdexcept>
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
template<bool superscalar>
|
||||
void CompiledLightVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
|
||||
mem.ds = ds;
|
||||
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||
if(superscalar)
|
||||
compiler.generateSuperScalarHash(programs);
|
||||
template<class Allocator, bool softAes>
|
||||
void CompiledLightVm<Allocator, softAes>::setCache(randomx_cache* cache) {
|
||||
this->mem.memory = cache->memory;
|
||||
//datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||
this->compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
|
||||
//datasetBasePtr = ds.dataset.memory;
|
||||
}
|
||||
|
||||
template void CompiledLightVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
|
||||
template void CompiledLightVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
|
||||
|
||||
template<bool superscalar>
|
||||
void CompiledLightVirtualMachine<superscalar>::initialize() {
|
||||
VirtualMachine::initialize();
|
||||
compiler.generateProgramLight<superscalar>(program, config);
|
||||
template<class Allocator, bool softAes>
|
||||
void CompiledLightVm<Allocator, softAes>::initialize() {
|
||||
randomx_vm::initialize();
|
||||
this->compiler.generateProgramLight(this->program, this->config);
|
||||
//mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
|
||||
}
|
||||
|
||||
template void CompiledLightVirtualMachine<true>::initialize();
|
||||
template void CompiledLightVirtualMachine<false>::initialize();
|
||||
template class CompiledLightVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
template class CompiledLightVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
template class CompiledLightVm<LargePageAllocator, false>;
|
||||
template class CompiledLightVm<LargePageAllocator, true>;
|
||||
}
|
@ -24,22 +24,27 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "JitCompilerX86.hpp"
|
||||
#include "intrinPortable.h"
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
template<bool superscalar>
|
||||
class CompiledLightVirtualMachine : public CompiledVirtualMachine {
|
||||
template<class Allocator, bool softAes>
|
||||
class CompiledLightVm : public CompiledVm<Allocator, softAes> {
|
||||
public:
|
||||
void* operator new(size_t size) {
|
||||
void* ptr = _mm_malloc(size, 64);
|
||||
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
void operator delete(void* ptr) {
|
||||
_mm_free(ptr);
|
||||
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(CompiledLightVm));
|
||||
}
|
||||
CompiledLightVirtualMachine() {}
|
||||
void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
|
||||
void setCache(randomx_cache* cache) override;
|
||||
void setDataset(randomx_dataset* dataset) override {}
|
||||
void initialize() override;
|
||||
};
|
||||
|
||||
using CompiledLightVmDefault = CompiledLightVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
using CompiledLightVmHardAes = CompiledLightVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
using CompiledLightVmLargePage = CompiledLightVm<LargePageAllocator, false>;
|
||||
using CompiledLightVmLargePageHardAes = CompiledLightVm<LargePageAllocator, true>;
|
||||
}
|
@ -21,34 +21,34 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "common.hpp"
|
||||
#include <stdexcept>
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
//static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters");
|
||||
static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct RandomX::RegisterFile");
|
||||
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters");
|
||||
static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile");
|
||||
|
||||
CompiledVirtualMachine::CompiledVirtualMachine() {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void CompiledVm<Allocator, softAes>::setDataset(randomx_dataset* dataset) {
|
||||
this->mem.memory = dataset->memory;
|
||||
//datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||
//datasetBasePtr = ds.dataset.memory;
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
|
||||
mem.ds = ds;
|
||||
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||
datasetBasePtr = ds.dataset.memory;
|
||||
template<class Allocator, bool softAes>
|
||||
void CompiledVm<Allocator, softAes>::initialize() {
|
||||
randomx_vm::initialize();
|
||||
this->compiler.generateProgram(this->program, this->config);
|
||||
//mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::initialize() {
|
||||
VirtualMachine::initialize();
|
||||
compiler.generateProgram(program, config);
|
||||
mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::execute() {
|
||||
template<class Allocator, bool softAes>
|
||||
void CompiledVm<Allocator, softAes>::execute() {
|
||||
//executeProgram(reg, mem, scratchpad, InstructionCount);
|
||||
compiler.getProgramFunc()(reg, mem, scratchpad, RANDOMX_PROGRAM_ITERATIONS);
|
||||
#ifdef TRACEVM
|
||||
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
|
||||
std::cout << std::hex << tracepad[i].u64 << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
compiler.getProgramFunc()(this->reg, this->mem, this->scratchpad, RANDOMX_PROGRAM_ITERATIONS);
|
||||
}
|
||||
|
||||
template class CompiledVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
template class CompiledVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
template class CompiledVm<LargePageAllocator, false>;
|
||||
template class CompiledVm<LargePageAllocator, true>;
|
||||
}
|
@ -18,38 +18,39 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
//#define TRACEVM
|
||||
|
||||
#include <new>
|
||||
#include "VirtualMachine.hpp"
|
||||
#include "JitCompilerX86.hpp"
|
||||
#include "intrinPortable.h"
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
extern "C" {
|
||||
void executeProgram(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
|
||||
}
|
||||
|
||||
class CompiledVirtualMachine : public VirtualMachine {
|
||||
template<class Allocator, bool softAes>
|
||||
class CompiledVm : public VmBase<Allocator, softAes> {
|
||||
public:
|
||||
void* operator new(size_t size) {
|
||||
void* ptr = _mm_malloc(size, 64);
|
||||
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
void operator delete(void* ptr) {
|
||||
_mm_free(ptr);
|
||||
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(CompiledVm));
|
||||
}
|
||||
CompiledVirtualMachine();
|
||||
void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
|
||||
void setDataset(randomx_dataset* dataset) override;
|
||||
void execute() override;
|
||||
void initialize() override;
|
||||
virtual void execute() override;
|
||||
void* getProgram() {
|
||||
return compiler.getCode();
|
||||
}
|
||||
protected:
|
||||
JitCompilerX86 compiler;
|
||||
uint8_t* datasetBasePtr;
|
||||
};
|
||||
}
|
||||
|
||||
using CompiledVmDefault = CompiledVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
using CompiledVmHardAes = CompiledVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
using CompiledVmLargePage = CompiledVm<LargePageAllocator, false>;
|
||||
using CompiledVmLargePageHardAes = CompiledVm<LargePageAllocator, true>;
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "Instruction.hpp"
|
||||
#include "common.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
void Instruction::print(std::ostream& os) const {
|
||||
os << names[opcode] << " ";
|
||||
|
@ -23,7 +23,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include <iostream>
|
||||
#include "blake2/endian.h"
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
class Instruction;
|
||||
|
||||
|
47
src/InterpretedLightVirtualMachine.cpp
Normal file
47
src/InterpretedLightVirtualMachine.cpp
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
Copyright (c) 2018 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "InterpretedLightVirtualMachine.hpp"
|
||||
#include "dataset.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedLightVm<Allocator, softAes>::setCache(randomx_cache* cache) {
|
||||
mem.memory = cache->memory;
|
||||
//datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||
cachePtr = cache;
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedLightVm<Allocator, softAes>::datasetRead(uint32_t address, int_reg_t(&r)[8]) {
|
||||
uint32_t blockNumber = address / CacheLineSize;
|
||||
int_reg_t rl[8];
|
||||
|
||||
initDatasetBlock(cachePtr, (uint8_t*)rl, blockNumber);
|
||||
|
||||
for (unsigned q = 0; q < 8; ++q)
|
||||
r[q] ^= rl[q];
|
||||
}
|
||||
|
||||
template class InterpretedLightVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
template class InterpretedLightVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
template class InterpretedLightVm<LargePageAllocator, false>;
|
||||
template class InterpretedLightVm<LargePageAllocator, true>;
|
||||
}
|
53
src/InterpretedLightVirtualMachine.hpp
Normal file
53
src/InterpretedLightVirtualMachine.hpp
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
Copyright (c) 2018 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <new>
|
||||
#include "InterpretedVirtualMachine.hpp"
|
||||
#include "superscalar_program.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
class InterpretedLightVm : public InterpretedVm<Allocator, softAes> {
|
||||
public:
|
||||
using VmBase<Allocator, softAes>::mem;
|
||||
void* operator new(size_t size) {
|
||||
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
void operator delete(void* ptr) {
|
||||
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(InterpretedLightVm));
|
||||
}
|
||||
void setDataset(randomx_dataset* dataset) override { }
|
||||
void setCache(randomx_cache* cache) override;
|
||||
protected:
|
||||
virtual void datasetRead(uint32_t address, int_reg_t(&r)[8]);
|
||||
private:
|
||||
randomx_cache* cachePtr;
|
||||
};
|
||||
|
||||
using InterpretedLightVmDefault = InterpretedLightVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
using InterpretedLightVmHardAes = InterpretedLightVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
using InterpretedLightVmLargePage = InterpretedLightVm<LargePageAllocator, false>;
|
||||
using InterpretedLightVmLargePageHardAes = InterpretedLightVm<LargePageAllocator, true>;
|
||||
}
|
@ -21,7 +21,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#define RANDOMX_JUMP
|
||||
#include "InterpretedVirtualMachine.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "Cache.hpp"
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
@ -32,10 +31,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include <climits>
|
||||
#include "intrinPortable.h"
|
||||
#include "reciprocal.h"
|
||||
#ifdef STATS
|
||||
#include <algorithm>
|
||||
#endif
|
||||
#include "superscalarGenerator.hpp"
|
||||
|
||||
#ifdef FPUCHECK
|
||||
constexpr bool fpuCheck = true;
|
||||
@ -43,44 +38,31 @@ constexpr bool fpuCheck = true;
|
||||
constexpr bool fpuCheck = false;
|
||||
#endif
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
static int_reg_t Zero = 0;
|
||||
|
||||
template<bool superscalar>
|
||||
void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
|
||||
mem.ds = ds;
|
||||
readDataset = &datasetReadLight;
|
||||
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||
if(superscalar)
|
||||
precompileSuperscalar(programs);
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::setDataset(randomx_dataset* dataset) {
|
||||
mem.memory = dataset->memory;
|
||||
}
|
||||
|
||||
template void InterpretedVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
|
||||
template void InterpretedVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
|
||||
|
||||
template<bool superscalar>
|
||||
void InterpretedVirtualMachine<superscalar>::initialize() {
|
||||
VirtualMachine::initialize();
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::initialize() {
|
||||
randomx_vm::initialize();
|
||||
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
|
||||
program(i).src %= RegistersCount;
|
||||
program(i).dst %= RegistersCount;
|
||||
}
|
||||
}
|
||||
|
||||
template void InterpretedVirtualMachine<true>::initialize();
|
||||
template void InterpretedVirtualMachine<false>::initialize();
|
||||
|
||||
template<bool superscalar>
|
||||
void InterpretedVirtualMachine<superscalar>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) {
|
||||
executeBytecode(ic, r, f, e, a);
|
||||
}
|
||||
}
|
||||
|
||||
template void InterpretedVirtualMachine<true>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
template void InterpretedVirtualMachine<false>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
|
||||
static void print(int_reg_t r) {
|
||||
std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl;
|
||||
}
|
||||
@ -110,14 +92,14 @@ namespace RandomX {
|
||||
return std::fpclassify(x) == FP_SUBNORMAL;
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
FORCE_INLINE void* InterpretedVirtualMachine<superscalar>::getScratchpadAddress(InstructionByteCode& ibc) {
|
||||
template<class Allocator, bool softAes>
|
||||
FORCE_INLINE void* InterpretedVm<Allocator, softAes>::getScratchpadAddress(InstructionByteCode& ibc) {
|
||||
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
|
||||
return scratchpad + addr;
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
FORCE_INLINE __m128d InterpretedVirtualMachine<superscalar>::maskRegisterExponentMantissa(__m128d x) {
|
||||
template<class Allocator, bool softAes>
|
||||
FORCE_INLINE __m128d InterpretedVm<Allocator, softAes>::maskRegisterExponentMantissa(__m128d x) {
|
||||
constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1;
|
||||
const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64));
|
||||
const __m128d exponentMask = _mm_load_pd((const double*)&config.eMask);
|
||||
@ -126,8 +108,8 @@ namespace RandomX {
|
||||
return x;
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
FORCE_INLINE void InterpretedVirtualMachine<superscalar>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
auto& ibc = byteCode[ic];
|
||||
if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
|
||||
//if(trace) printState(r, f, e, a);
|
||||
@ -318,8 +300,8 @@ namespace RandomX {
|
||||
#endif
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
void InterpretedVirtualMachine<superscalar>::execute() {
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::execute() {
|
||||
int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
__m128d f[4];
|
||||
__m128d e[4];
|
||||
@ -380,16 +362,8 @@ namespace RandomX {
|
||||
|
||||
mem.mx ^= r[config.readReg2] ^ r[config.readReg3];
|
||||
mem.mx &= CacheLineAlignMask;
|
||||
if (superscalar) {
|
||||
executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r);
|
||||
}
|
||||
else {
|
||||
Cache& cache = mem.ds.cache;
|
||||
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
||||
initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= datasetLine[i];
|
||||
}
|
||||
datasetRead(mem.ma, r);
|
||||
//executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r);
|
||||
std::swap(mem.mx, mem.ma);
|
||||
|
||||
if (trace) {
|
||||
@ -454,9 +428,6 @@ namespace RandomX {
|
||||
_mm_store_pd(®.e[3].lo, e[3]);
|
||||
}
|
||||
|
||||
template void InterpretedVirtualMachine<true>::execute();
|
||||
template void InterpretedVirtualMachine<false>::execute();
|
||||
|
||||
static int getConditionRegister(int(®isterUsage)[8]) {
|
||||
int min = INT_MAX;
|
||||
int minIndex;
|
||||
@ -469,108 +440,14 @@ namespace RandomX {
|
||||
return minIndex;
|
||||
}
|
||||
|
||||
constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
|
||||
constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL;
|
||||
constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
|
||||
constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL;
|
||||
constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
|
||||
constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
|
||||
constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
|
||||
constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
|
||||
|
||||
static uint8_t* getMixBlock(uint64_t registerValue, Cache& cache) {
|
||||
uint8_t* mixBlock;
|
||||
if (RANDOMX_ARGON_GROWTH == 0) {
|
||||
constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1);
|
||||
mixBlock = cache.memory + (registerValue & mask) * CacheLineSize;
|
||||
}
|
||||
else {
|
||||
const uint32_t modulus = cache.size / CacheLineSize;
|
||||
mixBlock = cache.memory + (registerValue % modulus) * CacheLineSize;
|
||||
}
|
||||
return mixBlock;
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::datasetRead(uint32_t address, int_reg_t(&r)[8]) {
|
||||
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= datasetLine[i];
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
void InterpretedVirtualMachine<superscalar>::executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t>& reciprocals) {
|
||||
for (unsigned j = 0; j < prog.getSize(); ++j) {
|
||||
Instruction& instr = prog(j);
|
||||
switch (instr.opcode)
|
||||
{
|
||||
case RandomX::SuperscalarInstructionType::ISUB_R:
|
||||
r[instr.dst] -= r[instr.src];
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_R:
|
||||
r[instr.dst] ^= r[instr.src];
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_RS:
|
||||
r[instr.dst] += r[instr.src] << instr.getModShift2();
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMUL_R:
|
||||
r[instr.dst] *= r[instr.src];
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IROR_C:
|
||||
r[instr.dst] = rotr(r[instr.dst], instr.getImm32());
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_C7:
|
||||
case RandomX::SuperscalarInstructionType::IADD_C8:
|
||||
case RandomX::SuperscalarInstructionType::IADD_C9:
|
||||
r[instr.dst] += signExtend2sCompl(instr.getImm32());
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C7:
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C8:
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C9:
|
||||
r[instr.dst] ^= signExtend2sCompl(instr.getImm32());
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMULH_R:
|
||||
r[instr.dst] = mulh(r[instr.dst], r[instr.src]);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::ISMULH_R:
|
||||
r[instr.dst] = smulh(r[instr.dst], r[instr.src]);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMUL_RCP:
|
||||
if(superscalar)
|
||||
r[instr.dst] *= reciprocals[instr.getImm32()];
|
||||
else
|
||||
r[instr.dst] *= reciprocal(instr.getImm32());
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
void InterpretedVirtualMachine<superscalar>::executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]) {
|
||||
int_reg_t rl[8];
|
||||
uint8_t* mixBlock;
|
||||
uint64_t registerValue = blockNumber;
|
||||
rl[0] = (blockNumber + 1) * superscalarMul0;
|
||||
rl[1] = rl[0] ^ superscalarAdd1;
|
||||
rl[2] = rl[0] ^ superscalarAdd2;
|
||||
rl[3] = rl[0] ^ superscalarAdd3;
|
||||
rl[4] = rl[0] ^ superscalarAdd4;
|
||||
rl[5] = rl[0] ^ superscalarAdd5;
|
||||
rl[6] = rl[0] ^ superscalarAdd6;
|
||||
rl[7] = rl[0] ^ superscalarAdd7;
|
||||
Cache& cache = mem.ds.cache;
|
||||
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
|
||||
mixBlock = getMixBlock(registerValue, cache);
|
||||
SuperscalarProgram& prog = superScalarPrograms[i];
|
||||
|
||||
executeSuperscalar(rl, prog, reciprocals);
|
||||
|
||||
for(unsigned q = 0; q < 8; ++q)
|
||||
rl[q] ^= load64(mixBlock + 8 * q);
|
||||
|
||||
registerValue = rl[prog.getAddressRegister()];
|
||||
}
|
||||
|
||||
for (unsigned q = 0; q < 8; ++q)
|
||||
r[q] ^= rl[q];
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
/*template<bool superscalar>
|
||||
void InterpretedVirtualMachine<superscalar>::precompileSuperscalar(SuperscalarProgram* programs) {
|
||||
memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms));
|
||||
reciprocals.clear();
|
||||
@ -584,12 +461,12 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
#include "instructionWeights.hpp"
|
||||
|
||||
template<bool superscalar>
|
||||
void InterpretedVirtualMachine<superscalar>::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
int registerUsage[8];
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
@ -1007,4 +884,9 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template class InterpretedVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
template class InterpretedVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
template class InterpretedVm<LargePageAllocator, false>;
|
||||
template class InterpretedVm<LargePageAllocator, true>;
|
||||
}
|
@ -18,20 +18,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
//#define STATS
|
||||
|
||||
#include <new>
|
||||
#include "VirtualMachine.hpp"
|
||||
#include "Program.hpp"
|
||||
#include "intrinPortable.h"
|
||||
#include <vector>
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
struct InstructionByteCode;
|
||||
template<bool superscalar> class InterpretedVirtualMachine;
|
||||
|
||||
template<bool superscalar>
|
||||
using InstructionHandler = void(InterpretedVirtualMachine<superscalar>::*)(Instruction&);
|
||||
namespace randomx {
|
||||
|
||||
struct InstructionByteCode {
|
||||
union {
|
||||
@ -56,83 +50,40 @@ namespace RandomX {
|
||||
|
||||
constexpr int asedwfagdewsa = sizeof(InstructionByteCode);
|
||||
|
||||
template<bool superscalar>
|
||||
class InterpretedVirtualMachine : public VirtualMachine {
|
||||
template<class Allocator, bool softAes>
|
||||
class InterpretedVm : public VmBase<Allocator, softAes> {
|
||||
public:
|
||||
using VmBase<Allocator, softAes>::mem;
|
||||
using VmBase<Allocator, softAes>::scratchpad;
|
||||
using VmBase<Allocator, softAes>::program;
|
||||
using VmBase<Allocator, softAes>::config;
|
||||
using VmBase<Allocator, softAes>::reg;
|
||||
void* operator new(size_t size) {
|
||||
void* ptr = _mm_malloc(size, 64);
|
||||
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
void operator delete(void* ptr) {
|
||||
_mm_free(ptr);
|
||||
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(InterpretedVm));
|
||||
}
|
||||
InterpretedVirtualMachine(bool soft) : softAes(soft) {}
|
||||
~InterpretedVirtualMachine() {}
|
||||
void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
|
||||
void initialize() override;
|
||||
void execute() override;
|
||||
static void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t>& reciprocals);
|
||||
void setDataset(randomx_dataset* dataset) override;
|
||||
void initialize() override;
|
||||
protected:
|
||||
virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[8]);
|
||||
private:
|
||||
static InstructionHandler<superscalar> engine[256];
|
||||
DatasetReadFunc readDataset;
|
||||
bool softAes;
|
||||
InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
|
||||
std::vector<uint64_t> reciprocals;
|
||||
alignas(64) SuperscalarProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES];
|
||||
#ifdef STATS
|
||||
int count_ADD_64 = 0;
|
||||
int count_ADD_32 = 0;
|
||||
int count_SUB_64 = 0;
|
||||
int count_SUB_32 = 0;
|
||||
int count_MUL_64 = 0;
|
||||
int count_MULH_64 = 0;
|
||||
int count_MUL_32 = 0;
|
||||
int count_IMUL_32 = 0;
|
||||
int count_IMULH_64 = 0;
|
||||
int count_DIV_64 = 0;
|
||||
int count_IDIV_64 = 0;
|
||||
int count_AND_64 = 0;
|
||||
int count_AND_32 = 0;
|
||||
int count_OR_64 = 0;
|
||||
int count_OR_32 = 0;
|
||||
int count_XOR_64 = 0;
|
||||
int count_XOR_32 = 0;
|
||||
int count_SHL_64 = 0;
|
||||
int count_SHR_64 = 0;
|
||||
int count_SAR_64 = 0;
|
||||
int count_ROL_64 = 0;
|
||||
int count_ROR_64 = 0;
|
||||
int count_FADD = 0;
|
||||
int count_FSUB = 0;
|
||||
int count_FMUL = 0;
|
||||
int count_FDIV = 0;
|
||||
int count_FSQRT = 0;
|
||||
int count_FPROUND = 0;
|
||||
int count_JUMP_taken = 0;
|
||||
int count_JUMP_not_taken = 0;
|
||||
int count_jump_taken[8] = { 0 };
|
||||
int count_jump_not_taken[8] = { 0 };
|
||||
int count_max_stack = 0;
|
||||
int count_retdepth = 0;
|
||||
int count_retdepth_max = 0;
|
||||
int count_endstack = 0;
|
||||
int count_instructions[RANDOMX_PROGRAM_SIZE] = { 0 };
|
||||
int count_FADD_nop = 0;
|
||||
int count_FADD_nop2 = 0;
|
||||
int count_FSUB_nop = 0;
|
||||
int count_FSUB_nop2 = 0;
|
||||
int count_FMUL_nop = 0;
|
||||
int count_FMUL_nop2 = 0;
|
||||
int datasetAccess[256] = { 0 };
|
||||
#endif
|
||||
void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
void precompileSuperscalar(SuperscalarProgram*);
|
||||
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]);
|
||||
void* getScratchpadAddress(InstructionByteCode& ibc);
|
||||
__m128d maskRegisterExponentMantissa(__m128d);
|
||||
|
||||
InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
|
||||
};
|
||||
|
||||
using InterpretedVmDefault = InterpretedVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
using InterpretedVmHardAes = InterpretedVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
using InterpretedVmLargePage = InterpretedVm<LargePageAllocator, false>;
|
||||
using InterpretedVmLargePageHardAes = InterpretedVm<LargePageAllocator, true>;
|
||||
}
|
@ -97,7 +97,7 @@ DECL(randomx_dataset_init):
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
;# cache in rdi
|
||||
mov rdi, qword ptr [rdi+8] ;# after virtual method table pointer
|
||||
;# dataset in rsi
|
||||
mov rbp, rdx ;# block index
|
||||
push rcx ;# max. block index
|
||||
|
@ -100,7 +100,7 @@ randomx_dataset_init PROC
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdi, rcx ;# cache
|
||||
mov rdi, qword ptr [rcx+8] ;# after virtual method table pointer
|
||||
mov rsi, rdx ;# dataset
|
||||
mov rbp, r8 ;# block index
|
||||
push r9 ;# max. block index
|
||||
|
@ -28,7 +28,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
#define RANDOMX_JUMP
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
#if !defined(_M_X64) && !defined(__x86_64__)
|
||||
JitCompilerX86::JitCompilerX86() {
|
||||
@ -238,33 +238,29 @@ namespace RandomX {
|
||||
generateProgramEpilogue(prog);
|
||||
}
|
||||
|
||||
template<bool superscalar>
|
||||
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) {
|
||||
if (RANDOMX_CACHE_ACCESSES != 8)
|
||||
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES");
|
||||
if (RANDOMX_ARGON_GROWTH != 0)
|
||||
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH");
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
if (superscalar) {
|
||||
//if (superscalar) {
|
||||
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
|
||||
emitByte(CALL);
|
||||
emit32(superScalarHashOffset - (codePos + 4));
|
||||
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
|
||||
}
|
||||
/*}
|
||||
else {
|
||||
memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize);
|
||||
codePos += readDatasetLightSize;
|
||||
emitByte(CALL);
|
||||
emit32(readDatasetLightSubOffset - (codePos + 4));
|
||||
}
|
||||
}*/
|
||||
generateProgramEpilogue(prog);
|
||||
}
|
||||
|
||||
template void JitCompilerX86::generateProgramLight<true>(Program& prog, ProgramConfiguration& pcfg);
|
||||
template void JitCompilerX86::generateProgramLight<false>(Program& prog, ProgramConfiguration& pcfg);
|
||||
|
||||
template<size_t N>
|
||||
void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) {
|
||||
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &reciprocalCache) {
|
||||
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
|
||||
codePos = superScalarHashOffset + codeSshInitSize;
|
||||
for (unsigned j = 0; j < N; ++j) {
|
||||
@ -273,7 +269,7 @@ namespace RandomX {
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode<SuperscalarProgram>(instr, i);
|
||||
generateSuperscalarCode(instr, reciprocalCache);
|
||||
}
|
||||
emit(codeShhLoad, codeSshLoadSize);
|
||||
if (j < N - 1) {
|
||||
@ -293,7 +289,7 @@ namespace RandomX {
|
||||
}
|
||||
|
||||
template
|
||||
void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
|
||||
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector<uint64_t> &reciprocalCache);
|
||||
|
||||
void JitCompilerX86::generateDatasetInitCode() {
|
||||
memcpy(code, codeDatasetInit, datasetInitSize);
|
||||
@ -314,7 +310,12 @@ namespace RandomX {
|
||||
emitByte(0xc0 + pcfg.readReg1);
|
||||
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
|
||||
codePos += loopLoadSize;
|
||||
generateCode(prog);
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode(instr, i);
|
||||
}
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc0 + pcfg.readReg2);
|
||||
emit(REX_XOR_EAX);
|
||||
@ -331,7 +332,6 @@ namespace RandomX {
|
||||
emit32(epilogueOffset - codePos - 4);
|
||||
}
|
||||
|
||||
template<class P>
|
||||
void JitCompilerX86::generateCode(Instruction& instr, int i) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
instructionOffsets.push_back(codePos);
|
||||
@ -340,67 +340,66 @@ namespace RandomX {
|
||||
(this->*generator)(instr, i);
|
||||
}
|
||||
|
||||
template<>
|
||||
void JitCompilerX86::generateCode<SuperscalarProgram>(Instruction& instr, int i) {
|
||||
void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector<uint64_t> &reciprocalCache) {
|
||||
switch (instr.opcode)
|
||||
{
|
||||
case RandomX::SuperscalarInstructionType::ISUB_R:
|
||||
case randomx::SuperscalarInstructionType::ISUB_R:
|
||||
emit(REX_SUB_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_R:
|
||||
case randomx::SuperscalarInstructionType::IXOR_R:
|
||||
emit(REX_XOR_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_RS:
|
||||
case randomx::SuperscalarInstructionType::IADD_RS:
|
||||
emit(REX_LEA);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
genSIB(instr.getModShift2(), instr.src, instr.dst);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMUL_R:
|
||||
case randomx::SuperscalarInstructionType::IMUL_R:
|
||||
emit(REX_IMUL_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IROR_C:
|
||||
case randomx::SuperscalarInstructionType::IROR_C:
|
||||
emit(REX_ROT_I8);
|
||||
emitByte(0xc8 + instr.dst);
|
||||
emitByte(instr.getImm32() & 63);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_C7:
|
||||
case randomx::SuperscalarInstructionType::IADD_C7:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C7:
|
||||
case randomx::SuperscalarInstructionType::IXOR_C7:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_C8:
|
||||
case randomx::SuperscalarInstructionType::IADD_C8:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
emit(NOP1);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C8:
|
||||
case randomx::SuperscalarInstructionType::IXOR_C8:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
emit(NOP1);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IADD_C9:
|
||||
case randomx::SuperscalarInstructionType::IADD_C9:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
emit(NOP2);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IXOR_C9:
|
||||
case randomx::SuperscalarInstructionType::IXOR_C9:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
emit(NOP2);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMULH_R:
|
||||
case randomx::SuperscalarInstructionType::IMULH_R:
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
@ -408,7 +407,7 @@ namespace RandomX {
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::ISMULH_R:
|
||||
case randomx::SuperscalarInstructionType::ISMULH_R:
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
@ -416,9 +415,9 @@ namespace RandomX {
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
break;
|
||||
case RandomX::SuperscalarInstructionType::IMUL_RCP:
|
||||
case randomx::SuperscalarInstructionType::IMUL_RCP:
|
||||
emit(MOV_RAX_I);
|
||||
emit64(reciprocal(instr.getImm32()));
|
||||
emit64(reciprocalCache[instr.getImm32()]);
|
||||
emit(REX_IMUL_RM);
|
||||
emitByte(0xc0 + 8 * instr.dst);
|
||||
break;
|
||||
@ -427,8 +426,6 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
template void JitCompilerX86::generateCode<Program>(Instruction& instr, int i);
|
||||
|
||||
void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
|
||||
emit(LEA_32);
|
||||
emitByte(0x80 + instr.src + (rax ? 0 : 8));
|
||||
|
@ -21,10 +21,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "common.hpp"
|
||||
#include "Instruction.hpp"
|
||||
#include "superscalar_program.hpp"
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class ProgramConfiguration;
|
||||
@ -40,15 +41,14 @@ namespace RandomX {
|
||||
JitCompilerX86();
|
||||
~JitCompilerX86();
|
||||
void generateProgram(Program&, ProgramConfiguration&);
|
||||
template<bool superscalar>
|
||||
void generateProgramLight(Program&, ProgramConfiguration&);
|
||||
template<size_t N>
|
||||
void generateSuperScalarHash(SuperscalarProgram (&programs)[N]);
|
||||
void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &);
|
||||
void generateDatasetInitCode();
|
||||
ProgramFunc getProgramFunc() {
|
||||
return (ProgramFunc)code;
|
||||
}
|
||||
DatasetInitFunc getDatasetInitFunc() {
|
||||
generateDatasetInitCode();
|
||||
return (DatasetInitFunc)code;
|
||||
}
|
||||
uint8_t* getCode() {
|
||||
@ -62,18 +62,6 @@ namespace RandomX {
|
||||
uint8_t* code;
|
||||
int32_t codePos;
|
||||
|
||||
template<class P>
|
||||
void generateCode(P& prog) {
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode<P>(instr, i);
|
||||
}
|
||||
}
|
||||
|
||||
void generateDatasetInitCode();
|
||||
|
||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||
void generateProgramEpilogue(Program&);
|
||||
int getConditionRegister();
|
||||
@ -84,8 +72,8 @@ namespace RandomX {
|
||||
|
||||
void handleCondition(Instruction&, int);
|
||||
|
||||
template<class P>
|
||||
void generateCode(Instruction&, int);
|
||||
void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
|
||||
|
||||
void emitByte(uint8_t val) {
|
||||
code[codePos] = val;
|
||||
|
@ -25,7 +25,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "Instruction.hpp"
|
||||
#include "blake2/endian.h"
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
struct ProgramConfiguration {
|
||||
uint64_t eMask[2];
|
||||
@ -59,46 +59,4 @@ namespace RandomX {
|
||||
};
|
||||
|
||||
static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program");
|
||||
|
||||
class SuperscalarProgram {
|
||||
public:
|
||||
Instruction& operator()(int pc) {
|
||||
return programBuffer[pc];
|
||||
}
|
||||
friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) {
|
||||
p.print(os);
|
||||
return os;
|
||||
}
|
||||
uint32_t getSize() {
|
||||
return size;
|
||||
}
|
||||
void setSize(uint32_t val) {
|
||||
size = val;
|
||||
}
|
||||
int getAddressRegister() {
|
||||
return addrReg;
|
||||
}
|
||||
void setAddressRegister(uint32_t val) {
|
||||
addrReg = val;
|
||||
}
|
||||
double ipc;
|
||||
int codeSize;
|
||||
int macroOps;
|
||||
int decodeCycles;
|
||||
int cpuLatency;
|
||||
int asicLatency;
|
||||
int mulCount;
|
||||
int cpuLatencies[8];
|
||||
int asicLatencies[8];
|
||||
private:
|
||||
void print(std::ostream& os) const {
|
||||
for (unsigned i = 0; i < size; ++i) {
|
||||
auto instr = programBuffer[i];
|
||||
os << instr;
|
||||
}
|
||||
}
|
||||
Instruction programBuffer[RANDOMX_SUPERSCALAR_MAX_SIZE];
|
||||
uint32_t size;
|
||||
int addrReg;
|
||||
};
|
||||
}
|
||||
|
@ -24,9 +24,60 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include "intrinPortable.h"
|
||||
#include "allocator.hpp"
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
|
||||
for (int i = 0; i < RandomX::RegistersCount; ++i)
|
||||
randomx_vm::~randomx_vm() {
|
||||
|
||||
}
|
||||
|
||||
void randomx_vm::resetRoundingMode() {
|
||||
initFpu();
|
||||
}
|
||||
|
||||
constexpr int mantissaSize = 52;
|
||||
constexpr int exponentSize = 11;
|
||||
constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
|
||||
constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
|
||||
constexpr int exponentBias = 1023;
|
||||
|
||||
static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
|
||||
auto exponent = entropy >> 59; //0..31
|
||||
auto mantissa = entropy & mantissaMask;
|
||||
exponent += exponentBias;
|
||||
exponent &= exponentMask;
|
||||
exponent <<= mantissaSize;
|
||||
return exponent | mantissa;
|
||||
}
|
||||
|
||||
void randomx_vm::initialize() {
|
||||
store64(®.a[0].lo, getSmallPositiveFloatBits(program.getEntropy(0)));
|
||||
store64(®.a[0].hi, getSmallPositiveFloatBits(program.getEntropy(1)));
|
||||
store64(®.a[1].lo, getSmallPositiveFloatBits(program.getEntropy(2)));
|
||||
store64(®.a[1].hi, getSmallPositiveFloatBits(program.getEntropy(3)));
|
||||
store64(®.a[2].lo, getSmallPositiveFloatBits(program.getEntropy(4)));
|
||||
store64(®.a[2].hi, getSmallPositiveFloatBits(program.getEntropy(5)));
|
||||
store64(®.a[3].lo, getSmallPositiveFloatBits(program.getEntropy(6)));
|
||||
store64(®.a[3].hi, getSmallPositiveFloatBits(program.getEntropy(7)));
|
||||
mem.ma = program.getEntropy(8) & randomx::CacheLineAlignMask;
|
||||
mem.mx = program.getEntropy(10);
|
||||
auto addressRegisters = program.getEntropy(12);
|
||||
config.readReg0 = 0 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
config.readReg1 = 2 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
config.readReg2 = 4 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
config.readReg3 = 6 + (addressRegisters & 1);
|
||||
//datasetBase = program.getEntropy(13) % datasetRange;
|
||||
constexpr uint64_t mask22bit = (1ULL << 22) - 1;
|
||||
constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>();
|
||||
store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240);
|
||||
store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
|
||||
}
|
||||
|
||||
//TODO
|
||||
std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf) {
|
||||
for (int i = 0; i < randomx::RegistersCount; ++i)
|
||||
os << std::hex << "r" << i << " = " << rf.r[i] << std::endl << std::dec;
|
||||
for (int i = 0; i < 4; ++i)
|
||||
os << std::hex << "f" << i << " = " << *(uint64_t*)&rf.f[i].hi << " (" << rf.f[i].hi << ")" << std::endl
|
||||
@ -40,66 +91,32 @@ std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
|
||||
return os;
|
||||
}
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
constexpr int mantissaSize = 52;
|
||||
constexpr int exponentSize = 11;
|
||||
constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
|
||||
constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
|
||||
constexpr int exponentBias = 1023;
|
||||
|
||||
static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
|
||||
auto exponent = entropy >> 59; //0..31
|
||||
auto mantissa = entropy & mantissaMask;
|
||||
exponent += exponentBias;
|
||||
exponent &= exponentMask;
|
||||
exponent <<= mantissaSize;
|
||||
return exponent | mantissa;
|
||||
template<class Allocator, bool softAes>
|
||||
VmBase<Allocator, softAes>::~VmBase() {
|
||||
Allocator::freeMemory(scratchpad, ScratchpadSize);
|
||||
}
|
||||
|
||||
VirtualMachine::VirtualMachine() {
|
||||
mem.ds.dataset.memory = nullptr;
|
||||
template<class Allocator, bool softAes>
|
||||
bool VmBase<Allocator, softAes>::allocate() {
|
||||
scratchpad = (uint8_t*)Allocator::allocMemory(ScratchpadSize);
|
||||
return scratchpad != nullptr;
|
||||
}
|
||||
|
||||
void VirtualMachine::resetRoundingMode() {
|
||||
initFpu();
|
||||
template<class Allocator, bool softAes>
|
||||
void VmBase<Allocator, softAes>::generate(void* seed, void* buffer, size_t bufferSize) {
|
||||
fillAes1Rx4<softAes>(seed, bufferSize, buffer);
|
||||
}
|
||||
|
||||
void VirtualMachine::initialize() {
|
||||
store64(®.a[0].lo, getSmallPositiveFloatBits(program.getEntropy(0)));
|
||||
store64(®.a[0].hi, getSmallPositiveFloatBits(program.getEntropy(1)));
|
||||
store64(®.a[1].lo, getSmallPositiveFloatBits(program.getEntropy(2)));
|
||||
store64(®.a[1].hi, getSmallPositiveFloatBits(program.getEntropy(3)));
|
||||
store64(®.a[2].lo, getSmallPositiveFloatBits(program.getEntropy(4)));
|
||||
store64(®.a[2].hi, getSmallPositiveFloatBits(program.getEntropy(5)));
|
||||
store64(®.a[3].lo, getSmallPositiveFloatBits(program.getEntropy(6)));
|
||||
store64(®.a[3].hi, getSmallPositiveFloatBits(program.getEntropy(7)));
|
||||
mem.ma = program.getEntropy(8) & CacheLineAlignMask;
|
||||
mem.mx = program.getEntropy(10);
|
||||
auto addressRegisters = program.getEntropy(12);
|
||||
config.readReg0 = 0 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
config.readReg1 = 2 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
config.readReg2 = 4 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
config.readReg3 = 6 + (addressRegisters & 1);
|
||||
datasetBase = program.getEntropy(13) % datasetRange;
|
||||
constexpr uint64_t mask22bit = (1ULL << 22) - 1;
|
||||
constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>();
|
||||
store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240);
|
||||
store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
|
||||
template<class Allocator, bool softAes>
|
||||
void VmBase<Allocator, softAes>::getFinalResult(void* out, size_t outSize) {
|
||||
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a);
|
||||
blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* outHash) {
|
||||
if (scratchpadSize > 0) {
|
||||
hashAes1Rx4<softAes>(scratchpad, scratchpadSize, ®.a);
|
||||
}
|
||||
blake2b(outHash, ResultSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
}
|
||||
|
||||
template void VirtualMachine::getResult<false>(void* scratchpad, size_t scratchpadSize, void* outHash);
|
||||
template void VirtualMachine::getResult<true>(void* scratchpad, size_t scratchpadSize, void* outHash);
|
||||
|
||||
template class VmBase<AlignedAllocator<CacheLineSize>, false>;
|
||||
template class VmBase<AlignedAllocator<CacheLineSize>, true>;
|
||||
template class VmBase<LargePageAllocator, false>;
|
||||
template class VmBase<LargePageAllocator, true>;
|
||||
}
|
@ -18,38 +18,40 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include "common.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "Program.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
/* Global namespace for C binding */
|
||||
struct randomx_vm {
|
||||
virtual ~randomx_vm() = 0;
|
||||
virtual bool allocate() = 0;
|
||||
virtual void generate(void* seed, void* buffer, size_t bufferSize) = 0;
|
||||
void resetRoundingMode();
|
||||
virtual void initialize();
|
||||
virtual void execute() = 0;
|
||||
virtual void getFinalResult(void* out, size_t outSize) = 0;
|
||||
virtual void setDataset(randomx_dataset* dataset) { }
|
||||
virtual void setCache(randomx_cache* cache) { }
|
||||
|
||||
class VirtualMachine {
|
||||
alignas(64) randomx::Program program;
|
||||
alignas(64) randomx::RegisterFile reg;
|
||||
alignas(16) randomx::ProgramConfiguration config;
|
||||
randomx::MemoryRegisters mem;
|
||||
uint8_t* scratchpad;
|
||||
};
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
class VmBase : public randomx_vm {
|
||||
public:
|
||||
VirtualMachine();
|
||||
virtual ~VirtualMachine() {}
|
||||
virtual void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0;
|
||||
void setScratchpad(void* ptr) {
|
||||
scratchpad = (uint8_t*)ptr;
|
||||
}
|
||||
void resetRoundingMode();
|
||||
virtual void initialize();
|
||||
virtual void execute() = 0;
|
||||
template<bool softAes>
|
||||
void getResult(void* scratchpad, size_t scratchpadSize, void* outHash);
|
||||
const RegisterFile& getRegisterFile() {
|
||||
return reg;
|
||||
}
|
||||
Program* getProgramBuffer() {
|
||||
return &program;
|
||||
}
|
||||
protected:
|
||||
alignas(64) Program program;
|
||||
alignas(64) RegisterFile reg;
|
||||
alignas(16) ProgramConfiguration config;
|
||||
MemoryRegisters mem;
|
||||
uint8_t* scratchpad;
|
||||
uint32_t datasetRange;
|
||||
uint32_t datasetBase;
|
||||
~VmBase() override;
|
||||
bool allocate() override;
|
||||
void generate(void* seed, void* buffer, size_t bufferSize) override;
|
||||
void getFinalResult(void* out, size_t outSize) override;
|
||||
};
|
||||
|
||||
}
|
52
src/allocator.cpp
Normal file
52
src/allocator.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
Copyright (c) 2019 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "allocator.hpp"
|
||||
#include "virtualMemory.hpp"
|
||||
#include "intrinPortable.h"
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<size_t alignment>
|
||||
void* AlignedAllocator<alignment>::allocMemory(size_t count) {
|
||||
return _mm_malloc(count, alignment);
|
||||
}
|
||||
|
||||
template<size_t alignment>
|
||||
void AlignedAllocator<alignment>::freeMemory(void* ptr, size_t count) {
|
||||
_mm_free(ptr);
|
||||
}
|
||||
|
||||
template void* AlignedAllocator<CacheLineSize>::allocMemory(size_t count);
|
||||
template void AlignedAllocator<CacheLineSize>::freeMemory(void* ptr, size_t count);
|
||||
template void* AlignedAllocator<sizeof(__m128i)>::allocMemory(size_t count);
|
||||
template void AlignedAllocator<sizeof(__m128i)>::freeMemory(void* ptr, size_t count);
|
||||
|
||||
void* LargePageAllocator::allocMemory(size_t count) {
|
||||
return allocLargePagesMemory(count);
|
||||
}
|
||||
|
||||
void LargePageAllocator::freeMemory(void* ptr, size_t count) {
|
||||
freePagedMemory(ptr, count);
|
||||
};
|
||||
|
||||
}
|
37
src/allocator.hpp
Normal file
37
src/allocator.hpp
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
Copyright (c) 2019 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<size_t alignment>
|
||||
struct AlignedAllocator {
|
||||
static void* allocMemory(size_t);
|
||||
static void freeMemory(void*, size_t);
|
||||
};
|
||||
|
||||
struct LargePageAllocator {
|
||||
static void* allocMemory(size_t);
|
||||
static void freeMemory(void*, size_t);
|
||||
};
|
||||
|
||||
}
|
@ -23,8 +23,9 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include <iostream>
|
||||
#include "blake2/endian.h"
|
||||
#include "configuration.h"
|
||||
#include "randomx.h"
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2.");
|
||||
static_assert((RANDOMX_DATASET_SIZE & (RANDOMX_DATASET_SIZE - 1)) == 0, "RANDOMX_DATASET_SIZE must be a power of 2.");
|
||||
@ -58,6 +59,7 @@ namespace RandomX {
|
||||
constexpr int ArgonBlockSize = 1024;
|
||||
constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1;
|
||||
constexpr int CacheLineSize = 64;
|
||||
constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3;
|
||||
constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_SIZE - 1) & ~(CacheLineSize - 1);
|
||||
constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * 1024;
|
||||
constexpr int CacheBlockCount = CacheSize / CacheLineSize;
|
||||
@ -98,39 +100,9 @@ namespace RandomX {
|
||||
constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
|
||||
constexpr int RegisterNeedsSib = 4; //x86 r12 register
|
||||
|
||||
struct Cache {
|
||||
uint8_t* memory;
|
||||
uint64_t size;
|
||||
};
|
||||
|
||||
struct Dataset : public Cache {
|
||||
};
|
||||
|
||||
class ILightClientAsyncWorker {
|
||||
public:
|
||||
virtual ~ILightClientAsyncWorker() {}
|
||||
virtual void prepareBlock(addr_t) = 0;
|
||||
virtual void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0;
|
||||
virtual const uint64_t* getBlock(addr_t) = 0;
|
||||
virtual void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0;
|
||||
virtual void sync() = 0;
|
||||
const Cache& getCache() {
|
||||
return cache;
|
||||
}
|
||||
protected:
|
||||
ILightClientAsyncWorker(const Cache& c) : cache(c) {}
|
||||
const Cache& cache;
|
||||
};
|
||||
|
||||
union dataset_t {
|
||||
Dataset dataset;
|
||||
Cache cache;
|
||||
ILightClientAsyncWorker* asyncWorker;
|
||||
};
|
||||
|
||||
struct MemoryRegisters {
|
||||
addr_t mx, ma;
|
||||
dataset_t ds;
|
||||
uint8_t* memory = nullptr;
|
||||
};
|
||||
|
||||
struct RegisterFile {
|
||||
@ -141,9 +113,8 @@ namespace RandomX {
|
||||
};
|
||||
|
||||
typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, int_reg_t(®)[RegistersCount]);
|
||||
|
||||
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
|
||||
typedef void(*DatasetInitFunc)(uint8_t* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
|
||||
typedef void(*DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf);
|
||||
std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf);
|
||||
|
272
src/dataset.cpp
272
src/dataset.cpp
@ -22,14 +22,17 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <cstring>
|
||||
|
||||
#include "common.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "Cache.hpp"
|
||||
#include "virtualMemory.hpp"
|
||||
#include "softAes.h"
|
||||
#include "squareHash.h"
|
||||
#include "superscalarGenerator.hpp"
|
||||
#include "Blake2Generator.hpp"
|
||||
#include "reciprocal.h"
|
||||
#include "blake2/endian.h"
|
||||
#include "argon2.h"
|
||||
#include "argon2_core.h"
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include <wmmintrin.h>
|
||||
@ -38,113 +41,174 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#define PREFETCH(memory)
|
||||
#endif
|
||||
|
||||
namespace RandomX {
|
||||
randomx_dataset::~randomx_dataset() {
|
||||
|
||||
#if true //RANDOMX_ARGON_GROWTH != 0 || (!defined(_M_X64) && !defined(__x86_64__))
|
||||
static FORCE_INLINE uint8_t* selectMixBlock(const Cache& cache, uint64_t& currentIndex, uint64_t& nextIndex) {
|
||||
}
|
||||
|
||||
static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
|
||||
|
||||
void randomx_cache::initialize(const void *seed, size_t seedSize) {
|
||||
uint32_t memory_blocks, segment_length;
|
||||
argon2_instance_t instance;
|
||||
argon2_context context;
|
||||
|
||||
context.out = nullptr;
|
||||
context.outlen = 0;
|
||||
context.pwd = CONST_CAST(uint8_t *)seed;
|
||||
context.pwdlen = (uint32_t)seedSize;
|
||||
context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT;
|
||||
context.saltlen = (uint32_t)randomx::ArgonSaltSize;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.t_cost = RANDOMX_ARGON_ITERATIONS;
|
||||
context.m_cost = RANDOMX_ARGON_MEMORY;
|
||||
context.lanes = RANDOMX_ARGON_LANES;
|
||||
context.threads = 1;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = ARGON2_VERSION_NUMBER;
|
||||
|
||||
/* 2. Align memory size */
|
||||
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
|
||||
memory_blocks = context.m_cost;
|
||||
|
||||
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
|
||||
|
||||
instance.version = context.version;
|
||||
instance.memory = NULL;
|
||||
instance.passes = context.t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
instance.lanes = context.lanes;
|
||||
instance.threads = context.threads;
|
||||
instance.type = Argon2_d;
|
||||
instance.memory = (block*)memory;
|
||||
|
||||
if (instance.threads > instance.lanes) {
|
||||
instance.threads = instance.lanes;
|
||||
}
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
* blocks
|
||||
*/
|
||||
argon_initialize(&instance, &context);
|
||||
|
||||
fill_memory_blocks(&instance);
|
||||
|
||||
reciprocalCache.clear();
|
||||
randomx::Blake2Generator gen(seed, 1000);
|
||||
for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
|
||||
randomx::generateSuperscalar(programs[i], gen);
|
||||
for (unsigned j = 0; j < programs[i].getSize(); ++j) {
|
||||
auto& instr = programs[i](j);
|
||||
if (instr.opcode == randomx::SuperscalarInstructionType::IMUL_RCP) {
|
||||
auto rcp = reciprocal(instr.getImm32());
|
||||
instr.setImm32(reciprocalCache.size());
|
||||
reciprocalCache.push_back(rcp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator>
|
||||
bool Dataset<Allocator>::allocate() {
|
||||
memory = (uint8_t*)Allocator::allocMemory(RANDOMX_DATASET_SIZE);
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
Dataset<Allocator>::~Dataset() {
|
||||
Allocator::freeMemory(memory, RANDOMX_DATASET_SIZE);
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
bool Cache<Allocator>::allocate() {
|
||||
memory = (uint8_t*)Allocator::allocMemory(RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE);
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
Cache<Allocator>::~Cache() {
|
||||
Allocator::freeMemory(memory, RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
DatasetInitFunc Cache<Allocator>::getInitFunc() {
|
||||
return &initDataset;
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
DatasetInitFunc CacheWithJit<Allocator>::getInitFunc() {
|
||||
return jit.getDatasetInitFunc();
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
void CacheWithJit<Allocator>::initialize(const void *seed, size_t seedSize) {
|
||||
randomx_cache::initialize(seed, seedSize);
|
||||
jit.generateSuperscalarHash(programs, reciprocalCache);
|
||||
jit.generateDatasetInitCode();
|
||||
}
|
||||
|
||||
template class Dataset<AlignedAllocator<CacheLineSize>>;
|
||||
template class Dataset<LargePageAllocator>;
|
||||
template class Cache<AlignedAllocator<CacheLineSize>>;
|
||||
template class Cache<LargePageAllocator>;
|
||||
template class CacheWithJit<AlignedAllocator<CacheLineSize>>;
|
||||
template class CacheWithJit<LargePageAllocator>;
|
||||
|
||||
constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
|
||||
constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL;
|
||||
constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
|
||||
constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL;
|
||||
constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
|
||||
constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
|
||||
constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
|
||||
constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
|
||||
|
||||
static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) {
|
||||
constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1);
|
||||
return memory + (registerValue & mask) * CacheLineSize;
|
||||
}
|
||||
|
||||
void initDatasetBlock(randomx_cache* cache, uint8_t* out, uint64_t blockNumber) {
|
||||
int_reg_t rl[8];
|
||||
uint8_t* mixBlock;
|
||||
if (RANDOMX_ARGON_GROWTH == 0) {
|
||||
constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1);
|
||||
mixBlock = cache.memory + (currentIndex & mask) * CacheLineSize;
|
||||
}
|
||||
else {
|
||||
const uint32_t modulus = cache.size / CacheLineSize;
|
||||
mixBlock = cache.memory + (currentIndex % modulus) * CacheLineSize;
|
||||
}
|
||||
PREFETCHNTA(mixBlock);
|
||||
nextIndex = squareHash(currentIndex + nextIndex);
|
||||
return mixBlock;
|
||||
}
|
||||
uint64_t registerValue = blockNumber;
|
||||
rl[0] = (blockNumber + 1) * superscalarMul0;
|
||||
rl[1] = rl[0] ^ superscalarAdd1;
|
||||
rl[2] = rl[0] ^ superscalarAdd2;
|
||||
rl[3] = rl[0] ^ superscalarAdd3;
|
||||
rl[4] = rl[0] ^ superscalarAdd4;
|
||||
rl[5] = rl[0] ^ superscalarAdd5;
|
||||
rl[6] = rl[0] ^ superscalarAdd6;
|
||||
rl[7] = rl[0] ^ superscalarAdd7;
|
||||
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
|
||||
mixBlock = getMixBlock(registerValue, cache->memory);
|
||||
SuperscalarProgram& prog = cache->programs[i];
|
||||
|
||||
static FORCE_INLINE void mixCache(uint8_t* mixBlock, uint64_t& c0, uint64_t& c1, uint64_t& c2, uint64_t& c3, uint64_t& c4, uint64_t& c5, uint64_t& c6, uint64_t& c7) {
|
||||
c0 ^= load64(mixBlock + 0);
|
||||
c1 ^= load64(mixBlock + 8);
|
||||
c2 ^= load64(mixBlock + 16);
|
||||
c3 ^= load64(mixBlock + 24);
|
||||
c4 ^= load64(mixBlock + 32);
|
||||
c5 ^= load64(mixBlock + 40);
|
||||
c6 ^= load64(mixBlock + 48);
|
||||
c7 ^= load64(mixBlock + 56);
|
||||
}
|
||||
executeSuperscalar(rl, prog, &cache->reciprocalCache);
|
||||
|
||||
void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber, unsigned iterations) {
|
||||
uint64_t c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
for (unsigned q = 0; q < 8; ++q)
|
||||
rl[q] ^= load64(mixBlock + 8 * q);
|
||||
|
||||
c0 = blockNumber;
|
||||
c1 = c2 = c3 = c4 = c5 = c6 = c7 = 0;
|
||||
|
||||
uint8_t* mixBlock;
|
||||
|
||||
for (auto i = 0; i < iterations; ++i) {
|
||||
mixBlock = selectMixBlock(cache, c0, c1);
|
||||
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
||||
|
||||
mixBlock = selectMixBlock(cache, c1, c2);
|
||||
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
||||
|
||||
mixBlock = selectMixBlock(cache, c2, c3);
|
||||
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
||||
|
||||
mixBlock = selectMixBlock(cache, c3, c4);
|
||||
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
||||
|
||||
mixBlock = selectMixBlock(cache, c4, c5);
|
||||
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
||||
|
||||
mixBlock = selectMixBlock(cache, c5, c6);
|
||||
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
||||
|
||||
mixBlock = selectMixBlock(cache, c6, c7);
|
||||
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
||||
|
||||
mixBlock = selectMixBlock(cache, c7, c0);
|
||||
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
||||
registerValue = rl[prog.getAddressRegister()];
|
||||
}
|
||||
|
||||
store64(out + 0, c0);
|
||||
store64(out + 8, c1);
|
||||
store64(out + 16, c2);
|
||||
store64(out + 24, c3);
|
||||
store64(out + 32, c4);
|
||||
store64(out + 40, c5);
|
||||
store64(out + 48, c6);
|
||||
store64(out + 56, c7);
|
||||
}
|
||||
#endif
|
||||
|
||||
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
|
||||
uint64_t* datasetLine = (uint64_t*)(memory.ds.dataset.memory + memory.ma);
|
||||
memory.mx ^= addr;
|
||||
memory.mx &= -64; //align to cache line
|
||||
std::swap(memory.mx, memory.ma);
|
||||
PREFETCHNTA(memory.ds.dataset.memory + memory.ma);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
reg.r[i] ^= datasetLine[i];
|
||||
memcpy(out, &rl, CacheLineSize);
|
||||
}
|
||||
|
||||
void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t (®)[RegistersCount]) {
|
||||
memory.mx ^= addr;
|
||||
memory.mx &= CacheLineAlignMask; //align to cache line
|
||||
Cache& cache = memory.ds.cache;
|
||||
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
||||
initBlock(cache, (uint8_t*)datasetLine, memory.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
reg[i] ^= datasetLine[i];
|
||||
std::swap(memory.mx, memory.ma);
|
||||
void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock) {
|
||||
for (uint32_t blockNumber = startBlock; blockNumber < endBlock; ++blockNumber, dataset += CacheLineSize)
|
||||
initDatasetBlock(cache, dataset, blockNumber);
|
||||
}
|
||||
|
||||
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]) {
|
||||
ILightClientAsyncWorker* aw = memory.ds.asyncWorker;
|
||||
const uint64_t* datasetLine = aw->getBlock(memory.ma);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
reg[i] ^= datasetLine[i];
|
||||
memory.mx ^= addr;
|
||||
memory.mx &= CacheLineAlignMask; //align to cache line
|
||||
std::swap(memory.mx, memory.ma);
|
||||
aw->prepareBlock(memory.ma);
|
||||
}
|
||||
|
||||
void datasetAlloc(dataset_t& ds, bool largePages) {
|
||||
|
||||
/*void datasetAlloc(dataset_t& ds, bool largePages) {
|
||||
if (std::numeric_limits<size_t>::max() < RANDOMX_DATASET_SIZE)
|
||||
throw std::runtime_error("Platform doesn't support enough memory for the dataset");
|
||||
if (largePages) {
|
||||
@ -158,14 +222,8 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void datasetInit(Cache& cache, Dataset& ds, uint32_t startBlock, uint32_t blockCount) {
|
||||
for (uint64_t i = startBlock; i < startBlock + blockCount; ++i) {
|
||||
initBlock(cache, ds.memory + i * CacheLineSize, i, RANDOMX_CACHE_ACCESSES / 8);
|
||||
}
|
||||
}
|
||||
|
||||
void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
|
||||
ds.cache.memory = allocCache(ds.cache.size, largePages);
|
||||
argonFill(ds.cache, seed, SeedSize);
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
@ -20,26 +20,62 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include "intrinPortable.h"
|
||||
#include "common.hpp"
|
||||
#include "randomx.h"
|
||||
#include "Program.hpp"
|
||||
#include "superscalar_program.hpp"
|
||||
#include "JitCompilerX86.hpp"
|
||||
#include "allocator.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
struct randomx_dataset {
|
||||
virtual ~randomx_dataset() = 0;
|
||||
virtual bool allocate() = 0;
|
||||
uint8_t* memory = nullptr;
|
||||
};
|
||||
|
||||
#if false //RANDOMX_ARGON_GROWTH == 0 && (defined(_M_X64) || defined(__x86_64__))
|
||||
extern "C"
|
||||
#endif
|
||||
void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber, unsigned iterations);
|
||||
struct randomx_cache : public randomx_dataset {
|
||||
virtual randomx::DatasetInitFunc getInitFunc() = 0;
|
||||
virtual void initialize(const void *seed, size_t seedSize); //argon2
|
||||
randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES];
|
||||
std::vector<uint64_t> reciprocalCache;
|
||||
};
|
||||
|
||||
void datasetAlloc(dataset_t& ds, bool largePages);
|
||||
|
||||
void datasetInit(Cache& cache, Dataset& ds, uint32_t startBlock, uint32_t blockCount);
|
||||
|
||||
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
|
||||
namespace randomx {
|
||||
|
||||
void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages);
|
||||
template<class Allocator>
|
||||
struct Dataset : public randomx_dataset {
|
||||
~Dataset() override;
|
||||
bool allocate() override;
|
||||
};
|
||||
|
||||
void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
|
||||
using DatasetDefault = Dataset<AlignedAllocator<CacheLineSize>>;
|
||||
using DatasetLargePage = Dataset<LargePageAllocator>;
|
||||
|
||||
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(®)[RegistersCount]);
|
||||
template<class Allocator>
|
||||
struct Cache : public randomx_cache {
|
||||
~Cache() override;
|
||||
bool allocate() override;
|
||||
DatasetInitFunc getInitFunc() override;
|
||||
};
|
||||
|
||||
template<class Allocator>
|
||||
struct CacheWithJit : public Cache<Allocator> {
|
||||
using Cache<Allocator>::programs;
|
||||
using Cache<Allocator>::reciprocalCache;
|
||||
void initialize(const void *seed, size_t seedSize) override;
|
||||
DatasetInitFunc getInitFunc() override;
|
||||
JitCompilerX86 jit;
|
||||
};
|
||||
|
||||
using CacheDefault = Cache<AlignedAllocator<CacheLineSize>>;
|
||||
using CacheWithJitDefault = CacheWithJit<AlignedAllocator<CacheLineSize>>;
|
||||
using CacheLargePage = Cache<LargePageAllocator>;
|
||||
using CacheWithJitLargePage = CacheWithJit<LargePageAllocator>;
|
||||
|
||||
void initDatasetBlock(randomx_cache* cache, uint8_t* out, uint64_t blockNumber);
|
||||
void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
|
||||
}
|
||||
|
||||
|
246
src/main.cpp
246
src/main.cpp
@ -17,31 +17,28 @@ You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
//#define TRACE
|
||||
#include "InterpretedVirtualMachine.hpp"
|
||||
#include "CompiledVirtualMachine.hpp"
|
||||
#include "CompiledLightVirtualMachine.hpp"
|
||||
#include "AssemblyGeneratorX86.hpp"
|
||||
|
||||
//#include "AssemblyGeneratorX86.hpp"
|
||||
#include "Stopwatch.hpp"
|
||||
#include "blake2/blake2.h"
|
||||
//#include "blake2/blake2.h"
|
||||
#include "blake2/endian.h"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <exception>
|
||||
#include <cstring>
|
||||
#include "Program.hpp"
|
||||
//#include "Program.hpp"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include "dataset.hpp"
|
||||
#include "Cache.hpp"
|
||||
#include "hashAes1Rx4.hpp"
|
||||
#include "superscalarGenerator.hpp"
|
||||
#include "JitCompilerX86.hpp"
|
||||
//#include "hashAes1Rx4.hpp"
|
||||
//#include "JitCompilerX86.hpp"
|
||||
#include "randomx.h"
|
||||
|
||||
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
|
||||
|
||||
const uint8_t blockTemplate__[] = {
|
||||
const uint8_t blockTemplate_[] = {
|
||||
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
|
||||
0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e,
|
||||
0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca,
|
||||
@ -131,77 +128,57 @@ void printUsage(const char* executable) {
|
||||
|
||||
template<bool softAes>
|
||||
void generateAsm(uint32_t nonce) {
|
||||
alignas(16) uint64_t hash[8];
|
||||
uint8_t blockTemplate[sizeof(blockTemplate__)];
|
||||
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
|
||||
/*alignas(16) uint64_t hash[8];
|
||||
uint8_t blockTemplate[sizeof(blockTemplate_)];
|
||||
memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate));
|
||||
store32(blockTemplate + 39, nonce);
|
||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||
uint8_t scratchpad[RANDOMX_SCRATCHPAD_L3];
|
||||
fillAes1Rx4<softAes>((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad);
|
||||
RandomX::AssemblyGeneratorX86 asmX86;
|
||||
RandomX::Program p;
|
||||
randomx::AssemblyGeneratorX86 asmX86;
|
||||
randomx::Program p;
|
||||
fillAes1Rx4<softAes>(hash, sizeof(p), &p);
|
||||
asmX86.generateProgram(p);
|
||||
asmX86.printCode(std::cout);
|
||||
asmX86.printCode(std::cout);*/
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void generateNative(uint32_t nonce) {
|
||||
alignas(16) uint64_t hash[8];
|
||||
uint8_t blockTemplate[sizeof(blockTemplate__)];
|
||||
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
|
||||
/*alignas(16) uint64_t hash[8];
|
||||
uint8_t blockTemplate[sizeof(blockTemplate_)];
|
||||
memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate));
|
||||
store32(blockTemplate + 39, nonce);
|
||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||
uint8_t scratchpad[RANDOMX_SCRATCHPAD_L3];
|
||||
fillAes1Rx4<softAes>((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad);
|
||||
alignas(16) RandomX::Program prog;
|
||||
alignas(16) randomx::Program prog;
|
||||
fillAes1Rx4<softAes>((void*)hash, sizeof(prog), &prog);
|
||||
for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
|
||||
prog(i).dst %= 8;
|
||||
prog(i).src %= 8;
|
||||
}
|
||||
std::cout << prog << std::endl;
|
||||
std::cout << prog << std::endl;*/
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void mine(RandomX::VirtualMachine* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, uint8_t* scratchpad) {
|
||||
alignas(16) uint64_t hash[8];
|
||||
uint8_t blockTemplate[sizeof(blockTemplate__)];
|
||||
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
|
||||
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread) {
|
||||
uint64_t hash[RANDOMX_HASH_SIZE / 4];
|
||||
uint8_t blockTemplate[sizeof(blockTemplate_)];
|
||||
memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate));
|
||||
void* noncePtr = blockTemplate + 39;
|
||||
auto nonce = atomicNonce.fetch_add(1);
|
||||
|
||||
while (nonce < noncesCount) {
|
||||
//std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
|
||||
store32(noncePtr, nonce);
|
||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||
fillAes1Rx4<softAes>((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad);
|
||||
//dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-before.txt");
|
||||
vm->resetRoundingMode();
|
||||
vm->setScratchpad(scratchpad);
|
||||
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
|
||||
fillAes1Rx4<softAes>((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer());
|
||||
vm->initialize();
|
||||
vm->execute();
|
||||
vm->getResult<false>(nullptr, 0, hash);
|
||||
}
|
||||
fillAes1Rx4<softAes>((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer());
|
||||
vm->initialize();
|
||||
vm->execute();
|
||||
/*if (RandomX::trace) {
|
||||
for (int j = 0; j < RandomX::ProgramLength; ++j) {
|
||||
uint64_t res = *(uint64_t*)(scratchpad + 8 * (RandomX::ProgramLength - 1 - j));
|
||||
std::cout << std::hex << std::setw(16) << std::setfill('0') << res << std::endl;
|
||||
}
|
||||
}*/
|
||||
vm->getResult<softAes>(scratchpad, RANDOMX_SCRATCHPAD_L3, hash);
|
||||
//dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-after.txt");
|
||||
|
||||
randomx_calculate_hash(vm, blockTemplate, sizeof(blockTemplate), &hash);
|
||||
|
||||
result.xorWith(hash);
|
||||
if (RandomX::trace) {
|
||||
/*if (randomx::trace) {
|
||||
std::cout << "Nonce: " << nonce << " ";
|
||||
outputHex(std::cout, (char*)hash, 16);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}*/
|
||||
nonce = atomicNonce.fetch_add(1);
|
||||
}
|
||||
}
|
||||
@ -227,16 +204,16 @@ int main(int argc, char** argv) {
|
||||
readOption("--genSuperscalar", argc, argv, genSuperscalar);
|
||||
readOption("--legacy", argc, argv, legacy);
|
||||
|
||||
if (genSuperscalar) {
|
||||
RandomX::SuperscalarProgram p;
|
||||
RandomX::Blake2Generator gen(seed, programCount);
|
||||
RandomX::generateSuperscalar(p, gen);
|
||||
RandomX::AssemblyGeneratorX86 asmX86;
|
||||
/*if (genSuperscalar) {
|
||||
randomx::SuperscalarProgram p;
|
||||
randomx::Blake2Generator gen(seed, programCount);
|
||||
randomx::generateSuperscalar(p, gen);
|
||||
randomx::AssemblyGeneratorX86 asmX86;
|
||||
asmX86.generateAsm(p);
|
||||
//std::ofstream file("lightProg2.asm");
|
||||
asmX86.printCode(std::cout);
|
||||
return 0;
|
||||
}
|
||||
}*/
|
||||
|
||||
if (genAsm) {
|
||||
if (softAes)
|
||||
@ -264,15 +241,42 @@ int main(int argc, char** argv) {
|
||||
|
||||
std::atomic<uint32_t> atomicNonce(0);
|
||||
AtomicHash result;
|
||||
std::vector<RandomX::VirtualMachine*> vms;
|
||||
std::vector<randomx_vm*> vms;
|
||||
std::vector<std::thread> threads;
|
||||
RandomX::dataset_t dataset;
|
||||
const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize;
|
||||
const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch);
|
||||
dataset.cache.size = cacheSize;
|
||||
RandomX::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES];
|
||||
randomx_dataset* dataset;
|
||||
randomx_cache* cache;
|
||||
randomx_flags flags = RANDOMX_FLAG_DEFAULT;
|
||||
|
||||
std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl;
|
||||
if (miningMode) {
|
||||
flags = (randomx_flags)(flags | RANDOMX_FLAG_FULL_MEM);
|
||||
std::cout << "RandomX - full memory mode (2 GiB)" << std::endl;
|
||||
} else {
|
||||
std::cout << "RandomX - light memory mode (256 MiB)" << std::endl;
|
||||
}
|
||||
|
||||
if (jit) {
|
||||
flags = (randomx_flags)(flags | RANDOMX_FLAG_JIT);
|
||||
std::cout << "RandomX - JIT compiled mode" << std::endl;
|
||||
}
|
||||
else {
|
||||
std::cout << "RandomX - interpreted mode" << std::endl;
|
||||
}
|
||||
|
||||
if (softAes) {
|
||||
std::cout << "RandomX - software AES mode" << std::endl;
|
||||
}
|
||||
else {
|
||||
flags = (randomx_flags)(flags | RANDOMX_FLAG_HARD_AES);
|
||||
std::cout << "RandomX - hardware AES mode" << std::endl;
|
||||
}
|
||||
|
||||
if (largePages) {
|
||||
flags = (randomx_flags)(flags | RANDOMX_FLAG_LARGE_PAGES);
|
||||
std::cout << "RandomX - large pages mode" << std::endl;
|
||||
}
|
||||
else {
|
||||
std::cout << "RandomX - small pages mode" << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Initializing";
|
||||
if(miningMode)
|
||||
@ -281,116 +285,60 @@ int main(int argc, char** argv) {
|
||||
|
||||
try {
|
||||
Stopwatch sw(true);
|
||||
RandomX::datasetInitCache(seed, dataset, largePages);
|
||||
if (RandomX::trace) {
|
||||
cache = randomx_alloc_cache(flags);
|
||||
randomx_init_cache(cache, seed, sizeof(seed));
|
||||
/*if (randomx::trace) {
|
||||
std::cout << "Cache: " << std::endl;
|
||||
outputHex(std::cout, (char*)dataset.cache.memory, sizeof(__m128i));
|
||||
std::cout << std::endl;
|
||||
}
|
||||
if (!legacy) {
|
||||
RandomX::Blake2Generator gen(seed, programCount);
|
||||
for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
|
||||
RandomX::generateSuperscalar(programs[i], gen);
|
||||
}
|
||||
}
|
||||
if (!miningMode) {
|
||||
std::cout << "Cache (" << cacheSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
}
|
||||
else {
|
||||
auto cache = dataset.cache;
|
||||
dataset.dataset.size = datasetSize;
|
||||
RandomX::datasetAlloc(dataset, largePages);
|
||||
const uint64_t datasetBlockCount = datasetSize / RandomX::CacheLineSize;
|
||||
if (!legacy) {
|
||||
RandomX::JitCompilerX86 jit86;
|
||||
jit86.generateSuperScalarHash(programs);
|
||||
RandomX::DatasetInitFunc dsfunc = jit86.getDatasetInitFunc();
|
||||
if (initThreadCount > 1) {
|
||||
auto perThread = datasetBlockCount / initThreadCount;
|
||||
auto remainder = datasetBlockCount % initThreadCount;
|
||||
uint32_t startBlock = 0;
|
||||
uint32_t endBlock = 0;
|
||||
for (int i = 0; i < initThreadCount; ++i) {
|
||||
auto count = perThread + (i == initThreadCount - 1 ? remainder : 0);
|
||||
endBlock += count;
|
||||
threads.push_back(std::thread(dsfunc, cache.memory, dataset.dataset.memory + startBlock * RandomX::CacheLineSize, startBlock, endBlock));
|
||||
startBlock += count;
|
||||
}
|
||||
for (unsigned i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
}*/
|
||||
if (miningMode) {
|
||||
dataset = randomx_alloc_dataset(flags);
|
||||
if (initThreadCount > 1) {
|
||||
auto perThread = RANDOMX_DATASET_BLOCKS / initThreadCount;
|
||||
auto remainder = RANDOMX_DATASET_BLOCKS % initThreadCount;
|
||||
uint32_t startBlock = 0;
|
||||
for (int i = 0; i < initThreadCount; ++i) {
|
||||
auto count = perThread + (i == initThreadCount - 1 ? remainder : 0);
|
||||
threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startBlock, count));
|
||||
startBlock += count;
|
||||
}
|
||||
else {
|
||||
dsfunc(cache.memory, dataset.dataset.memory, 0, datasetBlockCount);
|
||||
for (unsigned i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
//dump((const char*)dataset.dataset.memory, RANDOMX_DATASET_SIZE, "dataset.dat");
|
||||
}
|
||||
else {
|
||||
if (initThreadCount > 1) {
|
||||
auto perThread = datasetBlockCount / initThreadCount;
|
||||
auto remainder = datasetBlockCount % initThreadCount;
|
||||
for (int i = 0; i < initThreadCount; ++i) {
|
||||
auto count = perThread + (i == initThreadCount - 1 ? remainder : 0);
|
||||
threads.push_back(std::thread(&RandomX::datasetInit, std::ref(cache), std::ref(dataset.dataset), i * perThread, count));
|
||||
}
|
||||
for (unsigned i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
else {
|
||||
RandomX::datasetInit(cache, dataset.dataset, 0, datasetBlockCount);
|
||||
}
|
||||
randomx_init_dataset(dataset, cache, 0, RANDOMX_DATASET_BLOCKS);
|
||||
}
|
||||
RandomX::deallocCache(cache, largePages);
|
||||
//dump((const char*)dataset.dataset.memory, RANDOMX_DATASET_SIZE, "dataset.dat");
|
||||
randomx_release_cache(cache);
|
||||
threads.clear();
|
||||
std::cout << "Dataset (" << datasetSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
}
|
||||
std::cout << "Memory initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
std::cout << "Initializing " << threadCount << " virtual machine(s) ..." << std::endl;
|
||||
for (int i = 0; i < threadCount; ++i) {
|
||||
RandomX::VirtualMachine* vm;
|
||||
if (miningMode) {
|
||||
vm = new RandomX::CompiledVirtualMachine();
|
||||
}
|
||||
else {
|
||||
if (jit && !legacy)
|
||||
vm = new RandomX::CompiledLightVirtualMachine<true>();
|
||||
else if (jit)
|
||||
vm = new RandomX::CompiledLightVirtualMachine<false>();
|
||||
else if (!legacy)
|
||||
vm = new RandomX::InterpretedVirtualMachine<true>(softAes);
|
||||
else
|
||||
vm = new RandomX::InterpretedVirtualMachine<false>(softAes);
|
||||
}
|
||||
vm->setDataset(dataset, datasetSize, programs);
|
||||
randomx_vm *vm = randomx_create_vm(flags);
|
||||
if (miningMode)
|
||||
randomx_vm_set_dataset(vm, dataset);
|
||||
else
|
||||
randomx_vm_set_cache(vm, cache);
|
||||
vms.push_back(vm);
|
||||
}
|
||||
uint8_t* scratchpadMem;
|
||||
if (largePages) {
|
||||
scratchpadMem = (uint8_t*)allocLargePagesMemory(threadCount * RANDOMX_SCRATCHPAD_L3);
|
||||
}
|
||||
else {
|
||||
scratchpadMem = (uint8_t*)_mm_malloc(threadCount * RANDOMX_SCRATCHPAD_L3, RandomX::CacheLineSize);
|
||||
}
|
||||
std::cout << "Running benchmark (" << programCount << " nonces) ..." << std::endl;
|
||||
sw.restart();
|
||||
if (threadCount > 1) {
|
||||
for (unsigned i = 0; i < vms.size(); ++i) {
|
||||
if (softAes)
|
||||
threads.push_back(std::thread(&mine<true>, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RANDOMX_SCRATCHPAD_L3 * i));
|
||||
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i));
|
||||
else
|
||||
threads.push_back(std::thread(&mine<false>, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RANDOMX_SCRATCHPAD_L3 * i));
|
||||
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i));
|
||||
}
|
||||
for (unsigned i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(softAes)
|
||||
mine<true>(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0, scratchpadMem);
|
||||
else
|
||||
mine<false>(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0, scratchpadMem);
|
||||
/*if (miningMode)
|
||||
std::cout << "Average program size: " << ((RandomX::CompiledVirtualMachine*)vms[0])->getTotalSize() / programCount / RandomX::ChainLength << std::endl;*/
|
||||
mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0);
|
||||
}
|
||||
double elapsed = sw.getElapsed();
|
||||
std::cout << "Calculated result: ";
|
||||
|
209
src/randomx.cpp
Normal file
209
src/randomx.cpp
Normal file
@ -0,0 +1,209 @@
|
||||
/*
|
||||
Copyright (c) 2019 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "randomx.h"
|
||||
#include "dataset.hpp"
|
||||
#include "VirtualMachine.hpp"
|
||||
#include "./InterpretedVirtualMachine.hpp"
|
||||
#include "./InterpretedLightVirtualMachine.hpp"
|
||||
#include "./CompiledVirtualMachine.hpp"
|
||||
#include "./CompiledLightVirtualMachine.hpp"
|
||||
#include "virtualMemory.hpp"
|
||||
#include "blake2/blake2.h"
|
||||
|
||||
extern "C" {
|
||||
|
||||
randomx_cache *randomx_alloc_cache(randomx_flags flags) {
|
||||
randomx_cache *cache;
|
||||
switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES))
|
||||
{
|
||||
case RANDOMX_FLAG_DEFAULT:
|
||||
cache = new randomx::CacheDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT:
|
||||
cache = new randomx::CacheWithJitDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_LARGE_PAGES:
|
||||
cache = new randomx::CacheLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
|
||||
cache = new randomx::CacheWithJitLargePage();
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
|
||||
if (!cache->allocate()) {
|
||||
delete cache;
|
||||
cache = nullptr;
|
||||
}
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
void randomx_init_cache(randomx_cache *cache, const void *seed, size_t seedSize) {
|
||||
cache->initialize(seed, seedSize);
|
||||
}
|
||||
|
||||
void randomx_release_cache(randomx_cache* cache) {
|
||||
delete cache;
|
||||
}
|
||||
|
||||
randomx_dataset *randomx_alloc_dataset(randomx_flags flags) {
|
||||
randomx_dataset *dataset;
|
||||
if (flags & RANDOMX_FLAG_LARGE_PAGES) {
|
||||
dataset = new randomx::DatasetLargePage();
|
||||
}
|
||||
else {
|
||||
dataset = new randomx::DatasetDefault();
|
||||
}
|
||||
if (!dataset->allocate()) {
|
||||
delete dataset;
|
||||
dataset = nullptr;
|
||||
}
|
||||
|
||||
return dataset;
|
||||
}
|
||||
|
||||
void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startBlock, unsigned long blockCount) {
|
||||
randomx::DatasetInitFunc dsfunc = cache->getInitFunc();
|
||||
dsfunc(cache, dataset->memory + startBlock * randomx::CacheLineSize, startBlock, startBlock + blockCount);
|
||||
}
|
||||
|
||||
void randomx_release_dataset(randomx_dataset *dataset) {
|
||||
delete dataset;
|
||||
}
|
||||
|
||||
randomx_vm *randomx_create_vm(randomx_flags flags) {
|
||||
randomx_vm *vm;
|
||||
switch (flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES)) {
|
||||
case RANDOMX_FLAG_DEFAULT: //0
|
||||
vm = new randomx::InterpretedLightVmDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM: //1
|
||||
vm = new randomx::InterpretedVmDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT: //2
|
||||
vm = new randomx::CompiledLightVmDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT: //3
|
||||
vm = new randomx::CompiledVmDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_HARD_AES: //4
|
||||
vm = new randomx::InterpretedLightVmHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES: //5
|
||||
vm = new randomx::InterpretedVmHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES: //6
|
||||
vm = new randomx::CompiledLightVmHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES: //7
|
||||
vm = new randomx::CompiledVmHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_LARGE_PAGES: //8
|
||||
vm = new randomx::InterpretedLightVmLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_LARGE_PAGES: //9
|
||||
vm = new randomx::InterpretedVmLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: //10
|
||||
vm = new randomx::CompiledLightVmLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: //11
|
||||
vm = new randomx::CompiledVmLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: //12
|
||||
vm = new randomx::InterpretedLightVmLargePageHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: //13
|
||||
vm = new randomx::InterpretedVmLargePageHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: //14
|
||||
vm = new randomx::CompiledLightVmLargePageHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: //15
|
||||
vm = new randomx::CompiledVmLargePageHardAes();
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
|
||||
if (!vm->allocate()) {
|
||||
delete vm;
|
||||
vm = nullptr;
|
||||
}
|
||||
|
||||
return vm;
|
||||
}
|
||||
|
||||
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
|
||||
machine->setCache(cache);
|
||||
}
|
||||
|
||||
void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) {
|
||||
machine->setDataset(dataset);
|
||||
}
|
||||
|
||||
void randomx_destroy_vm(randomx_vm *machine) {
|
||||
delete machine;
|
||||
}
|
||||
|
||||
void randomx_calculate_hash(randomx_vm *machine, void *input, size_t inputSize, void *output) {
|
||||
alignas(16) uint64_t hash[8];
|
||||
blake2b(hash, sizeof(hash), input, inputSize, nullptr, 0);
|
||||
machine->generate(&hash, machine->scratchpad, randomx::ScratchpadSize);
|
||||
//fillAes1Rx4<false>((void*)hash, RANDOMX_SCRATCHPAD_L3, machine->scratchpad);
|
||||
//dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-before.txt");
|
||||
machine->resetRoundingMode();
|
||||
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
|
||||
machine->generate(&hash, &machine->program, sizeof(randomx::Program));
|
||||
//fillAes1Rx4<softAes>((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer());
|
||||
machine->initialize();
|
||||
machine->execute();
|
||||
blake2b(hash, sizeof(hash), &machine->reg, sizeof(machine->reg), nullptr, 0);
|
||||
}
|
||||
machine->generate((void*)hash, &machine->program, sizeof(randomx::Program));
|
||||
//fillAes1Rx4<softAes>((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer());
|
||||
machine->initialize();
|
||||
machine->execute();
|
||||
machine->getFinalResult(output, 64);
|
||||
}
|
||||
|
||||
}
|
130
src/randomx.h
Normal file
130
src/randomx.h
Normal file
@ -0,0 +1,130 @@
|
||||
/*
|
||||
Copyright (c) 2019 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef RANDOMX_H
|
||||
#define RANDOMX_H
|
||||
|
||||
/*
|
||||
|
||||
Minimal usage example:
|
||||
----------------------
|
||||
|
||||
#include "randomx.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
const char mySeed[] = "RandomX example seed";
|
||||
const char myInput[] = "RandomX example input";
|
||||
char hash[RANDOMX_HASH_SIZE];
|
||||
|
||||
randomx_cache *myCache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT);
|
||||
randomx_init_cache(myCache, mySeed, sizeof mySeed);
|
||||
randomx_vm *myMachine = randomx_create_vm(RANDOMX_FLAG_DEFAULT);
|
||||
randomx_vm_set_cache(myMachine, myCache);
|
||||
|
||||
randomx_calculate_hash(myMachine, myInput, sizeof myInput, hash);
|
||||
|
||||
randomx_destroy_vm(myMachine);
|
||||
randomx_release_cache(myCache);
|
||||
|
||||
for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i)
|
||||
printf("%02x", hash[i]);
|
||||
|
||||
printf("\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Optimized usage example:
|
||||
------------------------
|
||||
|
||||
#include "randomx.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
const char mySeed[] = "RandomX example seed";
|
||||
const char myInput[] = "RandomX example input";
|
||||
char hash[RANDOMX_HASH_SIZE];
|
||||
|
||||
randomx_cache *myCache = randomx_alloc_cache(RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES);
|
||||
randomx_init_cache(myCache, mySeed, sizeof mySeed);
|
||||
|
||||
randomx_dataset *myDataset = randomx_alloc_dataset(RANDOMX_FLAG_LARGE_PAGES);
|
||||
randomx_init_dataset(myDataset, myCache, 0, RANDOMX_DATASET_BLOCKS);
|
||||
randomx_release_cache(myCache);
|
||||
|
||||
randomx_vm *myMachine = randomx_create_vm(RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES);
|
||||
randomx_vm_set_dataset(myMachine, myDataset);
|
||||
|
||||
randomx_calculate_hash(myMachine, myInput, sizeof myInput, hash);
|
||||
|
||||
randomx_destroy_vm(myMachine);
|
||||
randomx_release_dataset(myDataset);
|
||||
|
||||
for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i)
|
||||
printf("%02x", hash[i]);
|
||||
|
||||
printf("\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#define RANDOMX_HASH_SIZE 32
|
||||
#define RANDOMX_DATASET_BLOCKS 33554432UL
|
||||
|
||||
typedef enum {
|
||||
RANDOMX_FLAG_DEFAULT = 0,
|
||||
RANDOMX_FLAG_FULL_MEM = 1,
|
||||
RANDOMX_FLAG_JIT = 2,
|
||||
RANDOMX_FLAG_HARD_AES = 4,
|
||||
RANDOMX_FLAG_LARGE_PAGES = 8,
|
||||
} randomx_flags;
|
||||
|
||||
typedef struct randomx_dataset randomx_dataset;
|
||||
typedef struct randomx_cache randomx_cache;
|
||||
typedef struct randomx_vm randomx_vm;
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
randomx_cache *randomx_alloc_cache(randomx_flags flags);
|
||||
void randomx_init_cache(randomx_cache *cache, const void *seed, size_t seedSize);
|
||||
void randomx_release_cache(randomx_cache* cache);
|
||||
|
||||
randomx_dataset *randomx_alloc_dataset(randomx_flags flags);
|
||||
void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startBlock, unsigned long blockCount);
|
||||
void randomx_release_dataset(randomx_dataset *dataset);
|
||||
|
||||
randomx_vm *randomx_create_vm(randomx_flags flags);
|
||||
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache);
|
||||
void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset);
|
||||
void randomx_destroy_vm(randomx_vm *machine);
|
||||
|
||||
void randomx_calculate_hash(randomx_vm *machine, void *input, size_t inputSize, void *output);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -26,8 +26,10 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include <stdexcept>
|
||||
#include <iomanip>
|
||||
#include "superscalarGenerator.hpp"
|
||||
#include "intrinPortable.h"
|
||||
#include "reciprocal.h"
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
|
||||
static bool isMultiplication(int type) {
|
||||
return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP;
|
||||
@ -842,4 +844,52 @@ namespace RandomX {
|
||||
std::cout << std::endl;
|
||||
}*/
|
||||
}
|
||||
|
||||
void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t> *reciprocals) {
|
||||
for (unsigned j = 0; j < prog.getSize(); ++j) {
|
||||
Instruction& instr = prog(j);
|
||||
switch (instr.opcode)
|
||||
{
|
||||
case randomx::SuperscalarInstructionType::ISUB_R:
|
||||
r[instr.dst] -= r[instr.src];
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_R:
|
||||
r[instr.dst] ^= r[instr.src];
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_RS:
|
||||
r[instr.dst] += r[instr.src] << instr.getModShift2();
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMUL_R:
|
||||
r[instr.dst] *= r[instr.src];
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IROR_C:
|
||||
r[instr.dst] = rotr(r[instr.dst], instr.getImm32());
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_C7:
|
||||
case randomx::SuperscalarInstructionType::IADD_C8:
|
||||
case randomx::SuperscalarInstructionType::IADD_C9:
|
||||
r[instr.dst] += signExtend2sCompl(instr.getImm32());
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_C7:
|
||||
case randomx::SuperscalarInstructionType::IXOR_C8:
|
||||
case randomx::SuperscalarInstructionType::IXOR_C9:
|
||||
r[instr.dst] ^= signExtend2sCompl(instr.getImm32());
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMULH_R:
|
||||
r[instr.dst] = mulh(r[instr.dst], r[instr.src]);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::ISMULH_R:
|
||||
r[instr.dst] = smulh(r[instr.dst], r[instr.src]);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMUL_RCP:
|
||||
if (reciprocals != nullptr)
|
||||
r[instr.dst] *= (*reciprocals)[instr.getImm32()];
|
||||
else
|
||||
r[instr.dst] *= reciprocal(instr.getImm32());
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -18,10 +18,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "Program.hpp"
|
||||
#include "superscalar_program.hpp"
|
||||
#include "Blake2Generator.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace RandomX {
|
||||
namespace randomx {
|
||||
// Intel Ivy Bridge reference
|
||||
namespace SuperscalarInstructionType { //uOPs (decode) execution ports latency code size
|
||||
constexpr int ISUB_R = 0; //1 p015 1 3 (sub)
|
||||
@ -44,4 +45,5 @@ namespace RandomX {
|
||||
}
|
||||
|
||||
void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen);
|
||||
void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t> *reciprocals = nullptr);
|
||||
}
|
70
src/superscalar_program.hpp
Normal file
70
src/superscalar_program.hpp
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
Copyright (c) 2019 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include "Instruction.hpp"
|
||||
#include "configuration.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class SuperscalarProgram {
|
||||
public:
|
||||
Instruction& operator()(int pc) {
|
||||
return programBuffer[pc];
|
||||
}
|
||||
friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) {
|
||||
p.print(os);
|
||||
return os;
|
||||
}
|
||||
uint32_t getSize() {
|
||||
return size;
|
||||
}
|
||||
void setSize(uint32_t val) {
|
||||
size = val;
|
||||
}
|
||||
int getAddressRegister() {
|
||||
return addrReg;
|
||||
}
|
||||
void setAddressRegister(uint32_t val) {
|
||||
addrReg = val;
|
||||
}
|
||||
double ipc;
|
||||
int codeSize;
|
||||
int macroOps;
|
||||
int decodeCycles;
|
||||
int cpuLatency;
|
||||
int asicLatency;
|
||||
int mulCount;
|
||||
int cpuLatencies[8];
|
||||
int asicLatencies[8];
|
||||
private:
|
||||
void print(std::ostream& os) const {
|
||||
for (unsigned i = 0; i < size; ++i) {
|
||||
auto instr = programBuffer[i];
|
||||
os << instr;
|
||||
}
|
||||
}
|
||||
Instruction programBuffer[RANDOMX_SUPERSCALAR_MAX_SIZE];
|
||||
uint32_t size;
|
||||
int addrReg;
|
||||
};
|
||||
|
||||
}
|
@ -124,20 +124,22 @@
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\src\allocator.cpp" />
|
||||
<ClCompile Include="..\src\argon2_core.c" />
|
||||
<ClCompile Include="..\src\argon2_ref.c" />
|
||||
<ClCompile Include="..\src\AssemblyGeneratorX86.cpp" />
|
||||
<ClCompile Include="..\src\Blake2Generator.cpp" />
|
||||
<ClCompile Include="..\src\blake2\blake2b.c" />
|
||||
<ClCompile Include="..\src\Cache.cpp" />
|
||||
<ClCompile Include="..\src\CompiledLightVirtualMachine.cpp" />
|
||||
<ClCompile Include="..\src\CompiledVirtualMachine.cpp" />
|
||||
<ClCompile Include="..\src\dataset.cpp" />
|
||||
<ClCompile Include="..\src\hashAes1Rx4.cpp" />
|
||||
<ClCompile Include="..\src\Instruction.cpp" />
|
||||
<ClCompile Include="..\src\instructionsPortable.cpp" />
|
||||
<ClCompile Include="..\src\InterpretedLightVirtualMachine.cpp" />
|
||||
<ClCompile Include="..\src\InterpretedVirtualMachine.cpp" />
|
||||
<ClCompile Include="..\src\JitCompilerX86.cpp" />
|
||||
<ClCompile Include="..\src\randomx.cpp" />
|
||||
<ClCompile Include="..\src\superscalarGenerator.cpp" />
|
||||
<ClCompile Include="..\src\main.cpp" />
|
||||
<ClCompile Include="..\src\reciprocal.c" />
|
||||
@ -150,11 +152,11 @@
|
||||
<MASM Include="..\src\squareHash.asm" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\src\allocator.hpp" />
|
||||
<ClInclude Include="..\src\argon2.h" />
|
||||
<ClInclude Include="..\src\argon2_core.h" />
|
||||
<ClInclude Include="..\src\AssemblyGeneratorX86.hpp" />
|
||||
<ClInclude Include="..\src\Blake2Generator.hpp" />
|
||||
<ClInclude Include="..\src\Cache.hpp" />
|
||||
<ClInclude Include="..\src\catch.hpp" />
|
||||
<ClInclude Include="..\src\common.hpp" />
|
||||
<ClInclude Include="..\src\CompiledLightVirtualMachine.hpp" />
|
||||
@ -164,16 +166,19 @@
|
||||
<ClInclude Include="..\src\hashAes1Rx4.hpp" />
|
||||
<ClInclude Include="..\src\Instruction.hpp" />
|
||||
<ClInclude Include="..\src\instructionWeights.hpp" />
|
||||
<ClInclude Include="..\src\InterpretedLightVirtualMachine.hpp" />
|
||||
<ClInclude Include="..\src\InterpretedVirtualMachine.hpp" />
|
||||
<ClInclude Include="..\src\intrinPortable.h" />
|
||||
<ClInclude Include="..\src\JitCompilerX86-static.hpp" />
|
||||
<ClInclude Include="..\src\JitCompilerX86.hpp" />
|
||||
<ClInclude Include="..\src\randomx.h" />
|
||||
<ClInclude Include="..\src\superscalarGenerator.hpp" />
|
||||
<ClInclude Include="..\src\Program.hpp" />
|
||||
<ClInclude Include="..\src\reciprocal.h" />
|
||||
<ClInclude Include="..\src\softAes.h" />
|
||||
<ClInclude Include="..\src\squareHash.h" />
|
||||
<ClInclude Include="..\src\Stopwatch.hpp" />
|
||||
<ClInclude Include="..\src\superscalar_program.hpp" />
|
||||
<ClInclude Include="..\src\VirtualMachine.hpp" />
|
||||
<ClInclude Include="..\src\virtualMemory.hpp" />
|
||||
</ItemGroup>
|
||||
|
@ -27,9 +27,6 @@
|
||||
<ClCompile Include="..\src\AssemblyGeneratorX86.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\Cache.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\CompiledLightVirtualMachine.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
@ -75,6 +72,15 @@
|
||||
<ClCompile Include="..\src\superscalarGenerator.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\randomx.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\allocator.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\InterpretedLightVirtualMachine.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<MASM Include="..\src\JitCompilerX86-static.asm">
|
||||
@ -94,9 +100,6 @@
|
||||
<ClInclude Include="..\src\AssemblyGeneratorX86.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\Cache.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\catch.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
@ -163,5 +166,17 @@
|
||||
<ClInclude Include="..\src\superscalarGenerator.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\randomx.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\allocator.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\InterpretedLightVirtualMachine.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\superscalar_program.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Reference in New Issue
Block a user