mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 15:58:53 +00:00
Interpreter + async mode
This commit is contained in:
parent
a7ffe8c19a
commit
8b1102ee05
292
src/AddressTransform.cpp
Normal file
292
src/AddressTransform.cpp
Normal file
@ -0,0 +1,292 @@
|
||||
/*
|
||||
Copyright (c) 2019 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "common.hpp"
|
||||
#include "InterpretedVirtualMachine.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
class Mul9Transform : public ITransform {
|
||||
public:
|
||||
Mul9Transform(int32_t cc) : c(cc) {
|
||||
std::ostringstream oss;
|
||||
oss << "mul9_" << std::hex << (cc & 255);
|
||||
name = oss.str();
|
||||
}
|
||||
int32_t apply(int32_t x) const override {
|
||||
return 9 * x + c;
|
||||
}
|
||||
const char* getName() const override {
|
||||
return name.c_str();
|
||||
}
|
||||
std::ostream& printAsm(std::ostream& os) const override {
|
||||
os << "lea ecx, [rcx+rcx*8" << std::showpos << c << "]" << std::noshowpos << std::endl;
|
||||
return os;
|
||||
}
|
||||
std::ostream& printCxx(std::ostream& os) const override {
|
||||
os << "static const Mul9Transform " << name << "(" << c << ");" << std::endl;
|
||||
return os;
|
||||
}
|
||||
private:
|
||||
int32_t c;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
class AddTransform : public ITransform {
|
||||
public:
|
||||
AddTransform(int32_t cc) : c(cc) {
|
||||
std::ostringstream oss;
|
||||
oss << "add_" << std::hex << (cc & 255);
|
||||
name = oss.str();
|
||||
}
|
||||
int32_t apply(int32_t x) const override {
|
||||
return x + c;
|
||||
}
|
||||
const char* getName() const override {
|
||||
return name.c_str();
|
||||
}
|
||||
std::ostream& printAsm(std::ostream& os) const override {
|
||||
os << "db 64" << std::endl;
|
||||
os << "add ecx, " << c << std::endl;
|
||||
return os;
|
||||
}
|
||||
std::ostream& printCxx(std::ostream& os) const override {
|
||||
os << "static const AddTransform " << name << "(" << c << ");" << std::endl;
|
||||
return os;
|
||||
}
|
||||
private:
|
||||
int32_t c;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
class XorTransform : public ITransform {
|
||||
public:
|
||||
XorTransform(int32_t cc) : c(cc) {
|
||||
std::ostringstream oss;
|
||||
oss << "xor_" << std::hex << (cc & 255);
|
||||
name = oss.str();
|
||||
}
|
||||
int32_t apply(int32_t x) const override {
|
||||
return x ^ c;
|
||||
}
|
||||
const char* getName() const override {
|
||||
return name.c_str();
|
||||
}
|
||||
std::ostream& printAsm(std::ostream& os) const override {
|
||||
os << "db 64" << std::endl;
|
||||
os << "xor ecx, " << c << std::endl;
|
||||
return os;
|
||||
}
|
||||
std::ostream& printCxx(std::ostream& os) const override {
|
||||
os << "static const XorTransform " << name << "(" << c << ");" << std::endl;
|
||||
return os;
|
||||
}
|
||||
private:
|
||||
int32_t c;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
static const Mul9Transform mul9_6d(109);
|
||||
static const XorTransform xor_60(96);
|
||||
static const Mul9Transform mul9_ed(-19);
|
||||
static const AddTransform add_9e(-98);
|
||||
static const AddTransform add_eb(-21);
|
||||
static const XorTransform xor_b0(-80);
|
||||
static const Mul9Transform mul9_a4(-92);
|
||||
static const AddTransform add_71(113);
|
||||
static const Mul9Transform mul9_64(100);
|
||||
static const AddTransform add_d9(-39);
|
||||
static const XorTransform xor_78(120);
|
||||
static const Mul9Transform mul9_89(-119);
|
||||
static const AddTransform add_8f(-113);
|
||||
static const AddTransform add_6f(111);
|
||||
static const XorTransform xor_68(104);
|
||||
static const Mul9Transform mul9_ad(-83);
|
||||
static const Mul9Transform mul9_7f(127);
|
||||
static const XorTransform xor_90(-112);
|
||||
static const AddTransform add_59(89);
|
||||
static const AddTransform add_e0(-32);
|
||||
static const AddTransform add_68(104);
|
||||
static const XorTransform xor_88(-120);
|
||||
static const XorTransform xor_18(24);
|
||||
static const Mul9Transform mul9_9(9);
|
||||
static const AddTransform add_e1(-31);
|
||||
static const XorTransform xor_f0(-16);
|
||||
static const AddTransform add_44(68);
|
||||
static const Mul9Transform mul9_92(-110);
|
||||
static const XorTransform xor_40(64);
|
||||
static const XorTransform xor_d8(-40);
|
||||
static const XorTransform xor_f8(-8);
|
||||
static const AddTransform add_f6(-10);
|
||||
static const XorTransform xor_e0(-32);
|
||||
static const AddTransform add_e(14);
|
||||
static const Mul9Transform mul9_d2(-46);
|
||||
static const XorTransform xor_98(-104);
|
||||
static const Mul9Transform mul9_24(36);
|
||||
static const AddTransform add_64(100);
|
||||
static const Mul9Transform mul9_bf(-65);
|
||||
static const Mul9Transform mul9_1b(27);
|
||||
static const Mul9Transform mul9_5b(91);
|
||||
static const AddTransform add_9b(-101);
|
||||
static const AddTransform add_a2(-94);
|
||||
static const Mul9Transform mul9_f6(-10);
|
||||
static const XorTransform xor_50(80);
|
||||
static const AddTransform add_94(-108);
|
||||
static const AddTransform add_c6(-58);
|
||||
static const XorTransform xor_30(48);
|
||||
static const Mul9Transform mul9_49(73);
|
||||
static const XorTransform xor_d0(-48);
|
||||
static const XorTransform xor_20(32);
|
||||
static const XorTransform xor_a0(-96);
|
||||
static const AddTransform add_76(118);
|
||||
static const AddTransform add_5b(91);
|
||||
static const Mul9Transform mul9_12(18);
|
||||
static const AddTransform add_f5(-11);
|
||||
static const Mul9Transform mul9_3f(63);
|
||||
static const AddTransform add_72(114);
|
||||
static const Mul9Transform mul9_2d(45);
|
||||
static const AddTransform add_bd(-67);
|
||||
static const AddTransform add_35(53);
|
||||
static const Mul9Transform mul9_9b(-101);
|
||||
static const Mul9Transform mul9_ff(-1);
|
||||
static const XorTransform xor_10(16);
|
||||
static const Mul9Transform mul9_db(-37);
|
||||
static const Mul9Transform mul9_e4(-28);
|
||||
static const Mul9Transform mul9_c9(-55);
|
||||
static const XorTransform xor_a8(-88);
|
||||
static const XorTransform xor_b8(-72);
|
||||
static const AddTransform add_24(36);
|
||||
static const XorTransform xor_c8(-56);
|
||||
static const AddTransform add_74(116);
|
||||
static const XorTransform xor_58(88);
|
||||
static const XorTransform xor_80(-128);
|
||||
static const AddTransform add_32(50);
|
||||
static const AddTransform add_69(105);
|
||||
static const AddTransform add_db(-37);
|
||||
static const XorTransform xor_70(112);
|
||||
static const XorTransform xor_8(8);
|
||||
static const XorTransform xor_e8(-24);
|
||||
static const Mul9Transform mul9_76(118);
|
||||
static const XorTransform xor_48(72);
|
||||
static const XorTransform xor_c0(-64);
|
||||
static const AddTransform add_28(40);
|
||||
static const Mul9Transform mul9_b6(-74);
|
||||
static const Mul9Transform mul9_52(82);
|
||||
static const Mul9Transform mul9_36(54);
|
||||
static const XorTransform xor_38(56);
|
||||
static const XorTransform xor_28(40);
|
||||
static const AddTransform add_57(87);
|
||||
|
||||
const ITransform* InterpretedVirtualMachine::addressTransformations[TransformationCount] = {
|
||||
(ITransform*)&mul9_6d,
|
||||
(ITransform*)&xor_60,
|
||||
(ITransform*)&mul9_ed,
|
||||
(ITransform*)&add_9e,
|
||||
(ITransform*)&add_eb,
|
||||
(ITransform*)&xor_b0,
|
||||
(ITransform*)&mul9_a4,
|
||||
(ITransform*)&add_71,
|
||||
(ITransform*)&mul9_64,
|
||||
(ITransform*)&add_d9,
|
||||
(ITransform*)&xor_78,
|
||||
(ITransform*)&mul9_89,
|
||||
(ITransform*)&add_8f,
|
||||
(ITransform*)&add_6f,
|
||||
(ITransform*)&xor_68,
|
||||
(ITransform*)&mul9_ad,
|
||||
(ITransform*)&mul9_7f,
|
||||
(ITransform*)&xor_90,
|
||||
(ITransform*)&add_59,
|
||||
(ITransform*)&add_e0,
|
||||
(ITransform*)&add_68,
|
||||
(ITransform*)&xor_88,
|
||||
(ITransform*)&xor_18,
|
||||
(ITransform*)&mul9_9,
|
||||
(ITransform*)&add_e1,
|
||||
(ITransform*)&xor_f0,
|
||||
(ITransform*)&add_44,
|
||||
(ITransform*)&mul9_92,
|
||||
(ITransform*)&xor_40,
|
||||
(ITransform*)&xor_d8,
|
||||
(ITransform*)&xor_f8,
|
||||
(ITransform*)&add_f6,
|
||||
(ITransform*)&xor_e0,
|
||||
(ITransform*)&add_e,
|
||||
(ITransform*)&mul9_d2,
|
||||
(ITransform*)&xor_98,
|
||||
(ITransform*)&mul9_24,
|
||||
(ITransform*)&add_64,
|
||||
(ITransform*)&mul9_bf,
|
||||
(ITransform*)&mul9_1b,
|
||||
(ITransform*)&mul9_5b,
|
||||
(ITransform*)&add_9b,
|
||||
(ITransform*)&add_a2,
|
||||
(ITransform*)&mul9_f6,
|
||||
(ITransform*)&xor_50,
|
||||
(ITransform*)&add_94,
|
||||
(ITransform*)&add_c6,
|
||||
(ITransform*)&xor_30,
|
||||
(ITransform*)&mul9_49,
|
||||
(ITransform*)&xor_d0,
|
||||
(ITransform*)&xor_20,
|
||||
(ITransform*)&xor_a0,
|
||||
(ITransform*)&add_76,
|
||||
(ITransform*)&add_5b,
|
||||
(ITransform*)&mul9_12,
|
||||
(ITransform*)&add_f5,
|
||||
(ITransform*)&mul9_3f,
|
||||
(ITransform*)&add_72,
|
||||
(ITransform*)&mul9_2d,
|
||||
(ITransform*)&add_bd,
|
||||
(ITransform*)&add_35,
|
||||
(ITransform*)&mul9_9b,
|
||||
(ITransform*)&mul9_ff,
|
||||
(ITransform*)&xor_10,
|
||||
(ITransform*)&mul9_db,
|
||||
(ITransform*)&mul9_e4,
|
||||
(ITransform*)&mul9_c9,
|
||||
(ITransform*)&xor_a8,
|
||||
(ITransform*)&xor_b8,
|
||||
(ITransform*)&add_24,
|
||||
(ITransform*)&xor_c8,
|
||||
(ITransform*)&add_74,
|
||||
(ITransform*)&xor_58,
|
||||
(ITransform*)&xor_80,
|
||||
(ITransform*)&add_32,
|
||||
(ITransform*)&add_69,
|
||||
(ITransform*)&add_db,
|
||||
(ITransform*)&xor_70,
|
||||
(ITransform*)&xor_8,
|
||||
(ITransform*)&xor_e8,
|
||||
(ITransform*)&mul9_76,
|
||||
(ITransform*)&xor_48,
|
||||
(ITransform*)&xor_c0,
|
||||
(ITransform*)&add_28,
|
||||
(ITransform*)&mul9_b6,
|
||||
(ITransform*)&mul9_52,
|
||||
(ITransform*)&mul9_36,
|
||||
(ITransform*)&xor_38,
|
||||
(ITransform*)&xor_28,
|
||||
(ITransform*)&add_57,
|
||||
};
|
||||
}
|
@ -67,12 +67,12 @@ namespace RandomX {
|
||||
void AssemblyGeneratorX86::gena(Instruction& instr, int i) {
|
||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
if ((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rax" << std::endl;
|
||||
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
|
||||
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
||||
asmCode << "\tcall rx_read" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if ((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rax" << std::endl;
|
||||
if (instr.loca & 3) {
|
||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ namespace RandomX {
|
||||
return keys;
|
||||
}
|
||||
|
||||
const uint8_t* getCache() {
|
||||
const uint8_t* getCache() const {
|
||||
return memory;
|
||||
}
|
||||
private:
|
||||
|
@ -25,15 +25,16 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
CompiledVirtualMachine::CompiledVirtualMachine(bool softAes) : VirtualMachine(softAes) {
|
||||
CompiledVirtualMachine::CompiledVirtualMachine() {
|
||||
totalSize = 0;
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::setDataset(dataset_t ds, bool lightClient) {
|
||||
if (lightClient) {
|
||||
throw std::runtime_error("Compiled VM does not support light-client mode");
|
||||
void CompiledVirtualMachine::setDataset(dataset_t ds) {
|
||||
mem.ds = ds;
|
||||
}
|
||||
VirtualMachine::setDataset(ds, lightClient);
|
||||
|
||||
void CompiledVirtualMachine::initializeScratchpad(uint32_t index) {
|
||||
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::initializeProgram(const void* seed) {
|
||||
|
@ -37,8 +37,9 @@ namespace RandomX {
|
||||
void operator delete(void* ptr) {
|
||||
_mm_free(ptr);
|
||||
}
|
||||
CompiledVirtualMachine(bool softAes);
|
||||
void setDataset(dataset_t ds, bool light = false) override;
|
||||
CompiledVirtualMachine();
|
||||
void setDataset(dataset_t ds) override;
|
||||
void initializeScratchpad(uint32_t index) override;
|
||||
void initializeProgram(const void* seed) override;
|
||||
virtual void execute() override;
|
||||
void* getProgram() {
|
||||
|
@ -21,11 +21,15 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include "InterpretedVirtualMachine.hpp"
|
||||
#include "Pcg32.hpp"
|
||||
#include "instructions.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "Cache.hpp"
|
||||
#include "LightClientAsyncWorker.hpp"
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
#include <cmath>
|
||||
#include <thread>
|
||||
#ifdef STATS
|
||||
#include <algorithm>
|
||||
#endif
|
||||
@ -38,6 +42,57 @@ constexpr bool fpuCheck = false;
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
InterpretedVirtualMachine::~InterpretedVirtualMachine() {
|
||||
if (asyncWorker) {
|
||||
delete mem.ds.asyncWorker;
|
||||
}
|
||||
}
|
||||
|
||||
void InterpretedVirtualMachine::setDataset(dataset_t ds) {
|
||||
if (asyncWorker) {
|
||||
if (softAes) {
|
||||
mem.ds.asyncWorker = new LightClientAsyncWorker<true>(ds.cache);
|
||||
}
|
||||
else {
|
||||
mem.ds.asyncWorker = new LightClientAsyncWorker<false>(ds.cache);
|
||||
}
|
||||
readDataset = &datasetReadLightAsync;
|
||||
}
|
||||
else {
|
||||
mem.ds = ds;
|
||||
if (softAes) {
|
||||
readDataset = &datasetReadLight<true>;
|
||||
}
|
||||
else {
|
||||
readDataset = &datasetReadLight<false>;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InterpretedVirtualMachine::initializeScratchpad(uint32_t index) {
|
||||
uint32_t startingBlock = (ScratchpadSize / CacheLineSize) * index;
|
||||
if (asyncWorker) {
|
||||
ILightClientAsyncWorker* worker = mem.ds.asyncWorker;
|
||||
const uint32_t blocksPerThread = (ScratchpadSize / CacheLineSize) / 2;
|
||||
worker->prepareBlocks(scratchpad, startingBlock, blocksPerThread); //async first half
|
||||
worker->getBlocks(scratchpad + ScratchpadLength / 2, startingBlock + blocksPerThread, blocksPerThread); //sync second half
|
||||
worker->sync();
|
||||
}
|
||||
else {
|
||||
auto cache = mem.ds.cache;
|
||||
if (softAes) {
|
||||
for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
|
||||
initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
|
||||
initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InterpretedVirtualMachine::initializeProgram(const void* seed) {
|
||||
Pcg32 gen(seed);
|
||||
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
|
||||
@ -50,6 +105,7 @@ namespace RandomX {
|
||||
}
|
||||
//std::cout << reg;
|
||||
p.initialize(gen);
|
||||
currentTransform = addressTransformations[gen.getUniform(0, TransformationCount - 1)];
|
||||
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
|
||||
mem.mx = *(((uint32_t*)seed) + 5);
|
||||
pc = 0;
|
||||
@ -74,64 +130,64 @@ namespace RandomX {
|
||||
#endif
|
||||
}
|
||||
|
||||
convertible_t InterpretedVirtualMachine::loada(Instruction& inst) {
|
||||
convertible_t& rega = reg.r[inst.rega % RegistersCount];
|
||||
rega.i64 ^= inst.addra; //sign-extend addra
|
||||
convertible_t InterpretedVirtualMachine::loada(Instruction& instr) {
|
||||
convertible_t& rega = reg.r[instr.rega % RegistersCount];
|
||||
rega.i64 ^= instr.addra; //sign-extend addra
|
||||
addr_t addr = rega.u32;
|
||||
switch (inst.loca & 7)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
return readDataset(addr, mem);
|
||||
|
||||
case 4:
|
||||
return scratchpad[addr % ScratchpadL2];
|
||||
if ((ic % 64) == 0) {
|
||||
addr = currentTransform->apply(addr);
|
||||
#ifdef STATS
|
||||
datasetAccess[mem.ma / (DatasetBlockCount / 256) / CacheLineSize]++;
|
||||
#endif
|
||||
readDataset(addr, mem, reg);
|
||||
}
|
||||
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
if ((instr.loca & 192) == 0) {
|
||||
mem.mx ^= addr;
|
||||
}
|
||||
|
||||
if (instr.loca & 3) {
|
||||
return scratchpad[addr % ScratchpadL1];
|
||||
}
|
||||
else {
|
||||
return scratchpad[addr % ScratchpadL2];
|
||||
}
|
||||
}
|
||||
|
||||
convertible_t InterpretedVirtualMachine::loadbr1(Instruction& inst) {
|
||||
switch (inst.locb & 7)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
return reg.r[inst.regb % RegistersCount];
|
||||
case 6:
|
||||
case 7:
|
||||
convertible_t InterpretedVirtualMachine::loadbia(Instruction& instr) {
|
||||
if (instr.locb & 3) {
|
||||
return reg.r[instr.regb % RegistersCount];
|
||||
}
|
||||
else {
|
||||
convertible_t temp;
|
||||
temp.i64 = inst.imm32; //sign-extend imm32
|
||||
temp.i64 = instr.imm32; //sign-extend imm32
|
||||
return temp;
|
||||
}
|
||||
}
|
||||
|
||||
convertible_t InterpretedVirtualMachine::loadbr0(Instruction& inst) {
|
||||
switch (inst.locb & 7)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
return reg.r[inst.regb % RegistersCount];
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
convertible_t InterpretedVirtualMachine::loadbiashift(Instruction& instr) {
|
||||
if (instr.locb & 1) {
|
||||
return reg.r[instr.regb % RegistersCount];
|
||||
}
|
||||
else {
|
||||
convertible_t temp;
|
||||
temp.u64 = inst.imm8;
|
||||
temp.u64 = instr.imm8;
|
||||
return temp;
|
||||
}
|
||||
}
|
||||
|
||||
convertible_t InterpretedVirtualMachine::loadbiadiv(Instruction& instr) {
|
||||
if (instr.locb & 3) {
|
||||
convertible_t temp;
|
||||
temp.u64 = instr.imm32;
|
||||
return temp;
|
||||
}
|
||||
else {
|
||||
return reg.r[instr.regb % RegistersCount];
|
||||
}
|
||||
}
|
||||
|
||||
convertible_t& InterpretedVirtualMachine::getcr(Instruction& inst) {
|
||||
addr_t addr;
|
||||
switch (inst.locc & 7)
|
||||
@ -174,26 +230,6 @@ namespace RandomX {
|
||||
}
|
||||
}
|
||||
|
||||
void InterpretedVirtualMachine::writecflo(Instruction& inst, fpu_reg_t& regc) {
|
||||
addr_t addr;
|
||||
switch (inst.locc & 7)
|
||||
{
|
||||
case 4:
|
||||
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
|
||||
scratchpad[addr % ScratchpadL2] = regc.lo;
|
||||
break;
|
||||
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
|
||||
scratchpad[addr % ScratchpadL1] = regc.lo;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#define ALU_RETIRE(x) x(a, b, c); \
|
||||
if(trace) std::cout << std::hex << /*a.u64 << " " << b.u64 << " " <<*/ c.u64 << std::endl;
|
||||
|
||||
@ -242,7 +278,7 @@ namespace RandomX {
|
||||
#define ALU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
||||
INC_COUNT(x) \
|
||||
convertible_t a = loada(inst); \
|
||||
convertible_t b = loadbr1(inst); \
|
||||
convertible_t b = loadbia(inst); \
|
||||
convertible_t& c = getcr(inst); \
|
||||
ALU_RETIRE(x) \
|
||||
}
|
||||
@ -250,7 +286,15 @@ namespace RandomX {
|
||||
#define ALU_INST_SR(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
||||
INC_COUNT(x) \
|
||||
convertible_t a = loada(inst); \
|
||||
convertible_t b = loadbr0(inst); \
|
||||
convertible_t b = loadbiashift(inst); \
|
||||
convertible_t& c = getcr(inst); \
|
||||
ALU_RETIRE(x) \
|
||||
}
|
||||
|
||||
#define ALU_INST_DIV(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
||||
INC_COUNT(x) \
|
||||
convertible_t a = loada(inst); \
|
||||
convertible_t b = loadbiadiv(inst); \
|
||||
convertible_t& c = getcr(inst); \
|
||||
ALU_RETIRE(x) \
|
||||
}
|
||||
@ -282,8 +326,8 @@ namespace RandomX {
|
||||
ALU_INST(MUL_32)
|
||||
ALU_INST(IMUL_32)
|
||||
ALU_INST(IMULH_64)
|
||||
ALU_INST(DIV_64)
|
||||
ALU_INST(IDIV_64)
|
||||
ALU_INST_DIV(DIV_64)
|
||||
ALU_INST_DIV(IDIV_64)
|
||||
ALU_INST(AND_64)
|
||||
ALU_INST(AND_32)
|
||||
ALU_INST(OR_64)
|
||||
@ -301,42 +345,68 @@ namespace RandomX {
|
||||
FPU_INST(FPSUB)
|
||||
FPU_INST(FPMUL)
|
||||
FPU_INST(FPDIV)
|
||||
|
||||
FPU_INST_NB(FPSQRT)
|
||||
FPU_INST_NB(FPROUND)
|
||||
|
||||
void InterpretedVirtualMachine::h_FPROUND(Instruction& inst) {
|
||||
convertible_t a = loada(inst);
|
||||
convertible_t& c = getcr(inst);
|
||||
c.u64 = a.u64;
|
||||
if (trace) std::cout << std::hex << a.u64 << std::endl;
|
||||
FPROUND(a, inst.imm8);
|
||||
}
|
||||
|
||||
void InterpretedVirtualMachine::h_JUMP(Instruction& inst) {
|
||||
convertible_t a = loada(inst);
|
||||
convertible_t& c = getcr(inst);
|
||||
c.u64 = a.u64;
|
||||
if (trace) std::cout << std::hex << a.u64 << std::endl;
|
||||
if (JMP_COND(inst.locb, reg.r[inst.regb % RegistersCount], inst.imm32)) {
|
||||
#ifdef STATS
|
||||
count_JUMP_taken++;
|
||||
count_jump_taken[inst.locb & 7]++;
|
||||
#endif
|
||||
pc += (inst.imm8 & 127) + 1;
|
||||
pc = pc % ProgramLength;
|
||||
}
|
||||
#ifdef STATS
|
||||
else {
|
||||
count_JUMP_not_taken++;
|
||||
count_jump_not_taken[inst.locb & 7]++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void InterpretedVirtualMachine::h_CALL(Instruction& inst) {
|
||||
convertible_t a = loada(inst);
|
||||
convertible_t& c = getcr(inst);
|
||||
c.u64 = a.u64;
|
||||
if (trace) std::cout << std::hex << a.u64 << std::endl;
|
||||
if (JMP_COND(inst.locb, reg.r[inst.regb % RegistersCount], inst.imm32)) {
|
||||
#ifdef STATS
|
||||
count_CALL_taken++;
|
||||
count_jump_taken[inst.locb & 7]++;
|
||||
count_retdepth = std::max(0, count_retdepth - 1);
|
||||
#endif
|
||||
stackPush(a);
|
||||
stackPush(pc);
|
||||
#ifdef STATS
|
||||
count_max_stack = std::max(count_max_stack, (int)stack.size());
|
||||
#endif
|
||||
pc += (inst.imm8 & 127) + 1;
|
||||
pc = pc % ProgramLength;
|
||||
if (trace) std::cout << std::hex << a.u64 << std::endl;
|
||||
}
|
||||
else {
|
||||
convertible_t& c = getcr(inst);
|
||||
#ifdef STATS
|
||||
else {
|
||||
count_CALL_not_taken++;
|
||||
count_jump_not_taken[inst.locb & 7]++;
|
||||
#endif
|
||||
c.u64 = a.u64;
|
||||
if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void InterpretedVirtualMachine::h_RET(Instruction& inst) {
|
||||
convertible_t a = loada(inst);
|
||||
convertible_t b = loadbr1(inst);
|
||||
convertible_t& c = getcr(inst);
|
||||
c.u64 = a.u64;
|
||||
if (trace) std::cout << std::hex << a.u64 << std::endl;
|
||||
if (stack.size() > 0) {
|
||||
#ifdef STATS
|
||||
count_RET_taken++;
|
||||
@ -344,22 +414,13 @@ namespace RandomX {
|
||||
count_retdepth_max = std::max(count_retdepth_max, count_retdepth);
|
||||
#endif
|
||||
auto raddr = stackPopAddress();
|
||||
auto retval = stackPopValue();
|
||||
c.u64 = a.u64 ^ retval.u64;
|
||||
pc = raddr;
|
||||
}
|
||||
else {
|
||||
#ifdef STATS
|
||||
if (stack.size() == 0)
|
||||
count_RET_stack_empty++;
|
||||
else {
|
||||
count_RET_not_taken++;
|
||||
count_jump_not_taken[inst.locb & 7]++;
|
||||
count_RET_stack_empty++;
|
||||
}
|
||||
#endif
|
||||
c.u64 = a.u64;
|
||||
}
|
||||
if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
|
||||
}
|
||||
|
||||
#include "instructionWeights.hpp"
|
||||
@ -394,6 +455,7 @@ namespace RandomX {
|
||||
INST_HANDLE(FPDIV)
|
||||
INST_HANDLE(FPSQRT)
|
||||
INST_HANDLE(FPROUND)
|
||||
INST_HANDLE(JUMP)
|
||||
INST_HANDLE(CALL)
|
||||
INST_HANDLE(RET)
|
||||
};
|
||||
|
@ -25,23 +25,37 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
class ITransform {
|
||||
public:
|
||||
virtual int32_t apply(int32_t) const = 0;
|
||||
virtual const char* getName() const = 0;
|
||||
virtual std::ostream& printAsm(std::ostream&) const = 0;
|
||||
virtual std::ostream& printCxx(std::ostream&) const = 0;
|
||||
};
|
||||
|
||||
class InterpretedVirtualMachine;
|
||||
|
||||
typedef void(InterpretedVirtualMachine::*InstructionHandler)(Instruction&);
|
||||
|
||||
class InterpretedVirtualMachine : public VirtualMachine {
|
||||
public:
|
||||
InterpretedVirtualMachine(bool softAes) : VirtualMachine(softAes) {}
|
||||
virtual void initializeProgram(const void* seed) override;
|
||||
virtual void execute() override;
|
||||
InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
|
||||
~InterpretedVirtualMachine();
|
||||
void setDataset(dataset_t ds) override;
|
||||
void initializeScratchpad(uint32_t index) override;
|
||||
void initializeProgram(const void* seed) override;
|
||||
void execute() override;
|
||||
const Program& getProgam() {
|
||||
return p;
|
||||
}
|
||||
private:
|
||||
static InstructionHandler engine[256];
|
||||
static const ITransform* addressTransformations[TransformationCount];
|
||||
bool softAes, asyncWorker;
|
||||
Program p;
|
||||
std::vector<convertible_t> stack;
|
||||
uint64_t pc, ic;
|
||||
const ITransform* currentTransform;
|
||||
#ifdef STATS
|
||||
int count_ADD_64 = 0;
|
||||
int count_ADD_32 = 0;
|
||||
@ -71,11 +85,12 @@ namespace RandomX {
|
||||
int count_FPDIV = 0;
|
||||
int count_FPSQRT = 0;
|
||||
int count_FPROUND = 0;
|
||||
int count_JUMP_taken = 0;
|
||||
int count_JUMP_not_taken = 0;
|
||||
int count_CALL_taken = 0;
|
||||
int count_CALL_not_taken = 0;
|
||||
int count_RET_stack_empty = 0;
|
||||
int count_RET_taken = 0;
|
||||
int count_RET_not_taken = 0;
|
||||
int count_jump_taken[8] = { 0 };
|
||||
int count_jump_not_taken[8] = { 0 };
|
||||
int count_max_stack = 0;
|
||||
@ -89,14 +104,15 @@ namespace RandomX {
|
||||
int count_FPSUB_nop2 = 0;
|
||||
int count_FPMUL_nop = 0;
|
||||
int count_FPMUL_nop2 = 0;
|
||||
int datasetAccess[256] = { 0 };
|
||||
#endif
|
||||
|
||||
convertible_t loada(Instruction&);
|
||||
convertible_t loadbr0(Instruction&);
|
||||
convertible_t loadbr1(Instruction&);
|
||||
convertible_t loadbiashift(Instruction&);
|
||||
convertible_t loadbiadiv(Instruction&);
|
||||
convertible_t loadbia(Instruction&);
|
||||
convertible_t& getcr(Instruction&);
|
||||
void writecf(Instruction&, fpu_reg_t&);
|
||||
void writecflo(Instruction&, fpu_reg_t&);
|
||||
|
||||
void stackPush(convertible_t& c) {
|
||||
stack.push_back(c);
|
||||
@ -148,6 +164,7 @@ namespace RandomX {
|
||||
void h_FPDIV(Instruction&);
|
||||
void h_FPSQRT(Instruction&);
|
||||
void h_FPROUND(Instruction&);
|
||||
void h_JUMP(Instruction&);
|
||||
void h_CALL(Instruction&);
|
||||
void h_RET(Instruction&);
|
||||
};
|
||||
|
@ -170,13 +170,13 @@ namespace RandomX {
|
||||
emit(instr.addra);
|
||||
emit(uint16_t(0x8b41)); //mov
|
||||
emitByte(0xc0 + (instr.rega % RegistersCount)); //eax, rega
|
||||
emit(0x753fc3f6); //test bl,0x3f; jne
|
||||
emit(uint16_t(0xe805));
|
||||
emit(readDatasetOffset - (codePos + 4));
|
||||
if ((instr.loca & 192) == 0) { //A.LOC.X
|
||||
emit(uint16_t(0x3348));
|
||||
emitByte(0xe8); //xor rbp, rax
|
||||
}
|
||||
emit(0x753fc3f6); //test bl,0x3f; jne
|
||||
emit(uint16_t(0xe805));
|
||||
emit(readDatasetOffset - (codePos + 4));
|
||||
emitByte(0x25); //and eax,
|
||||
if (instr.loca & 3) {
|
||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||
|
94
src/LightClientAsyncWorker.cpp
Normal file
94
src/LightClientAsyncWorker.cpp
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright (c) 2019 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "LightClientAsyncWorker.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "Cache.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
template<bool softAes>
|
||||
LightClientAsyncWorker<softAes>::LightClientAsyncWorker(const Cache* c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false), workerThread(&LightClientAsyncWorker::runWorker, this) {
|
||||
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void LightClientAsyncWorker<softAes>::prepareBlock(addr_t addr) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
startBlock = addr / CacheLineSize;
|
||||
blockCount = 1;
|
||||
output = currentLine.data();
|
||||
hasWork = true;
|
||||
}
|
||||
notifier.notify_all();
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
const uint64_t* LightClientAsyncWorker<softAes>::getBlock(addr_t addr) {
|
||||
uint32_t currentBlock = addr / CacheLineSize;
|
||||
if (currentBlock != startBlock || output != currentLine.data()) {
|
||||
initBlock<softAes>(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock, cache->getKeys());
|
||||
}
|
||||
else {
|
||||
sync();
|
||||
}
|
||||
return currentLine.data();
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void LightClientAsyncWorker<softAes>::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
startBlock = startBlock;
|
||||
blockCount = blockCount;
|
||||
output = out;
|
||||
hasWork = true;
|
||||
}
|
||||
notifier.notify_all();
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void LightClientAsyncWorker<softAes>::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
|
||||
for (uint32_t i = 0; i < blockCount; ++i) {
|
||||
initBlock<softAes>(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i, cache->getKeys());
|
||||
}
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void LightClientAsyncWorker<softAes>::sync() {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
notifier.wait(lk, [this] { return !hasWork; });
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void LightClientAsyncWorker<softAes>::runWorker() {
|
||||
for (;;) {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
notifier.wait(lk, [this] { return hasWork; });
|
||||
getBlocks(output, startBlock, blockCount);
|
||||
hasWork = false;
|
||||
lk.unlock();
|
||||
notifier.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
template class LightClientAsyncWorker<true>;
|
||||
template class LightClientAsyncWorker<false>;
|
||||
}
|
52
src/LightClientAsyncWorker.hpp
Normal file
52
src/LightClientAsyncWorker.hpp
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
Copyright (c) 2019 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <array>
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
class Cache;
|
||||
|
||||
using DatasetLine = std::array<uint64_t, CacheLineSize / sizeof(uint64_t)>;
|
||||
|
||||
template<bool softAes>
|
||||
class LightClientAsyncWorker : public ILightClientAsyncWorker {
|
||||
public:
|
||||
LightClientAsyncWorker(const Cache*);
|
||||
void prepareBlock(addr_t) final;
|
||||
void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final;
|
||||
const uint64_t* getBlock(addr_t) final;
|
||||
void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final;
|
||||
void sync() final;
|
||||
private:
|
||||
void runWorker();
|
||||
std::condition_variable notifier;
|
||||
std::mutex mutex;
|
||||
DatasetLine currentLine;
|
||||
void* output;
|
||||
uint32_t startBlock, blockCount;
|
||||
bool hasWork;
|
||||
std::thread workerThread;
|
||||
};
|
||||
}
|
@ -19,8 +19,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "VirtualMachine.hpp"
|
||||
#include "common.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "Cache.hpp"
|
||||
#include "t1ha/t1ha.h"
|
||||
#include "blake2/blake2.h"
|
||||
#include <cstring>
|
||||
@ -37,62 +35,10 @@ std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) {
|
||||
VirtualMachine::VirtualMachine() {
|
||||
mem.ds.dataset = nullptr;
|
||||
}
|
||||
|
||||
VirtualMachine::~VirtualMachine() {
|
||||
if (lightClient) {
|
||||
delete mem.ds.lightDataset->block;
|
||||
delete mem.ds.lightDataset;
|
||||
}
|
||||
}
|
||||
|
||||
void VirtualMachine::setDataset(dataset_t ds, bool light) {
|
||||
if (mem.ds.dataset != nullptr) {
|
||||
throw std::runtime_error("Dataset is already initialized");
|
||||
}
|
||||
lightClient = light;
|
||||
if (light) {
|
||||
auto lds = mem.ds.lightDataset = new LightClientDataset();
|
||||
lds->cache = ds.cache;
|
||||
//lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
|
||||
lds->blockNumber = -1;
|
||||
if (lds->block == nullptr) {
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
if (softAes) {
|
||||
readDataset = &datasetReadLight<true>;
|
||||
}
|
||||
else {
|
||||
readDataset = &datasetReadLight<false>;
|
||||
}
|
||||
}
|
||||
else {
|
||||
mem.ds = ds;
|
||||
readDataset = &datasetRead;
|
||||
}
|
||||
}
|
||||
|
||||
void VirtualMachine::initializeScratchpad(uint32_t index) {
|
||||
if (lightClient) {
|
||||
auto cache = mem.ds.lightDataset->cache;
|
||||
if (softAes) {
|
||||
for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
|
||||
initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < ScratchpadSize / CacheLineSize; ++i) {
|
||||
initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + CacheLineSize * i, (ScratchpadSize / CacheLineSize) * index + i, cache->getKeys());
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
|
||||
}
|
||||
}
|
||||
|
||||
void VirtualMachine::getResult(void* out) {
|
||||
constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 2;
|
||||
uint64_t smallState[smallStateLength];
|
||||
|
@ -25,10 +25,10 @@ namespace RandomX {
|
||||
|
||||
class VirtualMachine {
|
||||
public:
|
||||
VirtualMachine(bool softAes);
|
||||
virtual ~VirtualMachine();
|
||||
virtual void setDataset(dataset_t ds, bool light = false);
|
||||
void initializeScratchpad(uint32_t index);
|
||||
VirtualMachine();
|
||||
virtual ~VirtualMachine() {}
|
||||
virtual void setDataset(dataset_t ds) = 0;
|
||||
virtual void initializeScratchpad(uint32_t index) = 0;
|
||||
virtual void initializeProgram(const void* seed) = 0;
|
||||
virtual void execute() = 0;
|
||||
void getResult(void*);
|
||||
@ -36,7 +36,6 @@ namespace RandomX {
|
||||
return reg;
|
||||
}
|
||||
protected:
|
||||
bool softAes, lightClient;
|
||||
DatasetReadFunc readDataset;
|
||||
alignas(16) RegisterFile reg;
|
||||
MemoryRegisters mem;
|
||||
|
@ -38,7 +38,7 @@ namespace RandomX {
|
||||
constexpr int CacheLineSize = 64;
|
||||
constexpr int BlockExpansionRatio = 64;
|
||||
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
|
||||
constexpr int DatasetIterations = 64;
|
||||
constexpr int DatasetIterations = 16;
|
||||
constexpr uint32_t CacheSize = CacheBlockCount * CacheLineSize;
|
||||
constexpr uint64_t DatasetSize = (uint64_t)CacheSize * BlockExpansionRatio;
|
||||
|
||||
@ -86,16 +86,25 @@ namespace RandomX {
|
||||
return i % RandomX::ProgramLength;
|
||||
}
|
||||
|
||||
struct LightClientDataset {
|
||||
Cache* cache;
|
||||
uint8_t* block;
|
||||
uint32_t blockNumber;
|
||||
class ILightClientAsyncWorker {
|
||||
public:
|
||||
virtual void prepareBlock(addr_t) = 0;
|
||||
virtual void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0;
|
||||
virtual const uint64_t* getBlock(addr_t) = 0;
|
||||
virtual void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0;
|
||||
virtual void sync() = 0;
|
||||
const Cache* getCache() {
|
||||
return cache;
|
||||
}
|
||||
protected:
|
||||
ILightClientAsyncWorker(const Cache* c) : cache(c) {}
|
||||
const Cache* cache;
|
||||
};
|
||||
|
||||
union dataset_t {
|
||||
uint8_t* dataset;
|
||||
Cache* cache;
|
||||
LightClientDataset* lightDataset;
|
||||
ILightClientAsyncWorker* asyncWorker;
|
||||
};
|
||||
|
||||
struct MemoryRegisters {
|
||||
@ -112,7 +121,7 @@ namespace RandomX {
|
||||
|
||||
static_assert(sizeof(RegisterFile) == 3 * RegistersCount * sizeof(convertible_t), "Invalid alignment of struct RandomX::RegisterFile");
|
||||
|
||||
typedef convertible_t(*DatasetReadFunc)(addr_t, MemoryRegisters&);
|
||||
typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, RegisterFile&);
|
||||
|
||||
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*);
|
||||
|
||||
|
@ -30,7 +30,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include <wmmintrin.h>
|
||||
#define PREFETCH(memory) _mm_prefetch((const char *)((memory).ds.dataset + (memory).ma), _MM_HINT_T0)
|
||||
#define PREFETCH(memory) _mm_prefetch((const char *)((memory).ds.dataset + (memory).ma), _MM_HINT_NTA)
|
||||
#else
|
||||
#define PREFETCH(memory)
|
||||
#endif
|
||||
@ -106,32 +106,44 @@ namespace RandomX {
|
||||
template
|
||||
void initBlock<false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||
|
||||
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) {
|
||||
convertible_t data;
|
||||
data.u64 = *(uint64_t*)(memory.ds.dataset + memory.ma);
|
||||
memory.ma += 8;
|
||||
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
|
||||
uint64_t* datasetLine = (uint64_t*)(memory.ds.dataset + memory.ma);
|
||||
memory.mx ^= addr;
|
||||
if ((memory.mx & 0xFFF8) == 0) {
|
||||
memory.ma = memory.mx & ~7;
|
||||
memory.mx &= -64; //align to cache line
|
||||
std::swap(memory.mx, memory.ma);
|
||||
PREFETCH(memory);
|
||||
}
|
||||
return data;
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
reg.r[i].u64 ^= datasetLine[i];
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) {
|
||||
convertible_t data;
|
||||
LightClientDataset* lds = memory.ds.lightDataset;
|
||||
auto blockNumber = memory.ma / CacheLineSize;
|
||||
|
||||
return data;
|
||||
void datasetReadLight(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
|
||||
Cache* cache = memory.ds.cache;
|
||||
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
||||
initBlock<softAes>(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize, cache->getKeys());
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
reg.r[i].u64 ^= datasetLine[i];
|
||||
memory.mx ^= addr;
|
||||
memory.mx &= -64; //align to cache line
|
||||
std::swap(memory.mx, memory.ma);
|
||||
}
|
||||
|
||||
template
|
||||
convertible_t datasetReadLight<false>(addr_t addr, MemoryRegisters& memory);
|
||||
void datasetReadLight<false>(addr_t addr, MemoryRegisters& memory, RegisterFile& reg);
|
||||
|
||||
template
|
||||
convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory);
|
||||
void datasetReadLight<true>(addr_t addr, MemoryRegisters& memory, RegisterFile& reg);
|
||||
|
||||
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
|
||||
ILightClientAsyncWorker* aw = memory.ds.asyncWorker;
|
||||
const uint64_t* datasetLine = aw->getBlock(memory.ma);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
reg.r[i].u64 ^= datasetLine[i];
|
||||
memory.mx ^= addr;
|
||||
memory.mx &= -64; //align to cache line
|
||||
std::swap(memory.mx, memory.ma);
|
||||
aw->prepareBlock(memory.ma);
|
||||
}
|
||||
|
||||
void datasetAlloc(dataset_t& ds, bool largePages) {
|
||||
if (sizeof(size_t) <= 4)
|
||||
|
@ -40,12 +40,14 @@ namespace RandomX {
|
||||
template<bool softAes>
|
||||
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount);
|
||||
|
||||
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory);
|
||||
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInitCache(const void* seed, dataset_t& dataset);
|
||||
|
||||
template<bool softAes>
|
||||
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory);
|
||||
void datasetReadLight(addr_t addr, MemoryRegisters& memory, RegisterFile&);
|
||||
|
||||
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, RegisterFile& reg);
|
||||
}
|
||||
|
||||
|
@ -22,12 +22,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
//Clears the 11 least-significant bits before conversion. This is done so the number
|
||||
//fits exactly into the 52-bit mantissa without rounding.
|
||||
inline double convertSigned52(int64_t x) {
|
||||
return (double)(x & -2048L);
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
@ -53,11 +47,11 @@ namespace RandomX {
|
||||
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
bool JMP_COND(uint8_t, convertible_t&, int32_t);
|
||||
void FPINIT();
|
||||
void FPROUND(convertible_t, uint8_t);
|
||||
void FPADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||
void FPSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||
void FPMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||
void FPDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||
void FPSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||
void FPROUND(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
|
||||
}
|
||||
}
|
@ -370,9 +370,9 @@ namespace RandomX {
|
||||
#endif
|
||||
}
|
||||
|
||||
void FPROUND(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
||||
c.lo.f64 = convertSigned52(a.i64);
|
||||
switch (a.u64 & 3) {
|
||||
void FPROUND(convertible_t a, uint8_t rot) {
|
||||
uint64_t flag = ror64(a.u64, rot);
|
||||
switch (flag & 3) {
|
||||
case RoundDown:
|
||||
#ifdef DEBUG
|
||||
std::cout << "Round FE_DOWNWARD (" << FE_DOWNWARD << ") = " <<
|
||||
|
14
src/main.cpp
14
src/main.cpp
@ -162,7 +162,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool softAes, lightClient, genAsm, compiled, help, largePages;
|
||||
bool softAes, lightClient, genAsm, compiled, help, largePages, async;
|
||||
int programCount, threadCount;
|
||||
readOption("--help", argc, argv, help);
|
||||
|
||||
@ -178,6 +178,7 @@ int main(int argc, char** argv) {
|
||||
readIntOption("--threads", argc, argv, threadCount, 1);
|
||||
readIntOption("--nonces", argc, argv, programCount, 1000);
|
||||
readOption("--largePages", argc, argv, largePages);
|
||||
readOption("--async", argc, argv, async);
|
||||
|
||||
if (genAsm) {
|
||||
generateAsm(programCount);
|
||||
@ -250,12 +251,12 @@ int main(int argc, char** argv) {
|
||||
for (int i = 0; i < threadCount; ++i) {
|
||||
RandomX::VirtualMachine* vm;
|
||||
if (compiled) {
|
||||
vm = new RandomX::CompiledVirtualMachine(softAes);
|
||||
vm = new RandomX::CompiledVirtualMachine();
|
||||
}
|
||||
else {
|
||||
vm = new RandomX::InterpretedVirtualMachine(softAes);
|
||||
vm = new RandomX::InterpretedVirtualMachine(softAes, async);
|
||||
}
|
||||
vm->setDataset(dataset, lightClient);
|
||||
vm->setDataset(dataset);
|
||||
vms.push_back(vm);
|
||||
}
|
||||
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
|
||||
@ -278,8 +279,13 @@ int main(int argc, char** argv) {
|
||||
result.print(std::cout);
|
||||
if(programCount == 1000)
|
||||
std::cout << "Reference result: 3e1c5f9b9d0bf8ffa250f860bf5f7ab76ac823b206ddee6a592660119a3640c6" << std::endl;
|
||||
if (lightClient) {
|
||||
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per program" << std::endl;
|
||||
}
|
||||
else {
|
||||
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
|
||||
}
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << "ERROR: " << e.what() << std::endl;
|
||||
return 1;
|
||||
|
Loading…
Reference in New Issue
Block a user