Port mapping

This commit is contained in:
tevador 2019-03-31 21:22:36 +02:00
parent 2fd0a125b5
commit acef5ea0d7

View File

@ -23,6 +23,8 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "blake2/endian.h"; #include "blake2/endian.h";
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <algorithm>
#include <stdexcept>
namespace RandomX { namespace RandomX {
// Intel Ivy Bridge reference // Intel Ivy Bridge reference
@ -101,9 +103,9 @@ namespace RandomX {
constexpr type Null = 0; constexpr type Null = 0;
constexpr type P0 = 1; constexpr type P0 = 1;
constexpr type P1 = 2; constexpr type P1 = 2;
constexpr type P5 = 4; constexpr type P5 = 3;
constexpr type P05 = 6; constexpr type P05 = 4;
constexpr type P015 = 7; constexpr type P015 = 5;
} }
class Blake2Generator { class Blake2Generator {
@ -138,6 +140,15 @@ namespace RandomX {
} }
}; };
class RegisterInfo {
public:
RegisterInfo() : lastOpGroup(-1), source(-1), value(0), latency(0) {}
int lastOpGroup;
int source;
int value;
int latency;
};
class MacroOp { class MacroOp {
public: public:
MacroOp(const char* name, int size) MacroOp(const char* name, int size)
@ -146,6 +157,8 @@ namespace RandomX {
: name_(name), size_(size), latency_(latency), uop1_(uop), uop2_(ExecutionPort::Null) {} : name_(name), size_(size), latency_(latency), uop1_(uop), uop2_(ExecutionPort::Null) {}
MacroOp(const char* name, int size, int latency, ExecutionPort::type uop1, ExecutionPort::type uop2) MacroOp(const char* name, int size, int latency, ExecutionPort::type uop1, ExecutionPort::type uop2)
: name_(name), size_(size), latency_(latency), uop1_(uop1), uop2_(uop2) {} : name_(name), size_(size), latency_(latency), uop1_(uop1), uop2_(uop2) {}
MacroOp(const MacroOp& parent, bool dependent)
: name_(parent.name_), size_(parent.size_), latency_(parent.latency_), uop1_(parent.uop1_), uop2_(parent.uop2_), dependent_(dependent) {}
const char* getName() const { const char* getName() const {
return name_; return name_;
} }
@ -167,6 +180,27 @@ namespace RandomX {
bool isEliminated() const { bool isEliminated() const {
return uop1_ == ExecutionPort::Null; return uop1_ == ExecutionPort::Null;
} }
bool isDependent() const {
return dependent_;
}
int getCycle() const {
return cycle_;
}
void setCycle(int cycle) {
cycle_ = cycle;
}
MacroOp* getSrcDep() const {
return depSrc_;
}
void setSrcDep(MacroOp* src) {
depSrc_ = src;
}
MacroOp* getDstDep() const {
return depDst_;
}
void setDstDep(MacroOp* dst) {
depDst_ = dst;
}
static const MacroOp Add_rr; static const MacroOp Add_rr;
static const MacroOp Add_ri; static const MacroOp Add_ri;
static const MacroOp Lea_sib; static const MacroOp Lea_sib;
@ -191,6 +225,10 @@ namespace RandomX {
int latency_; int latency_;
ExecutionPort::type uop1_; ExecutionPort::type uop1_;
ExecutionPort::type uop2_; ExecutionPort::type uop2_;
int cycle_;
bool dependent_ = false;
MacroOp* depDst_ = nullptr;
MacroOp* depSrc_ = nullptr;
}; };
const MacroOp MacroOp::Add_rr = MacroOp("add r,r", 3, 1, ExecutionPort::P015); const MacroOp MacroOp::Add_rr = MacroOp("add r,r", 3, 1, ExecutionPort::P015);
@ -212,49 +250,56 @@ namespace RandomX {
const MacroOp MacroOp::Setcc_r = MacroOp("setcc cl", 3, 1, ExecutionPort::P05); const MacroOp MacroOp::Setcc_r = MacroOp("setcc cl", 3, 1, ExecutionPort::P05);
const MacroOp MacroOp::TestJmp_fused = MacroOp("testjmp r,i", 13, 0, ExecutionPort::P5); const MacroOp MacroOp::TestJmp_fused = MacroOp("testjmp r,i", 13, 0, ExecutionPort::P5);
template <typename T, size_t N> const MacroOp IMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Mul_r, MacroOp::Mov_rr };
T* begin(T(&arr)[N]) { return &arr[0]; } const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr };
template <typename T, size_t N> const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) };
T* end(T(&arr)[N]) { return &arr[0] + N; } const MacroOp IROR_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Ror_rcl };
const MacroOp COND_R_ops_array[] = { MacroOp::Add_ri, MacroOp(MacroOp::TestJmp_fused, true), MacroOp::Xor_self, MacroOp::Cmp_ri, MacroOp(MacroOp::Setcc_r, true), MacroOp(MacroOp::Add_rr, true) };
const MacroOp* IMULH_R_ops_array[] = { &MacroOp::Mov_rr, &MacroOp::Mul_r, &MacroOp::Mov_rr };
const MacroOp* ISMULH_R_ops_array[] = { &MacroOp::Mov_rr, &MacroOp::Imul_r, &MacroOp::Mov_rr };
const MacroOp* IMUL_RCP_ops_array[] = { &MacroOp::Mov_ri64, &MacroOp::Imul_rr };
const MacroOp* IROR_R_ops_array[] = { &MacroOp::Mov_rr, &MacroOp::Ror_rcl };
const MacroOp* COND_R_ops_array[] = { &MacroOp::Add_ri, &MacroOp::TestJmp_fused, &MacroOp::Xor_self, &MacroOp::Cmp_ri, &MacroOp::Setcc_r, &MacroOp::Add_rr };
class LightInstructionInfo { class LightInstructionInfo {
public: public:
LightInstructionInfo(const char* name, const MacroOp* op) LightInstructionInfo(const char* name, int type, const MacroOp& op)
: name_(name), op_(op), opsCount_(1), latency_(op->getLatency()) {} : name_(name), type_(type), latency_(op.getLatency()) {
ops_.push_back(MacroOp(op));
}
template <size_t N> template <size_t N>
LightInstructionInfo(const char* name, const MacroOp*(&arr)[N]) LightInstructionInfo(const char* name, int type, const MacroOp(&arr)[N])
: name_(name), ops_(arr), opsCount_(N), latency_(0) { : name_(name), type_(type), latency_(0) {
for (unsigned i = 0; i < N; ++i) { for (unsigned i = 0; i < N; ++i) {
latency_ += arr[i]->getLatency(); ops_.push_back(MacroOp(arr[i]));
latency_ += ops_.back().getLatency();
} }
static_assert(N > 1, "Invalid array size"); static_assert(N > 1, "Invalid array size");
} }
template <size_t N> template <size_t N>
LightInstructionInfo(const char* name, const MacroOp*(&arr)[N], int latency) LightInstructionInfo(const char* name, int type, const MacroOp*(&arr)[N], int latency)
: name_(name), ops_(arr), opsCount_(N), latency_(latency) { : name_(name), type_(type), latency_(latency) {
for (unsigned i = 0; i < N; ++i) {
ops_.push_back(MacroOp(arr[i]));
if (arr[i].isDependent()) {
ops_[i].setSrcDep(&ops_[i - 1]);
}
}
static_assert(N > 1, "Invalid array size"); static_assert(N > 1, "Invalid array size");
} }
const char* getName() const { const char* getName() const {
return name_; return name_;
} }
int getSize() const { int getSize() const {
return opsCount_; return ops_.size();
} }
bool isSimple() const { bool isSimple() const {
return opsCount_ == 1; return getSize() == 1;
} }
int getLatency() const { int getLatency() const {
return latency_; return latency_;
} }
const MacroOp* getOp(int index) const { MacroOp& getOp(int index) {
return opsCount_ > 1 ? ops_[index] : op_; return ops_[index];
}
int getType() const {
return type_;
} }
static const LightInstructionInfo IADD_R; static const LightInstructionInfo IADD_R;
static const LightInstructionInfo IADD_C; static const LightInstructionInfo IADD_C;
@ -274,32 +319,29 @@ namespace RandomX {
static const LightInstructionInfo NOP; static const LightInstructionInfo NOP;
private: private:
const char* name_; const char* name_;
union { int type_;
const MacroOp** ops_; std::vector<MacroOp> ops_;
const MacroOp* op_;
};
int opsCount_;
int latency_; int latency_;
LightInstructionInfo(const char* name) LightInstructionInfo(const char* name)
: name_(name), opsCount_(0), latency_(0) {} : name_(name), type_(-1), latency_(0) {}
}; };
const LightInstructionInfo LightInstructionInfo::IADD_R = LightInstructionInfo("IADD_R", &MacroOp::Add_rr); const LightInstructionInfo LightInstructionInfo::IADD_R = LightInstructionInfo("IADD_R", LightInstructionType::IADD_R, MacroOp::Add_rr);
const LightInstructionInfo LightInstructionInfo::IADD_C = LightInstructionInfo("IADD_C", &MacroOp::Add_ri); const LightInstructionInfo LightInstructionInfo::IADD_C = LightInstructionInfo("IADD_C", LightInstructionType::IADD_C, MacroOp::Add_ri);
const LightInstructionInfo LightInstructionInfo::IADD_RC = LightInstructionInfo("IADD_RC", &MacroOp::Lea_sib); const LightInstructionInfo LightInstructionInfo::IADD_RC = LightInstructionInfo("IADD_RC", LightInstructionType::IADD_RC, MacroOp::Lea_sib);
const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", &MacroOp::Sub_rr); const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", LightInstructionType::ISUB_R, MacroOp::Sub_rr);
const LightInstructionInfo LightInstructionInfo::IMUL_9C = LightInstructionInfo("IMUL_9C", &MacroOp::Lea_sib); const LightInstructionInfo LightInstructionInfo::IMUL_9C = LightInstructionInfo("IMUL_9C", LightInstructionType::IMUL_9C, MacroOp::Lea_sib);
const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", &MacroOp::Imul_rr); const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", LightInstructionType::IMUL_R, MacroOp::Imul_rr);
const LightInstructionInfo LightInstructionInfo::IMUL_C = LightInstructionInfo("IMUL_C", &MacroOp::Imul_rri); const LightInstructionInfo LightInstructionInfo::IMUL_C = LightInstructionInfo("IMUL_C", LightInstructionType::IMUL_C, MacroOp::Imul_rri);
const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", IMULH_R_ops_array); const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", LightInstructionType::IMULH_R, IMULH_R_ops_array);
const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", ISMULH_R_ops_array); const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", LightInstructionType::ISMULH_R, ISMULH_R_ops_array);
const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", IMUL_RCP_ops_array); const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", LightInstructionType::IMUL_RCP, IMUL_RCP_ops_array);
const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", &MacroOp::Xor_rr); const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", LightInstructionType::IXOR_R, MacroOp::Xor_rr);
const LightInstructionInfo LightInstructionInfo::IXOR_C = LightInstructionInfo("IXOR_C", &MacroOp::Xor_ri); const LightInstructionInfo LightInstructionInfo::IXOR_C = LightInstructionInfo("IXOR_C", LightInstructionType::IXOR_C, MacroOp::Xor_ri);
const LightInstructionInfo LightInstructionInfo::IROR_R = LightInstructionInfo("IROR_R", IROR_R_ops_array); const LightInstructionInfo LightInstructionInfo::IROR_R = LightInstructionInfo("IROR_R", LightInstructionType::IROR_R, IROR_R_ops_array);
const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", &MacroOp::Ror_ri); const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", LightInstructionType::IROR_C, MacroOp::Ror_ri);
const LightInstructionInfo LightInstructionInfo::COND_R = LightInstructionInfo("COND_R", COND_R_ops_array); const LightInstructionInfo LightInstructionInfo::COND_R = LightInstructionInfo("COND_R", LightInstructionType::COND_R, COND_R_ops_array);
const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP"); const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP");
const int buffer0[] = { 3, 3, 10 }; const int buffer0[] = { 3, 3, 10 };
@ -375,20 +417,37 @@ namespace RandomX {
DecoderBuffer DecoderBuffer::Default = DecoderBuffer(); DecoderBuffer DecoderBuffer::Default = DecoderBuffer();
const int slot_3[] = { LightInstructionType::IADD_R, LightInstructionType::ISUB_R, LightInstructionType::IXOR_R, LightInstructionType::IADD_R }; const LightInstructionInfo* slot_3[] = { &LightInstructionInfo::IADD_R, &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IADD_R };
const int slot_3L[] = { LightInstructionType::IADD_R, LightInstructionType::ISUB_R, LightInstructionType::IXOR_R, LightInstructionType::IMULH_R, LightInstructionType::ISMULH_R, LightInstructionType::IXOR_R, LightInstructionType::IMULH_R, LightInstructionType::ISMULH_R }; const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::IADD_R, &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R };
const int slot_3F[] = { LightInstructionType::IADD_R, LightInstructionType::ISUB_R, LightInstructionType::IXOR_R, LightInstructionType::IROR_R }; const LightInstructionInfo* slot_3F[] = { &LightInstructionInfo::IADD_R, &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IROR_R };
const int slot_4[] = { LightInstructionType::IMUL_R, LightInstructionType::IROR_C }; const LightInstructionInfo* slot_4[] = { &LightInstructionInfo::IMUL_R, &LightInstructionInfo::IROR_C };
const int slot_7[] = { LightInstructionType::IADD_C, LightInstructionType::IMUL_C, LightInstructionType::IXOR_C, LightInstructionType::IXOR_C }; const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::IADD_C, &LightInstructionInfo::IMUL_C, &LightInstructionInfo::IXOR_C, &LightInstructionInfo::IXOR_C };
const int slot_7L = LightInstructionType::COND_R; const LightInstructionInfo* slot_7L = &LightInstructionInfo::COND_R;
const int slot_8[] = { LightInstructionType::IADD_RC, LightInstructionType::IMUL_9C }; const LightInstructionInfo* slot_8[] = { &LightInstructionInfo::IADD_RC, &LightInstructionInfo::IMUL_9C };
const int slot_10 = LightInstructionType::IMUL_RCP; const LightInstructionInfo* slot_10 = &LightInstructionInfo::IMUL_RCP;
template<bool erase>
static int selectRegister(std::vector<int>& availableRegisters, Blake2Generator& gen) {
if (availableRegisters.size() == 0)
throw std::runtime_error("No avialable registers");
int index;
if (availableRegisters.size() > 1) {
index = gen.getInt32() % availableRegisters.size();
}
else {
index = 0;
}
int select = availableRegisters[index];
if (erase)
availableRegisters.erase(availableRegisters.begin() + index);
return select;
}
class LightInstruction { class LightInstruction {
public: public:
Instruction toInstr() { Instruction toInstr() {
Instruction instr; Instruction instr;
instr.opcode = lightInstructionOpcode[type_]; instr.opcode = lightInstructionOpcode[getType()];
instr.dst = dst_; instr.dst = dst_;
instr.src = src_ >= 0 ? src_ : dst_; instr.src = src_ >= 0 ? src_ : dst_;
instr.mod = mod_; instr.mod = mod_;
@ -396,42 +455,40 @@ namespace RandomX {
return instr; return instr;
} }
static LightInstruction createForSlot(Blake2Generator& gen, int slotSize, bool isLast = false, bool isFirst = false) { static LightInstruction createForSlot(Blake2Generator& gen, int slotSize, std::vector<int>& availableRegisters, bool isLast = false, bool isFirst = false) {
switch (slotSize) switch (slotSize)
{ {
case 3: case 3:
if (isLast) { if (isLast) {
return create(slot_3L[gen.getByte() & 7], gen); return create(slot_3L[gen.getByte() & 7], availableRegisters, gen);
} }
else if (isFirst) { else if (isFirst) {
return create(slot_3F[gen.getByte() & 3], gen); return create(slot_3F[gen.getByte() & 3], availableRegisters, gen);
} }
else { else {
return create(slot_3[gen.getByte() & 3], gen); return create(slot_3[gen.getByte() & 3], availableRegisters, gen);
} }
case 4: case 4:
return create(slot_4[gen.getByte() & 1], gen); return create(slot_4[gen.getByte() & 1], availableRegisters, gen);
case 7: case 7:
if (isLast) { if (isLast) {
return create(slot_7L, gen); return create(slot_7L, availableRegisters, gen);
} }
else { else {
return create(slot_7[gen.getByte() & 3], gen); return create(slot_7[gen.getByte() & 3], availableRegisters, gen);
} }
case 8: case 8:
return create(slot_8[gen.getByte() & 1], gen); return create(slot_8[gen.getByte() & 1], availableRegisters, gen);
case 10: case 10:
return create(slot_10, gen); return create(slot_10, availableRegisters, gen);
default: default:
break; break;
} }
} }
static LightInstruction create(int type, Blake2Generator& gen) { static LightInstruction create(const LightInstructionInfo* info, std::vector<int>& availableRegisters, Blake2Generator& gen) {
LightInstruction li; LightInstruction li(info);
li.type_ = type; switch (info->getType())
li.opGroup_ = type;
switch (type)
{ {
case LightInstructionType::IADD_R: { case LightInstructionType::IADD_R: {
li.dst_ = gen.getByte() & 7; li.dst_ = gen.getByte() & 7;
@ -440,7 +497,6 @@ namespace RandomX {
} while (li.dst_ == li.src_); } while (li.dst_ == li.src_);
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = 0; li.imm32_ = 0;
li.info_ = &LightInstructionInfo::IADD_R;
li.opGroup_ = LightInstructionType::IADD_R; li.opGroup_ = LightInstructionType::IADD_R;
li.opGroupPar_ = li.src_; li.opGroupPar_ = li.src_;
} break; } break;
@ -450,7 +506,6 @@ namespace RandomX {
li.src_ = -1; li.src_ = -1;
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = gen.getInt32(); li.imm32_ = gen.getInt32();
li.info_ = &LightInstructionInfo::IADD_C;
li.opGroup_ = LightInstructionType::IADD_R; li.opGroup_ = LightInstructionType::IADD_R;
li.opGroupPar_ = li.src_; li.opGroupPar_ = li.src_;
} break; } break;
@ -462,7 +517,6 @@ namespace RandomX {
} while (li.dst_ == li.src_); } while (li.dst_ == li.src_);
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = gen.getInt32(); li.imm32_ = gen.getInt32();
li.info_ = &LightInstructionInfo::IADD_RC;
li.opGroup_ = LightInstructionType::IADD_R; li.opGroup_ = LightInstructionType::IADD_R;
li.opGroupPar_ = li.src_; li.opGroupPar_ = li.src_;
} break; } break;
@ -474,7 +528,6 @@ namespace RandomX {
} while (li.dst_ == li.src_); } while (li.dst_ == li.src_);
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = 0; li.imm32_ = 0;
li.info_ = &LightInstructionInfo::ISUB_R;
li.opGroup_ = LightInstructionType::IADD_R; li.opGroup_ = LightInstructionType::IADD_R;
li.opGroupPar_ = li.src_; li.opGroupPar_ = li.src_;
} break; } break;
@ -486,7 +539,6 @@ namespace RandomX {
} while (li.dst_ == li.src_); } while (li.dst_ == li.src_);
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = gen.getInt32(); li.imm32_ = gen.getInt32();
li.info_ = &LightInstructionInfo::IMUL_9C;
li.opGroup_ = LightInstructionType::IMUL_C; li.opGroup_ = LightInstructionType::IMUL_C;
li.opGroupPar_ = -1; li.opGroupPar_ = -1;
} break; } break;
@ -498,7 +550,6 @@ namespace RandomX {
} while (li.dst_ == li.src_); } while (li.dst_ == li.src_);
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = 0; li.imm32_ = 0;
li.info_ = &LightInstructionInfo::IMUL_R;
li.opGroup_ = LightInstructionType::IMUL_R; li.opGroup_ = LightInstructionType::IMUL_R;
li.opGroupPar_ = gen.getInt32(); li.opGroupPar_ = gen.getInt32();
} break; } break;
@ -508,7 +559,6 @@ namespace RandomX {
li.src_ = -1; li.src_ = -1;
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = gen.getInt32(); li.imm32_ = gen.getInt32();
li.info_ = &LightInstructionInfo::IMUL_C;
li.opGroup_ = LightInstructionType::IMUL_C; li.opGroup_ = LightInstructionType::IMUL_C;
li.opGroupPar_ = li.src_; li.opGroupPar_ = li.src_;
} break; } break;
@ -518,7 +568,6 @@ namespace RandomX {
li.src_ = gen.getByte() & 7; li.src_ = gen.getByte() & 7;
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = 0; li.imm32_ = 0;
li.info_ = &LightInstructionInfo::IMULH_R;
li.opGroup_ = LightInstructionType::IMULH_R; li.opGroup_ = LightInstructionType::IMULH_R;
li.opGroupPar_ = gen.getInt32(); li.opGroupPar_ = gen.getInt32();
} break; } break;
@ -528,7 +577,6 @@ namespace RandomX {
li.src_ = gen.getByte() & 7; li.src_ = gen.getByte() & 7;
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = 0; li.imm32_ = 0;
li.info_ = &LightInstructionInfo::ISMULH_R;
li.opGroup_ = LightInstructionType::ISMULH_R; li.opGroup_ = LightInstructionType::ISMULH_R;
li.opGroupPar_ = gen.getInt32(); li.opGroupPar_ = gen.getInt32();
} break; } break;
@ -538,7 +586,6 @@ namespace RandomX {
li.src_ = -1; li.src_ = -1;
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = gen.getInt32(); li.imm32_ = gen.getInt32();
li.info_ = &LightInstructionInfo::IMUL_RCP;
li.opGroup_ = LightInstructionType::IMUL_C; li.opGroup_ = LightInstructionType::IMUL_C;
li.opGroupPar_ = -1; li.opGroupPar_ = -1;
} break; } break;
@ -550,7 +597,6 @@ namespace RandomX {
} while (li.dst_ == li.src_); } while (li.dst_ == li.src_);
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = 0; li.imm32_ = 0;
li.info_ = &LightInstructionInfo::IXOR_R;
li.opGroup_ = LightInstructionType::IXOR_R; li.opGroup_ = LightInstructionType::IXOR_R;
li.opGroupPar_ = li.src_; li.opGroupPar_ = li.src_;
} break; } break;
@ -560,7 +606,6 @@ namespace RandomX {
li.src_ = -1; li.src_ = -1;
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = gen.getInt32(); li.imm32_ = gen.getInt32();
li.info_ = &LightInstructionInfo::IXOR_C;
li.opGroup_ = LightInstructionType::IXOR_R; li.opGroup_ = LightInstructionType::IXOR_R;
li.opGroupPar_ = li.src_; li.opGroupPar_ = li.src_;
} break; } break;
@ -572,7 +617,6 @@ namespace RandomX {
} while (li.dst_ == li.src_); } while (li.dst_ == li.src_);
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = 0; li.imm32_ = 0;
li.info_ = &LightInstructionInfo::IROR_R;
li.opGroup_ = LightInstructionType::IROR_R; li.opGroup_ = LightInstructionType::IROR_R;
li.opGroupPar_ = -1; li.opGroupPar_ = -1;
} break; } break;
@ -582,7 +626,6 @@ namespace RandomX {
li.src_ = -1; li.src_ = -1;
li.mod_ = 0; li.mod_ = 0;
li.imm32_ = gen.getByte(); li.imm32_ = gen.getByte();
li.info_ = &LightInstructionInfo::IROR_C;
li.opGroup_ = LightInstructionType::IROR_R; li.opGroup_ = LightInstructionType::IROR_R;
li.opGroupPar_ = -1; li.opGroupPar_ = -1;
} break; } break;
@ -592,7 +635,6 @@ namespace RandomX {
li.src_ = gen.getByte() & 7; li.src_ = gen.getByte() & 7;
li.mod_ = gen.getByte(); li.mod_ = gen.getByte();
li.imm32_ = gen.getInt32(); li.imm32_ = gen.getInt32();
li.info_ = &LightInstructionInfo::COND_R;
li.opGroup_ = LightInstructionType::COND_R; li.opGroup_ = LightInstructionType::COND_R;
li.opGroupPar_ = li.imm32_; li.opGroupPar_ = li.imm32_;
} break; } break;
@ -605,7 +647,7 @@ namespace RandomX {
} }
int getType() { int getType() {
return type_; return info_.getType();
} }
int getSource() { int getSource() {
return src_; return src_;
@ -620,37 +662,32 @@ namespace RandomX {
return opGroupPar_; return opGroupPar_;
} }
const LightInstructionInfo* getInfo() { LightInstructionInfo& getInfo() {
return info_; return info_;
} }
static const LightInstruction Null; static const LightInstruction Null;
private: private:
int type_; LightInstructionInfo info_;
int src_; int src_;
int dst_; int dst_;
int mod_; int mod_;
uint32_t imm32_; uint32_t imm32_;
const LightInstructionInfo* info_;
int opGroup_; int opGroup_;
int opGroupPar_; int opGroupPar_;
LightInstruction() {} LightInstruction(const LightInstructionInfo* info) : info_(*info) {
LightInstruction(int type, const LightInstructionInfo* info) : type_(type), info_(info) {} for (unsigned i = 0; i < info_.getSize(); ++i) {
MacroOp& mop = info_.getOp(i);
if (mop.isDependent()) {
mop.setSrcDep(&info_.getOp(i - 1));
}
}
}
}; };
class RegisterInfo { const LightInstruction LightInstruction::Null = LightInstruction(&LightInstructionInfo::NOP);
public:
RegisterInfo() : lastOpGroup(-1), source(-1), value(0), latency(0) {}
int lastOpGroup;
int source;
int value;
int latency;
};
const LightInstruction LightInstruction::Null = LightInstruction(-1, &LightInstructionInfo::NOP);
constexpr int ALU_COUNT_MUL = 1; constexpr int ALU_COUNT_MUL = 1;
constexpr int ALU_COUNT = 4; constexpr int ALU_COUNT = 4;
@ -660,6 +697,73 @@ namespace RandomX {
static int blakeCounter = 0; static int blakeCounter = 0;
static int scheduleUop(const MacroOp& mop, ExecutionPort::type(&portBusy)[RANDOMX_LPROG_LATENCY + 1][3], int cycle, int depCycle) {
if (mop.isDependent()) {
cycle = std::max(cycle, depCycle);
}
if (mop.isEliminated()) {
std::cout << "; (eliminated)" << std::endl;
return cycle;
}
else if (mop.isSimple()) {
if (mop.getUop1() <= ExecutionPort::P5) {
for (; cycle <= RANDOMX_LPROG_LATENCY; ++cycle) {
if (!portBusy[cycle][mop.getUop1() - 1]) {
std::cout << "; P" << mop.getUop1() - 1 << " at cycle " << cycle << std::endl;
portBusy[cycle][mop.getUop1() - 1] = mop.getUop1();
return cycle;
}
}
}
else if (mop.getUop1() == ExecutionPort::P05) {
for (; cycle <= RANDOMX_LPROG_LATENCY; ++cycle) {
if (!portBusy[cycle][0]) {
std::cout << "; P0 at cycle " << cycle << std::endl;
portBusy[cycle][0] = mop.getUop1();
return cycle;
}
if (!portBusy[cycle][2]) {
std::cout << "; P2 at cycle " << cycle << std::endl;
portBusy[cycle][2] = mop.getUop1();
return cycle;
}
}
}
else {
for (; cycle <= RANDOMX_LPROG_LATENCY; ++cycle) {
if (!portBusy[cycle][0]) {
std::cout << "; P0 at cycle " << cycle << std::endl;
portBusy[cycle][0] = mop.getUop1();
return cycle;
}
if (!portBusy[cycle][2]) {
std::cout << "; P2 at cycle " << cycle << std::endl;
portBusy[cycle][2] = mop.getUop1();
return cycle;
}
if (!portBusy[cycle][1]) {
std::cout << "; P1 at cycle " << cycle << std::endl;
portBusy[cycle][1] = mop.getUop1();
return cycle;
}
}
}
}
else {
for (; cycle <= RANDOMX_LPROG_LATENCY; ++cycle) {
if (!portBusy[cycle][mop.getUop1() - 1] && !portBusy[cycle][mop.getUop2() - 1]) {
std::cout << "; P" << mop.getUop1() - 1 << " P" << mop.getUop2() - 1 << " at cycle " << cycle << std::endl;
portBusy[cycle][mop.getUop1() - 1] = mop.getUop1();
portBusy[cycle][mop.getUop2() - 1] = mop.getUop2();
return cycle;
}
}
}
std::cout << "Unable to map operation '" << mop.getName() << "' to execution port";
return -1;
}
// If we don't have enough data available, generate more // If we don't have enough data available, generate more
static FORCE_INLINE void check_data(size_t& data_index, const size_t bytes_needed, uint8_t* data, const size_t data_size) static FORCE_INLINE void check_data(size_t& data_index, const size_t bytes_needed, uint8_t* data, const size_t data_size)
{ {
@ -673,11 +777,12 @@ namespace RandomX {
void generateLightProg2(LightProgram& prog, const void* seed, int indexRegister) { void generateLightProg2(LightProgram& prog, const void* seed, int indexRegister) {
bool portBusy[RANDOMX_LPROG_LATENCY][3]; ExecutionPort::type portBusy[RANDOMX_LPROG_LATENCY + 1][3];
memset(portBusy, 0, sizeof(portBusy));
RegisterInfo registers[8]; RegisterInfo registers[8];
bool decoderBusy[RANDOMX_LPROG_LATENCY][4];
Blake2Generator gen(seed); Blake2Generator gen(seed);
std::vector<LightInstruction> instructions; std::vector<LightInstruction> instructions;
std::vector<int> availableRegisters;
DecoderBuffer& fetchLine = DecoderBuffer::Default; DecoderBuffer& fetchLine = DecoderBuffer::Default;
LightInstruction currentInstruction = LightInstruction::Null; LightInstruction currentInstruction = LightInstruction::Null;
@ -685,35 +790,78 @@ namespace RandomX {
int codeSize = 0; int codeSize = 0;
int macroOpCount = 0; int macroOpCount = 0;
int rxOpCount = 0; int rxOpCount = 0;
int cycle = 0;
int depCycle = 0;
int mopIndex = 0;
bool portsSaturated = false;
for (int cycle = 0; cycle < 170; ++cycle) { while(!portsSaturated) {
fetchLine = fetchLine.fetchNext(currentInstruction.getType(), gen); fetchLine = fetchLine.fetchNext(currentInstruction.getType(), gen);
std::cout << "; cycle " << cycle << " buffer " << fetchLine.getName() << std::endl; std::cout << "; ------------- fetch cycle " << cycle << " (" << fetchLine.getName() << ")" << std::endl;
int mopIndex = 0; availableRegisters.clear();
for (unsigned i = 0; i < 8; ++i) {
if (registers[i].latency <= cycle)
availableRegisters.push_back(i);
}
while (mopIndex < fetchLine.getSize()) { mopIndex = 0;
if (instrIndex >= currentInstruction.getInfo()->getSize()) {
currentInstruction = LightInstruction::createForSlot(gen, fetchLine.getCounts()[mopIndex], fetchLine.getSize() == mopIndex + 1, fetchLine.getIndex() == 0 && mopIndex == 0); while (!portsSaturated && mopIndex < fetchLine.getSize()) {
if (instrIndex >= currentInstruction.getInfo().getSize()) {
currentInstruction = LightInstruction::createForSlot(gen, fetchLine.getCounts()[mopIndex], availableRegisters, fetchLine.getSize() == mopIndex + 1, fetchLine.getIndex() == 0 && mopIndex == 0);
instrIndex = 0; instrIndex = 0;
std::cout << "; " << currentInstruction.getInfo()->getName() << std::endl; std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
rxOpCount++; rxOpCount++;
} }
if (fetchLine.getCounts()[mopIndex] != currentInstruction.getInfo()->getOp(instrIndex)->getSize()) { MacroOp& mop = currentInstruction.getInfo().getOp(instrIndex);
std::cout << "ERROR instruction " << currentInstruction.getInfo()->getOp(instrIndex)->getName() << " doesn't fit into slot of size " << fetchLine.getCounts()[mopIndex] << std::endl; if (fetchLine.getCounts()[mopIndex] != mop.getSize()) {
std::cout << "ERROR instruction " << mop.getName() << " doesn't fit into slot of size " << fetchLine.getCounts()[mopIndex] << std::endl;
return; return;
} }
std::cout << currentInstruction.getInfo()->getOp(instrIndex)->getName() << std::endl;
codeSize += currentInstruction.getInfo()->getOp(instrIndex)->getSize(); std::cout << mop.getName() << " ";
codeSize += mop.getSize();
mopIndex++; mopIndex++;
instrIndex++; instrIndex++;
macroOpCount++; macroOpCount++;
int scheduleCycle = scheduleUop(mop, portBusy, cycle, depCycle);
if (scheduleCycle >= RANDOMX_LPROG_LATENCY) {
portsSaturated = true;
}
mop.setCycle(scheduleCycle);
depCycle = scheduleCycle + mop.getLatency();
} }
++cycle;
}
while (instrIndex < currentInstruction.getInfo().getSize()) {
if (mopIndex >= fetchLine.getSize()) {
fetchLine = fetchLine.fetchNext(currentInstruction.getType(), gen);
std::cout << "; cycle " << cycle++ << " buffer " << fetchLine.getName() << std::endl;
mopIndex = 0;
}
MacroOp& mop = currentInstruction.getInfo().getOp(instrIndex);
std::cout << mop.getName() << " ";
codeSize += mop.getSize();
mopIndex++;
instrIndex++;
macroOpCount++;
int scheduleCycle = scheduleUop(mop, portBusy, cycle, depCycle);
mop.setCycle(scheduleCycle);
depCycle = scheduleCycle + mop.getLatency();
} }
std::cout << "; code size " << codeSize << std::endl; std::cout << "; code size " << codeSize << std::endl;
std::cout << "; x86 macro-ops: " << macroOpCount << std::endl; std::cout << "; x86 macro-ops: " << macroOpCount << std::endl;
std::cout << "; RandomX instructions: " << rxOpCount << std::endl; std::cout << "; RandomX instructions: " << rxOpCount << std::endl;
for (int i = 0; i < RANDOMX_LPROG_LATENCY + 1; ++i) {
for (int j = 0; j < 3; ++j) {
std::cout << (portBusy[i][j] ? '*' : '_');
}
std::cout << std::endl;
}
} }
void generateLightProgram(LightProgram& prog, const void* seed, int indexRegister) { void generateLightProgram(LightProgram& prog, const void* seed, int indexRegister) {