2019-03-28 14:27:10 +00:00
/*
Copyright ( c ) 2019 tevador
This file is part of RandomX .
RandomX is free software : you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation , either version 3 of the License , or
( at your option ) any later version .
RandomX is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with RandomX . If not , see < http : //www.gnu.org/licenses/>.
*/
2019-04-10 22:01:22 +00:00
# include <stddef.h>
2019-03-28 14:27:10 +00:00
# include "blake2/blake2.h"
# include "configuration.h"
# include "Program.hpp"
2019-04-10 22:01:22 +00:00
# include "blake2/endian.h"
2019-03-28 14:27:10 +00:00
# include <iostream>
2019-03-31 11:32:16 +00:00
# include <vector>
2019-03-31 19:22:36 +00:00
# include <algorithm>
# include <stdexcept>
2019-03-31 22:38:17 +00:00
# include <iomanip>
2019-04-06 10:00:56 +00:00
# include "LightProgramGenerator.hpp"
2019-03-28 14:27:10 +00:00
namespace RandomX {
2019-04-12 11:32:22 +00:00
static bool isMultiplication ( int type ) {
return type = = SuperscalarInstructionType : : IMUL_R | | type = = SuperscalarInstructionType : : IMULH_R | | type = = SuperscalarInstructionType : : ISMULH_R | | type = = SuperscalarInstructionType : : IMUL_RCP ;
2019-04-03 07:53:25 +00:00
}
2019-03-31 11:32:16 +00:00
namespace ExecutionPort {
using type = int ;
constexpr type Null = 0 ;
constexpr type P0 = 1 ;
constexpr type P1 = 2 ;
2019-04-12 11:32:22 +00:00
constexpr type P5 = 4 ;
constexpr type P01 = P0 | P1 ;
constexpr type P05 = P0 | P5 ;
constexpr type P015 = P0 | P1 | P5 ;
2019-03-31 11:32:16 +00:00
}
2019-04-06 10:00:56 +00:00
Blake2Generator : : Blake2Generator ( const void * seed , int nonce ) : dataIndex ( sizeof ( data ) ) {
memset ( data , 0 , sizeof ( data ) ) ;
memcpy ( data , seed , SeedSize ) ;
store32 ( & data [ 60 ] , nonce ) ;
}
2019-03-31 11:32:16 +00:00
2019-04-06 10:00:56 +00:00
uint8_t Blake2Generator : : getByte ( ) {
checkData ( 1 ) ;
return data [ dataIndex + + ] ;
}
2019-03-31 11:32:16 +00:00
2019-04-06 10:00:56 +00:00
uint32_t Blake2Generator : : getInt32 ( ) {
checkData ( 4 ) ;
auto ret = load32 ( & data [ dataIndex ] ) ;
dataIndex + = 4 ;
return ret ;
}
2019-03-31 11:32:16 +00:00
2019-04-06 10:00:56 +00:00
void Blake2Generator : : checkData ( const size_t bytesNeeded ) {
if ( dataIndex + bytesNeeded > sizeof ( data ) ) {
blake2b ( data , sizeof ( data ) , data , sizeof ( data ) , nullptr , 0 ) ;
dataIndex = 0 ;
2019-03-31 11:32:16 +00:00
}
2019-04-06 10:00:56 +00:00
}
2019-03-31 11:32:16 +00:00
2019-03-31 19:22:36 +00:00
class RegisterInfo {
public :
2019-04-01 16:31:02 +00:00
RegisterInfo ( ) : latency ( 0 ) , lastOpGroup ( - 1 ) , lastOpPar ( - 1 ) , value ( 0 ) { }
2019-03-31 22:38:17 +00:00
int latency ;
2019-03-31 19:22:36 +00:00
int lastOpGroup ;
2019-04-01 16:31:02 +00:00
int lastOpPar ;
2019-03-31 19:22:36 +00:00
int value ;
} ;
2019-03-31 11:32:16 +00:00
class MacroOp {
public :
MacroOp ( const char * name , int size )
: name_ ( name ) , size_ ( size ) , latency_ ( 0 ) , uop1_ ( ExecutionPort : : Null ) , uop2_ ( ExecutionPort : : Null ) { }
MacroOp ( const char * name , int size , int latency , ExecutionPort : : type uop )
: name_ ( name ) , size_ ( size ) , latency_ ( latency ) , uop1_ ( uop ) , uop2_ ( ExecutionPort : : Null ) { }
MacroOp ( const char * name , int size , int latency , ExecutionPort : : type uop1 , ExecutionPort : : type uop2 )
: name_ ( name ) , size_ ( size ) , latency_ ( latency ) , uop1_ ( uop1 ) , uop2_ ( uop2 ) { }
2019-03-31 19:22:36 +00:00
MacroOp ( const MacroOp & parent , bool dependent )
: name_ ( parent . name_ ) , size_ ( parent . size_ ) , latency_ ( parent . latency_ ) , uop1_ ( parent . uop1_ ) , uop2_ ( parent . uop2_ ) , dependent_ ( dependent ) { }
2019-03-31 11:32:16 +00:00
const char * getName ( ) const {
return name_ ;
}
int getSize ( ) const {
return size_ ;
}
int getLatency ( ) const {
return latency_ ;
}
ExecutionPort : : type getUop1 ( ) const {
return uop1_ ;
}
ExecutionPort : : type getUop2 ( ) const {
return uop2_ ;
}
bool isSimple ( ) const {
return uop2_ = = ExecutionPort : : Null ;
}
bool isEliminated ( ) const {
return uop1_ = = ExecutionPort : : Null ;
}
2019-03-31 19:22:36 +00:00
bool isDependent ( ) const {
return dependent_ ;
}
2019-03-31 11:32:16 +00:00
static const MacroOp Add_rr ;
static const MacroOp Add_ri ;
static const MacroOp Lea_sib ;
static const MacroOp Sub_rr ;
static const MacroOp Imul_rr ;
static const MacroOp Imul_r ;
static const MacroOp Mul_r ;
static const MacroOp Mov_rr ;
static const MacroOp Mov_ri64 ;
static const MacroOp Xor_rr ;
static const MacroOp Xor_ri ;
static const MacroOp Ror_rcl ;
static const MacroOp Ror_ri ;
2019-04-06 10:00:56 +00:00
static const MacroOp TestJz_fused ;
2019-03-31 11:32:16 +00:00
static const MacroOp Xor_self ;
static const MacroOp Cmp_ri ;
static const MacroOp Setcc_r ;
private :
const char * name_ ;
int size_ ;
int latency_ ;
ExecutionPort : : type uop1_ ;
ExecutionPort : : type uop2_ ;
2019-03-31 19:22:36 +00:00
int cycle_ ;
bool dependent_ = false ;
MacroOp * depDst_ = nullptr ;
MacroOp * depSrc_ = nullptr ;
2019-03-31 11:32:16 +00:00
} ;
2019-04-11 22:02:22 +00:00
//Size: 3 bytes
2019-03-31 11:32:16 +00:00
const MacroOp MacroOp : : Add_rr = MacroOp ( " add r,r " , 3 , 1 , ExecutionPort : : P015 ) ;
const MacroOp MacroOp : : Sub_rr = MacroOp ( " sub r,r " , 3 , 1 , ExecutionPort : : P015 ) ;
2019-04-11 22:02:22 +00:00
const MacroOp MacroOp : : Xor_rr = MacroOp ( " xor r,r " , 3 , 1 , ExecutionPort : : P015 ) ;
2019-04-06 15:07:40 +00:00
const MacroOp MacroOp : : Imul_r = MacroOp ( " imul r " , 3 , 4 , ExecutionPort : : P1 , ExecutionPort : : P5 ) ;
2019-03-31 11:32:16 +00:00
const MacroOp MacroOp : : Mul_r = MacroOp ( " mul r " , 3 , 3 , ExecutionPort : : P1 , ExecutionPort : : P5 ) ;
const MacroOp MacroOp : : Mov_rr = MacroOp ( " mov r,r " , 3 ) ;
2019-04-11 22:02:22 +00:00
//Size: 4 bytes
const MacroOp MacroOp : : Lea_sib = MacroOp ( " lea r,r+r*s " , 4 , 1 , ExecutionPort : : P01 ) ;
const MacroOp MacroOp : : Imul_rr = MacroOp ( " imul r,r " , 4 , 3 , ExecutionPort : : P1 ) ;
const MacroOp MacroOp : : Ror_ri = MacroOp ( " ror r,i " , 4 , 1 , ExecutionPort : : P05 ) ;
//Size: 7 bytes (can be optionally padded with nop to 8 or 9 bytes)
const MacroOp MacroOp : : Add_ri = MacroOp ( " add r,i " , 7 , 1 , ExecutionPort : : P015 ) ;
2019-03-31 11:32:16 +00:00
const MacroOp MacroOp : : Xor_ri = MacroOp ( " xor r,i " , 7 , 1 , ExecutionPort : : P015 ) ;
2019-04-11 22:02:22 +00:00
//Size: 10 bytes
const MacroOp MacroOp : : Mov_ri64 = MacroOp ( " mov rax,i64 " , 10 , 1 , ExecutionPort : : P015 ) ;
//Unused:
2019-03-31 11:32:16 +00:00
const MacroOp MacroOp : : Ror_rcl = MacroOp ( " ror r,cl " , 3 , 1 , ExecutionPort : : P0 , ExecutionPort : : P5 ) ;
const MacroOp MacroOp : : Xor_self = MacroOp ( " xor rcx,rcx " , 3 ) ;
const MacroOp MacroOp : : Cmp_ri = MacroOp ( " cmp r,i " , 7 , 1 , ExecutionPort : : P015 ) ;
const MacroOp MacroOp : : Setcc_r = MacroOp ( " setcc cl " , 3 , 1 , ExecutionPort : : P05 ) ;
2019-04-06 10:00:56 +00:00
const MacroOp MacroOp : : TestJz_fused = MacroOp ( " testjz r,i " , 13 , 0 , ExecutionPort : : P5 ) ;
2019-03-31 11:32:16 +00:00
2019-03-31 19:22:36 +00:00
const MacroOp IMULH_R_ops_array [ ] = { MacroOp : : Mov_rr , MacroOp : : Mul_r , MacroOp : : Mov_rr } ;
const MacroOp ISMULH_R_ops_array [ ] = { MacroOp : : Mov_rr , MacroOp : : Imul_r , MacroOp : : Mov_rr } ;
const MacroOp IMUL_RCP_ops_array [ ] = { MacroOp : : Mov_ri64 , MacroOp ( MacroOp : : Imul_rr , true ) } ;
2019-03-31 11:32:16 +00:00
class LightInstructionInfo {
public :
const char * getName ( ) const {
return name_ ;
}
int getSize ( ) const {
2019-03-31 19:22:36 +00:00
return ops_ . size ( ) ;
2019-03-31 11:32:16 +00:00
}
bool isSimple ( ) const {
2019-03-31 19:22:36 +00:00
return getSize ( ) = = 1 ;
2019-03-31 11:32:16 +00:00
}
int getLatency ( ) const {
return latency_ ;
}
2019-04-11 22:02:22 +00:00
const MacroOp & getOp ( int index ) const {
2019-03-31 19:22:36 +00:00
return ops_ [ index ] ;
}
int getType ( ) const {
return type_ ;
2019-03-31 11:32:16 +00:00
}
2019-03-31 22:38:17 +00:00
int getResultOp ( ) const {
return resultOp_ ;
}
int getDstOp ( ) const {
return dstOp_ ;
}
int getSrcOp ( ) const {
return srcOp_ ;
}
2019-03-31 11:32:16 +00:00
static const LightInstructionInfo ISUB_R ;
2019-04-07 13:38:51 +00:00
static const LightInstructionInfo IXOR_R ;
static const LightInstructionInfo IADD_RS ;
2019-03-31 11:32:16 +00:00
static const LightInstructionInfo IMUL_R ;
2019-04-07 13:38:51 +00:00
static const LightInstructionInfo IROR_C ;
static const LightInstructionInfo IADD_C7 ;
static const LightInstructionInfo IXOR_C7 ;
static const LightInstructionInfo IADD_C8 ;
static const LightInstructionInfo IXOR_C8 ;
static const LightInstructionInfo IADD_C9 ;
static const LightInstructionInfo IXOR_C9 ;
2019-03-31 11:32:16 +00:00
static const LightInstructionInfo IMULH_R ;
static const LightInstructionInfo ISMULH_R ;
static const LightInstructionInfo IMUL_RCP ;
static const LightInstructionInfo NOP ;
private :
const char * name_ ;
2019-03-31 19:22:36 +00:00
int type_ ;
std : : vector < MacroOp > ops_ ;
2019-03-31 11:32:16 +00:00
int latency_ ;
2019-03-31 22:38:17 +00:00
int resultOp_ = 0 ;
int dstOp_ = 0 ;
2019-04-01 16:31:02 +00:00
int srcOp_ ;
2019-03-31 11:32:16 +00:00
LightInstructionInfo ( const char * name )
2019-03-31 19:22:36 +00:00
: name_ ( name ) , type_ ( - 1 ) , latency_ ( 0 ) { }
2019-04-11 22:02:22 +00:00
LightInstructionInfo ( const char * name , int type , const MacroOp & op , int srcOp )
: name_ ( name ) , type_ ( type ) , latency_ ( op . getLatency ( ) ) , srcOp_ ( srcOp ) {
ops_ . push_back ( MacroOp ( op ) ) ;
}
template < size_t N >
LightInstructionInfo ( const char * name , int type , const MacroOp ( & arr ) [ N ] , int resultOp , int dstOp , int srcOp )
: name_ ( name ) , type_ ( type ) , latency_ ( 0 ) , resultOp_ ( resultOp ) , dstOp_ ( dstOp ) , srcOp_ ( srcOp ) {
for ( unsigned i = 0 ; i < N ; + + i ) {
ops_ . push_back ( MacroOp ( arr [ i ] ) ) ;
latency_ + = ops_ . back ( ) . getLatency ( ) ;
}
static_assert ( N > 1 , " Invalid array size " ) ;
}
2019-03-31 11:32:16 +00:00
} ;
2019-04-12 11:32:22 +00:00
const LightInstructionInfo LightInstructionInfo : : ISUB_R = LightInstructionInfo ( " ISUB_R " , SuperscalarInstructionType : : ISUB_R , MacroOp : : Sub_rr , 0 ) ;
const LightInstructionInfo LightInstructionInfo : : IXOR_R = LightInstructionInfo ( " IXOR_R " , SuperscalarInstructionType : : IXOR_R , MacroOp : : Xor_rr , 0 ) ;
const LightInstructionInfo LightInstructionInfo : : IADD_RS = LightInstructionInfo ( " IADD_RS " , SuperscalarInstructionType : : IADD_RS , MacroOp : : Lea_sib , 0 ) ;
const LightInstructionInfo LightInstructionInfo : : IMUL_R = LightInstructionInfo ( " IMUL_R " , SuperscalarInstructionType : : IMUL_R , MacroOp : : Imul_rr , 0 ) ;
const LightInstructionInfo LightInstructionInfo : : IROR_C = LightInstructionInfo ( " IROR_C " , SuperscalarInstructionType : : IROR_C , MacroOp : : Ror_ri , - 1 ) ;
const LightInstructionInfo LightInstructionInfo : : IADD_C7 = LightInstructionInfo ( " IADD_C7 " , SuperscalarInstructionType : : IADD_C7 , MacroOp : : Add_ri , - 1 ) ;
const LightInstructionInfo LightInstructionInfo : : IXOR_C7 = LightInstructionInfo ( " IXOR_C7 " , SuperscalarInstructionType : : IXOR_C7 , MacroOp : : Xor_ri , - 1 ) ;
const LightInstructionInfo LightInstructionInfo : : IADD_C8 = LightInstructionInfo ( " IADD_C8 " , SuperscalarInstructionType : : IADD_C8 , MacroOp : : Add_ri , - 1 ) ;
const LightInstructionInfo LightInstructionInfo : : IXOR_C8 = LightInstructionInfo ( " IXOR_C8 " , SuperscalarInstructionType : : IXOR_C8 , MacroOp : : Xor_ri , - 1 ) ;
const LightInstructionInfo LightInstructionInfo : : IADD_C9 = LightInstructionInfo ( " IADD_C9 " , SuperscalarInstructionType : : IADD_C9 , MacroOp : : Add_ri , - 1 ) ;
const LightInstructionInfo LightInstructionInfo : : IXOR_C9 = LightInstructionInfo ( " IXOR_C9 " , SuperscalarInstructionType : : IXOR_C9 , MacroOp : : Xor_ri , - 1 ) ;
const LightInstructionInfo LightInstructionInfo : : IMULH_R = LightInstructionInfo ( " IMULH_R " , SuperscalarInstructionType : : IMULH_R , IMULH_R_ops_array , 1 , 0 , 1 ) ;
const LightInstructionInfo LightInstructionInfo : : ISMULH_R = LightInstructionInfo ( " ISMULH_R " , SuperscalarInstructionType : : ISMULH_R , ISMULH_R_ops_array , 1 , 0 , 1 ) ;
const LightInstructionInfo LightInstructionInfo : : IMUL_RCP = LightInstructionInfo ( " IMUL_RCP " , SuperscalarInstructionType : : IMUL_RCP , IMUL_RCP_ops_array , 1 , 1 , - 1 ) ;
2019-04-07 13:38:51 +00:00
2019-03-31 11:32:16 +00:00
const LightInstructionInfo LightInstructionInfo : : NOP = LightInstructionInfo ( " NOP " ) ;
class DecoderBuffer {
public :
2019-04-06 10:00:56 +00:00
static const DecoderBuffer Default ;
2019-03-31 11:32:16 +00:00
template < size_t N >
DecoderBuffer ( const char * name , int index , const int ( & arr ) [ N ] )
: name_ ( name ) , index_ ( index ) , counts_ ( arr ) , opsCount_ ( N ) { }
const int * getCounts ( ) const {
return counts_ ;
}
int getSize ( ) const {
return opsCount_ ;
}
int getIndex ( ) const {
return index_ ;
}
const char * getName ( ) const {
return name_ ;
}
2019-04-06 15:07:40 +00:00
const DecoderBuffer * fetchNext ( int instrType , int cycle , int mulCount , Blake2Generator & gen ) const {
2019-04-11 22:02:22 +00:00
//If the current RandomX instruction is "IMULH", the next fetch configuration must be 3-3-10
//because the full 128-bit multiplication instruction is 3 bytes long and decodes to 2 uOPs on Intel CPUs.
//Intel CPUs can decode at most 4 uOPs per cycle, so this requires a 2-1-1 configuration for a total of 3 macro ops.
2019-04-12 11:32:22 +00:00
if ( instrType = = SuperscalarInstructionType : : IMULH_R | | instrType = = SuperscalarInstructionType : : ISMULH_R )
2019-04-11 22:02:22 +00:00
return & decodeBuffer3310 ;
//To make sure that the multiplication port is saturated, a 4-4-4-4 configuration is generated if the number of multiplications
//is lower than the number of cycles.
2019-04-07 13:38:51 +00:00
if ( mulCount < cycle + 1 )
return & decodeBuffer4444 ;
2019-04-11 22:02:22 +00:00
//If the current RandomX instruction is "IMUL_RCP", the next buffer must begin with a 4-byte slot for multiplication.
2019-04-12 11:32:22 +00:00
if ( instrType = = SuperscalarInstructionType : : IMUL_RCP )
2019-04-07 13:38:51 +00:00
return ( gen . getByte ( ) & 1 ) ? & decodeBuffer484 : & decodeBuffer493 ;
2019-04-11 22:02:22 +00:00
//Default: select a random fetch configuration.
2019-03-31 11:32:16 +00:00
return fetchNextDefault ( gen ) ;
}
private :
const char * name_ ;
int index_ ;
const int * counts_ ;
int opsCount_ ;
DecoderBuffer ( ) : index_ ( - 1 ) { }
2019-04-07 13:38:51 +00:00
static const DecoderBuffer decodeBuffer484 ;
2019-04-03 12:06:59 +00:00
static const DecoderBuffer decodeBuffer7333 ;
static const DecoderBuffer decodeBuffer3733 ;
2019-04-07 13:38:51 +00:00
static const DecoderBuffer decodeBuffer493 ;
static const DecoderBuffer decodeBuffer4444 ;
static const DecoderBuffer decodeBuffer3310 ;
static const DecoderBuffer * decodeBuffers [ 4 ] ;
2019-04-06 10:00:56 +00:00
const DecoderBuffer * fetchNextDefault ( Blake2Generator & gen ) const {
2019-04-07 13:38:51 +00:00
return decodeBuffers [ gen . getByte ( ) & 3 ] ;
2019-03-31 11:32:16 +00:00
}
} ;
2019-04-11 22:02:22 +00:00
//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
//RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
//Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
const int buffer0 [ ] = { 4 , 8 , 4 } ;
const int buffer1 [ ] = { 7 , 3 , 3 , 3 } ;
const int buffer2 [ ] = { 3 , 7 , 3 , 3 } ;
const int buffer3 [ ] = { 4 , 9 , 3 } ;
const int buffer4 [ ] = { 4 , 4 , 4 , 4 } ;
const int buffer5 [ ] = { 3 , 3 , 10 } ;
2019-04-07 13:38:51 +00:00
const DecoderBuffer DecoderBuffer : : decodeBuffer484 = DecoderBuffer ( " 4,8,4 " , 0 , buffer0 ) ;
2019-04-03 12:06:59 +00:00
const DecoderBuffer DecoderBuffer : : decodeBuffer7333 = DecoderBuffer ( " 7,3,3,3 " , 1 , buffer1 ) ;
2019-04-07 13:38:51 +00:00
const DecoderBuffer DecoderBuffer : : decodeBuffer3733 = DecoderBuffer ( " 3,7,3,3 " , 2 , buffer2 ) ;
const DecoderBuffer DecoderBuffer : : decodeBuffer493 = DecoderBuffer ( " 4,9,3 " , 3 , buffer3 ) ;
2019-04-03 12:06:59 +00:00
const DecoderBuffer DecoderBuffer : : decodeBuffer4444 = DecoderBuffer ( " 4,4,4,4 " , 4 , buffer4 ) ;
2019-04-07 13:38:51 +00:00
const DecoderBuffer DecoderBuffer : : decodeBuffer3310 = DecoderBuffer ( " 3,3,10 " , 5 , buffer5 ) ;
2019-04-03 12:06:59 +00:00
2019-04-07 13:38:51 +00:00
const DecoderBuffer * DecoderBuffer : : decodeBuffers [ 4 ] = {
& DecoderBuffer : : decodeBuffer484 ,
& DecoderBuffer : : decodeBuffer7333 ,
2019-04-03 12:06:59 +00:00
& DecoderBuffer : : decodeBuffer3733 ,
2019-04-07 13:38:51 +00:00
& DecoderBuffer : : decodeBuffer493 ,
2019-03-31 11:32:16 +00:00
} ;
2019-04-06 10:00:56 +00:00
const DecoderBuffer DecoderBuffer : : Default = DecoderBuffer ( ) ;
2019-03-31 11:32:16 +00:00
2019-04-03 12:06:59 +00:00
const LightInstructionInfo * slot_3 [ ] = { & LightInstructionInfo : : ISUB_R , & LightInstructionInfo : : IXOR_R } ;
const LightInstructionInfo * slot_3L [ ] = { & LightInstructionInfo : : ISUB_R , & LightInstructionInfo : : IXOR_R , & LightInstructionInfo : : IMULH_R , & LightInstructionInfo : : ISMULH_R } ;
2019-04-06 15:07:40 +00:00
const LightInstructionInfo * slot_4 [ ] = { & LightInstructionInfo : : IROR_C , & LightInstructionInfo : : IADD_RS } ;
2019-04-07 13:38:51 +00:00
const LightInstructionInfo * slot_7 [ ] = { & LightInstructionInfo : : IXOR_C7 , & LightInstructionInfo : : IADD_C7 } ;
const LightInstructionInfo * slot_8 [ ] = { & LightInstructionInfo : : IXOR_C8 , & LightInstructionInfo : : IADD_C8 } ;
const LightInstructionInfo * slot_9 [ ] = { & LightInstructionInfo : : IXOR_C9 , & LightInstructionInfo : : IADD_C9 } ;
2019-03-31 19:22:36 +00:00
const LightInstructionInfo * slot_10 = & LightInstructionInfo : : IMUL_RCP ;
2019-03-31 22:38:17 +00:00
static bool selectRegister ( std : : vector < int > & availableRegisters , Blake2Generator & gen , int & reg ) {
2019-03-31 19:22:36 +00:00
int index ;
2019-03-31 22:38:17 +00:00
if ( availableRegisters . size ( ) = = 0 )
return false ;
2019-03-31 19:22:36 +00:00
if ( availableRegisters . size ( ) > 1 ) {
index = gen . getInt32 ( ) % availableRegisters . size ( ) ;
}
else {
index = 0 ;
}
2019-03-31 22:38:17 +00:00
reg = availableRegisters [ index ] ;
return true ;
2019-03-31 19:22:36 +00:00
}
2019-03-31 11:32:16 +00:00
class LightInstruction {
public :
2019-03-31 22:38:17 +00:00
void toInstr ( Instruction & instr ) {
2019-04-07 13:38:51 +00:00
instr . opcode = getType ( ) ;
2019-03-31 11:32:16 +00:00
instr . dst = dst_ ;
instr . src = src_ > = 0 ? src_ : dst_ ;
instr . mod = mod_ ;
instr . setImm32 ( imm32_ ) ;
}
2019-04-11 22:02:22 +00:00
void createForSlot ( Blake2Generator & gen , int slotSize , int fetchType , bool isLast , bool isFirst ) {
2019-03-31 11:32:16 +00:00
switch ( slotSize )
{
case 3 :
2019-04-12 11:32:22 +00:00
//if this is the last slot, we can also select "IMULH" instructions
2019-03-31 11:32:16 +00:00
if ( isLast ) {
2019-04-11 22:02:22 +00:00
create ( slot_3L [ gen . getByte ( ) & 3 ] , gen ) ;
2019-03-31 11:32:16 +00:00
}
else {
2019-04-11 22:02:22 +00:00
create ( slot_3 [ gen . getByte ( ) & 1 ] , gen ) ;
2019-03-31 11:32:16 +00:00
}
2019-04-11 22:02:22 +00:00
break ;
2019-03-31 11:32:16 +00:00
case 4 :
2019-04-12 11:32:22 +00:00
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
2019-04-07 13:38:51 +00:00
if ( fetchType = = 4 & & ! isLast ) {
2019-04-11 22:02:22 +00:00
create ( & LightInstructionInfo : : IMUL_R , gen ) ;
2019-04-06 15:07:40 +00:00
}
else {
2019-04-11 22:02:22 +00:00
create ( slot_4 [ gen . getByte ( ) & 1 ] , gen ) ;
2019-04-06 15:07:40 +00:00
}
2019-04-11 22:02:22 +00:00
break ;
2019-03-31 11:32:16 +00:00
case 7 :
2019-04-11 22:02:22 +00:00
create ( slot_7 [ gen . getByte ( ) & 1 ] , gen ) ;
break ;
2019-04-07 13:38:51 +00:00
case 8 :
2019-04-11 22:02:22 +00:00
create ( slot_8 [ gen . getByte ( ) & 1 ] , gen ) ;
break ;
2019-04-07 13:38:51 +00:00
case 9 :
2019-04-11 22:02:22 +00:00
create ( slot_9 [ gen . getByte ( ) & 1 ] , gen ) ;
break ;
2019-03-31 11:32:16 +00:00
case 10 :
2019-04-11 22:02:22 +00:00
create ( slot_10 , gen ) ;
break ;
2019-03-31 11:32:16 +00:00
default :
2019-04-11 22:02:22 +00:00
UNREACHABLE ;
2019-03-31 11:32:16 +00:00
}
}
2019-04-11 22:02:22 +00:00
void create ( const LightInstructionInfo * info , Blake2Generator & gen ) {
info_ = info ;
reset ( ) ;
2019-03-31 19:22:36 +00:00
switch ( info - > getType ( ) )
2019-03-31 11:32:16 +00:00
{
2019-04-12 11:32:22 +00:00
case SuperscalarInstructionType : : ISUB_R : {
2019-04-11 22:02:22 +00:00
mod_ = 0 ;
imm32_ = 0 ;
2019-04-12 11:32:22 +00:00
opGroup_ = SuperscalarInstructionType : : IADD_RS ;
2019-04-11 22:02:22 +00:00
groupParIsSource_ = true ;
2019-03-31 11:32:16 +00:00
} break ;
2019-04-12 11:32:22 +00:00
case SuperscalarInstructionType : : IXOR_R : {
2019-04-11 22:02:22 +00:00
mod_ = 0 ;
imm32_ = 0 ;
2019-04-12 11:32:22 +00:00
opGroup_ = SuperscalarInstructionType : : IXOR_R ;
2019-04-11 22:02:22 +00:00
groupParIsSource_ = true ;
2019-03-31 11:32:16 +00:00
} break ;
2019-04-12 11:32:22 +00:00
case SuperscalarInstructionType : : IADD_RS : {
2019-04-11 22:02:22 +00:00
mod_ = gen . getByte ( ) ;
imm32_ = 0 ;
2019-04-12 11:32:22 +00:00
opGroup_ = SuperscalarInstructionType : : IADD_RS ;
2019-04-11 22:02:22 +00:00
groupParIsSource_ = true ;
2019-03-31 11:32:16 +00:00
} break ;
2019-04-12 11:32:22 +00:00
case SuperscalarInstructionType : : IMUL_R : {
2019-04-11 22:02:22 +00:00
mod_ = 0 ;
imm32_ = 0 ;
2019-04-12 11:32:22 +00:00
opGroup_ = SuperscalarInstructionType : : IMUL_R ;
2019-04-11 22:02:22 +00:00
opGroupPar_ = - 1 ;
2019-04-07 13:38:51 +00:00
} break ;
2019-04-12 11:32:22 +00:00
case SuperscalarInstructionType : : IROR_C : {
2019-04-11 22:02:22 +00:00
mod_ = 0 ;
2019-04-07 13:38:51 +00:00
do {
2019-04-11 22:02:22 +00:00
imm32_ = gen . getByte ( ) & 63 ;
} while ( imm32_ = = 0 ) ;
2019-04-12 11:32:22 +00:00
opGroup_ = SuperscalarInstructionType : : IROR_C ;
2019-04-11 22:02:22 +00:00
opGroupPar_ = - 1 ;
2019-03-31 11:32:16 +00:00
} break ;
2019-04-12 11:32:22 +00:00
case SuperscalarInstructionType : : IADD_C7 :
case SuperscalarInstructionType : : IADD_C8 :
case SuperscalarInstructionType : : IADD_C9 : {
2019-04-11 22:02:22 +00:00
mod_ = 0 ;
imm32_ = gen . getInt32 ( ) ;
2019-04-12 11:32:22 +00:00
opGroup_ = SuperscalarInstructionType : : IADD_C7 ;
2019-04-11 22:02:22 +00:00
opGroupPar_ = - 1 ;
2019-04-07 13:38:51 +00:00
} break ;
2019-04-12 11:32:22 +00:00
case SuperscalarInstructionType : : IXOR_C7 :
case SuperscalarInstructionType : : IXOR_C8 :
case SuperscalarInstructionType : : IXOR_C9 : {
2019-04-11 22:02:22 +00:00
mod_ = 0 ;
imm32_ = gen . getInt32 ( ) ;
2019-04-12 11:32:22 +00:00
opGroup_ = SuperscalarInstructionType : : IXOR_C7 ;
2019-04-11 22:02:22 +00:00
opGroupPar_ = - 1 ;
2019-03-31 11:32:16 +00:00
} break ;
2019-04-12 11:32:22 +00:00
case SuperscalarInstructionType : : IMULH_R : {
2019-04-11 22:02:22 +00:00
canReuse_ = true ;
mod_ = 0 ;
imm32_ = 0 ;
2019-04-12 11:32:22 +00:00
opGroup_ = SuperscalarInstructionType : : IMULH_R ;
2019-04-11 22:02:22 +00:00
opGroupPar_ = gen . getInt32 ( ) ;
2019-03-31 11:32:16 +00:00
} break ;
2019-04-12 11:32:22 +00:00
case SuperscalarInstructionType : : ISMULH_R : {
2019-04-11 22:02:22 +00:00
canReuse_ = true ;
mod_ = 0 ;
imm32_ = 0 ;
2019-04-12 11:32:22 +00:00
opGroup_ = SuperscalarInstructionType : : ISMULH_R ;
2019-04-11 22:02:22 +00:00
opGroupPar_ = gen . getInt32 ( ) ;
2019-03-31 11:32:16 +00:00
} break ;
2019-04-12 11:32:22 +00:00
case SuperscalarInstructionType : : IMUL_RCP : {
2019-04-11 22:02:22 +00:00
mod_ = 0 ;
2019-04-01 16:31:02 +00:00
do {
2019-04-11 22:02:22 +00:00
imm32_ = gen . getInt32 ( ) ;
} while ( ( imm32_ & ( imm32_ - 1 ) ) = = 0 ) ;
2019-04-12 11:32:22 +00:00
opGroup_ = SuperscalarInstructionType : : IMUL_RCP ;
2019-04-11 22:02:22 +00:00
opGroupPar_ = - 1 ;
2019-03-31 11:32:16 +00:00
} break ;
default :
break ;
}
}
2019-03-31 22:38:17 +00:00
bool selectDestination ( int cycle , RegisterInfo ( & registers ) [ 8 ] , Blake2Generator & gen ) {
std : : vector < int > availableRegisters ;
2019-04-12 12:56:20 +00:00
//Conditions for the destination register:
// * value must be ready at the required cycle
// * cannot be the same as the source register unless the instruction allows it
// - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
// * either the last instruction applied to the register or its source must be different than this instruction
// - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
// - it also avoids accumulation of trailing zeroes in registers due to excessive multiplication
// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
2019-03-31 22:38:17 +00:00
for ( unsigned i = 0 ; i < 8 ; + + i ) {
2019-04-12 12:56:20 +00:00
if ( registers [ i ] . latency < = cycle & & ( canReuse_ | | i ! = src_ ) & & ( registers [ i ] . lastOpGroup ! = opGroup_ | | registers [ i ] . lastOpPar ! = opGroupPar_ ) & & ( info_ - > getType ( ) ! = SuperscalarInstructionType : : IADD_RS | | i ! = LimitedAddressRegister ) )
2019-03-31 22:38:17 +00:00
availableRegisters . push_back ( i ) ;
}
return selectRegister ( availableRegisters , gen , dst_ ) ;
}
bool selectSource ( int cycle , RegisterInfo ( & registers ) [ 8 ] , Blake2Generator & gen ) {
std : : vector < int > availableRegisters ;
2019-04-12 12:56:20 +00:00
//all registers that are ready at the cycle
2019-03-31 22:38:17 +00:00
for ( unsigned i = 0 ; i < 8 ; + + i ) {
2019-04-01 16:31:02 +00:00
if ( registers [ i ] . latency < = cycle )
2019-03-31 22:38:17 +00:00
availableRegisters . push_back ( i ) ;
}
2019-04-12 12:56:20 +00:00
//if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination
2019-04-12 11:32:22 +00:00
if ( availableRegisters . size ( ) = = 2 & & info_ - > getType ( ) = = SuperscalarInstructionType : : IADD_RS ) {
2019-04-12 12:56:20 +00:00
if ( availableRegisters [ 0 ] = = LimitedAddressRegister | | availableRegisters [ 1 ] = = LimitedAddressRegister ) {
opGroupPar_ = src_ = LimitedAddressRegister ;
2019-04-06 10:00:56 +00:00
return true ;
}
}
2019-04-01 16:31:02 +00:00
if ( selectRegister ( availableRegisters , gen , src_ ) ) {
if ( groupParIsSource_ )
opGroupPar_ = src_ ;
return true ;
}
return false ;
2019-03-31 22:38:17 +00:00
}
2019-03-31 11:32:16 +00:00
int getType ( ) {
2019-04-11 22:02:22 +00:00
return info_ - > getType ( ) ;
2019-03-31 11:32:16 +00:00
}
int getSource ( ) {
return src_ ;
}
int getDestination ( ) {
return dst_ ;
}
int getGroup ( ) {
return opGroup_ ;
}
int getGroupPar ( ) {
return opGroupPar_ ;
}
2019-04-11 22:02:22 +00:00
const LightInstructionInfo & getInfo ( ) const {
return * info_ ;
2019-03-31 11:32:16 +00:00
}
static const LightInstruction Null ;
private :
2019-04-11 22:02:22 +00:00
const LightInstructionInfo * info_ ;
2019-03-31 22:38:17 +00:00
int src_ = - 1 ;
int dst_ = - 1 ;
2019-03-31 11:32:16 +00:00
int mod_ ;
uint32_t imm32_ ;
int opGroup_ ;
int opGroupPar_ ;
2019-03-31 22:38:17 +00:00
bool canReuse_ = false ;
2019-04-01 16:31:02 +00:00
bool groupParIsSource_ = false ;
2019-03-31 11:32:16 +00:00
2019-04-11 22:02:22 +00:00
void reset ( ) {
src_ = dst_ = - 1 ;
canReuse_ = groupParIsSource_ = false ;
}
LightInstruction ( const LightInstructionInfo * info ) : info_ ( info ) {
2019-03-31 19:22:36 +00:00
}
2019-03-31 11:32:16 +00:00
} ;
2019-03-31 19:22:36 +00:00
const LightInstruction LightInstruction : : Null = LightInstruction ( & LightInstructionInfo : : NOP ) ;
2019-03-31 11:32:16 +00:00
2019-04-11 22:02:22 +00:00
constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 3 ;
constexpr int LOOK_FORWARD_CYCLES = 4 ;
constexpr int MAX_THROWAWAY_COUNT = 256 ;
2019-04-06 15:07:40 +00:00
# ifndef _DEBUG
2019-04-06 10:00:56 +00:00
constexpr bool TRACE = false ;
2019-04-07 13:38:51 +00:00
constexpr bool INFO = false ;
2019-04-06 15:07:40 +00:00
# else
constexpr bool TRACE = true ;
2019-04-07 13:38:51 +00:00
constexpr bool INFO = true ;
2019-04-06 15:07:40 +00:00
# endif
2019-03-28 14:27:10 +00:00
2019-04-12 11:32:22 +00:00
template < bool commit >
static int scheduleUop ( ExecutionPort : : type uop , ExecutionPort : : type ( & portBusy ) [ CYCLE_MAP_SIZE ] [ 3 ] , int cycle ) {
//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
//P1 (multiplication port) by instructions that can go to any port.
for ( ; cycle < CYCLE_MAP_SIZE ; + + cycle ) {
if ( ( uop & ExecutionPort : : P5 ) ! = 0 & & ! portBusy [ cycle ] [ 2 ] ) {
if ( commit ) {
if ( TRACE ) std : : cout < < " ; P5 at cycle " < < cycle < < std : : endl ;
portBusy [ cycle ] [ 2 ] = uop ;
}
return cycle ;
}
if ( ( uop & ExecutionPort : : P0 ) ! = 0 & & ! portBusy [ cycle ] [ 0 ] ) {
if ( commit ) {
if ( TRACE ) std : : cout < < " ; P0 at cycle " < < cycle < < std : : endl ;
portBusy [ cycle ] [ 0 ] = uop ;
}
return cycle ;
}
if ( ( uop & ExecutionPort : : P1 ) ! = 0 & & ! portBusy [ cycle ] [ 1 ] ) {
if ( commit ) {
if ( TRACE ) std : : cout < < " ; P1 at cycle " < < cycle < < std : : endl ;
portBusy [ cycle ] [ 1 ] = uop ;
}
return cycle ;
}
}
return - 1 ;
}
2019-03-28 14:27:10 +00:00
2019-04-03 07:53:25 +00:00
template < bool commit >
2019-04-12 11:32:22 +00:00
static int scheduleMop ( const MacroOp & mop , ExecutionPort : : type ( & portBusy ) [ CYCLE_MAP_SIZE ] [ 3 ] , int cycle , int depCycle ) {
//if this macro-op depends on the previous one, increase the starting cycle if needed
//this handles an explicit dependency chain in IMUL_RCP
2019-03-31 19:22:36 +00:00
if ( mop . isDependent ( ) ) {
cycle = std : : max ( cycle , depCycle ) ;
}
2019-04-12 11:32:22 +00:00
//move instructions are eliminated and don't need an execution unit
2019-03-31 19:22:36 +00:00
if ( mop . isEliminated ( ) ) {
2019-04-03 07:53:25 +00:00
if ( commit )
if ( TRACE ) std : : cout < < " ; (eliminated) " < < std : : endl ;
2019-03-31 19:22:36 +00:00
return cycle ;
}
else if ( mop . isSimple ( ) ) {
2019-04-12 11:32:22 +00:00
//this macro-op has only one uOP
return scheduleUop < commit > ( mop . getUop1 ( ) , portBusy , cycle ) ;
2019-03-31 19:22:36 +00:00
}
else {
2019-04-12 11:32:22 +00:00
//macro-ops with 2 uOPs are scheduled conservatively by requiring both uOPs to execute in the same cycle
2019-04-03 07:53:25 +00:00
for ( ; cycle < CYCLE_MAP_SIZE ; + + cycle ) {
2019-04-12 11:32:22 +00:00
int cycle1 = scheduleUop < false > ( mop . getUop1 ( ) , portBusy , cycle ) ;
int cycle2 = scheduleUop < false > ( mop . getUop2 ( ) , portBusy , cycle ) ;
if ( cycle1 = = cycle2 ) {
2019-04-03 07:53:25 +00:00
if ( commit ) {
2019-04-12 11:32:22 +00:00
scheduleUop < true > ( mop . getUop1 ( ) , portBusy , cycle1 ) ;
scheduleUop < true > ( mop . getUop2 ( ) , portBusy , cycle2 ) ;
2019-04-03 07:53:25 +00:00
}
2019-04-12 11:32:22 +00:00
return cycle1 ;
2019-03-31 19:22:36 +00:00
}
}
}
return - 1 ;
}
2019-04-12 12:56:20 +00:00
double generateSuperscalar ( LightProgram & prog , Blake2Generator & gen ) {
2019-03-31 11:32:16 +00:00
2019-04-03 07:53:25 +00:00
ExecutionPort : : type portBusy [ CYCLE_MAP_SIZE ] [ 3 ] ;
2019-03-31 19:22:36 +00:00
memset ( portBusy , 0 , sizeof ( portBusy ) ) ;
2019-03-31 11:32:16 +00:00
RegisterInfo registers [ 8 ] ;
2019-04-11 22:02:22 +00:00
const DecoderBuffer * decodeBuffer = & DecoderBuffer : : Default ;
2019-03-31 11:32:16 +00:00
LightInstruction currentInstruction = LightInstruction : : Null ;
2019-04-12 11:32:22 +00:00
int macroOpIndex = 0 ;
2019-03-31 11:32:16 +00:00
int codeSize = 0 ;
int macroOpCount = 0 ;
2019-03-31 19:22:36 +00:00
int cycle = 0 ;
int depCycle = 0 ;
2019-04-03 07:53:25 +00:00
int retireCycle = 0 ;
2019-03-31 19:22:36 +00:00
bool portsSaturated = false ;
2019-04-12 11:32:22 +00:00
int programSize = 0 ;
2019-04-03 12:06:59 +00:00
int mulCount = 0 ;
2019-04-11 22:02:22 +00:00
int decodeCycle ;
2019-04-12 12:56:20 +00:00
int throwAwayCount = 0 ;
2019-03-31 11:32:16 +00:00
2019-04-11 22:02:22 +00:00
//decode instructions for RANDOMX_SUPERSCALAR_LATENCY cycles or until an execution port is saturated.
//Each decode cycle decodes 16 bytes of x86 code.
//Since a decode cycle produces on average 3.45 macro-ops and there are only 3 ALU ports, execution ports are always
//saturated first. The cycle limit is present only to guarantee loop termination.
2019-04-12 11:32:22 +00:00
//Program size is limited to RANDOMX_SUPERSCALAR_MAX_SIZE instructions.
for ( decodeCycle = 0 ; decodeCycle < RANDOMX_SUPERSCALAR_LATENCY & & ! portsSaturated & & programSize < RANDOMX_SUPERSCALAR_MAX_SIZE ; + + decodeCycle ) {
2019-03-31 19:22:36 +00:00
2019-04-12 11:32:22 +00:00
//select a decode configuration
2019-04-11 22:02:22 +00:00
decodeBuffer = decodeBuffer - > fetchNext ( currentInstruction . getType ( ) , decodeCycle , mulCount , gen ) ;
if ( TRACE ) std : : cout < < " ; ------------- fetch cycle " < < cycle < < " ( " < < decodeBuffer - > getName ( ) < < " ) " < < std : : endl ;
int bufferIndex = 0 ;
2019-03-31 11:32:16 +00:00
2019-04-12 11:32:22 +00:00
//fill all instruction slots in the current decode buffer
2019-04-11 22:02:22 +00:00
while ( bufferIndex < decodeBuffer - > getSize ( ) ) {
2019-04-01 17:04:08 +00:00
int topCycle = cycle ;
2019-04-11 22:02:22 +00:00
2019-04-12 11:32:22 +00:00
//if we have issued all macro-ops for the current RandomX instruction, create a new instruction
if ( macroOpIndex > = currentInstruction . getInfo ( ) . getSize ( ) ) {
2019-04-03 07:53:25 +00:00
if ( portsSaturated )
break ;
2019-04-12 11:32:22 +00:00
//select an instruction so that the first macro-op fits into the current slot
2019-04-11 22:02:22 +00:00
currentInstruction . createForSlot ( gen , decodeBuffer - > getCounts ( ) [ bufferIndex ] , decodeBuffer - > getIndex ( ) , decodeBuffer - > getSize ( ) = = bufferIndex + 1 , bufferIndex = = 0 ) ;
2019-04-12 11:32:22 +00:00
macroOpIndex = 0 ;
2019-04-03 07:53:25 +00:00
if ( TRACE ) std : : cout < < " ; " < < currentInstruction . getInfo ( ) . getName ( ) < < std : : endl ;
2019-03-31 11:32:16 +00:00
}
2019-04-12 11:32:22 +00:00
const MacroOp & mop = currentInstruction . getInfo ( ) . getOp ( macroOpIndex ) ;
2019-04-03 07:53:25 +00:00
if ( TRACE ) std : : cout < < mop . getName ( ) < < " " ;
2019-04-11 22:02:22 +00:00
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
2019-04-12 11:32:22 +00:00
int scheduleCycle = scheduleMop < false > ( mop , portBusy , cycle , depCycle ) ;
2019-04-03 07:53:25 +00:00
if ( scheduleCycle < 0 ) {
2019-04-12 11:32:22 +00:00
/*if (TRACE)*/ std : : cout < < " Unable to map operation ' " < < mop . getName ( ) < < " ' to execution port (cycle " < < cycle < < " ) " < < std : : endl ;
2019-04-10 22:01:22 +00:00
return 0 ;
2019-04-03 07:53:25 +00:00
}
2019-03-31 22:38:17 +00:00
2019-04-11 22:02:22 +00:00
//find a source register (if applicable) that will be ready when this instruction executes
2019-04-12 11:32:22 +00:00
if ( macroOpIndex = = currentInstruction . getInfo ( ) . getSrcOp ( ) ) {
2019-04-11 22:02:22 +00:00
int forward ;
//if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward
for ( forward = 0 ; forward < LOOK_FORWARD_CYCLES & & ! currentInstruction . selectSource ( scheduleCycle , registers , gen ) ; + + forward ) {
2019-04-03 07:53:25 +00:00
if ( TRACE ) std : : cout < < " ; src STALL at cycle " < < cycle < < std : : endl ;
2019-03-31 22:38:17 +00:00
+ + scheduleCycle ;
+ + cycle ;
}
2019-04-11 22:02:22 +00:00
//if no register was found, throw the instruction away and try another one
if ( forward = = LOOK_FORWARD_CYCLES ) {
2019-04-12 12:56:20 +00:00
if ( throwAwayCount < MAX_THROWAWAY_COUNT ) {
throwAwayCount + + ;
macroOpIndex = currentInstruction . getInfo ( ) . getSize ( ) ;
if ( TRACE ) std : : cout < < " ; THROW away " < < currentInstruction . getInfo ( ) . getName ( ) < < std : : endl ;
continue ;
}
//abort this decode buffer
/*if (TRACE)*/ std : : cout < < " Aborting at cycle " < < cycle < < " with decode buffer " < < decodeBuffer - > getName ( ) < < " - source registers not available " < < std : : endl ;
currentInstruction = LightInstruction : : Null ;
break ;
2019-04-01 17:04:08 +00:00
}
2019-04-03 07:53:25 +00:00
if ( TRACE ) std : : cout < < " ; src = r " < < currentInstruction . getSource ( ) < < std : : endl ;
2019-03-31 22:38:17 +00:00
}
2019-04-12 12:56:20 +00:00
throwAwayCount = 0 ;
2019-04-11 22:02:22 +00:00
//find a destination register that will be ready when this instruction executes
2019-04-12 11:32:22 +00:00
if ( macroOpIndex = = currentInstruction . getInfo ( ) . getDstOp ( ) ) {
2019-04-11 22:02:22 +00:00
int forward ;
for ( forward = 0 ; forward < LOOK_FORWARD_CYCLES & & ! currentInstruction . selectDestination ( scheduleCycle , registers , gen ) ; + + forward ) {
2019-04-03 07:53:25 +00:00
if ( TRACE ) std : : cout < < " ; dst STALL at cycle " < < cycle < < std : : endl ;
2019-03-31 22:38:17 +00:00
+ + scheduleCycle ;
+ + cycle ;
}
2019-04-11 22:02:22 +00:00
if ( forward = = LOOK_FORWARD_CYCLES ) { //throw instruction away
2019-04-12 12:56:20 +00:00
if ( throwAwayCount < MAX_THROWAWAY_COUNT ) {
throwAwayCount + + ;
macroOpIndex = currentInstruction . getInfo ( ) . getSize ( ) ;
if ( TRACE ) std : : cout < < " ; THROW away " < < currentInstruction . getInfo ( ) . getName ( ) < < std : : endl ;
continue ;
}
//abort this decode buffer
/*if (TRACE)*/ std : : cout < < " Aborting at cycle " < < cycle < < " with decode buffer " < < decodeBuffer - > getName ( ) < < " - destination registers not available " < < std : : endl ;
currentInstruction = LightInstruction : : Null ;
break ;
2019-04-01 17:04:08 +00:00
}
2019-04-03 07:53:25 +00:00
if ( TRACE ) std : : cout < < " ; dst = r " < < currentInstruction . getDestination ( ) < < std : : endl ;
2019-03-31 22:38:17 +00:00
}
2019-04-12 12:56:20 +00:00
throwAwayCount = 0 ;
2019-04-11 22:02:22 +00:00
//recalculate when the instruction can be scheduled for execution based on operand availability
2019-04-12 11:32:22 +00:00
scheduleCycle = scheduleMop < true > ( mop , portBusy , scheduleCycle , scheduleCycle ) ;
2019-04-11 22:02:22 +00:00
//calculate when the result will be ready
2019-04-01 16:31:02 +00:00
depCycle = scheduleCycle + mop . getLatency ( ) ;
2019-04-11 22:02:22 +00:00
//if this instruction writes the result, modify register information
// RegisterInfo.latency - which cycle the register will be ready
// RegisterInfo.lastOpGroup - the last operation that was applied to the register
2019-04-12 11:32:22 +00:00
// RegisterInfo.lastOpPar - the last operation source value (-1 = constant, 0-7 = register)
if ( macroOpIndex = = currentInstruction . getInfo ( ) . getResultOp ( ) ) {
2019-04-01 16:31:02 +00:00
int dst = currentInstruction . getDestination ( ) ;
RegisterInfo & ri = registers [ dst ] ;
2019-04-03 07:53:25 +00:00
retireCycle = depCycle ;
ri . latency = retireCycle ;
2019-04-01 16:31:02 +00:00
ri . lastOpGroup = currentInstruction . getGroup ( ) ;
ri . lastOpPar = currentInstruction . getGroupPar ( ) ;
2019-04-03 07:53:25 +00:00
if ( TRACE ) std : : cout < < " ; RETIRED at cycle " < < retireCycle < < std : : endl ;
2019-03-31 22:38:17 +00:00
}
2019-03-31 19:22:36 +00:00
codeSize + = mop . getSize ( ) ;
2019-04-11 22:02:22 +00:00
bufferIndex + + ;
2019-04-12 11:32:22 +00:00
macroOpIndex + + ;
2019-03-31 11:32:16 +00:00
macroOpCount + + ;
2019-04-11 22:02:22 +00:00
//terminating condition
if ( scheduleCycle > = RANDOMX_SUPERSCALAR_LATENCY ) {
2019-03-31 19:22:36 +00:00
portsSaturated = true ;
}
2019-04-01 17:04:08 +00:00
cycle = topCycle ;
2019-04-11 22:02:22 +00:00
//when all macro-ops of the current instruction have been issued, add the instruction into the program
2019-04-12 11:32:22 +00:00
if ( macroOpIndex > = currentInstruction . getInfo ( ) . getSize ( ) ) {
currentInstruction . toInstr ( prog ( programSize + + ) ) ;
mulCount + = isMultiplication ( currentInstruction . getType ( ) ) ;
2019-04-03 07:53:25 +00:00
}
2019-03-31 19:22:36 +00:00
}
+ + cycle ;
}
2019-04-07 13:38:51 +00:00
if ( INFO ) std : : cout < < " ; ALU port utilization: " < < std : : endl ;
if ( INFO ) std : : cout < < " ; (* = in use, _ = idle) " < < std : : endl ;
2019-03-31 19:22:36 +00:00
2019-04-03 07:53:25 +00:00
int portCycles = 0 ;
2019-04-11 22:02:22 +00:00
for ( int i = 0 ; i < CYCLE_MAP_SIZE ; + + i ) {
2019-04-12 11:32:22 +00:00
//std::cout << "; " << std::setw(3) << i << " ";
2019-03-31 19:22:36 +00:00
for ( int j = 0 ; j < 3 ; + + j ) {
2019-04-12 11:32:22 +00:00
//std::cout << (portBusy[i][j] ? '*' : '_');
2019-04-03 07:53:25 +00:00
portCycles + = ! ! portBusy [ i ] [ j ] ;
2019-03-31 19:22:36 +00:00
}
2019-04-12 11:32:22 +00:00
//std::cout << std::endl;
2019-04-11 22:02:22 +00:00
}
2019-04-06 10:00:56 +00:00
double ipc = ( macroOpCount / ( double ) retireCycle ) ;
2019-03-31 22:38:17 +00:00
2019-04-07 13:38:51 +00:00
if ( INFO ) std : : cout < < " ; code size " < < codeSize < < " bytes " < < std : : endl ;
if ( INFO ) std : : cout < < " ; x86 macro-ops: " < < macroOpCount < < std : : endl ;
2019-04-11 22:02:22 +00:00
if ( INFO ) std : : cout < < " ; fetch cycles: " < < decodeCycle < < std : : endl ;
2019-04-12 11:32:22 +00:00
if ( INFO ) std : : cout < < " ; RandomX instructions: " < < programSize < < std : : endl ;
2019-04-07 13:38:51 +00:00
if ( INFO ) std : : cout < < " ; Execution time: " < < retireCycle < < " cycles " < < std : : endl ;
if ( INFO ) std : : cout < < " ; IPC = " < < ipc < < std : : endl ;
if ( INFO ) std : : cout < < " ; Port-cycles: " < < portCycles < < std : : endl ;
if ( INFO ) std : : cout < < " ; Multiplications: " < < mulCount < < std : : endl ;
2019-04-03 07:53:25 +00:00
int asicLatency [ 8 ] ;
memset ( asicLatency , 0 , sizeof ( asicLatency ) ) ;
2019-04-11 22:02:22 +00:00
//Calculate ASIC latency:
//Assumes 1 cycle latency for all operations and unlimited parallelization.
2019-04-12 11:32:22 +00:00
for ( int i = 0 ; i < programSize ; + + i ) {
2019-04-03 07:53:25 +00:00
Instruction & instr = prog ( i ) ;
int latDst = asicLatency [ instr . dst ] + 1 ;
int latSrc = instr . dst ! = instr . src ? asicLatency [ instr . src ] + 1 : 0 ;
asicLatency [ instr . dst ] = std : : max ( latDst , latSrc ) ;
}
2019-04-11 22:02:22 +00:00
//address register is the register with the highest ASIC latency
int asicLatencyMax = 0 ;
2019-04-06 10:00:56 +00:00
int addressReg = 0 ;
for ( int i = 0 ; i < 8 ; + + i ) {
2019-04-11 22:02:22 +00:00
if ( asicLatency [ i ] > asicLatencyMax ) {
asicLatencyMax = asicLatency [ i ] ;
2019-04-06 10:00:56 +00:00
addressReg = i ;
}
}
2019-04-11 22:02:22 +00:00
if ( INFO ) std : : cout < < " ; ASIC latency: " < < asicLatencyMax < < std : : endl ;
2019-04-03 07:53:25 +00:00
2019-04-07 13:38:51 +00:00
if ( INFO ) {
std : : cout < < " ; ASIC latency: " < < std : : endl ;
for ( int i = 0 ; i < 8 ; + + i ) {
std : : cout < < " ; r " < < i < < " = " < < asicLatency [ i ] < < std : : endl ;
}
if ( INFO ) std : : cout < < " ; CPU latency: " < < std : : endl ;
for ( int i = 0 ; i < 8 ; + + i ) {
std : : cout < < " ; r " < < i < < " = " < < registers [ i ] . latency < < std : : endl ;
}
2019-04-03 07:53:25 +00:00
}
2019-04-12 11:32:22 +00:00
prog . setSize ( programSize ) ;
2019-04-06 10:00:56 +00:00
prog . setAddressRegister ( addressReg ) ;
2019-04-12 11:32:22 +00:00
return ipc ;
2019-03-31 11:32:16 +00:00
}
2019-03-28 14:27:10 +00:00
}