Fixed a chance of CBRANCH looping

Fixed CBRANCH jump probability being lower than expected
This commit is contained in:
tevador 2019-05-03 14:02:40 +02:00
parent 5241cb902e
commit 9e5eac8645
8 changed files with 73 additions and 46 deletions

View File

@ -18,8 +18,8 @@ randomx_isn_5:
xchg r12, r8 xchg r12, r8
randomx_isn_6: randomx_isn_6:
; CBRANCH -188214077, COND 5 ; CBRANCH -188214077, COND 5
add r9, -188214045 add r9, -188209981
test r9, 8160 test r9, 2088960
jz randomx_isn_0 jz randomx_isn_0
randomx_isn_7: randomx_isn_7:
; ISTORE L3[r0-784322734], r3 ; ISTORE L3[r0-784322734], r3
@ -52,13 +52,13 @@ randomx_isn_12:
imul r15, r12 imul r15, r12
randomx_isn_13: randomx_isn_13:
; CBRANCH 179989705, COND 3 ; CBRANCH 179989705, COND 3
add r8, 179989705 add r8, 179988681
test r8, 2040 test r8, 522240
jz randomx_isn_7 jz randomx_isn_7
randomx_isn_14: randomx_isn_14:
; CBRANCH 1801296358, COND 3 ; CBRANCH 1801296358, COND 3
add r10, 1801296366 add r10, 1801296358
test r10, 2040 test r10, 522240
jz randomx_isn_14 jz randomx_isn_14
randomx_isn_15: randomx_isn_15:
; IADD_RS r6, r2, SHFT 3 ; IADD_RS r6, r2, SHFT 3
@ -80,8 +80,8 @@ randomx_isn_19:
mulpd xmm5, xmm10 mulpd xmm5, xmm10
randomx_isn_20: randomx_isn_20:
; CBRANCH 1593588996, COND 3 ; CBRANCH 1593588996, COND 3
add r11, 1593589004 add r11, 1593587972
test r11, 2040 test r11, 522240
jz randomx_isn_15 jz randomx_isn_15
randomx_isn_21: randomx_isn_21:
; IROR_R r7, r2 ; IROR_R r7, r2
@ -102,7 +102,7 @@ randomx_isn_23:
randomx_isn_24: randomx_isn_24:
; CBRANCH 149087159, COND 13 ; CBRANCH 149087159, COND 13
add r12, 149087159 add r12, 149087159
test r12, 2088960 test r12, 534773760
jz randomx_isn_21 jz randomx_isn_21
randomx_isn_25: randomx_isn_25:
; FADD_R f3, a0 ; FADD_R f3, a0
@ -208,8 +208,8 @@ randomx_isn_50:
subpd xmm3, xmm8 subpd xmm3, xmm8
randomx_isn_51: randomx_isn_51:
; CBRANCH -1975981803, COND 14 ; CBRANCH -1975981803, COND 14
add r9, -1975981803 add r9, -1973884651
test r9, 4177920 test r9, 1069547520
jz randomx_isn_25 jz randomx_isn_25
randomx_isn_52: randomx_isn_52:
; IADD_M r1, L3[1622792] ; IADD_M r1, L3[1622792]
@ -219,8 +219,8 @@ randomx_isn_53:
subpd xmm2, xmm8 subpd xmm2, xmm8
randomx_isn_54: randomx_isn_54:
; CBRANCH 1917049931, COND 12 ; CBRANCH 1917049931, COND 12
add r13, 1917049931 add r13, 1918098507
test r13, 1044480 test r13, 267386880
jz randomx_isn_52 jz randomx_isn_52
randomx_isn_55: randomx_isn_55:
; IXOR_R r2, r3 ; IXOR_R r2, r3
@ -249,7 +249,7 @@ randomx_isn_61:
randomx_isn_62: randomx_isn_62:
; CBRANCH 1111898647, COND 1 ; CBRANCH 1111898647, COND 1
add r14, 1111898647 add r14, 1111898647
test r14, 510 test r14, 130560
jz randomx_isn_55 jz randomx_isn_55
randomx_isn_63: randomx_isn_63:
; IMUL_R r6, r5 ; IMUL_R r6, r5
@ -288,8 +288,8 @@ randomx_isn_73:
mulpd xmm4, xmm8 mulpd xmm4, xmm8
randomx_isn_74: randomx_isn_74:
; CBRANCH -1200328848, COND 4 ; CBRANCH -1200328848, COND 4
add r15, -1200328848 add r15, -1200326800
test r15, 4080 test r15, 1044480
jz randomx_isn_63 jz randomx_isn_63
randomx_isn_75: randomx_isn_75:
; FSQRT_R e0 ; FSQRT_R e0
@ -346,8 +346,8 @@ randomx_isn_88:
imul r9, qword ptr [rsi+rax] imul r9, qword ptr [rsi+rax]
randomx_isn_89: randomx_isn_89:
; CBRANCH -122257389, COND 13 ; CBRANCH -122257389, COND 13
add r8, -122249197 add r8, -123305965
test r8, 2088960 test r8, 534773760
jz randomx_isn_75 jz randomx_isn_75
randomx_isn_90: randomx_isn_90:
; ISTORE L1[r5+228116180], r7 ; ISTORE L1[r5+228116180], r7
@ -481,8 +481,8 @@ randomx_isn_122:
subpd xmm0, xmm9 subpd xmm0, xmm9
randomx_isn_123: randomx_isn_123:
; CBRANCH 269211216, COND 3 ; CBRANCH 269211216, COND 3
add r9, 269211224 add r9, 269212240
test r9, 2040 test r9, 522240
jz randomx_isn_100 jz randomx_isn_100
randomx_isn_124: randomx_isn_124:
; FSUB_M f2, L1[r6-1615966581] ; FSUB_M f2, L1[r6-1615966581]
@ -564,8 +564,8 @@ randomx_isn_142:
addpd xmm1, xmm8 addpd xmm1, xmm8
randomx_isn_143: randomx_isn_143:
; CBRANCH 880467599, COND 5 ; CBRANCH 880467599, COND 5
add r14, 880467631 add r14, 880471695
test r14, 8160 test r14, 2088960
jz randomx_isn_124 jz randomx_isn_124
randomx_isn_144: randomx_isn_144:
; FMUL_R e1, a1 ; FMUL_R e1, a1
@ -585,8 +585,8 @@ randomx_isn_147:
add r9, qword ptr [rsi+rax] add r9, qword ptr [rsi+rax]
randomx_isn_148: randomx_isn_148:
; CBRANCH -1843326985, COND 14 ; CBRANCH -1843326985, COND 14
add r10, -1843310601 add r10, -1841229833
test r10, 4177920 test r10, 1069547520
jz randomx_isn_144 jz randomx_isn_144
randomx_isn_149: randomx_isn_149:
; IADD_RS r4, r3, SHFT 2 ; IADD_RS r4, r3, SHFT 2
@ -655,8 +655,8 @@ randomx_isn_163:
shufpd xmm3, xmm3, 1 shufpd xmm3, xmm3, 1
randomx_isn_164: randomx_isn_164:
; CBRANCH -2107581963, COND 4 ; CBRANCH -2107581963, COND 4
add r11, -2107581963 add r11, -2107584011
test r11, 4080 test r11, 1044480
jz randomx_isn_149 jz randomx_isn_149
randomx_isn_165: randomx_isn_165:
; FSUB_R f1, a2 ; FSUB_R f1, a2
@ -720,8 +720,8 @@ randomx_isn_180:
subpd xmm3, xmm9 subpd xmm3, xmm9
randomx_isn_181: randomx_isn_181:
; CBRANCH 556152230, COND 12 ; CBRANCH 556152230, COND 12
add r12, 556152230 add r12, 557200806
test r12, 1044480 test r12, 267386880
jz randomx_isn_165 jz randomx_isn_165
randomx_isn_182: randomx_isn_182:
; FSQRT_R e2 ; FSQRT_R e2
@ -956,8 +956,8 @@ randomx_isn_246:
imul r15, r10 imul r15, r10
randomx_isn_247: randomx_isn_247:
; CBRANCH -8545330, COND 4 ; CBRANCH -8545330, COND 4
add r8, -8545314 add r8, -8547378
test r8, 4080 test r8, 1044480
jz randomx_isn_213 jz randomx_isn_213
randomx_isn_248: randomx_isn_248:
; ISTORE L1[r0+1951752498], r5 ; ISTORE L1[r0+1951752498], r5

View File

@ -55,7 +55,8 @@ RandomX has several configurable parameters that are listed in Table 1.2.1 with
|`RANDOMX_PROGRAM_SIZE`|The number of instructions in a RandomX program|`256`| |`RANDOMX_PROGRAM_SIZE`|The number of instructions in a RandomX program|`256`|
|`RANDOMX_PROGRAM_ITERATIONS`|The number of iterations per program|`2048`| |`RANDOMX_PROGRAM_ITERATIONS`|The number of iterations per program|`2048`|
|`RANDOMX_PROGRAM_COUNT`|The number of programs per hash|`8`| |`RANDOMX_PROGRAM_COUNT`|The number of programs per hash|`8`|
|`RANDOMX_JUMP_BITS`|How many register bits must be zero for the CBRANCH instruction to jump|`8`| |`RANDOMX_JUMP_BITS`|Jump condition mask size in bits|`8`|
|`RANDOMX_JUMP_OFFSET`|Jump condition mask offset in bits|`8`|
|`RANDOMX_SCRATCHPAD_L3`|Scratchpad L3 size in bytes|`2097152`| |`RANDOMX_SCRATCHPAD_L3`|Scratchpad L3 size in bytes|`2097152`|
|`RANDOMX_SCRATCHPAD_L2`|Scratchpad L2 size in bytes|`262144`| |`RANDOMX_SCRATCHPAD_L2`|Scratchpad L2 size in bytes|`262144`|
|`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`| |`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`|
@ -613,16 +614,28 @@ A register is considered as modified by an instruction in the following cases:
There are 3 rules for the selection of the `creg` register, evaluated in this order: There are 3 rules for the selection of the `creg` register, evaluated in this order:
1. The register with the lowest value of `lastUsed` tag is selected. 1. The register with the lowest value of `lastUsed` tag is selected.
2. In case multiple registers have the same value of the `lastUsed` tag, the register with the lowest value of the `count` tag is selected. 1. In case multiple registers have the same value of the `lastUsed` tag, the register with the lowest value of the `count` tag is selected.
3. In case multiple registers have the same values of both `lastUsed` and `count` tags, a register with the lowest index is selected (`r0` before `r1` etc.). 1. In case multiple registers have the same values of both `lastUsed` and `count` tags, a register with the lowest index is selected (`r0` before `r1` etc.).
Whenever a register is selected as the operand of a CBRANCH instruction, its `count` tag is increased by 1. Whenever a register is selected as the operand of a CBRANCH instruction, its `count` tag is increased by 1.
The CBRANCH instruction performs the following steps (`|` represents a bitwise OR operation, `&` is a bitwise AND operation): The CBRANCH instruction performs the following steps:
1. A constant value of `imm32 | (1 << mod.cond)` is added to `creg`. 1. A constant `b` is calculated as `mod.cond + RANDOMX_JUMP_OFFSET`.
2. `conditionMask` is constructed as `RANDOMX_JUMP_BITS` one-bits shifted left by `mod.cond`. 1. A constant `conditionImmediate` is constructed as sign-extended `imm32` with bit `b` set to 1 and bit `b-1` set to 0 (if `b > 0`).
3. If `creg & conditionMask` is zero, execution jumps to instruction `creg.lastUsed + 1` (the instruction following the instruction where `creg` was last modified). 1. `conditionImmediate` is added to `creg`.
1. If bits `b` to `b + RANDOMX_JUMP_BITS - 1` of `creg` are zero, execution jumps to instruction `creg.lastUsed + 1` (the instruction following the instruction where `creg` was last modified).
Bits in immediate and register values are numbered from 0 to 63 with 0 being the least significant bit. For example, for `b = 10` and `RANDOMX_JUMP_BITS = 8`, the bits are arranged like this:
```
conditionImmediate = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSMMMMMMMMMMMMMMMMMMMMM10MMMMMMMMM
creg = ..............................................XXXXXXXX..........
```
`S` is a copied sign bit from `imm32`. `M` denotes bits of `imm32`. The 9th bit is set to 0 and the 10th bit is set to 1. This value would be added to `creg`.
The second line uses `X` to mark bits of `creg` that would be checked by the condition. If all these bits are 0 after adding `conditionImmediate`, the jump is executed.
The construction of the CBRANCH instruction ensures that no inifinite loops are possible in the program. The construction of the CBRANCH instruction ensures that no inifinite loops are possible in the program.

View File

@ -532,8 +532,11 @@ namespace randomx {
int reg = getConditionRegister(registerUsage); int reg = getConditionRegister(registerUsage);
int target = registerUsage[reg].lastUsed + 1; int target = registerUsage[reg].lastUsed + 1;
registerUsage[reg].count++; registerUsage[reg].count++;
int shift = instr.getModCond(); int shift = instr.getModCond() + ConditionOffset;
asmCode << "\tadd " << regR[reg] << ", " << (int32_t)(instr.getImm32() | (1 << shift)) << std::endl; int32_t imm = instr.getImm32() | (1L << shift);
if (ConditionOffset > 0 || shift > 0)
imm &= ~(1L << (shift - 1));
asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl;
asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl; asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl;
asmCode << "\tjz randomx_isn_" << target << std::endl; asmCode << "\tjz randomx_isn_" << target << std::endl;
//mark all registers as used //mark all registers as used

View File

@ -41,7 +41,9 @@ namespace randomx {
static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1."); static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1.");
static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2."); static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1"); static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
static_assert(RANDOMX_JUMP_BITS >= 1 && RANDOMX_JUMP_BITS <= 16, "RANDOMX_JUMP_BITS must be an integer in the range 1-16."); static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0.");
static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0.");
static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16.");
constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \ constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \ RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
@ -62,6 +64,7 @@ namespace randomx {
constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE; constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE;
constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE; constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE;
constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1); constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1);
constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET;
constexpr int StoreL3Condition = 14; constexpr int StoreL3Condition = 14;
#ifdef TRACE #ifdef TRACE

View File

@ -64,9 +64,12 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
//Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2. //Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 (16 * 1024) #define RANDOMX_SCRATCHPAD_L1 (16 * 1024)
//How many register bits must be zero for CBRANCH instruction to jump. Must be an integer in the range 1-16. //Jump condition mask size in bits.
#define RANDOMX_JUMP_BITS 8 #define RANDOMX_JUMP_BITS 8
//Jump condition mask offset in bits.
#define RANDOMX_JUMP_OFFSET 8
/* /*
Instruction frequencies (per 256 opcodes) Instruction frequencies (per 256 opcodes)
Total sum of frequencies must be 256 Total sum of frequencies must be 256

View File

@ -775,10 +775,13 @@ namespace randomx {
int reg = getConditionRegister(registerUsage); int reg = getConditionRegister(registerUsage);
int target = registerUsage[reg].lastUsed + 1; int target = registerUsage[reg].lastUsed + 1;
registerUsage[reg].count++; registerUsage[reg].count++;
int shift = instr.getModCond();
emit(REX_ADD_I); emit(REX_ADD_I);
emitByte(0xc0 + reg); emitByte(0xc0 + reg);
emit32(instr.getImm32() | (1 << shift)); int shift = instr.getModCond() + ConditionOffset;
uint32_t imm = instr.getImm32() | (1UL << shift);
if (ConditionOffset > 0 || shift > 0)
imm &= ~(1UL << (shift - 1));
emit32(imm);
emit(REX_TEST); emit(REX_TEST);
emitByte(0xc0 + reg); emitByte(0xc0 + reg);
emit32(ConditionMask << shift); emit32(ConditionMask << shift);

View File

@ -229,7 +229,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: "; std::cout << "Calculated result: ";
result.print(std::cout); result.print(std::cout);
if (noncesCount == 1000 && seedValue == 0) if (noncesCount == 1000 && seedValue == 0)
std::cout << "Reference result: a15448785857f9a78703eb5da235dfe73d0d5fc4c8effaebe73869904f5af47d" << std::endl; std::cout << "Reference result: 47452f6064db799ae580dd71fe0ebe221579cedf837fac7095f1c5edc07cf345" << std::endl;
if (!miningMode) { if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
} }

View File

@ -615,9 +615,11 @@ namespace randomx {
ibc.isrc = &r[reg]; ibc.isrc = &r[reg];
ibc.target = registerUsage[reg].lastUsed; ibc.target = registerUsage[reg].lastUsed;
registerUsage[reg].count++; registerUsage[reg].count++;
int shift = instr.getModCond(); int shift = instr.getModCond() + ConditionOffset;
const uint64_t conditionMask = ConditionMask << instr.getModCond(); const uint64_t conditionMask = ConditionMask << shift;
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
ibc.imm &= ~(1ULL << (shift - 1));
ibc.memMask = ConditionMask << shift; ibc.memMask = ConditionMask << shift;
//mark all registers as used //mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) { for (unsigned j = 0; j < RegistersCount; ++j) {