mirror of
https://git.wownero.com/wownero/wownero.git
synced 2025-01-23 15:38:36 +00:00
commit
99bf440290
@ -215,30 +215,43 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
|
|||||||
lo ^= SWAP64LE(*(U64(hp_state + (j ^ 0x20)) + 1)); \
|
lo ^= SWAP64LE(*(U64(hp_state + (j ^ 0x20)) + 1)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define V4_REG_LOAD(dst, src) \
|
||||||
|
do { \
|
||||||
|
memcpy((dst), (src), sizeof(v4_reg)); \
|
||||||
|
if (sizeof(v4_reg) == sizeof(uint32_t)) \
|
||||||
|
*(dst) = SWAP32LE(*(dst)); \
|
||||||
|
else \
|
||||||
|
*(dst) = SWAP64LE(*(dst)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define VARIANT4_RANDOM_MATH_INIT() \
|
#define VARIANT4_RANDOM_MATH_INIT() \
|
||||||
v4_reg r[8]; \
|
v4_reg r[8]; \
|
||||||
struct V4_Instruction code[TOTAL_LATENCY * ALU_COUNT + 1]; \
|
struct V4_Instruction code[TOTAL_LATENCY * ALU_COUNT + 1]; \
|
||||||
do if (variant >= 4) \
|
do if (variant >= 4) \
|
||||||
{ \
|
{ \
|
||||||
v4_reg* data = (v4_reg*)(state.hs.w + 12); \
|
for (int i = 0; i < 4; ++i) \
|
||||||
r[0] = data[0]; \
|
V4_REG_LOAD(r + i, (uint8_t*)(state.hs.w + 12) + sizeof(v4_reg) * i); \
|
||||||
r[1] = data[1]; \
|
|
||||||
r[2] = data[2]; \
|
|
||||||
r[3] = data[3]; \
|
|
||||||
v4_random_math_init(code, height); \
|
v4_random_math_init(code, height); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define VARIANT4_RANDOM_MATH(a, b, r, _b, _b1) \
|
#define VARIANT4_RANDOM_MATH(a, b, r, _b, _b1) \
|
||||||
do if (variant >= 4) \
|
do if (variant >= 4) \
|
||||||
{ \
|
{ \
|
||||||
|
uint64_t t; \
|
||||||
|
memcpy(&t, b, sizeof(uint64_t)); \
|
||||||
|
\
|
||||||
if (sizeof(v4_reg) == sizeof(uint32_t)) \
|
if (sizeof(v4_reg) == sizeof(uint32_t)) \
|
||||||
U64(b)[0] ^= (r[0] + r[1]) | ((uint64_t)(r[2] + r[3]) << 32); \
|
t ^= SWAP64LE((r[0] + r[1]) | ((uint64_t)(r[2] + r[3]) << 32)); \
|
||||||
else \
|
else \
|
||||||
U64(b)[0] ^= (r[0] + r[1]) ^ (r[2] + r[3]); \
|
t ^= SWAP64LE((r[0] + r[1]) ^ (r[2] + r[3])); \
|
||||||
r[4] = ((v4_reg*)(a))[0]; \
|
\
|
||||||
r[5] = ((v4_reg*)(a))[sizeof(uint64_t) / sizeof(v4_reg)]; \
|
memcpy(b, &t, sizeof(uint64_t)); \
|
||||||
r[6] = ((v4_reg*)(_b))[0]; \
|
\
|
||||||
r[7] = ((v4_reg*)(_b1))[0]; \
|
V4_REG_LOAD(r + 4, a); \
|
||||||
|
V4_REG_LOAD(r + 5, (uint64_t*)(a) + 1); \
|
||||||
|
V4_REG_LOAD(r + 6, _b); \
|
||||||
|
V4_REG_LOAD(r + 7, _b1); \
|
||||||
|
\
|
||||||
v4_random_math(code, r); \
|
v4_random_math(code, r); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -106,13 +106,13 @@ static FORCEINLINE void v4_random_math(const struct V4_Instruction* code, v4_reg
|
|||||||
case ROR: \
|
case ROR: \
|
||||||
{ \
|
{ \
|
||||||
const uint32_t shift = src % REG_BITS; \
|
const uint32_t shift = src % REG_BITS; \
|
||||||
*dst = (*dst >> shift) | (*dst << (REG_BITS - shift)); \
|
*dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \
|
||||||
} \
|
} \
|
||||||
break; \
|
break; \
|
||||||
case ROL: \
|
case ROL: \
|
||||||
{ \
|
{ \
|
||||||
const uint32_t shift = src % REG_BITS; \
|
const uint32_t shift = src % REG_BITS; \
|
||||||
*dst = (*dst << shift) | (*dst >> (REG_BITS - shift)); \
|
*dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \
|
||||||
} \
|
} \
|
||||||
break; \
|
break; \
|
||||||
case XOR: \
|
case XOR: \
|
||||||
@ -166,11 +166,11 @@ static FORCEINLINE void v4_random_math(const struct V4_Instruction* code, v4_reg
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If we don't have enough data available, generate more
|
// If we don't have enough data available, generate more
|
||||||
static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, char* data, const size_t data_size)
|
static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size)
|
||||||
{
|
{
|
||||||
if (*data_index + bytes_needed > data_size)
|
if (*data_index + bytes_needed > data_size)
|
||||||
{
|
{
|
||||||
hash_extra_blake(data, data_size, data);
|
hash_extra_blake(data, data_size, (char*) data);
|
||||||
*data_index = 0;
|
*data_index = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -193,10 +193,14 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
|
|||||||
// Available ALUs for each instruction
|
// Available ALUs for each instruction
|
||||||
const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT };
|
const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT };
|
||||||
|
|
||||||
char data[32];
|
int8_t data[32];
|
||||||
memset(data, 0, sizeof(data));
|
memset(data, 0, sizeof(data));
|
||||||
*((uint64_t*)data) = height;
|
uint64_t tmp = SWAP64LE(height);
|
||||||
|
memcpy(data, &tmp, sizeof(uint64_t));
|
||||||
|
|
||||||
|
// Set data_index past the last byte in data
|
||||||
|
// to trigger full data update with blake hash
|
||||||
|
// before we start using it
|
||||||
size_t data_index = sizeof(data);
|
size_t data_index = sizeof(data);
|
||||||
|
|
||||||
int code_size;
|
int code_size;
|
||||||
@ -209,7 +213,7 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
|
|||||||
// byte 1: instruction opcode
|
// byte 1: instruction opcode
|
||||||
// byte 2: current value of the source register
|
// byte 2: current value of the source register
|
||||||
//
|
//
|
||||||
// Registers R4-R7 are constant and are threatened as having the same value because when we do
|
// Registers R4-R7 are constant and are treated as having the same value because when we do
|
||||||
// the same operation twice with two constant source registers, it can be optimized into a single operation
|
// the same operation twice with two constant source registers, it can be optimized into a single operation
|
||||||
int inst_data[8] = { 0, 1, 2, 3, -1, -1, -1, -1 };
|
int inst_data[8] = { 0, 1, 2, 3, -1, -1, -1, -1 };
|
||||||
|
|
||||||
@ -355,7 +359,9 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
|
|||||||
|
|
||||||
// ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C"
|
// ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C"
|
||||||
check_data(&data_index, sizeof(uint32_t), data, sizeof(data));
|
check_data(&data_index, sizeof(uint32_t), data, sizeof(data));
|
||||||
code[code_size].C = *((uint32_t*)&data[data_index]);
|
uint32_t t;
|
||||||
|
memcpy(&t, data + data_index, sizeof(uint32_t));
|
||||||
|
code[code_size].C = SWAP32LE(t);
|
||||||
data_index += sizeof(uint32_t);
|
data_index += sizeof(uint32_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user