diff --git a/src/crypto/slow-hash.c b/src/crypto/slow-hash.c
index 8da79dee0..5a8be4a61 100644
--- a/src/crypto/slow-hash.c
+++ b/src/crypto/slow-hash.c
@@ -215,30 +215,43 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
     lo ^= SWAP64LE(*(U64(hp_state + (j ^ 0x20)) + 1)); \
   } while (0)
 
+#define V4_REG_LOAD(dst, src) \
+  do { \
+    memcpy((dst), (src), sizeof(v4_reg)); \
+    if (sizeof(v4_reg) == sizeof(uint32_t)) \
+      *(dst) = SWAP32LE(*(dst)); \
+    else \
+      *(dst) = SWAP64LE(*(dst)); \
+  } while (0)
+
 #define VARIANT4_RANDOM_MATH_INIT() \
   v4_reg r[8]; \
   struct V4_Instruction code[TOTAL_LATENCY * ALU_COUNT + 1]; \
   do if (variant >= 4) \
   { \
-    v4_reg* data = (v4_reg*)(state.hs.w + 12); \
-    r[0] = data[0]; \
-    r[1] = data[1]; \
-    r[2] = data[2]; \
-    r[3] = data[3]; \
+    for (int i = 0; i < 4; ++i) \
+      V4_REG_LOAD(r + i, (uint8_t*)(state.hs.w + 12) + sizeof(v4_reg) * i); \
     v4_random_math_init(code, height); \
   } while (0)
 
 #define VARIANT4_RANDOM_MATH(a, b, r, _b, _b1) \
   do if (variant >= 4) \
   { \
+    uint64_t t; \
+    memcpy(&t, b, sizeof(uint64_t)); \
+    \
     if (sizeof(v4_reg) == sizeof(uint32_t)) \
-      U64(b)[0] ^= (r[0] + r[1]) | ((uint64_t)(r[2] + r[3]) << 32); \
+      t ^= SWAP64LE((r[0] + r[1]) | ((uint64_t)(r[2] + r[3]) << 32)); \
     else \
-      U64(b)[0] ^= (r[0] + r[1]) ^ (r[2] + r[3]); \
-    r[4] = ((v4_reg*)(a))[0]; \
-    r[5] = ((v4_reg*)(a))[sizeof(uint64_t) / sizeof(v4_reg)]; \
-    r[6] = ((v4_reg*)(_b))[0]; \
-    r[7] = ((v4_reg*)(_b1))[0]; \
+      t ^= SWAP64LE((r[0] + r[1]) ^ (r[2] + r[3])); \
+    \
+    memcpy(b, &t, sizeof(uint64_t)); \
+    \
+    V4_REG_LOAD(r + 4, a); \
+    V4_REG_LOAD(r + 5, (uint64_t*)(a) + 1); \
+    V4_REG_LOAD(r + 6, _b); \
+    V4_REG_LOAD(r + 7, _b1); \
+    \
     v4_random_math(code, r); \
   } while (0)
 
diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h
index fc16ef4d8..8724c58c9 100644
--- a/src/crypto/variant4_random_math.h
+++ b/src/crypto/variant4_random_math.h
@@ -106,13 +106,13 @@ static FORCEINLINE void v4_random_math(const struct V4_Instruction* code, v4_reg
 		case ROR: \
 			{ \
 				const uint32_t shift = src % REG_BITS; \
-				*dst = (*dst >> shift) | (*dst << (REG_BITS - shift)); \
+				*dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \
 			} \
 			break; \
 		case ROL: \
 			{ \
 				const uint32_t shift = src % REG_BITS; \
-				*dst = (*dst << shift) | (*dst >> (REG_BITS - shift)); \
+				*dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \
 			} \
 			break; \
 		case XOR: \
@@ -166,11 +166,11 @@ static FORCEINLINE void v4_random_math(const struct V4_Instruction* code, v4_reg
 }
 
 // If we don't have enough data available, generate more
-static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, char* data, const size_t data_size)
+static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size)
 {
 	if (*data_index + bytes_needed > data_size)
 	{
-		hash_extra_blake(data, data_size, data);
+		hash_extra_blake(data, data_size, (char*) data);
 		*data_index = 0;
 	}
 }
@@ -193,10 +193,14 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
 	// Available ALUs for each instruction
 	const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT };
 
-	char data[32];
+	int8_t data[32];
 	memset(data, 0, sizeof(data));
-	*((uint64_t*)data) = height;
+	uint64_t tmp = SWAP64LE(height);
+	memcpy(data, &tmp, sizeof(uint64_t));
 
+	// Set data_index past the last byte in data
+	// to trigger full data update with blake hash
+	// before we start using it
 	size_t data_index = sizeof(data);
 
 	int code_size;
@@ -209,7 +213,7 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
 		// byte 1: instruction opcode
 		// byte 2: current value of the source register
 		//
-		// Registers R4-R7 are constant and are threatened as having the same value because when we do
+		// Registers R4-R7 are constant and are treated as having the same value because when we do
 		// the same operation twice with two constant source registers, it can be optimized into a single operation
 		int inst_data[8] = { 0, 1, 2, 3, -1, -1, -1, -1 };
 
@@ -355,7 +359,9 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
 
 					// ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C"
 					check_data(&data_index, sizeof(uint32_t), data, sizeof(data));
-					code[code_size].C = *((uint32_t*)&data[data_index]);
+					uint32_t t;
+					memcpy(&t, data + data_index, sizeof(uint32_t));
+					code[code_size].C = SWAP32LE(t);
 					data_index += sizeof(uint32_t);
 				}