diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp
index a2d1b32..9f03da1 100644
--- a/src/AssemblyGeneratorX86.cpp
+++ b/src/AssemblyGeneratorX86.cpp
@@ -20,7 +20,6 @@ along with RandomX. If not, see.
#define MAGIC_DIVISION
#include "AssemblyGeneratorX86.hpp"
#include "common.hpp"
-#include "instructions.hpp"
#ifdef MAGIC_DIVISION
#include "divideByConstantCodegen.h"
#endif
diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp
index b3b5db8..8cfc364 100644
--- a/src/CompiledVirtualMachine.cpp
+++ b/src/CompiledVirtualMachine.cpp
@@ -19,7 +19,6 @@ along with RandomX. If not, see.
#include "CompiledVirtualMachine.hpp"
#include "common.hpp"
-#include "instructions.hpp"
#include
namespace RandomX {
diff --git a/src/Instruction.cpp b/src/Instruction.cpp
index 35cc737..2fefcf3 100644
--- a/src/Instruction.cpp
+++ b/src/Instruction.cpp
@@ -45,7 +45,7 @@ namespace RandomX {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
}
else {
- os << "r" << (int)dst << ", " << imm32 << std::endl;
+ os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl;
}
}
@@ -63,7 +63,7 @@ namespace RandomX {
}
void Instruction::h_IADD_RC(std::ostream& os) const {
- os << "r" << (int)dst << ", r" << (int)src << ", " << imm32 << std::endl;
+ os << "r" << (int)dst << ", r" << (int)src << ", " << (int32_t)imm32 << std::endl;
}
//1 uOP
@@ -72,7 +72,7 @@ namespace RandomX {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
}
else {
- os << "r" << (int)dst << ", " << imm32 << std::endl;
+ os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl;
}
}
@@ -90,7 +90,7 @@ namespace RandomX {
}
void Instruction::h_IMUL_9C(std::ostream& os) const {
- os << "r" << (int)dst << ", " << imm32 << std::endl;
+ os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl;
}
void Instruction::h_IMUL_R(std::ostream& os) const {
@@ -98,7 +98,7 @@ namespace RandomX {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
}
else {
- os << "r" << (int)dst << ", " << imm32 << std::endl;
+ os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl;
}
}
@@ -158,7 +158,7 @@ namespace RandomX {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
}
else {
- os << "r" << (int)dst << ", " << imm32 << std::endl;
+ os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl;
}
}
@@ -194,11 +194,11 @@ namespace RandomX {
}
void Instruction::h_IDIV_C(std::ostream& os) const {
- os << "r" << (int)dst << ", " << (uint32_t)imm32 << std::endl;
+ os << "r" << (int)dst << ", " << imm32 << std::endl;
}
void Instruction::h_ISDIV_C(std::ostream& os) const {
- os << "r" << (int)dst << ", " << imm32 << std::endl;
+ os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl;
}
void Instruction::h_ISWAP_R(std::ostream& os) const {
@@ -300,13 +300,13 @@ namespace RandomX {
}
void Instruction::h_COND_R(std::ostream& os) const {
- os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << imm32 << ")" << std::endl;
+ os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)imm32 << ")" << std::endl;
}
void Instruction::h_COND_M(std::ostream& os) const {
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(";
genAddressReg(os);
- os << ", " << imm32 << ")" << std::endl;
+ os << ", " << (int32_t)imm32 << ")" << std::endl;
}
void Instruction::h_ISTORE(std::ostream& os) const {
diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp
index 9e0d5e2..0757f43 100644
--- a/src/InterpretedVirtualMachine.cpp
+++ b/src/InterpretedVirtualMachine.cpp
@@ -19,7 +19,6 @@ along with RandomX. If not, see.
//#define TRACE
//#define FPUCHECK
#include "InterpretedVirtualMachine.hpp"
-#include "instructions.hpp"
#include "dataset.hpp"
#include "Cache.hpp"
#include "LightClientAsyncWorker.hpp"
diff --git a/src/main.cpp b/src/main.cpp
index b16b13b..0a10d8f 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -29,7 +29,6 @@ along with RandomX. If not, see.
#include
#include "Program.hpp"
#include
-#include "instructions.hpp"
#include
#include
#include "dataset.hpp"
diff --git a/src/program.inc b/src/program.inc
index 5de4504..ac8957b 100644
--- a/src/program.inc
+++ b/src/program.inc
@@ -1,736 +1,740 @@
- ; FMUL_R e0, a2
- mulpd xmm4, xmm10
- ; IADD_RC r2, r5, 2673743102
- lea r10, [r10+r13-1621224194]
- ; ISTORE L2[r2], r7
- mov eax, r10d
- and eax, 262136
- mov qword ptr [rsi+rax], r15
- ; FNEG_R f2
- xorps xmm2, xmm15
- ; IMUL_9C r6, 3291464084
- lea r14, [r14+r14*8-1003503212]
- ; FSUB_R f1, a0
- subpd xmm1, xmm8
- ; IXOR_M r5, L2[r3]
+ ; COND_M r1, sg(L1[r3], -2004237569)
+ xor ecx, ecx
mov eax, r11d
+ and eax, 16376
+ cmp dword ptr [rsi+rax], -2004237569
+ sets cl
+ add r9, rcx
+ ; IXOR_R r7, -1379425991
+ xor r15, -1379425991
+ ; IXOR_R r2, r6
+ xor r10, r14
+ ; FSWAP_R f3
+ shufpd xmm3, xmm3, 1
+ ; FADD_R f1, a1
+ addpd xmm1, xmm9
+ ; IMUL_R r0, r5
+ imul r8, r13
+ ; FMUL_R e1, a3
+ mulpd xmm5, xmm11
+ ; IADD_R r3, r2
+ add r11, r10
+ ; COND_M r1, ab(L2[r6], -724006934)
+ xor ecx, ecx
+ mov eax, r14d
and eax, 262136
- xor r13, qword ptr [rsi+rax]
- ; FNEG_R f2
- xorps xmm2, xmm15
- ; FSUB_R f3, a0
- subpd xmm3, xmm8
- ; ISDIV_C r0, 1400272688
- mov rax, 7072565507528518045
+ cmp dword ptr [rsi+rax], -724006934
+ seta cl
+ add r9, rcx
+ ; IADD_RC r2, r7, -854121467
+ lea r10, [r10+r15-854121467]
+ ; IADD_RC r5, r6, 1291744030
+ lea r13, [r13+r14+1291744030]
+ ; ISTORE L2[r6], r4
+ mov eax, r14d
+ and eax, 262136
+ mov qword ptr [rsi+rax], r12
+ ; IMUL_R r6, r7
+ imul r14, r15
+ ; FSUB_R f0, a3
+ subpd xmm0, xmm11
+ ; IADD_M r3, L1[r0]
+ mov eax, r8d
+ and eax, 16376
+ add r11, qword ptr [rsi+rax]
+ ; ISDIV_C r4, -692911499
+ mov rax, -893288710803585809
+ imul r12
+ xor eax, eax
+ sar rdx, 25
+ sets al
+ add rdx, rax
+ add r12, rdx
+ ; FMUL_R e0, a0
+ mulpd xmm4, xmm8
+ ; FDIV_M e1, L1[r0]
+ mov eax, r8d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ andps xmm12, xmm14
+ divpd xmm5, xmm12
+ maxpd xmm5, xmm13
+ ; FMUL_R e0, a1
+ mulpd xmm4, xmm9
+ ; COND_M r0, no(L1[r1], -540292380)
+ xor ecx, ecx
+ mov eax, r9d
+ and eax, 16376
+ cmp dword ptr [rsi+rax], -540292380
+ setno cl
+ add r8, rcx
+ ; FSUB_R f1, a1
+ subpd xmm1, xmm9
+ ; IADD_RC r0, r2, 310371682
+ lea r8, [r8+r10+310371682]
+ ; COND_R r3, lt(r0, -1067603143)
+ xor ecx, ecx
+ cmp r8d, -1067603143
+ setl cl
+ add r11, rcx
+ ; FMUL_R e0, a0
+ mulpd xmm4, xmm8
+ ; FADD_R f0, a3
+ addpd xmm0, xmm11
+ ; COND_R r4, sg(r3, -389806289)
+ xor ecx, ecx
+ cmp r11d, -389806289
+ sets cl
+ add r12, rcx
+ ; FMUL_R e0, a3
+ mulpd xmm4, xmm11
+ ; ISTORE L2[r7], r4
+ mov eax, r15d
+ and eax, 262136
+ mov qword ptr [rsi+rax], r12
+ ; IADD_RC r4, r2, 1888908452
+ lea r12, [r12+r10+1888908452]
+ ; IADD_R r1, r2
+ add r9, r10
+ ; IXOR_R r6, r5
+ xor r14, r13
+ ; IADD_M r7, L1[r0]
+ mov eax, r8d
+ and eax, 16376
+ add r15, qword ptr [rsi+rax]
+ ; IADD_R r5, r6
+ add r13, r14
+ ; FSUB_R f0, a1
+ subpd xmm0, xmm9
+ ; IMULH_R r5, r4
+ mov rax, r13
+ mul r12
+ mov r13, rdx
+ ; IMUL_9C r7, 753606235
+ lea r15, [r15+r15*8+753606235]
+ ; FSWAP_R e2
+ shufpd xmm6, xmm6, 1
+ ; IMUL_M r7, L1[r1]
+ mov eax, r9d
+ and eax, 16376
+ imul r15, qword ptr [rsi+rax]
+ ; IMUL_R r5, 1431156245
+ imul r13, 1431156245
+ ; IADD_RC r4, r2, 1268508410
+ lea r12, [r12+r10+1268508410]
+ ; FSWAP_R f2
+ shufpd xmm2, xmm2, 1
+ ; ISDIV_C r0, -845194077
+ mov rax, -5858725577819591251
imul r8
xor eax, eax
- sar rdx, 29
+ sar rdx, 28
sets al
add rdx, rax
add r8, rdx
- ; IMUL_M r3, L1[r7]
- mov eax, r15d
- and eax, 16376
- imul r11, qword ptr [rsi+rax]
- ; ISWAP_R r2, r3
- xchg r10, r11
- ; IMULH_R r6, r0
- mov rax, r14
- mul r8
- mov r14, rdx
- ; FMUL_R e0, a2
- mulpd xmm4, xmm10
- ; IADD_RC r3, r4, 4242706868
- lea r11, [r11+r12-52260428]
- ; IADD_R r7, 3156349536
- add r15, -1138617760
- ; IXOR_M r2, L1[r6]
- mov eax, r14d
- and eax, 16376
- xor r10, qword ptr [rsi+rax]
- ; FSUB_M f2, L1[r5]
- mov eax, r13d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm2, xmm12
- ; IXOR_R r7, r1
- xor r15, r9
- ; COND_R r2, lt(r7, 4253348488)
+ ; COND_R r0, ab(r5, 1644043355)
xor ecx, ecx
- cmp r15d, -41618808
+ cmp r13d, 1644043355
+ seta cl
+ add r8, rcx
+ ; COND_R r5, lt(r0, 1216385844)
+ xor ecx, ecx
+ cmp r8d, 1216385844
setl cl
- add r10, rcx
- ; FMUL_R e3, a0
- mulpd xmm7, xmm8
- ; COND_R r4, sg(r1, 3333776931)
- xor ecx, ecx
- cmp r9d, -961190365
- sets cl
- add r12, rcx
- ; FADD_R f2, a1
- addpd xmm2, xmm9
- ; FSUB_R f0, a3
- subpd xmm0, xmm11
- ; ISTORE L1[r6], r2
- mov eax, r14d
- and eax, 16376
- mov qword ptr [rsi+rax], r10
- ; ISUB_R r6, r5
- sub r14, r13
- ; IADD_M r0, L1[r4]
+ add r13, rcx
+ ; IMUL_R r5, r2
+ imul r13, r10
+ ; ISTORE L1[r4], r6
mov eax, r12d
and eax, 16376
- add r8, qword ptr [rsi+rax]
- ; ISTORE L1[r4], r3
- mov eax, r12d
- and eax, 16376
- mov qword ptr [rsi+rax], r11
- ; COND_M r6, sg(L1[r6], 1048782623)
- xor ecx, ecx
- mov eax, r14d
- and eax, 16376
- cmp dword ptr [rsi+rax], 1048782623
- sets cl
- add r14, rcx
- ; FSQRT_R e0
- sqrtpd xmm4, xmm4
- ; INEG_R r2
- neg r10
+ mov qword ptr [rsi+rax], r14
+ ; IXOR_R r4, r3
+ xor r12, r11
+ ; IXOR_R r6, r2
+ xor r14, r10
; FSQRT_R e1
sqrtpd xmm5, xmm5
- ; FMUL_R e1, a3
- mulpd xmm5, xmm11
- ; IMUL_R r7, r6
- imul r15, r14
- ; IMULH_R r0, r4
- mov rax, r8
- mul r12
- mov r8, rdx
- ; IMUL_R r5, r3
- imul r13, r11
- ; FSQRT_R e2
- sqrtpd xmm6, xmm6
- ; FADD_R f3, a0
- addpd xmm3, xmm8
- ; IADD_R r3, r2
- add r11, r10
- ; FADD_R f1, a0
- addpd xmm1, xmm8
- ; FMUL_R e3, a2
- mulpd xmm7, xmm10
- ; FADD_M f0, L2[r5]
- mov eax, r13d
- and eax, 262136
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- addpd xmm0, xmm12
- ; IMUL_R r5, r6
- imul r13, r14
- ; IADD_RC r1, r2, 3031682053
- lea r9, [r9+r10-1263285243]
- ; ISUB_M r4, L1[r6]
- mov eax, r14d
- and eax, 16376
- sub r12, qword ptr [rsi+rax]
- ; FSWAP_R e3
- shufpd xmm7, xmm7, 1
- ; IMUL_R r0, r7
- imul r8, r15
- ; IXOR_R r1, r6
- xor r9, r14
- ; IXOR_M r2, L1[r4]
- mov eax, r12d
- and eax, 16376
- xor r10, qword ptr [rsi+rax]
- ; FSUB_R f3, a1
- subpd xmm3, xmm9
- ; ISTORE L1[r0], r5
- mov eax, r8d
- and eax, 16376
- mov qword ptr [rsi+rax], r13
- ; FDIV_M e2, L2[r3]
+ ; COND_R r5, be(r1, 1781435695)
+ xor ecx, ecx
+ cmp r9d, 1781435695
+ setbe cl
+ add r13, rcx
+ ; ISDIV_C r0, 1367038890
+ mov rax, 1811126293978922977
+ imul r8
+ xor eax, eax
+ sar rdx, 27
+ sets al
+ add rdx, rax
+ add r8, rdx
+ ; FDIV_M e1, L1[r3]
mov eax, r11d
- and eax, 262136
+ and eax, 16376
cvtdq2pd xmm12, qword ptr [rsi+rax]
andps xmm12, xmm14
- divpd xmm6, xmm12
- maxpd xmm6, xmm13
- ; ISWAP_R r2, r0
- xchg r10, r8
- ; IADD_R r7, r5
- add r15, r13
- ; FDIV_M e0, L1[r4]
- mov eax, r12d
+ divpd xmm5, xmm12
+ maxpd xmm5, xmm13
+ ; FMUL_R e2, a0
+ mulpd xmm6, xmm8
+ ; ISTORE L1[r5], r4
+ mov eax, r13d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r12
+ ; IXOR_R r0, r4
+ xor r8, r12
+ ; IMUL_R r5, r1
+ imul r13, r9
+ ; FDIV_M e0, L1[r2]
+ mov eax, r10d
and eax, 16376
cvtdq2pd xmm12, qword ptr [rsi+rax]
andps xmm12, xmm14
divpd xmm4, xmm12
maxpd xmm4, xmm13
- ; FADD_R f3, a1
- addpd xmm3, xmm9
- ; FADD_R f0, a3
- addpd xmm0, xmm11
- ; IADD_R r2, r0
- add r10, r8
- ; ISTORE L1[r3], r6
- mov eax, r11d
+ ; IMUL_R r6, r1
+ imul r14, r9
+ ; FSUB_M f1, L1[r0]
+ mov eax, r8d
and eax, 16376
- mov qword ptr [rsi+rax], r14
- ; IXOR_R r1, r7
- xor r9, r15
- ; ISUB_M r5, L2[r7]
- mov eax, r15d
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm1, xmm12
+ ; COND_R r2, ns(r1, 392878356)
+ xor ecx, ecx
+ cmp r9d, 392878356
+ setns cl
+ add r10, rcx
+ ; IADD_R r6, r5
+ add r14, r13
+ ; FMUL_R e2, a0
+ mulpd xmm6, xmm8
+ ; ISTORE L1[r0], r3
+ mov eax, r8d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r11
+ ; IMUL_R r1, r3
+ imul r9, r11
+ ; IMUL_R r5, r2
+ imul r13, r10
+ ; FADD_R f0, a0
+ addpd xmm0, xmm8
+ ; FADD_R f0, a1
+ addpd xmm0, xmm9
+ ; FSUB_R f0, a0
+ subpd xmm0, xmm8
+ ; IMUL_R r3, r5
+ imul r11, r13
+ ; IADD_R r1, r5
+ add r9, r13
+ ; IXOR_M r0, L1[r5]
+ mov eax, r13d
+ and eax, 16376
+ xor r8, qword ptr [rsi+rax]
+ ; FNEG_R f2
+ xorps xmm2, xmm15
+ ; IDIV_C r5, 2577129788
+ mov rax, 15371395512010654233
+ mul r13
+ shr rdx, 31
+ add r13, rdx
+ ; COND_R r5, be(r5, -999219370)
+ xor ecx, ecx
+ cmp r13d, -999219370
+ setbe cl
+ add r13, rcx
+ ; ISTORE L2[r0], r2
+ mov eax, r8d
and eax, 262136
- sub r13, qword ptr [rsi+rax]
- ; ISDIV_C r7, 266992378
- mov rax, -9173520256920442565
- imul r15
+ mov qword ptr [rsi+rax], r10
+ ; FSUB_R f3, a3
+ subpd xmm3, xmm11
+ ; IROR_R r7, r6
+ mov ecx, r14d
+ ror r15, cl
+ ; COND_R r6, ab(r4, 1309137534)
+ xor ecx, ecx
+ cmp r12d, 1309137534
+ seta cl
+ add r14, rcx
+ ; FMUL_R e3, a0
+ mulpd xmm7, xmm8
+ ; COND_M r3, no(L2[r5], 483660199)
+ xor ecx, ecx
+ mov eax, r13d
+ and eax, 262136
+ cmp dword ptr [rsi+rax], 483660199
+ setno cl
+ add r11, rcx
+ ; IMUL_R r1, r6
+ imul r9, r14
+ ; IADD_RC r7, r2, -1340630490
+ lea r15, [r15+r10-1340630490]
+ ; IADD_M r0, L3[1554088]
+ add r8, qword ptr [rsi+1554088]
+ ; FMUL_R e2, a3
+ mulpd xmm6, xmm11
+ ; IDIV_C r0, 1566192452
+ mov rax, 12646619898641986559
+ mul r8
+ shr rdx, 30
+ add r8, rdx
+ ; FADD_R f0, a1
+ addpd xmm0, xmm9
+ ; ISWAP_R r6, r0
+ xchg r14, r8
+ ; IMUL_9C r4, 1340891034
+ lea r12, [r12+r12*8+1340891034]
+ ; IROR_R r7, r2
+ mov ecx, r10d
+ ror r15, cl
+ ; FSQRT_R e2
+ sqrtpd xmm6, xmm6
+ ; FADD_R f2, a1
+ addpd xmm2, xmm9
+ ; IMUL_R r4, r3
+ imul r12, r11
+ ; IADD_RC r6, r3, -1584624397
+ lea r14, [r14+r11-1584624397]
+ ; IROR_R r1, r7
+ mov ecx, r15d
+ ror r9, cl
+ ; IXOR_R r4, r7
+ xor r12, r15
+ ; FSWAP_R f0
+ shufpd xmm0, xmm0, 1
+ ; FSWAP_R f3
+ shufpd xmm3, xmm3, 1
+ ; IROR_R r5, 3
+ ror r13, 3
+ ; FADD_R f3, a0
+ addpd xmm3, xmm8
+ ; FMUL_R e0, a0
+ mulpd xmm4, xmm8
+ ; IADD_R r4, r1
+ add r12, r9
+ ; COND_M r4, ge(L1[r6], -1612023931)
+ xor ecx, ecx
+ mov eax, r14d
+ and eax, 16376
+ cmp dword ptr [rsi+rax], -1612023931
+ setge cl
+ add r12, rcx
+ ; FSWAP_R e2
+ shufpd xmm6, xmm6, 1
+ ; IADD_R r3, r7
+ add r11, r15
+ ; COND_R r5, be(r2, -1083018923)
+ xor ecx, ecx
+ cmp r10d, -1083018923
+ setbe cl
+ add r13, rcx
+ ; IADD_R r3, r7
+ add r11, r15
+ ; ISTORE L2[r6], r0
+ mov eax, r14d
+ and eax, 262136
+ mov qword ptr [rsi+rax], r8
+ ; IXOR_R r2, r3
+ xor r10, r11
+ ; FMUL_R e2, a3
+ mulpd xmm6, xmm11
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+ ; FADD_R f0, a2
+ addpd xmm0, xmm10
+ ; ISTORE L1[r5], r1
+ mov eax, r13d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r9
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+ ; ISWAP_R r1, r2
+ xchg r9, r10
+ ; FSWAP_R e0
+ shufpd xmm4, xmm4, 1
+ ; FSUB_R f1, a2
+ subpd xmm1, xmm10
+ ; FSUB_R f0, a0
+ subpd xmm0, xmm8
+ ; IROR_R r7, r0
+ mov ecx, r8d
+ ror r15, cl
+ ; IADD_RC r5, r4, 283260945
+ lea r13, [r13+r12+283260945]
+ ; ISDIV_C r6, -340125851
+ mov rax, -3639652898025032137
+ imul r14
xor eax, eax
- add rdx, r15
- sar rdx, 27
+ sar rdx, 26
sets al
add rdx, rax
- add r15, rdx
- ; FDIV_M e3, L1[r4]
+ add r14, rdx
+ ; ISTORE L2[r2], r3
+ mov eax, r10d
+ and eax, 262136
+ mov qword ptr [rsi+rax], r11
+ ; IADD_RC r6, r6, -935765909
+ lea r14, [r14+r14-935765909]
+ ; ISDIV_C r3, -701703430
+ mov rax, -7056770631919985199
+ imul r11
+ xor eax, eax
+ sar rdx, 28
+ sets al
+ add rdx, rax
+ add r11, rdx
+ ; IXOR_M r3, L2[r1]
+ mov eax, r9d
+ and eax, 262136
+ xor r11, qword ptr [rsi+rax]
+ ; FADD_R f2, a1
+ addpd xmm2, xmm9
+ ; ISTORE L1[r5], r7
+ mov eax, r13d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r15
+ ; FSUB_R f2, a0
+ subpd xmm2, xmm8
+ ; FMUL_R e3, a2
+ mulpd xmm7, xmm10
+ ; IADD_R r2, r5
+ add r10, r13
+ ; IADD_RC r2, r5, -1056770544
+ lea r10, [r10+r13-1056770544]
+ ; ISTORE L2[r2], r3
+ mov eax, r10d
+ and eax, 262136
+ mov qword ptr [rsi+rax], r11
+ ; ISMULH_R r7, r1
+ mov rax, r15
+ imul r9
+ mov r15, rdx
+ ; IXOR_R r0, r5
+ xor r8, r13
+ ; ISTORE L1[r4], r0
mov eax, r12d
and eax, 16376
+ mov qword ptr [rsi+rax], r8
+ ; INEG_R r5
+ neg r13
+ ; FSUB_R f0, a1
+ subpd xmm0, xmm9
+ ; IMUL_R r6, -244261682
+ imul r14, -244261682
+ ; IMUL_R r1, r0
+ imul r9, r8
+ ; IMUL_9C r3, -985744277
+ lea r11, [r11+r11*8-985744277]
+ ; IROR_R r2, r1
+ mov ecx, r9d
+ ror r10, cl
+ ; ISUB_R r4, -1079131550
+ sub r12, -1079131550
+ ; FNEG_R f3
+ xorps xmm3, xmm15
+ ; COND_R r4, ns(r5, -362284631)
+ xor ecx, ecx
+ cmp r13d, -362284631
+ setns cl
+ add r12, rcx
+ ; FSUB_R f2, a0
+ subpd xmm2, xmm8
+ ; IXOR_R r4, r5
+ xor r12, r13
+ ; FNEG_R f1
+ xorps xmm1, xmm15
+ ; FADD_R f0, a0
+ addpd xmm0, xmm8
+ ; IADD_RC r3, r3, -173615832
+ lea r11, [r11+r11-173615832]
+ ; IMUL_R r0, 928402279
+ imul r8, 928402279
+ ; ISUB_R r2, r0
+ sub r10, r8
+ ; IXOR_R r6, r3
+ xor r14, r11
+ ; ISUB_R r2, 2106401471
+ sub r10, 2106401471
+ ; FADD_R f0, a2
+ addpd xmm0, xmm10
+ ; IMUL_R r4, r6
+ imul r12, r14
+ ; IADD_RC r4, r0, -373491513
+ lea r12, [r12+r8-373491513]
+ ; ISDIV_C r0, -1739042721
+ mov rax, 7057121271817449967
+ imul r8
+ xor eax, eax
+ sub rdx, r8
+ sar rdx, 30
+ sets al
+ add rdx, rax
+ add r8, rdx
+ ; IADD_R r3, r1
+ add r11, r9
+ ; ISUB_M r7, L1[r5]
+ mov eax, r13d
+ and eax, 16376
+ sub r15, qword ptr [rsi+rax]
+ ; IMUL_R r1, r2
+ imul r9, r10
+ ; ISUB_R r0, 722465116
+ sub r8, 722465116
+ ; IADD_RC r0, r0, -1919541169
+ lea r8, [r8+r8-1919541169]
+ ; ISUB_M r2, L1[r3]
+ mov eax, r11d
+ and eax, 16376
+ sub r10, qword ptr [rsi+rax]
+ ; IADD_R r7, -1183581468
+ add r15, -1183581468
+ ; FMUL_R e1, a3
+ mulpd xmm5, xmm11
+ ; FSUB_R f0, a0
+ subpd xmm0, xmm8
+ ; FADD_R f0, a3
+ addpd xmm0, xmm11
+ ; IMUL_9C r6, 1241113238
+ lea r14, [r14+r14*8+1241113238]
+ ; FSUB_R f3, a3
+ subpd xmm3, xmm11
+ ; IADD_M r0, L1[r3]
+ mov eax, r11d
+ and eax, 16376
+ add r8, qword ptr [rsi+rax]
+ ; IROR_R r3, r7
+ mov ecx, r15d
+ ror r11, cl
+ ; FADD_R f2, a1
+ addpd xmm2, xmm9
+ ; IMUL_M r3, L1[r2]
+ mov eax, r10d
+ and eax, 16376
+ imul r11, qword ptr [rsi+rax]
+ ; IMUL_9C r7, -2080412544
+ lea r15, [r15+r15*8-2080412544]
+ ; IMUL_R r0, r3
+ imul r8, r11
+ ; FADD_R f1, a1
+ addpd xmm1, xmm9
+ ; IROR_R r6, 21
+ ror r14, 21
+ ; FDIV_M e3, L1[r1]
+ mov eax, r9d
+ and eax, 16376
cvtdq2pd xmm12, qword ptr [rsi+rax]
andps xmm12, xmm14
divpd xmm7, xmm12
maxpd xmm7, xmm13
- ; IMUL_R r2, r0
- imul r10, r8
- ; FMUL_R e3, a2
- mulpd xmm7, xmm10
- ; IMUL_R r0, r6
- imul r8, r14
- ; ISTORE L1[r0], r7
- mov eax, r8d
- and eax, 16376
- mov qword ptr [rsi+rax], r15
- ; FSUB_M f0, L2[r1]
- mov eax, r9d
- and eax, 262136
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm0, xmm12
- ; FADD_R f3, a1
- addpd xmm3, xmm9
- ; IXOR_R r5, r4
- xor r13, r12
- ; ISTORE L2[r7], r2
- mov eax, r15d
- and eax, 262136
- mov qword ptr [rsi+rax], r10
- ; FSWAP_R e2
- shufpd xmm6, xmm6, 1
- ; FADD_R f3, a2
- addpd xmm3, xmm10
- ; ISMULH_R r5, r0
- mov rax, r13
- imul r8
- mov r13, rdx
- ; IADD_M r0, L1[r4]
- mov eax, r12d
- and eax, 16376
- add r8, qword ptr [rsi+rax]
- ; COND_R r7, ge(r6, 2322068811)
- xor ecx, ecx
- cmp r14d, -1972898485
- setge cl
- add r15, rcx
- ; FADD_R f2, a2
- addpd xmm2, xmm10
- ; IROR_R r7, r6
- mov ecx, r14d
- ror r15, cl
- ; IADD_RC r2, r4, 4177509323
- lea r10, [r10+r12-117457973]
- ; IMUL_R r0, 2794074228
- imul r8, -1500893068
- ; IADD_R r2, r3
- add r10, r11
- ; FSQRT_R e2
- sqrtpd xmm6, xmm6
- ; IROR_R r7, r4
- mov ecx, r12d
- ror r15, cl
- ; IMUL_9C r4, 381194890
- lea r12, [r12+r12*8+381194890]
- ; IADD_RC r3, r7, 1050899263
- lea r11, [r11+r15+1050899263]
- ; IADD_R r2, r7
- add r10, r15
- ; FMUL_R e3, a0
- mulpd xmm7, xmm8
- ; IADD_RC r6, r6, 540663146
- lea r14, [r14+r14+540663146]
- ; IROR_R r5, 58
- ror r13, 58
- ; FADD_R f2, a1
- addpd xmm2, xmm9
- ; FADD_R f2, a2
- addpd xmm2, xmm10
- ; FMUL_R e1, a2
- mulpd xmm5, xmm10
+ ; FSUB_R f0, a1
+ subpd xmm0, xmm9
; FSWAP_R e1
shufpd xmm5, xmm5, 1
- ; IADD_R r5, r3
- add r13, r11
- ; IADD_R r7, 2514699120
- add r15, -1780268176
- ; IADD_RC r7, r0, 2797210442
- lea r15, [r15+r8-1497756854]
- ; ISTORE L2[r0], r7
- mov eax, r8d
- and eax, 262136
- mov qword ptr [rsi+rax], r15
- ; ISMULH_R r2, r4
- mov rax, r10
- imul r12
- mov r10, rdx
- ; FSUB_M f0, L1[r2]
- mov eax, r10d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm0, xmm12
- ; ISMULH_R r2, r3
- mov rax, r10
- imul r11
- mov r10, rdx
- ; IADD_R r0, r3
- add r8, r11
- ; ISUB_R r7, r2
- sub r15, r10
- ; FADD_R f2, a0
- addpd xmm2, xmm8
- ; FMUL_R e0, a2
- mulpd xmm4, xmm10
- ; FADD_M f2, L1[r3]
- mov eax, r11d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- addpd xmm2, xmm12
- ; IMUL_R r1, r2
- imul r9, r10
- ; IMUL_M r7, L1[r5]
+ ; COND_M r0, no(L1[r5], -1627153829)
+ xor ecx, ecx
mov eax, r13d
and eax, 16376
- imul r15, qword ptr [rsi+rax]
+ cmp dword ptr [rsi+rax], -1627153829
+ setno cl
+ add r8, rcx
+ ; FADD_R f2, a3
+ addpd xmm2, xmm11
+ ; FSUB_R f1, a2
+ subpd xmm1, xmm10
+ ; FSUB_M f1, L1[r4]
+ mov eax, r12d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm1, xmm12
+ ; ISTORE L1[r5], r1
+ mov eax, r13d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r9
+ ; ISUB_M r2, L2[r7]
+ mov eax, r15d
+ and eax, 262136
+ sub r10, qword ptr [rsi+rax]
+ ; ISTORE L1[r2], r3
+ mov eax, r10d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r11
+ ; FADD_R f0, a3
+ addpd xmm0, xmm11
+ ; ISUB_M r1, L1[r7]
+ mov eax, r15d
+ and eax, 16376
+ sub r9, qword ptr [rsi+rax]
+ ; IDIV_C r5, 624165039
+ mov rax, 15866829597104432181
+ mul r13
+ shr rdx, 29
+ add r13, rdx
+ ; FMUL_R e3, a0
+ mulpd xmm7, xmm8
+ ; IMUL_R r5, r4
+ imul r13, r12
+ ; FMUL_R e3, a1
+ mulpd xmm7, xmm9
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+ ; IXOR_R r0, -2064879200
+ xor r8, -2064879200
+ ; FADD_R f1, a3
+ addpd xmm1, xmm11
+ ; IADD_M r0, L1[r3]
+ mov eax, r11d
+ and eax, 16376
+ add r8, qword ptr [rsi+rax]
+ ; ISMULH_R r7, r3
+ mov rax, r15
+ imul r11
+ mov r15, rdx
+ ; IMUL_R r5, -1645503310
+ imul r13, -1645503310
+ ; IMUL_R r7, r3
+ imul r15, r11
+ ; FMUL_R e2, a2
+ mulpd xmm6, xmm10
+ ; IADD_R r6, 1769041191
+ add r14, 1769041191
+ ; FSUB_M f1, L1[r4]
+ mov eax, r12d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm1, xmm12
+ ; ISTORE L2[r1], r0
+ mov eax, r9d
+ and eax, 262136
+ mov qword ptr [rsi+rax], r8
+ ; FNEG_R f0
+ xorps xmm0, xmm15
+ ; FMUL_R e0, a3
+ mulpd xmm4, xmm11
+ ; IMUL_R r2, r7
+ imul r10, r15
+ ; IADD_R r5, r1
+ add r13, r9
+ ; IROR_R r3, r6
+ mov ecx, r14d
+ ror r11, cl
+ ; FADD_R f0, a0
+ addpd xmm0, xmm8
+ ; FMUL_R e1, a2
+ mulpd xmm5, xmm10
+ ; FNEG_R f3
+ xorps xmm3, xmm15
+ ; FADD_R f1, a1
+ addpd xmm1, xmm9
+ ; IMULH_R r2, r5
+ mov rax, r10
+ mul r13
+ mov r10, rdx
+ ; ISTORE L1[r4], r0
+ mov eax, r12d
+ and eax, 16376
+ mov qword ptr [rsi+rax], r8
+ ; ISWAP_R r7, r0
+ xchg r15, r8
+ ; FSWAP_R f0
+ shufpd xmm0, xmm0, 1
+ ; ISUB_R r2, r0
+ sub r10, r8
+ ; FSUB_R f1, a3
+ subpd xmm1, xmm11
+ ; ISUB_M r5, L1[r3]
+ mov eax, r11d
+ and eax, 16376
+ sub r13, qword ptr [rsi+rax]
+ ; IXOR_R r7, r0
+ xor r15, r8
+ ; IMUL_R r4, r1
+ imul r12, r9
+ ; IADD_RC r0, r2, -1102648763
+ lea r8, [r8+r10-1102648763]
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+ ; IXOR_R r4, r1
+ xor r12, r9
+ ; IXOR_R r6, r0
+ xor r14, r8
+ ; FSQRT_R e1
+ sqrtpd xmm5, xmm5
+ ; IMUL_M r6, L2[r1]
+ mov eax, r9d
+ and eax, 262136
+ imul r14, qword ptr [rsi+rax]
+ ; ISMULH_M r5, L3[353552]
+ mov rax, r13
+ imul qword ptr [rsi+353552]
+ mov r13, rdx
+ ; ISUB_M r1, L1[r6]
+ mov eax, r14d
+ and eax, 16376
+ sub r9, qword ptr [rsi+rax]
+ ; FADD_R f0, a3
+ addpd xmm0, xmm11
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+ ; FSUB_M f3, L2[r7]
+ mov eax, r15d
+ and eax, 262136
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ subpd xmm3, xmm12
+ ; IMUL_R r0, r2
+ imul r8, r10
+ ; FMUL_R e1, a0
+ mulpd xmm5, xmm8
+ ; COND_R r5, sg(r3, -1392293091)
+ xor ecx, ecx
+ cmp r11d, -1392293091
+ sets cl
+ add r13, rcx
+ ; FSWAP_R e3
+ shufpd xmm7, xmm7, 1
+ ; IMUL_R r7, r4
+ imul r15, r12
+ ; IXOR_R r7, r5
+ xor r15, r13
+ ; FMUL_R e3, a3
+ mulpd xmm7, xmm11
+ ; IMUL_R r4, r3
+ imul r12, r11
+ ; FADD_M f1, L1[r1]
+ mov eax, r9d
+ and eax, 16376
+ cvtdq2pd xmm12, qword ptr [rsi+rax]
+ addpd xmm1, xmm12
+ ; IMUL_R r5, r0
+ imul r13, r8
+ ; ISUB_R r7, r0
+ sub r15, r8
+ ; IADD_M r5, L1[r4]
+ mov eax, r12d
+ and eax, 16376
+ add r13, qword ptr [rsi+rax]
+ ; IADD_R r6, r2
+ add r14, r10
+ ; FMUL_R e1, a1
+ mulpd xmm5, xmm9
+ ; IADD_M r2, L3[1073640]
+ add r10, qword ptr [rsi+1073640]
; IMUL_R r3, r2
imul r11, r10
; IXOR_R r1, r0
xor r9, r8
- ; FNEG_R f0
- xorps xmm0, xmm15
- ; IADD_RC r4, r4, 1456841848
- lea r12, [r12+r12+1456841848]
- ; IXOR_R r3, r2
- xor r11, r10
- ; COND_R r0, of(r4, 1678513610)
- xor ecx, ecx
- cmp r12d, 1678513610
- seto cl
- add r8, rcx
- ; ISMULH_R r4, r4
- mov rax, r12
- imul r12
- mov r12, rdx
- ; IMUL_R r4, r1
- imul r12, r9
- ; FADD_R f1, a2
- addpd xmm1, xmm10
- ; FSUB_R f2, a0
- subpd xmm2, xmm8
- ; FMUL_R e1, a2
- mulpd xmm5, xmm10
- ; FSUB_R f0, a3
- subpd xmm0, xmm11
- ; IXOR_R r0, r7
- xor r8, r15
- ; ISTORE L2[r1], r4
- mov eax, r9d
- and eax, 262136
- mov qword ptr [rsi+rax], r12
- ; IXOR_M r7, L1[r6]
- mov eax, r14d
- and eax, 16376
- xor r15, qword ptr [rsi+rax]
- ; ISUB_R r2, r4
- sub r10, r12
- ; ISUB_M r4, L1[r6]
- mov eax, r14d
- and eax, 16376
- sub r12, qword ptr [rsi+rax]
- ; FADD_R f2, a2
- addpd xmm2, xmm10
- ; FSUB_M f3, L2[r4]
+ ; IROR_R r7, r4
+ mov ecx, r12d
+ ror r15, cl
+ ; FSUB_R f1, a1
+ subpd xmm1, xmm9
+ ; IMUL_R r7, r5
+ imul r15, r13
+ ; ISUB_R r1, 866191482
+ sub r9, 866191482
+ ; IMUL_M r7, L1[r4]
mov eax, r12d
- and eax, 262136
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm3, xmm12
- ; IXOR_R r7, r2
- xor r15, r10
- ; IXOR_R r0, r5
- xor r8, r13
- ; FADD_R f1, a2
- addpd xmm1, xmm10
- ; FMUL_R e3, a2
- mulpd xmm7, xmm10
- ; FSWAP_R e3
- shufpd xmm7, xmm7, 1
- ; FSWAP_R f1
- shufpd xmm1, xmm1, 1
- ; COND_R r2, ge(r2, 4068636356)
- xor ecx, ecx
- cmp r10d, -226330940
- setge cl
- add r10, rcx
- ; FMUL_R e2, a3
- mulpd xmm6, xmm11
- ; FSUB_M f2, L2[r1]
- mov eax, r9d
- and eax, 262136
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm2, xmm12
- ; FADD_R f1, a0
- addpd xmm1, xmm8
- ; ISUB_R r7, r5
- sub r15, r13
- ; ISUB_M r0, L1[r1]
- mov eax, r9d
and eax, 16376
- sub r8, qword ptr [rsi+rax]
- ; FSUB_R f3, a1
- subpd xmm3, xmm9
- ; ISWAP_R r3, r5
- xchg r11, r13
- ; IADD_RC r5, r2, 795784298
- lea r13, [r13+r10+795784298]
- ; IADD_RC r0, r4, 2244788743
- lea r8, [r8+r12-2050178553]
- ; IMUL_9C r5, 1062534001
- lea r13, [r13+r13*8+1062534001]
- ; FADD_R f0, a2
- addpd xmm0, xmm10
- ; FMUL_R e3, a1
- mulpd xmm7, xmm9
- ; IDIV_C r3, 1662492575
- mov rax, 11914062610815620875
- mul r11
- shr rdx, 30
- add r11, rdx
- ; IMUL_M r5, L1[r0]
- mov eax, r8d
- and eax, 16376
- imul r13, qword ptr [rsi+rax]
- ; IDIV_C r4, 1963597892
- mov rax, r12
- shr rax, 2
- mov rcx, 1260889558222626443
- mul rcx
- shr rdx, 25
- add r12, rdx
- ; IMUL_9C r7, 1820045218
- lea r15, [r15+r15*8+1820045218]
- ; IMUL_M r0, L1[r3]
- mov eax, r11d
- and eax, 16376
- imul r8, qword ptr [rsi+rax]
- ; IXOR_R r3, r7
- xor r11, r15
- ; ISMULH_R r4, r2
- mov rax, r12
- imul r10
- mov r12, rdx
- ; ISWAP_R r3, r0
- xchg r11, r8
- ; IXOR_R r2, r0
- xor r10, r8
- ; IXOR_M r0, L2[r1]
- mov eax, r9d
- and eax, 262136
- xor r8, qword ptr [rsi+rax]
- ; ISDIV_C r7, 3359520316
- mov rax, 7859804860668271393
- imul r15
- xor eax, eax
- sub rdx, r15
- sar rdx, 29
- sets al
- add rdx, rax
- add r15, rdx
- ; IMUL_M r6, L1[r2]
- mov eax, r10d
- and eax, 16376
- imul r14, qword ptr [rsi+rax]
- ; FNEG_R f3
- xorps xmm3, xmm15
- ; IADD_RC r4, r2, 1704868083
- lea r12, [r12+r10+1704868083]
+ imul r15, qword ptr [rsi+rax]
; FADD_R f2, a0
addpd xmm2, xmm8
- ; ISTORE L1[r0], r0
- mov eax, r8d
- and eax, 16376
- mov qword ptr [rsi+rax], r8
- ; FADD_M f0, L1[r7]
- mov eax, r15d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- addpd xmm0, xmm12
- ; FMUL_R e0, a3
- mulpd xmm4, xmm11
- ; FSUB_R f3, a2
- subpd xmm3, xmm10
- ; IADD_RC r7, r7, 1302457878
- lea r15, [r15+r15+1302457878]
- ; ISUB_R r1, 1330165941
- sub r9, 1330165941
- ; FNEG_R f1
- xorps xmm1, xmm15
- ; IROR_R r0, r4
- mov ecx, r12d
- ror r8, cl
- ; FSUB_R f1, a0
- subpd xmm1, xmm8
- ; IROR_R r5, r6
- mov ecx, r14d
- ror r13, cl
- ; COND_R r0, ab(r1, 3984033425)
- xor ecx, ecx
- cmp r9d, -310933871
- seta cl
- add r8, rcx
- ; COND_R r4, ab(r7, 757929676)
- xor ecx, ecx
- cmp r15d, 757929676
- seta cl
- add r12, rcx
- ; FMUL_R e0, a1
- mulpd xmm4, xmm9
- ; IMUL_R r1, r3
- imul r9, r11
- ; ISUB_R r3, r2
- sub r11, r10
- ; FSUB_R f3, a2
- subpd xmm3, xmm10
- ; FDIV_M e1, L1[r4]
- mov eax, r12d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- andps xmm12, xmm14
- divpd xmm5, xmm12
- maxpd xmm5, xmm13
- ; FSWAP_R f1
- shufpd xmm1, xmm1, 1
- ; IADD_R r7, 2873779272
- add r15, -1421188024
- ; FSUB_M f3, L2[r2]
- mov eax, r10d
- and eax, 262136
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm3, xmm12
- ; FSUB_R f2, a3
- subpd xmm2, xmm11
- ; FSUB_R f3, a1
- subpd xmm3, xmm9
- ; FMUL_R e1, a3
- mulpd xmm5, xmm11
- ; IADD_RC r2, r4, 3977135268
- lea r10, [r10+r12-317832028]
- ; IMUL_M r4, L1[r5]
- mov eax, r13d
- and eax, 16376
- imul r12, qword ptr [rsi+rax]
- ; FDIV_M e1, L1[r7]
- mov eax, r15d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- andps xmm12, xmm14
- divpd xmm5, xmm12
- maxpd xmm5, xmm13
- ; IADD_R r5, r2
- add r13, r10
- ; ISUB_R r4, 401020510
- sub r12, 401020510
- ; IROR_R r3, r0
- mov ecx, r8d
- ror r11, cl
- ; ISTORE L1[r7], r0
- mov eax, r15d
- and eax, 16376
- mov qword ptr [rsi+rax], r8
- ; FSUB_R f2, a1
- subpd xmm2, xmm9
- ; FMUL_R e3, a1
- mulpd xmm7, xmm9
- ; IMUL_9C r3, 720965215
- lea r11, [r11+r11*8+720965215]
- ; IMUL_9C r6, 74948046
- lea r14, [r14+r14*8+74948046]
- ; ISTORE L1[r7], r3
- mov eax, r15d
- and eax, 16376
- mov qword ptr [rsi+rax], r11
- ; IXOR_R r2, r6
- xor r10, r14
- ; FMUL_R e3, a1
- mulpd xmm7, xmm9
- ; ISUB_R r4, r1
- sub r12, r9
- ; ISUB_R r3, r0
- sub r11, r8
- ; ISWAP_R r7, r5
- xchg r15, r13
- ; IMUL_R r2, r6
- imul r10, r14
- ; COND_R r2, ge(r2, 2402809790)
- xor ecx, ecx
- cmp r10d, -1892157506
- setge cl
- add r10, rcx
- ; FADD_R f1, a3
- addpd xmm1, xmm11
- ; IADD_R r7, r0
- add r15, r8
- ; IDIV_C r1, 624867857
- mov rax, 15848983434401622933
- mul r9
- shr rdx, 29
- add r9, rdx
- ; FADD_R f0, a1
- addpd xmm0, xmm9
- ; IADD_RC r5, r7, 3817376178
- lea r13, [r13+r15-477591118]
- ; FSUB_R f0, a3
- subpd xmm0, xmm11
- ; ISUB_M r6, L1[r2]
- mov eax, r10d
- and eax, 16376
- sub r14, qword ptr [rsi+rax]
- ; FMUL_R e3, a1
- mulpd xmm7, xmm9
- ; IADD_R r0, r4
- add r8, r12
- ; FSUB_R f3, a1
- subpd xmm3, xmm9
- ; FSUB_M f2, L1[r4]
- mov eax, r12d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- subpd xmm2, xmm12
- ; ISDIV_C r2, 3898255608
- mov rax, 5964731804029407733
- imul r10
- xor eax, eax
- sub rdx, r10
- sar rdx, 28
- sets al
- add rdx, rax
- add r10, rdx
- ; FNEG_R f2
- xorps xmm2, xmm15
- ; FSUB_R f3, a2
- subpd xmm3, xmm10
- ; FADD_R f1, a3
- addpd xmm1, xmm11
- ; IMUL_R r3, r2
- imul r11, r10
- ; FADD_M f0, L1[r3]
- mov eax, r11d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- addpd xmm0, xmm12
- ; ISMULH_R r5, r2
- mov rax, r13
- imul r10
- mov r13, rdx
- ; IMULH_R r6, r2
- mov rax, r14
- mul r10
- mov r14, rdx
- ; FADD_M f3, L1[r3]
- mov eax, r11d
- and eax, 16376
- cvtdq2pd xmm12, qword ptr [rsi+rax]
- addpd xmm3, xmm12
- ; IMUL_R r6, r7
- imul r14, r15
- ; FSUB_R f0, a0
- subpd xmm0, xmm8
- ; FNEG_R f2
- xorps xmm2, xmm15
- ; ISUB_R r6, r4
- sub r14, r12
- ; FADD_R f1, a1
- addpd xmm1, xmm9
- ; IXOR_R r0, r5
- xor r8, r13
- ; FADD_R f2, a1
- addpd xmm2, xmm9
- ; ISWAP_R r7, r5
- xchg r15, r13
- ; FMUL_R e3, a2
- mulpd xmm7, xmm10
- ; IADD_RC r3, r6, 2977336568
- lea r11, [r11+r14-1317630728]
- ; IMUL_R r2, r3
- imul r10, r11
- ; IADD_RC r1, r4, 894105694
- lea r9, [r9+r12+894105694]
- ; IMUL_9C r7, 504293473
- lea r15, [r15+r15*8+504293473]
- ; FSUB_R f1, a0
- subpd xmm1, xmm8
- ; IMUL_R r7, r1
- imul r15, r9
- ; IXOR_R r2, r4
- xor r10, r12
- ; IADD_RC r0, r1, 392362094
- lea r8, [r8+r9+392362094]
- ; IDIV_C r4, 1645771433
- mov rax, 376097195048767223
- mul r12
- shr rdx, 25
- add r12, rdx
- ; ISUB_R r4, r3
- sub r12, r11
- ; ISUB_M r7, L1[r4]
- mov eax, r12d
- and eax, 16376
- sub r15, qword ptr [rsi+rax]
- ; IMUL_M r5, L1[r7]
- mov eax, r15d
- and eax, 16376
- imul r13, qword ptr [rsi+rax]
- ; IROR_R r1, r7
- mov ecx, r15d
- ror r9, cl
- ; INEG_R r4
- neg r12
- ; IMUL_R r3, 1863959234
- imul r11, 1863959234
- ; IROR_R r4, 59
- ror r12, 59
- ; IMUL_M r1, L3[363256]
- imul r9, qword ptr [rsi+363256]
- ; ISTORE L2[r6], r7
- mov eax, r14d
- and eax, 262136
- mov qword ptr [rsi+rax], r15
- ; ISTORE L1[r1], r5
- mov eax, r9d
- and eax, 16376
- mov qword ptr [rsi+rax], r13
- ; FNEG_R f0
- xorps xmm0, xmm15
- ; FSQRT_R e2
- sqrtpd xmm6, xmm6
- ; FMUL_R e0, a3
- mulpd xmm4, xmm11
- ; FMUL_R e3, a2
- mulpd xmm7, xmm10
- ; IROR_R r5, r2
- mov ecx, r10d
- ror r13, cl
- ; IADD_R r0, r4
- add r8, r12
+ ; IADD_R r2, r1
+ add r10, r9