From 2798d78717ee61f7a5f92f2085ac4ae7f8507671 Mon Sep 17 00:00:00 2001 From: tevador Date: Sat, 9 Feb 2019 16:19:15 +0100 Subject: [PATCH] Render imm32 as signed in RandomX code --- src/AssemblyGeneratorX86.cpp | 1 - src/CompiledVirtualMachine.cpp | 1 - src/Instruction.cpp | 20 +- src/InterpretedVirtualMachine.cpp | 1 - src/main.cpp | 1 - src/program.inc | 1388 +++++++++++++++-------------- 6 files changed, 706 insertions(+), 706 deletions(-) diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index a2d1b32..9f03da1 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -20,7 +20,6 @@ along with RandomX. If not, see. #define MAGIC_DIVISION #include "AssemblyGeneratorX86.hpp" #include "common.hpp" -#include "instructions.hpp" #ifdef MAGIC_DIVISION #include "divideByConstantCodegen.h" #endif diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp index b3b5db8..8cfc364 100644 --- a/src/CompiledVirtualMachine.cpp +++ b/src/CompiledVirtualMachine.cpp @@ -19,7 +19,6 @@ along with RandomX. If not, see. #include "CompiledVirtualMachine.hpp" #include "common.hpp" -#include "instructions.hpp" #include namespace RandomX { diff --git a/src/Instruction.cpp b/src/Instruction.cpp index 35cc737..2fefcf3 100644 --- a/src/Instruction.cpp +++ b/src/Instruction.cpp @@ -45,7 +45,7 @@ namespace RandomX { os << "r" << (int)dst << ", r" << (int)src << std::endl; } else { - os << "r" << (int)dst << ", " << imm32 << std::endl; + os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl; } } @@ -63,7 +63,7 @@ namespace RandomX { } void Instruction::h_IADD_RC(std::ostream& os) const { - os << "r" << (int)dst << ", r" << (int)src << ", " << imm32 << std::endl; + os << "r" << (int)dst << ", r" << (int)src << ", " << (int32_t)imm32 << std::endl; } //1 uOP @@ -72,7 +72,7 @@ namespace RandomX { os << "r" << (int)dst << ", r" << (int)src << std::endl; } else { - os << "r" << (int)dst << ", " << imm32 << std::endl; + os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl; } } @@ -90,7 +90,7 @@ namespace RandomX { } void Instruction::h_IMUL_9C(std::ostream& os) const { - os << "r" << (int)dst << ", " << imm32 << std::endl; + os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl; } void Instruction::h_IMUL_R(std::ostream& os) const { @@ -98,7 +98,7 @@ namespace RandomX { os << "r" << (int)dst << ", r" << (int)src << std::endl; } else { - os << "r" << (int)dst << ", " << imm32 << std::endl; + os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl; } } @@ -158,7 +158,7 @@ namespace RandomX { os << "r" << (int)dst << ", r" << (int)src << std::endl; } else { - os << "r" << (int)dst << ", " << imm32 << std::endl; + os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl; } } @@ -194,11 +194,11 @@ namespace RandomX { } void Instruction::h_IDIV_C(std::ostream& os) const { - os << "r" << (int)dst << ", " << (uint32_t)imm32 << std::endl; + os << "r" << (int)dst << ", " << imm32 << std::endl; } void Instruction::h_ISDIV_C(std::ostream& os) const { - os << "r" << (int)dst << ", " << imm32 << std::endl; + os << "r" << (int)dst << ", " << (int32_t)imm32 << std::endl; } void Instruction::h_ISWAP_R(std::ostream& os) const { @@ -300,13 +300,13 @@ namespace RandomX { } void Instruction::h_COND_R(std::ostream& os) const { - os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << imm32 << ")" << std::endl; + os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)imm32 << ")" << std::endl; } void Instruction::h_COND_M(std::ostream& os) const { os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "("; genAddressReg(os); - os << ", " << imm32 << ")" << std::endl; + os << ", " << (int32_t)imm32 << ")" << std::endl; } void Instruction::h_ISTORE(std::ostream& os) const { diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 9e0d5e2..0757f43 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -19,7 +19,6 @@ along with RandomX. If not, see. //#define TRACE //#define FPUCHECK #include "InterpretedVirtualMachine.hpp" -#include "instructions.hpp" #include "dataset.hpp" #include "Cache.hpp" #include "LightClientAsyncWorker.hpp" diff --git a/src/main.cpp b/src/main.cpp index b16b13b..0a10d8f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -29,7 +29,6 @@ along with RandomX. If not, see. #include #include "Program.hpp" #include -#include "instructions.hpp" #include #include #include "dataset.hpp" diff --git a/src/program.inc b/src/program.inc index 5de4504..ac8957b 100644 --- a/src/program.inc +++ b/src/program.inc @@ -1,736 +1,740 @@ - ; FMUL_R e0, a2 - mulpd xmm4, xmm10 - ; IADD_RC r2, r5, 2673743102 - lea r10, [r10+r13-1621224194] - ; ISTORE L2[r2], r7 - mov eax, r10d - and eax, 262136 - mov qword ptr [rsi+rax], r15 - ; FNEG_R f2 - xorps xmm2, xmm15 - ; IMUL_9C r6, 3291464084 - lea r14, [r14+r14*8-1003503212] - ; FSUB_R f1, a0 - subpd xmm1, xmm8 - ; IXOR_M r5, L2[r3] + ; COND_M r1, sg(L1[r3], -2004237569) + xor ecx, ecx mov eax, r11d + and eax, 16376 + cmp dword ptr [rsi+rax], -2004237569 + sets cl + add r9, rcx + ; IXOR_R r7, -1379425991 + xor r15, -1379425991 + ; IXOR_R r2, r6 + xor r10, r14 + ; FSWAP_R f3 + shufpd xmm3, xmm3, 1 + ; FADD_R f1, a1 + addpd xmm1, xmm9 + ; IMUL_R r0, r5 + imul r8, r13 + ; FMUL_R e1, a3 + mulpd xmm5, xmm11 + ; IADD_R r3, r2 + add r11, r10 + ; COND_M r1, ab(L2[r6], -724006934) + xor ecx, ecx + mov eax, r14d and eax, 262136 - xor r13, qword ptr [rsi+rax] - ; FNEG_R f2 - xorps xmm2, xmm15 - ; FSUB_R f3, a0 - subpd xmm3, xmm8 - ; ISDIV_C r0, 1400272688 - mov rax, 7072565507528518045 + cmp dword ptr [rsi+rax], -724006934 + seta cl + add r9, rcx + ; IADD_RC r2, r7, -854121467 + lea r10, [r10+r15-854121467] + ; IADD_RC r5, r6, 1291744030 + lea r13, [r13+r14+1291744030] + ; ISTORE L2[r6], r4 + mov eax, r14d + and eax, 262136 + mov qword ptr [rsi+rax], r12 + ; IMUL_R r6, r7 + imul r14, r15 + ; FSUB_R f0, a3 + subpd xmm0, xmm11 + ; IADD_M r3, L1[r0] + mov eax, r8d + and eax, 16376 + add r11, qword ptr [rsi+rax] + ; ISDIV_C r4, -692911499 + mov rax, -893288710803585809 + imul r12 + xor eax, eax + sar rdx, 25 + sets al + add rdx, rax + add r12, rdx + ; FMUL_R e0, a0 + mulpd xmm4, xmm8 + ; FDIV_M e1, L1[r0] + mov eax, r8d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + andps xmm12, xmm14 + divpd xmm5, xmm12 + maxpd xmm5, xmm13 + ; FMUL_R e0, a1 + mulpd xmm4, xmm9 + ; COND_M r0, no(L1[r1], -540292380) + xor ecx, ecx + mov eax, r9d + and eax, 16376 + cmp dword ptr [rsi+rax], -540292380 + setno cl + add r8, rcx + ; FSUB_R f1, a1 + subpd xmm1, xmm9 + ; IADD_RC r0, r2, 310371682 + lea r8, [r8+r10+310371682] + ; COND_R r3, lt(r0, -1067603143) + xor ecx, ecx + cmp r8d, -1067603143 + setl cl + add r11, rcx + ; FMUL_R e0, a0 + mulpd xmm4, xmm8 + ; FADD_R f0, a3 + addpd xmm0, xmm11 + ; COND_R r4, sg(r3, -389806289) + xor ecx, ecx + cmp r11d, -389806289 + sets cl + add r12, rcx + ; FMUL_R e0, a3 + mulpd xmm4, xmm11 + ; ISTORE L2[r7], r4 + mov eax, r15d + and eax, 262136 + mov qword ptr [rsi+rax], r12 + ; IADD_RC r4, r2, 1888908452 + lea r12, [r12+r10+1888908452] + ; IADD_R r1, r2 + add r9, r10 + ; IXOR_R r6, r5 + xor r14, r13 + ; IADD_M r7, L1[r0] + mov eax, r8d + and eax, 16376 + add r15, qword ptr [rsi+rax] + ; IADD_R r5, r6 + add r13, r14 + ; FSUB_R f0, a1 + subpd xmm0, xmm9 + ; IMULH_R r5, r4 + mov rax, r13 + mul r12 + mov r13, rdx + ; IMUL_9C r7, 753606235 + lea r15, [r15+r15*8+753606235] + ; FSWAP_R e2 + shufpd xmm6, xmm6, 1 + ; IMUL_M r7, L1[r1] + mov eax, r9d + and eax, 16376 + imul r15, qword ptr [rsi+rax] + ; IMUL_R r5, 1431156245 + imul r13, 1431156245 + ; IADD_RC r4, r2, 1268508410 + lea r12, [r12+r10+1268508410] + ; FSWAP_R f2 + shufpd xmm2, xmm2, 1 + ; ISDIV_C r0, -845194077 + mov rax, -5858725577819591251 imul r8 xor eax, eax - sar rdx, 29 + sar rdx, 28 sets al add rdx, rax add r8, rdx - ; IMUL_M r3, L1[r7] - mov eax, r15d - and eax, 16376 - imul r11, qword ptr [rsi+rax] - ; ISWAP_R r2, r3 - xchg r10, r11 - ; IMULH_R r6, r0 - mov rax, r14 - mul r8 - mov r14, rdx - ; FMUL_R e0, a2 - mulpd xmm4, xmm10 - ; IADD_RC r3, r4, 4242706868 - lea r11, [r11+r12-52260428] - ; IADD_R r7, 3156349536 - add r15, -1138617760 - ; IXOR_M r2, L1[r6] - mov eax, r14d - and eax, 16376 - xor r10, qword ptr [rsi+rax] - ; FSUB_M f2, L1[r5] - mov eax, r13d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm2, xmm12 - ; IXOR_R r7, r1 - xor r15, r9 - ; COND_R r2, lt(r7, 4253348488) + ; COND_R r0, ab(r5, 1644043355) xor ecx, ecx - cmp r15d, -41618808 + cmp r13d, 1644043355 + seta cl + add r8, rcx + ; COND_R r5, lt(r0, 1216385844) + xor ecx, ecx + cmp r8d, 1216385844 setl cl - add r10, rcx - ; FMUL_R e3, a0 - mulpd xmm7, xmm8 - ; COND_R r4, sg(r1, 3333776931) - xor ecx, ecx - cmp r9d, -961190365 - sets cl - add r12, rcx - ; FADD_R f2, a1 - addpd xmm2, xmm9 - ; FSUB_R f0, a3 - subpd xmm0, xmm11 - ; ISTORE L1[r6], r2 - mov eax, r14d - and eax, 16376 - mov qword ptr [rsi+rax], r10 - ; ISUB_R r6, r5 - sub r14, r13 - ; IADD_M r0, L1[r4] + add r13, rcx + ; IMUL_R r5, r2 + imul r13, r10 + ; ISTORE L1[r4], r6 mov eax, r12d and eax, 16376 - add r8, qword ptr [rsi+rax] - ; ISTORE L1[r4], r3 - mov eax, r12d - and eax, 16376 - mov qword ptr [rsi+rax], r11 - ; COND_M r6, sg(L1[r6], 1048782623) - xor ecx, ecx - mov eax, r14d - and eax, 16376 - cmp dword ptr [rsi+rax], 1048782623 - sets cl - add r14, rcx - ; FSQRT_R e0 - sqrtpd xmm4, xmm4 - ; INEG_R r2 - neg r10 + mov qword ptr [rsi+rax], r14 + ; IXOR_R r4, r3 + xor r12, r11 + ; IXOR_R r6, r2 + xor r14, r10 ; FSQRT_R e1 sqrtpd xmm5, xmm5 - ; FMUL_R e1, a3 - mulpd xmm5, xmm11 - ; IMUL_R r7, r6 - imul r15, r14 - ; IMULH_R r0, r4 - mov rax, r8 - mul r12 - mov r8, rdx - ; IMUL_R r5, r3 - imul r13, r11 - ; FSQRT_R e2 - sqrtpd xmm6, xmm6 - ; FADD_R f3, a0 - addpd xmm3, xmm8 - ; IADD_R r3, r2 - add r11, r10 - ; FADD_R f1, a0 - addpd xmm1, xmm8 - ; FMUL_R e3, a2 - mulpd xmm7, xmm10 - ; FADD_M f0, L2[r5] - mov eax, r13d - and eax, 262136 - cvtdq2pd xmm12, qword ptr [rsi+rax] - addpd xmm0, xmm12 - ; IMUL_R r5, r6 - imul r13, r14 - ; IADD_RC r1, r2, 3031682053 - lea r9, [r9+r10-1263285243] - ; ISUB_M r4, L1[r6] - mov eax, r14d - and eax, 16376 - sub r12, qword ptr [rsi+rax] - ; FSWAP_R e3 - shufpd xmm7, xmm7, 1 - ; IMUL_R r0, r7 - imul r8, r15 - ; IXOR_R r1, r6 - xor r9, r14 - ; IXOR_M r2, L1[r4] - mov eax, r12d - and eax, 16376 - xor r10, qword ptr [rsi+rax] - ; FSUB_R f3, a1 - subpd xmm3, xmm9 - ; ISTORE L1[r0], r5 - mov eax, r8d - and eax, 16376 - mov qword ptr [rsi+rax], r13 - ; FDIV_M e2, L2[r3] + ; COND_R r5, be(r1, 1781435695) + xor ecx, ecx + cmp r9d, 1781435695 + setbe cl + add r13, rcx + ; ISDIV_C r0, 1367038890 + mov rax, 1811126293978922977 + imul r8 + xor eax, eax + sar rdx, 27 + sets al + add rdx, rax + add r8, rdx + ; FDIV_M e1, L1[r3] mov eax, r11d - and eax, 262136 + and eax, 16376 cvtdq2pd xmm12, qword ptr [rsi+rax] andps xmm12, xmm14 - divpd xmm6, xmm12 - maxpd xmm6, xmm13 - ; ISWAP_R r2, r0 - xchg r10, r8 - ; IADD_R r7, r5 - add r15, r13 - ; FDIV_M e0, L1[r4] - mov eax, r12d + divpd xmm5, xmm12 + maxpd xmm5, xmm13 + ; FMUL_R e2, a0 + mulpd xmm6, xmm8 + ; ISTORE L1[r5], r4 + mov eax, r13d + and eax, 16376 + mov qword ptr [rsi+rax], r12 + ; IXOR_R r0, r4 + xor r8, r12 + ; IMUL_R r5, r1 + imul r13, r9 + ; FDIV_M e0, L1[r2] + mov eax, r10d and eax, 16376 cvtdq2pd xmm12, qword ptr [rsi+rax] andps xmm12, xmm14 divpd xmm4, xmm12 maxpd xmm4, xmm13 - ; FADD_R f3, a1 - addpd xmm3, xmm9 - ; FADD_R f0, a3 - addpd xmm0, xmm11 - ; IADD_R r2, r0 - add r10, r8 - ; ISTORE L1[r3], r6 - mov eax, r11d + ; IMUL_R r6, r1 + imul r14, r9 + ; FSUB_M f1, L1[r0] + mov eax, r8d and eax, 16376 - mov qword ptr [rsi+rax], r14 - ; IXOR_R r1, r7 - xor r9, r15 - ; ISUB_M r5, L2[r7] - mov eax, r15d + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm1, xmm12 + ; COND_R r2, ns(r1, 392878356) + xor ecx, ecx + cmp r9d, 392878356 + setns cl + add r10, rcx + ; IADD_R r6, r5 + add r14, r13 + ; FMUL_R e2, a0 + mulpd xmm6, xmm8 + ; ISTORE L1[r0], r3 + mov eax, r8d + and eax, 16376 + mov qword ptr [rsi+rax], r11 + ; IMUL_R r1, r3 + imul r9, r11 + ; IMUL_R r5, r2 + imul r13, r10 + ; FADD_R f0, a0 + addpd xmm0, xmm8 + ; FADD_R f0, a1 + addpd xmm0, xmm9 + ; FSUB_R f0, a0 + subpd xmm0, xmm8 + ; IMUL_R r3, r5 + imul r11, r13 + ; IADD_R r1, r5 + add r9, r13 + ; IXOR_M r0, L1[r5] + mov eax, r13d + and eax, 16376 + xor r8, qword ptr [rsi+rax] + ; FNEG_R f2 + xorps xmm2, xmm15 + ; IDIV_C r5, 2577129788 + mov rax, 15371395512010654233 + mul r13 + shr rdx, 31 + add r13, rdx + ; COND_R r5, be(r5, -999219370) + xor ecx, ecx + cmp r13d, -999219370 + setbe cl + add r13, rcx + ; ISTORE L2[r0], r2 + mov eax, r8d and eax, 262136 - sub r13, qword ptr [rsi+rax] - ; ISDIV_C r7, 266992378 - mov rax, -9173520256920442565 - imul r15 + mov qword ptr [rsi+rax], r10 + ; FSUB_R f3, a3 + subpd xmm3, xmm11 + ; IROR_R r7, r6 + mov ecx, r14d + ror r15, cl + ; COND_R r6, ab(r4, 1309137534) + xor ecx, ecx + cmp r12d, 1309137534 + seta cl + add r14, rcx + ; FMUL_R e3, a0 + mulpd xmm7, xmm8 + ; COND_M r3, no(L2[r5], 483660199) + xor ecx, ecx + mov eax, r13d + and eax, 262136 + cmp dword ptr [rsi+rax], 483660199 + setno cl + add r11, rcx + ; IMUL_R r1, r6 + imul r9, r14 + ; IADD_RC r7, r2, -1340630490 + lea r15, [r15+r10-1340630490] + ; IADD_M r0, L3[1554088] + add r8, qword ptr [rsi+1554088] + ; FMUL_R e2, a3 + mulpd xmm6, xmm11 + ; IDIV_C r0, 1566192452 + mov rax, 12646619898641986559 + mul r8 + shr rdx, 30 + add r8, rdx + ; FADD_R f0, a1 + addpd xmm0, xmm9 + ; ISWAP_R r6, r0 + xchg r14, r8 + ; IMUL_9C r4, 1340891034 + lea r12, [r12+r12*8+1340891034] + ; IROR_R r7, r2 + mov ecx, r10d + ror r15, cl + ; FSQRT_R e2 + sqrtpd xmm6, xmm6 + ; FADD_R f2, a1 + addpd xmm2, xmm9 + ; IMUL_R r4, r3 + imul r12, r11 + ; IADD_RC r6, r3, -1584624397 + lea r14, [r14+r11-1584624397] + ; IROR_R r1, r7 + mov ecx, r15d + ror r9, cl + ; IXOR_R r4, r7 + xor r12, r15 + ; FSWAP_R f0 + shufpd xmm0, xmm0, 1 + ; FSWAP_R f3 + shufpd xmm3, xmm3, 1 + ; IROR_R r5, 3 + ror r13, 3 + ; FADD_R f3, a0 + addpd xmm3, xmm8 + ; FMUL_R e0, a0 + mulpd xmm4, xmm8 + ; IADD_R r4, r1 + add r12, r9 + ; COND_M r4, ge(L1[r6], -1612023931) + xor ecx, ecx + mov eax, r14d + and eax, 16376 + cmp dword ptr [rsi+rax], -1612023931 + setge cl + add r12, rcx + ; FSWAP_R e2 + shufpd xmm6, xmm6, 1 + ; IADD_R r3, r7 + add r11, r15 + ; COND_R r5, be(r2, -1083018923) + xor ecx, ecx + cmp r10d, -1083018923 + setbe cl + add r13, rcx + ; IADD_R r3, r7 + add r11, r15 + ; ISTORE L2[r6], r0 + mov eax, r14d + and eax, 262136 + mov qword ptr [rsi+rax], r8 + ; IXOR_R r2, r3 + xor r10, r11 + ; FMUL_R e2, a3 + mulpd xmm6, xmm11 + ; FMUL_R e3, a3 + mulpd xmm7, xmm11 + ; FADD_R f0, a2 + addpd xmm0, xmm10 + ; ISTORE L1[r5], r1 + mov eax, r13d + and eax, 16376 + mov qword ptr [rsi+rax], r9 + ; FMUL_R e3, a3 + mulpd xmm7, xmm11 + ; ISWAP_R r1, r2 + xchg r9, r10 + ; FSWAP_R e0 + shufpd xmm4, xmm4, 1 + ; FSUB_R f1, a2 + subpd xmm1, xmm10 + ; FSUB_R f0, a0 + subpd xmm0, xmm8 + ; IROR_R r7, r0 + mov ecx, r8d + ror r15, cl + ; IADD_RC r5, r4, 283260945 + lea r13, [r13+r12+283260945] + ; ISDIV_C r6, -340125851 + mov rax, -3639652898025032137 + imul r14 xor eax, eax - add rdx, r15 - sar rdx, 27 + sar rdx, 26 sets al add rdx, rax - add r15, rdx - ; FDIV_M e3, L1[r4] + add r14, rdx + ; ISTORE L2[r2], r3 + mov eax, r10d + and eax, 262136 + mov qword ptr [rsi+rax], r11 + ; IADD_RC r6, r6, -935765909 + lea r14, [r14+r14-935765909] + ; ISDIV_C r3, -701703430 + mov rax, -7056770631919985199 + imul r11 + xor eax, eax + sar rdx, 28 + sets al + add rdx, rax + add r11, rdx + ; IXOR_M r3, L2[r1] + mov eax, r9d + and eax, 262136 + xor r11, qword ptr [rsi+rax] + ; FADD_R f2, a1 + addpd xmm2, xmm9 + ; ISTORE L1[r5], r7 + mov eax, r13d + and eax, 16376 + mov qword ptr [rsi+rax], r15 + ; FSUB_R f2, a0 + subpd xmm2, xmm8 + ; FMUL_R e3, a2 + mulpd xmm7, xmm10 + ; IADD_R r2, r5 + add r10, r13 + ; IADD_RC r2, r5, -1056770544 + lea r10, [r10+r13-1056770544] + ; ISTORE L2[r2], r3 + mov eax, r10d + and eax, 262136 + mov qword ptr [rsi+rax], r11 + ; ISMULH_R r7, r1 + mov rax, r15 + imul r9 + mov r15, rdx + ; IXOR_R r0, r5 + xor r8, r13 + ; ISTORE L1[r4], r0 mov eax, r12d and eax, 16376 + mov qword ptr [rsi+rax], r8 + ; INEG_R r5 + neg r13 + ; FSUB_R f0, a1 + subpd xmm0, xmm9 + ; IMUL_R r6, -244261682 + imul r14, -244261682 + ; IMUL_R r1, r0 + imul r9, r8 + ; IMUL_9C r3, -985744277 + lea r11, [r11+r11*8-985744277] + ; IROR_R r2, r1 + mov ecx, r9d + ror r10, cl + ; ISUB_R r4, -1079131550 + sub r12, -1079131550 + ; FNEG_R f3 + xorps xmm3, xmm15 + ; COND_R r4, ns(r5, -362284631) + xor ecx, ecx + cmp r13d, -362284631 + setns cl + add r12, rcx + ; FSUB_R f2, a0 + subpd xmm2, xmm8 + ; IXOR_R r4, r5 + xor r12, r13 + ; FNEG_R f1 + xorps xmm1, xmm15 + ; FADD_R f0, a0 + addpd xmm0, xmm8 + ; IADD_RC r3, r3, -173615832 + lea r11, [r11+r11-173615832] + ; IMUL_R r0, 928402279 + imul r8, 928402279 + ; ISUB_R r2, r0 + sub r10, r8 + ; IXOR_R r6, r3 + xor r14, r11 + ; ISUB_R r2, 2106401471 + sub r10, 2106401471 + ; FADD_R f0, a2 + addpd xmm0, xmm10 + ; IMUL_R r4, r6 + imul r12, r14 + ; IADD_RC r4, r0, -373491513 + lea r12, [r12+r8-373491513] + ; ISDIV_C r0, -1739042721 + mov rax, 7057121271817449967 + imul r8 + xor eax, eax + sub rdx, r8 + sar rdx, 30 + sets al + add rdx, rax + add r8, rdx + ; IADD_R r3, r1 + add r11, r9 + ; ISUB_M r7, L1[r5] + mov eax, r13d + and eax, 16376 + sub r15, qword ptr [rsi+rax] + ; IMUL_R r1, r2 + imul r9, r10 + ; ISUB_R r0, 722465116 + sub r8, 722465116 + ; IADD_RC r0, r0, -1919541169 + lea r8, [r8+r8-1919541169] + ; ISUB_M r2, L1[r3] + mov eax, r11d + and eax, 16376 + sub r10, qword ptr [rsi+rax] + ; IADD_R r7, -1183581468 + add r15, -1183581468 + ; FMUL_R e1, a3 + mulpd xmm5, xmm11 + ; FSUB_R f0, a0 + subpd xmm0, xmm8 + ; FADD_R f0, a3 + addpd xmm0, xmm11 + ; IMUL_9C r6, 1241113238 + lea r14, [r14+r14*8+1241113238] + ; FSUB_R f3, a3 + subpd xmm3, xmm11 + ; IADD_M r0, L1[r3] + mov eax, r11d + and eax, 16376 + add r8, qword ptr [rsi+rax] + ; IROR_R r3, r7 + mov ecx, r15d + ror r11, cl + ; FADD_R f2, a1 + addpd xmm2, xmm9 + ; IMUL_M r3, L1[r2] + mov eax, r10d + and eax, 16376 + imul r11, qword ptr [rsi+rax] + ; IMUL_9C r7, -2080412544 + lea r15, [r15+r15*8-2080412544] + ; IMUL_R r0, r3 + imul r8, r11 + ; FADD_R f1, a1 + addpd xmm1, xmm9 + ; IROR_R r6, 21 + ror r14, 21 + ; FDIV_M e3, L1[r1] + mov eax, r9d + and eax, 16376 cvtdq2pd xmm12, qword ptr [rsi+rax] andps xmm12, xmm14 divpd xmm7, xmm12 maxpd xmm7, xmm13 - ; IMUL_R r2, r0 - imul r10, r8 - ; FMUL_R e3, a2 - mulpd xmm7, xmm10 - ; IMUL_R r0, r6 - imul r8, r14 - ; ISTORE L1[r0], r7 - mov eax, r8d - and eax, 16376 - mov qword ptr [rsi+rax], r15 - ; FSUB_M f0, L2[r1] - mov eax, r9d - and eax, 262136 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm0, xmm12 - ; FADD_R f3, a1 - addpd xmm3, xmm9 - ; IXOR_R r5, r4 - xor r13, r12 - ; ISTORE L2[r7], r2 - mov eax, r15d - and eax, 262136 - mov qword ptr [rsi+rax], r10 - ; FSWAP_R e2 - shufpd xmm6, xmm6, 1 - ; FADD_R f3, a2 - addpd xmm3, xmm10 - ; ISMULH_R r5, r0 - mov rax, r13 - imul r8 - mov r13, rdx - ; IADD_M r0, L1[r4] - mov eax, r12d - and eax, 16376 - add r8, qword ptr [rsi+rax] - ; COND_R r7, ge(r6, 2322068811) - xor ecx, ecx - cmp r14d, -1972898485 - setge cl - add r15, rcx - ; FADD_R f2, a2 - addpd xmm2, xmm10 - ; IROR_R r7, r6 - mov ecx, r14d - ror r15, cl - ; IADD_RC r2, r4, 4177509323 - lea r10, [r10+r12-117457973] - ; IMUL_R r0, 2794074228 - imul r8, -1500893068 - ; IADD_R r2, r3 - add r10, r11 - ; FSQRT_R e2 - sqrtpd xmm6, xmm6 - ; IROR_R r7, r4 - mov ecx, r12d - ror r15, cl - ; IMUL_9C r4, 381194890 - lea r12, [r12+r12*8+381194890] - ; IADD_RC r3, r7, 1050899263 - lea r11, [r11+r15+1050899263] - ; IADD_R r2, r7 - add r10, r15 - ; FMUL_R e3, a0 - mulpd xmm7, xmm8 - ; IADD_RC r6, r6, 540663146 - lea r14, [r14+r14+540663146] - ; IROR_R r5, 58 - ror r13, 58 - ; FADD_R f2, a1 - addpd xmm2, xmm9 - ; FADD_R f2, a2 - addpd xmm2, xmm10 - ; FMUL_R e1, a2 - mulpd xmm5, xmm10 + ; FSUB_R f0, a1 + subpd xmm0, xmm9 ; FSWAP_R e1 shufpd xmm5, xmm5, 1 - ; IADD_R r5, r3 - add r13, r11 - ; IADD_R r7, 2514699120 - add r15, -1780268176 - ; IADD_RC r7, r0, 2797210442 - lea r15, [r15+r8-1497756854] - ; ISTORE L2[r0], r7 - mov eax, r8d - and eax, 262136 - mov qword ptr [rsi+rax], r15 - ; ISMULH_R r2, r4 - mov rax, r10 - imul r12 - mov r10, rdx - ; FSUB_M f0, L1[r2] - mov eax, r10d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm0, xmm12 - ; ISMULH_R r2, r3 - mov rax, r10 - imul r11 - mov r10, rdx - ; IADD_R r0, r3 - add r8, r11 - ; ISUB_R r7, r2 - sub r15, r10 - ; FADD_R f2, a0 - addpd xmm2, xmm8 - ; FMUL_R e0, a2 - mulpd xmm4, xmm10 - ; FADD_M f2, L1[r3] - mov eax, r11d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - addpd xmm2, xmm12 - ; IMUL_R r1, r2 - imul r9, r10 - ; IMUL_M r7, L1[r5] + ; COND_M r0, no(L1[r5], -1627153829) + xor ecx, ecx mov eax, r13d and eax, 16376 - imul r15, qword ptr [rsi+rax] + cmp dword ptr [rsi+rax], -1627153829 + setno cl + add r8, rcx + ; FADD_R f2, a3 + addpd xmm2, xmm11 + ; FSUB_R f1, a2 + subpd xmm1, xmm10 + ; FSUB_M f1, L1[r4] + mov eax, r12d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm1, xmm12 + ; ISTORE L1[r5], r1 + mov eax, r13d + and eax, 16376 + mov qword ptr [rsi+rax], r9 + ; ISUB_M r2, L2[r7] + mov eax, r15d + and eax, 262136 + sub r10, qword ptr [rsi+rax] + ; ISTORE L1[r2], r3 + mov eax, r10d + and eax, 16376 + mov qword ptr [rsi+rax], r11 + ; FADD_R f0, a3 + addpd xmm0, xmm11 + ; ISUB_M r1, L1[r7] + mov eax, r15d + and eax, 16376 + sub r9, qword ptr [rsi+rax] + ; IDIV_C r5, 624165039 + mov rax, 15866829597104432181 + mul r13 + shr rdx, 29 + add r13, rdx + ; FMUL_R e3, a0 + mulpd xmm7, xmm8 + ; IMUL_R r5, r4 + imul r13, r12 + ; FMUL_R e3, a1 + mulpd xmm7, xmm9 + ; FMUL_R e3, a3 + mulpd xmm7, xmm11 + ; IXOR_R r0, -2064879200 + xor r8, -2064879200 + ; FADD_R f1, a3 + addpd xmm1, xmm11 + ; IADD_M r0, L1[r3] + mov eax, r11d + and eax, 16376 + add r8, qword ptr [rsi+rax] + ; ISMULH_R r7, r3 + mov rax, r15 + imul r11 + mov r15, rdx + ; IMUL_R r5, -1645503310 + imul r13, -1645503310 + ; IMUL_R r7, r3 + imul r15, r11 + ; FMUL_R e2, a2 + mulpd xmm6, xmm10 + ; IADD_R r6, 1769041191 + add r14, 1769041191 + ; FSUB_M f1, L1[r4] + mov eax, r12d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm1, xmm12 + ; ISTORE L2[r1], r0 + mov eax, r9d + and eax, 262136 + mov qword ptr [rsi+rax], r8 + ; FNEG_R f0 + xorps xmm0, xmm15 + ; FMUL_R e0, a3 + mulpd xmm4, xmm11 + ; IMUL_R r2, r7 + imul r10, r15 + ; IADD_R r5, r1 + add r13, r9 + ; IROR_R r3, r6 + mov ecx, r14d + ror r11, cl + ; FADD_R f0, a0 + addpd xmm0, xmm8 + ; FMUL_R e1, a2 + mulpd xmm5, xmm10 + ; FNEG_R f3 + xorps xmm3, xmm15 + ; FADD_R f1, a1 + addpd xmm1, xmm9 + ; IMULH_R r2, r5 + mov rax, r10 + mul r13 + mov r10, rdx + ; ISTORE L1[r4], r0 + mov eax, r12d + and eax, 16376 + mov qword ptr [rsi+rax], r8 + ; ISWAP_R r7, r0 + xchg r15, r8 + ; FSWAP_R f0 + shufpd xmm0, xmm0, 1 + ; ISUB_R r2, r0 + sub r10, r8 + ; FSUB_R f1, a3 + subpd xmm1, xmm11 + ; ISUB_M r5, L1[r3] + mov eax, r11d + and eax, 16376 + sub r13, qword ptr [rsi+rax] + ; IXOR_R r7, r0 + xor r15, r8 + ; IMUL_R r4, r1 + imul r12, r9 + ; IADD_RC r0, r2, -1102648763 + lea r8, [r8+r10-1102648763] + ; FMUL_R e3, a3 + mulpd xmm7, xmm11 + ; IXOR_R r4, r1 + xor r12, r9 + ; IXOR_R r6, r0 + xor r14, r8 + ; FSQRT_R e1 + sqrtpd xmm5, xmm5 + ; IMUL_M r6, L2[r1] + mov eax, r9d + and eax, 262136 + imul r14, qword ptr [rsi+rax] + ; ISMULH_M r5, L3[353552] + mov rax, r13 + imul qword ptr [rsi+353552] + mov r13, rdx + ; ISUB_M r1, L1[r6] + mov eax, r14d + and eax, 16376 + sub r9, qword ptr [rsi+rax] + ; FADD_R f0, a3 + addpd xmm0, xmm11 + ; FMUL_R e3, a3 + mulpd xmm7, xmm11 + ; FSUB_M f3, L2[r7] + mov eax, r15d + and eax, 262136 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm3, xmm12 + ; IMUL_R r0, r2 + imul r8, r10 + ; FMUL_R e1, a0 + mulpd xmm5, xmm8 + ; COND_R r5, sg(r3, -1392293091) + xor ecx, ecx + cmp r11d, -1392293091 + sets cl + add r13, rcx + ; FSWAP_R e3 + shufpd xmm7, xmm7, 1 + ; IMUL_R r7, r4 + imul r15, r12 + ; IXOR_R r7, r5 + xor r15, r13 + ; FMUL_R e3, a3 + mulpd xmm7, xmm11 + ; IMUL_R r4, r3 + imul r12, r11 + ; FADD_M f1, L1[r1] + mov eax, r9d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm1, xmm12 + ; IMUL_R r5, r0 + imul r13, r8 + ; ISUB_R r7, r0 + sub r15, r8 + ; IADD_M r5, L1[r4] + mov eax, r12d + and eax, 16376 + add r13, qword ptr [rsi+rax] + ; IADD_R r6, r2 + add r14, r10 + ; FMUL_R e1, a1 + mulpd xmm5, xmm9 + ; IADD_M r2, L3[1073640] + add r10, qword ptr [rsi+1073640] ; IMUL_R r3, r2 imul r11, r10 ; IXOR_R r1, r0 xor r9, r8 - ; FNEG_R f0 - xorps xmm0, xmm15 - ; IADD_RC r4, r4, 1456841848 - lea r12, [r12+r12+1456841848] - ; IXOR_R r3, r2 - xor r11, r10 - ; COND_R r0, of(r4, 1678513610) - xor ecx, ecx - cmp r12d, 1678513610 - seto cl - add r8, rcx - ; ISMULH_R r4, r4 - mov rax, r12 - imul r12 - mov r12, rdx - ; IMUL_R r4, r1 - imul r12, r9 - ; FADD_R f1, a2 - addpd xmm1, xmm10 - ; FSUB_R f2, a0 - subpd xmm2, xmm8 - ; FMUL_R e1, a2 - mulpd xmm5, xmm10 - ; FSUB_R f0, a3 - subpd xmm0, xmm11 - ; IXOR_R r0, r7 - xor r8, r15 - ; ISTORE L2[r1], r4 - mov eax, r9d - and eax, 262136 - mov qword ptr [rsi+rax], r12 - ; IXOR_M r7, L1[r6] - mov eax, r14d - and eax, 16376 - xor r15, qword ptr [rsi+rax] - ; ISUB_R r2, r4 - sub r10, r12 - ; ISUB_M r4, L1[r6] - mov eax, r14d - and eax, 16376 - sub r12, qword ptr [rsi+rax] - ; FADD_R f2, a2 - addpd xmm2, xmm10 - ; FSUB_M f3, L2[r4] + ; IROR_R r7, r4 + mov ecx, r12d + ror r15, cl + ; FSUB_R f1, a1 + subpd xmm1, xmm9 + ; IMUL_R r7, r5 + imul r15, r13 + ; ISUB_R r1, 866191482 + sub r9, 866191482 + ; IMUL_M r7, L1[r4] mov eax, r12d - and eax, 262136 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm3, xmm12 - ; IXOR_R r7, r2 - xor r15, r10 - ; IXOR_R r0, r5 - xor r8, r13 - ; FADD_R f1, a2 - addpd xmm1, xmm10 - ; FMUL_R e3, a2 - mulpd xmm7, xmm10 - ; FSWAP_R e3 - shufpd xmm7, xmm7, 1 - ; FSWAP_R f1 - shufpd xmm1, xmm1, 1 - ; COND_R r2, ge(r2, 4068636356) - xor ecx, ecx - cmp r10d, -226330940 - setge cl - add r10, rcx - ; FMUL_R e2, a3 - mulpd xmm6, xmm11 - ; FSUB_M f2, L2[r1] - mov eax, r9d - and eax, 262136 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm2, xmm12 - ; FADD_R f1, a0 - addpd xmm1, xmm8 - ; ISUB_R r7, r5 - sub r15, r13 - ; ISUB_M r0, L1[r1] - mov eax, r9d and eax, 16376 - sub r8, qword ptr [rsi+rax] - ; FSUB_R f3, a1 - subpd xmm3, xmm9 - ; ISWAP_R r3, r5 - xchg r11, r13 - ; IADD_RC r5, r2, 795784298 - lea r13, [r13+r10+795784298] - ; IADD_RC r0, r4, 2244788743 - lea r8, [r8+r12-2050178553] - ; IMUL_9C r5, 1062534001 - lea r13, [r13+r13*8+1062534001] - ; FADD_R f0, a2 - addpd xmm0, xmm10 - ; FMUL_R e3, a1 - mulpd xmm7, xmm9 - ; IDIV_C r3, 1662492575 - mov rax, 11914062610815620875 - mul r11 - shr rdx, 30 - add r11, rdx - ; IMUL_M r5, L1[r0] - mov eax, r8d - and eax, 16376 - imul r13, qword ptr [rsi+rax] - ; IDIV_C r4, 1963597892 - mov rax, r12 - shr rax, 2 - mov rcx, 1260889558222626443 - mul rcx - shr rdx, 25 - add r12, rdx - ; IMUL_9C r7, 1820045218 - lea r15, [r15+r15*8+1820045218] - ; IMUL_M r0, L1[r3] - mov eax, r11d - and eax, 16376 - imul r8, qword ptr [rsi+rax] - ; IXOR_R r3, r7 - xor r11, r15 - ; ISMULH_R r4, r2 - mov rax, r12 - imul r10 - mov r12, rdx - ; ISWAP_R r3, r0 - xchg r11, r8 - ; IXOR_R r2, r0 - xor r10, r8 - ; IXOR_M r0, L2[r1] - mov eax, r9d - and eax, 262136 - xor r8, qword ptr [rsi+rax] - ; ISDIV_C r7, 3359520316 - mov rax, 7859804860668271393 - imul r15 - xor eax, eax - sub rdx, r15 - sar rdx, 29 - sets al - add rdx, rax - add r15, rdx - ; IMUL_M r6, L1[r2] - mov eax, r10d - and eax, 16376 - imul r14, qword ptr [rsi+rax] - ; FNEG_R f3 - xorps xmm3, xmm15 - ; IADD_RC r4, r2, 1704868083 - lea r12, [r12+r10+1704868083] + imul r15, qword ptr [rsi+rax] ; FADD_R f2, a0 addpd xmm2, xmm8 - ; ISTORE L1[r0], r0 - mov eax, r8d - and eax, 16376 - mov qword ptr [rsi+rax], r8 - ; FADD_M f0, L1[r7] - mov eax, r15d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - addpd xmm0, xmm12 - ; FMUL_R e0, a3 - mulpd xmm4, xmm11 - ; FSUB_R f3, a2 - subpd xmm3, xmm10 - ; IADD_RC r7, r7, 1302457878 - lea r15, [r15+r15+1302457878] - ; ISUB_R r1, 1330165941 - sub r9, 1330165941 - ; FNEG_R f1 - xorps xmm1, xmm15 - ; IROR_R r0, r4 - mov ecx, r12d - ror r8, cl - ; FSUB_R f1, a0 - subpd xmm1, xmm8 - ; IROR_R r5, r6 - mov ecx, r14d - ror r13, cl - ; COND_R r0, ab(r1, 3984033425) - xor ecx, ecx - cmp r9d, -310933871 - seta cl - add r8, rcx - ; COND_R r4, ab(r7, 757929676) - xor ecx, ecx - cmp r15d, 757929676 - seta cl - add r12, rcx - ; FMUL_R e0, a1 - mulpd xmm4, xmm9 - ; IMUL_R r1, r3 - imul r9, r11 - ; ISUB_R r3, r2 - sub r11, r10 - ; FSUB_R f3, a2 - subpd xmm3, xmm10 - ; FDIV_M e1, L1[r4] - mov eax, r12d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - andps xmm12, xmm14 - divpd xmm5, xmm12 - maxpd xmm5, xmm13 - ; FSWAP_R f1 - shufpd xmm1, xmm1, 1 - ; IADD_R r7, 2873779272 - add r15, -1421188024 - ; FSUB_M f3, L2[r2] - mov eax, r10d - and eax, 262136 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm3, xmm12 - ; FSUB_R f2, a3 - subpd xmm2, xmm11 - ; FSUB_R f3, a1 - subpd xmm3, xmm9 - ; FMUL_R e1, a3 - mulpd xmm5, xmm11 - ; IADD_RC r2, r4, 3977135268 - lea r10, [r10+r12-317832028] - ; IMUL_M r4, L1[r5] - mov eax, r13d - and eax, 16376 - imul r12, qword ptr [rsi+rax] - ; FDIV_M e1, L1[r7] - mov eax, r15d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - andps xmm12, xmm14 - divpd xmm5, xmm12 - maxpd xmm5, xmm13 - ; IADD_R r5, r2 - add r13, r10 - ; ISUB_R r4, 401020510 - sub r12, 401020510 - ; IROR_R r3, r0 - mov ecx, r8d - ror r11, cl - ; ISTORE L1[r7], r0 - mov eax, r15d - and eax, 16376 - mov qword ptr [rsi+rax], r8 - ; FSUB_R f2, a1 - subpd xmm2, xmm9 - ; FMUL_R e3, a1 - mulpd xmm7, xmm9 - ; IMUL_9C r3, 720965215 - lea r11, [r11+r11*8+720965215] - ; IMUL_9C r6, 74948046 - lea r14, [r14+r14*8+74948046] - ; ISTORE L1[r7], r3 - mov eax, r15d - and eax, 16376 - mov qword ptr [rsi+rax], r11 - ; IXOR_R r2, r6 - xor r10, r14 - ; FMUL_R e3, a1 - mulpd xmm7, xmm9 - ; ISUB_R r4, r1 - sub r12, r9 - ; ISUB_R r3, r0 - sub r11, r8 - ; ISWAP_R r7, r5 - xchg r15, r13 - ; IMUL_R r2, r6 - imul r10, r14 - ; COND_R r2, ge(r2, 2402809790) - xor ecx, ecx - cmp r10d, -1892157506 - setge cl - add r10, rcx - ; FADD_R f1, a3 - addpd xmm1, xmm11 - ; IADD_R r7, r0 - add r15, r8 - ; IDIV_C r1, 624867857 - mov rax, 15848983434401622933 - mul r9 - shr rdx, 29 - add r9, rdx - ; FADD_R f0, a1 - addpd xmm0, xmm9 - ; IADD_RC r5, r7, 3817376178 - lea r13, [r13+r15-477591118] - ; FSUB_R f0, a3 - subpd xmm0, xmm11 - ; ISUB_M r6, L1[r2] - mov eax, r10d - and eax, 16376 - sub r14, qword ptr [rsi+rax] - ; FMUL_R e3, a1 - mulpd xmm7, xmm9 - ; IADD_R r0, r4 - add r8, r12 - ; FSUB_R f3, a1 - subpd xmm3, xmm9 - ; FSUB_M f2, L1[r4] - mov eax, r12d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm2, xmm12 - ; ISDIV_C r2, 3898255608 - mov rax, 5964731804029407733 - imul r10 - xor eax, eax - sub rdx, r10 - sar rdx, 28 - sets al - add rdx, rax - add r10, rdx - ; FNEG_R f2 - xorps xmm2, xmm15 - ; FSUB_R f3, a2 - subpd xmm3, xmm10 - ; FADD_R f1, a3 - addpd xmm1, xmm11 - ; IMUL_R r3, r2 - imul r11, r10 - ; FADD_M f0, L1[r3] - mov eax, r11d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - addpd xmm0, xmm12 - ; ISMULH_R r5, r2 - mov rax, r13 - imul r10 - mov r13, rdx - ; IMULH_R r6, r2 - mov rax, r14 - mul r10 - mov r14, rdx - ; FADD_M f3, L1[r3] - mov eax, r11d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - addpd xmm3, xmm12 - ; IMUL_R r6, r7 - imul r14, r15 - ; FSUB_R f0, a0 - subpd xmm0, xmm8 - ; FNEG_R f2 - xorps xmm2, xmm15 - ; ISUB_R r6, r4 - sub r14, r12 - ; FADD_R f1, a1 - addpd xmm1, xmm9 - ; IXOR_R r0, r5 - xor r8, r13 - ; FADD_R f2, a1 - addpd xmm2, xmm9 - ; ISWAP_R r7, r5 - xchg r15, r13 - ; FMUL_R e3, a2 - mulpd xmm7, xmm10 - ; IADD_RC r3, r6, 2977336568 - lea r11, [r11+r14-1317630728] - ; IMUL_R r2, r3 - imul r10, r11 - ; IADD_RC r1, r4, 894105694 - lea r9, [r9+r12+894105694] - ; IMUL_9C r7, 504293473 - lea r15, [r15+r15*8+504293473] - ; FSUB_R f1, a0 - subpd xmm1, xmm8 - ; IMUL_R r7, r1 - imul r15, r9 - ; IXOR_R r2, r4 - xor r10, r12 - ; IADD_RC r0, r1, 392362094 - lea r8, [r8+r9+392362094] - ; IDIV_C r4, 1645771433 - mov rax, 376097195048767223 - mul r12 - shr rdx, 25 - add r12, rdx - ; ISUB_R r4, r3 - sub r12, r11 - ; ISUB_M r7, L1[r4] - mov eax, r12d - and eax, 16376 - sub r15, qword ptr [rsi+rax] - ; IMUL_M r5, L1[r7] - mov eax, r15d - and eax, 16376 - imul r13, qword ptr [rsi+rax] - ; IROR_R r1, r7 - mov ecx, r15d - ror r9, cl - ; INEG_R r4 - neg r12 - ; IMUL_R r3, 1863959234 - imul r11, 1863959234 - ; IROR_R r4, 59 - ror r12, 59 - ; IMUL_M r1, L3[363256] - imul r9, qword ptr [rsi+363256] - ; ISTORE L2[r6], r7 - mov eax, r14d - and eax, 262136 - mov qword ptr [rsi+rax], r15 - ; ISTORE L1[r1], r5 - mov eax, r9d - and eax, 16376 - mov qword ptr [rsi+rax], r13 - ; FNEG_R f0 - xorps xmm0, xmm15 - ; FSQRT_R e2 - sqrtpd xmm6, xmm6 - ; FMUL_R e0, a3 - mulpd xmm4, xmm11 - ; FMUL_R e3, a2 - mulpd xmm7, xmm10 - ; IROR_R r5, r2 - mov ecx, r10d - ror r13, cl - ; IADD_R r0, r4 - add r8, r12 + ; IADD_R r2, r1 + add r10, r9