From 7c049cce8dbca9b4a8825eb197fc3638d1401ec8 Mon Sep 17 00:00:00 2001 From: tevador <37503146+tevador@users.noreply.github.com> Date: Thu, 24 Jan 2019 21:49:39 +0100 Subject: [PATCH] Added store instructions --- tests/small-prog.asm | 1424 +++++++++++++++++++++--------------------- 1 file changed, 723 insertions(+), 701 deletions(-) diff --git a/tests/small-prog.asm b/tests/small-prog.asm index a91240e..e0464db 100644 --- a/tests/small-prog.asm +++ b/tests/small-prog.asm @@ -1,745 +1,767 @@ - ; ISUB_R r0, r4 - sub r8, r12 - ; IROR_R r5, 15 - ror r13, 15 - ; ISUB_M r6, L1[r5] + ; ISTORE L1[r6], r4 + mov eax, r14d + and eax, 16376 + mov qword ptr [rsi+rax], r12 + ; IROR_R r1, r2 + mov ecx, r10d + ror r9, cl + ; FPNEG_R f2 + xorps xmm2, xmm15 + ; FPSUB_R f1, a1 + subpd xmm1, xmm9 + ; FPMUL_M e3, L2[r5] + mov eax, r13d + and eax, 262136 + cvtdq2pd xmm12, qword ptr [rsi+rax] + mulpd xmm7, xmm12 + maxpd xmm7, xmm13 + ; IMUL_R r7, r1 + imul r15, r9 + ; IMUL_R r3, r6 + imul r11, r14 + ; IMUL_R r5, r4 + imul r13, r12 + ; IADD_R r2, r3 + add r10, r11 + ; IMUL_R r3, r4 + imul r11, r12 + ; ISDIV_C r7, 1531724965 + mov rax, 3232799797802813183 + imul r15 + xor eax, eax + sar rdx, 28 + sets al + add rdx, rax + add r15, rdx + ; IADD_R r7, r1 + add r15, r9 + ; IMUL_R r7, r3 + imul r15, r11 + ; ISUB_R r6, r7 + sub r14, r15 + ; ISTORE L1[r4], r6 + mov eax, r12d + and eax, 16376 + mov qword ptr [rsi+rax], r14 + ; IADD_M r4, L1[r6] + mov eax, r14d + and eax, 16376 + add r12, qword ptr [rsi+rax] + ; FPSWAP_R f0 + shufpd xmm0, xmm0, 1 + ; FPSUB_R f1, a0 + subpd xmm1, xmm8 + ; ISUB_R r2, r4 + sub r10, r12 + ; FPNEG_R f2 + xorps xmm2, xmm15 + ; IMUL_R r3, 1367232543 + imul r11, 1367232543 + ; IROL_R r1, r7 + mov ecx, r15d + rol r9, cl + ; IMUL_9C r2, 1164637590 + lea r10, [r10+r10*8+1164637590] + ; FPADD_M f0, L2[r3] + mov eax, r11d + and eax, 262136 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm0, xmm12 + ; FPMUL_R e1, a2 + mulpd xmm5, xmm10 + ; IMUL_M r0, L1[8240] + imul r8, qword ptr [rsi+8240] + ; IMULH_M r4, L2[186792] + mov rax, r12 + mul qword ptr [rsi+186792] + mov r12, rdx + ; IDIV_C r3, 973927932 + mov rax, r11 + shr rax, 2 + mov rcx, 5084318864305373573 + mul rcx + shr rdx, 26 + add r11, rdx + ; IROL_R r2, r5 + mov ecx, r13d + rol r10, cl + ; ISMULH_R r3, r0 + mov rax, r11 + imul r8 + mov r11, rdx + ; ISTORE L2[r3], r6 + mov eax, r11d + and eax, 262136 + mov qword ptr [rsi+rax], r14 + ; FPSQRT_R e1 + sqrtpd xmm5, xmm5 + ; ISTORE L2[r0], r3 + mov eax, r8d + and eax, 262136 + mov qword ptr [rsi+rax], r11 + ; FPDIV_R e2, a0 + divpd xmm6, xmm8 + maxpd xmm6, xmm13 + ; COND_R r2, lt(r2, 809935569) + xor ecx, ecx + cmp r10d, 809935569 + setl cl + add r10, rcx + ; IDIV_C r3, 3449361310 + mov rax, r11 + shr rax, 1 + mov rcx, 11484468484727153387 + mul rcx + shr rdx, 30 + add r11, rdx + ; FPSWAP_R f2 + shufpd xmm2, xmm2, 1 + ; IROL_R r6, 42 + rol r14, 42 + ; IMULH_M r1, L2[r2] + mov ecx, r10d + and ecx, 262136 + mov rax, r9 + mul qword ptr [rsi+rcx] + mov r9, rdx + ; FPADD_R f2, a2 + addpd xmm2, xmm10 + ; IROR_R r4, r7 + mov ecx, r15d + ror r12, cl + ; FPMUL_R e3, a0 + mulpd xmm7, xmm8 + ; ISTORE L2[r7], r4 + mov eax, r15d + and eax, 262136 + mov qword ptr [rsi+rax], r12 + ; IMUL_9C r7, -7511892 + lea r15, [r15+r15*8-7511892] + ; IROL_R r7, r5 + mov ecx, r13d + rol r15, cl + ; FPMUL_R e1, a2 + mulpd xmm5, xmm10 + ; FPSUB_R f1, a1 + subpd xmm1, xmm9 + ; FPSUB_R f3, a2 + subpd xmm3, xmm10 + ; IADD_RC r0, r0, 636102408 + lea r8, [r8+r8+636102408] + ; ISUB_R r4, r0 + sub r12, r8 + ; IADD_M r3, L1[r0] + mov eax, r8d + and eax, 16376 + add r11, qword ptr [rsi+rax] + ; FPADD_R f3, a3 + addpd xmm3, xmm11 + ; IADD_R r0, r2 + add r8, r10 + ; FPSUB_R f0, a3 + subpd xmm0, xmm11 + ; IADD_R r2, r1 + add r10, r9 + ; COND_R r4, ge(r7, 295314673) + xor ecx, ecx + cmp r15d, 295314673 + setge cl + add r12, rcx + ; IMUL_R r2, r5 + imul r10, r13 + ; ISMULH_R r4, -160066964 + mov rax, -160066964 + imul r12 + add r12, rdx + ; IMUL_R r2, r6 + imul r10, r14 + ; IMUL_9C r1, 526734458 + lea r9, [r9+r9*8+526734458] + ; IMUL_R r3, r5 + imul r11, r13 + ; FPSUB_M f0, L1[r6] + mov eax, r14d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm0, xmm12 + ; FPMUL_R e3, a2 + mulpd xmm7, xmm10 + ; IXOR_R r3, r1 + xor r11, r9 + ; ISTORE L2[r0], r0 + mov eax, r8d + and eax, 262136 + mov qword ptr [rsi+rax], r8 + ; FPSWAP_R f3 + shufpd xmm3, xmm3, 1 + ; ISUB_M r2, L1[14864] + sub r10, qword ptr [rsi+14864] + ; IDIV_C r6, 4274620060 + mov rax, r14 + shr rax, 2 + mov rcx, 4633637691899589411 + mul rcx + shr rdx, 28 + add r14, rdx + ; ISUB_R r7, r6 + sub r15, r14 + ; ISMULH_R r3, r0 + mov rax, r11 + imul r8 + mov r11, rdx + ; IADD_R r5, r3 + add r13, r11 + ; IROR_R r5, r3 + mov ecx, r11d + ror r13, cl + ; ISUB_R r2, r5 + sub r10, r13 + ; COND_R r7, sg(r5, 1569330334) + xor ecx, ecx + cmp r13d, 1569330334 + sets cl + add r15, rcx + ; IROR_R r1, r6 + mov ecx, r14d + ror r9, cl + ; FPADD_R f0, a1 + addpd xmm0, xmm9 + ; FPSQRT_R e3 + sqrtpd xmm7, xmm7 + ; FPSUB_R f2, a3 + subpd xmm2, xmm11 + ; ISMULH_M r2, L1[r3] + mov ecx, r11d + and ecx, 16376 + mov rax, r10 + imul qword ptr [rsi+rcx] + mov r10, rdx + ; IMUL_9C r5, -1590168006 + lea r13, [r13+r13*8-1590168006] + ; IMUL_9C r4, 1994845080 + lea r12, [r12+r12*8+1994845080] + ; IADD_R r1, r3 + add r9, r11 + ; IROL_R r4, r6 + mov ecx, r14d + rol r12, cl + ; IMUL_R r1, -1333414368 + imul r9, -1333414368 + ; ISTORE L1[r5], r0 mov eax, r13d and eax, 16376 - sub r14, qword ptr [rsi+rax] - ; IMUL_R r7, r6 - imul r15, r14 - ; FPADD_R f3, a1 - addpd xmm3, xmm9 - ; FPMUL_R e1, a3 - mulpd xmm5, xmm11 - ; IMUL_R r2, r4 - imul r10, r12 - ; IADD_RC r4, r5, 1789610138 - lea r12, [r12+r13+1789610138] - ; IADD_R r1, r4 - add r9, r12 - ; IADD_R r6, r0 - add r14, r8 - ; IXOR_R r7, r2 - xor r15, r10 - ; ISMULH_M r6, L1[6816] - mov rax, r14 - imul qword ptr [rsi+6816] - mov r14, rdx - ; ISUB_R r0, r4 - sub r8, r12 - ; IXOR_R r7, r2 - xor r15, r10 - ; INEG_R r4 - neg r12 + mov qword ptr [rsi+rax], r8 + ; IROL_R r5, r7 + mov ecx, r15d + rol r13, cl + ; ISUB_M r0, L1[r5] + mov eax, r13d + and eax, 16376 + sub r8, qword ptr [rsi+rax] + ; COND_R r6, sg(r5, 43404748) + xor ecx, ecx + cmp r13d, 43404748 + sets cl + add r14, rcx + ; IMULH_R r4, r5 + mov rax, r12 + mul r13 + mov r12, rdx + ; FPDIV_R e3, a3 + divpd xmm7, xmm11 + maxpd xmm7, xmm13 + ; INEG_R r6 + neg r14 + ; IROL_R r1, r2 + mov ecx, r10d + rol r9, cl + ; ISUB_M r3, L1[r7] + mov eax, r15d + and eax, 16376 + sub r11, qword ptr [rsi+rax] + ; FPSUB_R f2, a0 + subpd xmm2, xmm8 + ; IMUL_R r7, 1436360085 + imul r15, 1436360085 + ; ISMULH_M r7, L2[r3] + mov ecx, r11d + and ecx, 262136 + mov rax, r15 + imul qword ptr [rsi+rcx] + mov r15, rdx + ; IXOR_R r6, r1 + xor r14, r9 + ; FPSUB_M f0, L1[r2] + mov eax, r10d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm0, xmm12 + ; ISMULH_R r1, -1626920972 + mov rax, -1626920972 + imul r9 + add r9, rdx + ; IMUL_R r1, -1587440282 + imul r9, -1587440282 + ; FPADD_R f3, a3 + addpd xmm3, xmm11 + ; FPDIV_R e3, a0 + divpd xmm7, xmm8 + maxpd xmm7, xmm13 + ; FPSUB_R f2, a1 + subpd xmm2, xmm9 + ; FPMUL_R e2, a0 + mulpd xmm6, xmm8 + ; FPSQRT_R e2 + sqrtpd xmm6, xmm6 + ; FPMUL_R e3, a2 + mulpd xmm7, xmm10 + ; FPMUL_R e0, a3 + mulpd xmm4, xmm11 + ; FPMUL_R e1, a2 + mulpd xmm5, xmm10 + ; FPSUB_R f0, a2 + subpd xmm0, xmm10 + ; IMUL_R r6, r4 + imul r14, r12 + ; IMUL_R r6, r3 + imul r14, r11 + ; FPMUL_R e0, a1 + mulpd xmm4, xmm9 + ; IMUL_R r2, r6 + imul r10, r14 + ; IXOR_R r1, r0 + xor r9, r8 + ; IMUL_M r4, L1[r7] + mov eax, r15d + and eax, 16376 + imul r12, qword ptr [rsi+rax] + ; FPSUB_R f2, a0 + subpd xmm2, xmm8 ; IROL_R r3, r0 mov ecx, r8d rol r11, cl - ; IADD_RC r2, r5, -1667142135 - lea r10, [r10+r13-1667142135] - ; ISUB_R r6, r2 - sub r14, r10 - ; IDIV_C r3, 2650709570 - mov rax, 3736177069856446853 - mul r11 - shr rdx, 29 + ; IROR_R r2, 61 + ror r10, 61 + ; FPADD_R f2, a2 + addpd xmm2, xmm10 + ; COND_R r5, sg(r1, -1184956925) + xor ecx, ecx + cmp r9d, -1184956925 + sets cl + add r13, rcx + ; ISTORE L1[r2], r5 + mov eax, r10d + and eax, 16376 + mov qword ptr [rsi+rax], r13 + ; FPSWAP_R e1 + shufpd xmm5, xmm5, 1 + ; IADD_R r4, r5 + add r12, r13 + ; IADD_R r4, r3 + add r12, r11 + ; FPDIV_R e3, a1 + divpd xmm7, xmm9 + maxpd xmm7, xmm13 + ; IADD_RC r6, r5, 1890583833 + lea r14, [r14+r13+1890583833] + ; ISTORE L1[r3], r1 + mov eax, r11d + and eax, 16376 + mov qword ptr [rsi+rax], r9 + ; IADD_RC r3, r2, 1329347581 + lea r11, [r11+r10+1329347581] + ; FPMUL_R e3, a1 + mulpd xmm7, xmm9 + ; IMUL_9C r3, -676169110 + lea r11, [r11+r11*8-676169110] + ; COND_R r5, ns(r3, 531330698) + xor ecx, ecx + cmp r11d, 531330698 + setns cl + add r13, rcx + ; ISUB_R r7, r2 + sub r15, r10 + ; IADD_RC r7, r0, 974749728 + lea r15, [r15+r8+974749728] + ; IMULH_R r6, 1083698437 + mov eax, 1083698437 + mul r14 + add r14, rdx + ; FPNEG_R f2 + xorps xmm2, xmm15 + ; IMUL_R r6, r7 + imul r14, r15 + ; FPSQRT_R e3 + sqrtpd xmm7, xmm7 + ; IADD_RC r2, r2, 1044563079 + lea r10, [r10+r10+1044563079] + ; IADD_RC r4, r1, -2016187742 + lea r12, [r12+r9-2016187742] + ; FPDIV_R e0, a1 + divpd xmm4, xmm9 + maxpd xmm4, xmm13 + ; IMUL_R r3, r4 + imul r11, r12 + ; ISDIV_C r3, 1398200496 + mov rax, -4280649378971233915 + imul r11 + xor eax, eax + add rdx, r11 + sar rdx, 30 + sets al + add rdx, rax add r11, rdx - ; IMULH_R r3, r0 - mov rax, r11 - mul r8 - mov r11, rdx - ; FPSUB_R f0, a2 - subpd xmm0, xmm10 - ; FPADD_M f3, L2[r4] - mov eax, r12d - and eax, 262136 + ; COND_R r5, ge(r7, -596284511) + xor ecx, ecx + cmp r15d, -596284511 + setge cl + add r13, rcx + ; ISTORE L1[r0], r0 + mov eax, r8d + and eax, 16376 + mov qword ptr [rsi+rax], r8 + ; ISMULH_M r6, L2[r7] + mov ecx, r15d + and ecx, 262136 + mov rax, r14 + imul qword ptr [rsi+rcx] + mov r14, rdx + ; IMUL_R r5, r4 + imul r13, r12 + ; IROR_R r7, r4 + mov ecx, r12d + ror r15, cl + ; FPADD_M f3, L1[r0] + mov eax, r8d + and eax, 16376 cvtdq2pd xmm12, qword ptr [rsi+rax] addpd xmm3, xmm12 - ; FPMUL_M e1, L1[r5] - mov eax, r13d + ; IMUL_R r2, r5 + imul r10, r13 + ; IXOR_R r7, r1 + xor r15, r9 + ; ISTORE L1[r4], r3 + mov eax, r12d + and eax, 16376 + mov qword ptr [rsi+rax], r11 + ; INEG_R r5 + neg r13 + ; IADD_R r4, r7 + add r12, r15 + ; IMUL_R r6, r4 + imul r14, r12 + ; IMUL_M r5, L1[r2] + mov eax, r10d + and eax, 16376 + imul r13, qword ptr [rsi+rax] + ; FPMUL_M e1, L1[r7] + mov eax, r15d and eax, 16376 cvtdq2pd xmm12, qword ptr [rsi+rax] mulpd xmm5, xmm12 maxpd xmm5, xmm13 - ; IMUL_9C r7, -778247271 - lea r15, [r15+r15*8-778247271] - ; IXOR_R r4, 1846379510 - xor r12, 1846379510 - ; COND_M r6, of(L1[r1], -397786451) - xor ecx, ecx - mov eax, r9d - and eax, 16376 - cmp dword ptr [rsi+rax], -397786451 - seto cl - add r14, rcx - ; COND_R r6, of(r3, -1033710571) - xor ecx, ecx - cmp r11d, -1033710571 - seto cl - add r14, rcx - ; COND_M r6, sg(L1[r6], 1413230028) - xor ecx, ecx - mov eax, r14d - and eax, 16376 - cmp dword ptr [rsi+rax], 1413230028 - sets cl - add r14, rcx - ; IDIV_C r0, 2791108943 - mov rax, 1774119268816201525 - mul r8 - shr rdx, 28 - add r8, rdx - ; FPSUB_M f1, L1[r6] - mov eax, r14d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm1, xmm12 - ; FPSWAP_R f0 - shufpd xmm0, xmm0, 1 - ; IADD_RC r6, r5, -640194892 - lea r14, [r14+r13-640194892] - ; FPADD_M f0, L1[r2] - mov eax, r10d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - addpd xmm0, xmm12 - ; IMUL_R r6, r5 - imul r14, r13 - ; IROL_R r4, r1 - mov ecx, r9d - rol r12, cl - ; FPDIV_R e2, a0 - divpd xmm6, xmm8 - maxpd xmm6, xmm13 - ; IADD_RC r0, r2, -487084195 - lea r8, [r8+r10-487084195] - ; FPADD_R f0, a0 - addpd xmm0, xmm8 - ; IXOR_R r5, r3 - xor r13, r11 - ; IMUL_R r2, r4 - imul r10, r12 - ; FPMUL_R e0, a0 - mulpd xmm4, xmm8 - ; FPSUB_R f3, a3 - subpd xmm3, xmm11 - ; IMUL_M r4, L1[4856] - imul r12, qword ptr [rsi+4856] - ; IMUL_9C r2, 7951348 - lea r10, [r10+r10*8+7951348] - ; COND_R r3, ab(r7, 984532162) - xor ecx, ecx - cmp r15d, 984532162 - seta cl - add r11, rcx - ; IXOR_M r7, L1[r4] - mov eax, r12d - and eax, 16376 - xor r15, qword ptr [rsi+rax] - ; IMUL_R r4, 248971329 - imul r12, 248971329 - ; IXOR_R r3, r1 - xor r11, r9 - ; IMUL_R r3, 2098482639 - imul r11, 2098482639 - ; IXOR_R r6, r3 - xor r14, r11 - ; IXOR_R r5, r4 - xor r13, r12 - ; IADD_R r5, r4 - add r13, r12 - ; IMUL_9C r7, 66530302 - lea r15, [r15+r15*8+66530302] - ; IMULH_R r0, r5 - mov rax, r8 - mul r13 - mov r8, rdx - ; IMUL_R r2, r7 - imul r10, r15 - ; IMUL_R r1, 770985098 - imul r9, 770985098 - ; COND_R r7, be(r5, 58538265) - xor ecx, ecx - cmp r13d, 58538265 - setbe cl - add r15, rcx - ; IMUL_9C r3, 245704334 - lea r11, [r11+r11*8+245704334] - ; ISMULH_R r2, r4 - mov rax, r10 - imul r12 - mov r10, rdx - ; FPDIV_R e3, a3 - divpd xmm7, xmm11 - maxpd xmm7, xmm13 - ; IMULH_R r5, r2 - mov rax, r13 - mul r10 - mov r13, rdx - ; ISUB_M r7, L1[r5] - mov eax, r13d - and eax, 16376 - sub r15, qword ptr [rsi+rax] - ; FPMUL_R e3, a3 - mulpd xmm7, xmm11 - ; IMUL_R r3, r4 - imul r11, r12 - ; FPSWAP_R f1 - shufpd xmm1, xmm1, 1 - ; IMULH_R r1, 633797287 - mov eax, 633797287 - mul r9 - add r9, rdx - ; IADD_R r4, r3 - add r12, r11 ; IROR_R r2, r7 mov ecx, r15d ror r10, cl - ; FPSUB_R f0, a2 - subpd xmm0, xmm10 - ; FPSUB_R f2, a2 - subpd xmm2, xmm10 - ; FPMUL_R e0, a2 - mulpd xmm4, xmm10 - ; IMUL_M r4, L1[r3] + ; COND_M r5, ab(L1[r3], -1085209087) + xor ecx, ecx mov eax, r11d and eax, 16376 - imul r12, qword ptr [rsi+rax] - ; IMUL_9C r1, -1901091890 - lea r9, [r9+r9*8-1901091890] - ; IROR_R r2, r6 - mov ecx, r14d - ror r10, cl - ; IMULH_R r5, r3 - mov rax, r13 - mul r11 - mov r13, rdx - ; FPSUB_M f1, L1[r7] - mov eax, r15d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm1, xmm12 - ; IMUL_M r2, L1[r1] - mov eax, r9d - and eax, 16376 - imul r10, qword ptr [rsi+rax] - ; IMUL_R r6, r0 - imul r14, r8 - ; IADD_R r7, r6 - add r15, r14 - ; FPSUB_R f2, a3 - subpd xmm2, xmm11 - ; COND_R r5, no(r2, -1589295370) - xor ecx, ecx - cmp r10d, -1589295370 - setno cl + cmp dword ptr [rsi+rax], -1085209087 + seta cl add r13, rcx - ; IMUL_9C r7, 420978486 - lea r15, [r15+r15*8+420978486] - ; IROL_R r4, r2 - mov ecx, r10d - rol r12, cl - ; IMUL_9C r0, -1084530831 - lea r8, [r8+r8*8-1084530831] - ; FPNEG_R f3 - xorps xmm3, xmm15 - ; IROR_R r6, r4 + ; FPMUL_R e3, a3 + mulpd xmm7, xmm11 + ; IMUL_9C r6, 898607426 + lea r14, [r14+r14*8+898607426] + ; IMUL_9C r1, -1816383392 + lea r9, [r9+r9*8-1816383392] + ; IROR_R r3, r4 mov ecx, r12d - ror r14, cl - ; IROL_R r4, r5 - mov ecx, r13d - rol r12, cl + ror r11, cl ; FPSUB_R f2, a3 subpd xmm2, xmm11 - ; FPMUL_R e2, a2 - mulpd xmm6, xmm10 - ; ISMULH_M r6, L2[98600] - mov rax, r14 - imul qword ptr [rsi+98600] - mov r14, rdx - ; IXOR_R r0, r6 - xor r8, r14 - ; FPSWAP_R f1 - shufpd xmm1, xmm1, 1 - ; FPADD_R f0, a1 - addpd xmm0, xmm9 - ; COND_R r1, ab(r3, -991705199) - xor ecx, ecx - cmp r11d, -991705199 - seta cl - add r9, rcx - ; IMULH_M r4, L2[r2] - mov ecx, r10d - and ecx, 262136 - mov rax, r12 - mul qword ptr [rsi+rcx] - mov r12, rdx - ; IROR_R r2, r6 - mov ecx, r14d - ror r10, cl - ; FPDIV_R e0, a1 - divpd xmm4, xmm9 - maxpd xmm4, xmm13 - ; IMUL_R r1, r7 - imul r9, r15 - ; COND_R r6, ns(r2, 939392855) - xor ecx, ecx - cmp r10d, 939392855 - setns cl - add r14, rcx - ; FPMUL_R e3, a1 - mulpd xmm7, xmm9 - ; COND_R r2, ab(r2, -499266314) - xor ecx, ecx - cmp r10d, -499266314 - seta cl - add r10, rcx - ; COND_M r7, lt(L1[r1], -1624420482) - xor ecx, ecx - mov eax, r9d - and eax, 16376 - cmp dword ptr [rsi+rax], -1624420482 - setl cl - add r15, rcx - ; COND_R r1, lt(r1, 1525413977) - xor ecx, ecx - cmp r9d, 1525413977 - setl cl - add r9, rcx - ; IMUL_R r4, r5 - imul r12, r13 - ; IMUL_R r4, r2 - imul r12, r10 - ; FPSQRT_R e1 - sqrtpd xmm5, xmm5 - ; ISUB_R r2, r6 - sub r10, r14 - ; FPDIV_R e1, a0 - divpd xmm5, xmm8 - maxpd xmm5, xmm13 - ; FPMUL_R e2, a3 - mulpd xmm6, xmm11 - ; IADD_R r6, 671627590 - add r14, 671627590 - ; COND_M r6, sg(L1[r4], -780452820) - xor ecx, ecx - mov eax, r12d - and eax, 16376 - cmp dword ptr [rsi+rax], -780452820 - sets cl - add r14, rcx - ; IMULH_R r4, r7 - mov rax, r12 - mul r15 - mov r12, rdx - ; FPMUL_R e3, a1 - mulpd xmm7, xmm9 - ; FPADD_R f0, a0 - addpd xmm0, xmm8 - ; FPMUL_R e0, a1 - mulpd xmm4, xmm9 - ; IMUL_R r7, r3 - imul r15, r11 - ; IROL_R r0, r7 - mov ecx, r15d - rol r8, cl - ; IMUL_R r1, r7 - imul r9, r15 - ; COND_R r0, no(r7, 449007464) - xor ecx, ecx - cmp r15d, 449007464 - setno cl - add r8, rcx - ; ISMULH_M r6, L2[134288] - mov rax, r14 - imul qword ptr [rsi+134288] - mov r14, rdx - ; IMULH_R r5, r2 - mov rax, r13 - mul r10 - mov r13, rdx - ; IMULH_R r7, r4 - mov rax, r15 - mul r12 - mov r15, rdx + ; FPSQRT_R e3 + sqrtpd xmm7, xmm7 ; FPDIV_R e3, a0 divpd xmm7, xmm8 maxpd xmm7, xmm13 - ; IXOR_R r3, r4 - xor r11, r12 - ; IDIV_C r1, 72349044 - mov rax, 8555331009525020641 - mul r9 - shr rdx, 25 - add r9, rdx - ; IADD_R r5, r4 - add r13, r12 - ; IROR_R r2, r4 - mov ecx, r12d - ror r10, cl - ; FPSUB_M f1, L1[r2] + ; FPMUL_M e3, L2[r2] mov eax, r10d - and eax, 16376 + and eax, 262136 cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm1, xmm12 - ; FPMUL_R e2, a3 - mulpd xmm6, xmm11 - ; IADD_R r5, r6 - add r13, r14 - ; IXOR_M r1, L1[r4] - mov eax, r12d - and eax, 16376 - xor r9, qword ptr [rsi+rax] - ; ISUB_R r2, -1544880589 - sub r10, -1544880589 - ; FPNEG_R f0 - xorps xmm0, xmm15 - ; IROR_R r1, r6 - mov ecx, r14d - ror r9, cl - ; IMUL_R r6, r4 - imul r14, r12 - ; IMULH_M r4, L2[r1] - mov ecx, r9d - and ecx, 262136 - mov rax, r12 - mul qword ptr [rsi+rcx] - mov r12, rdx - ; IXOR_R r3, r0 - xor r11, r8 - ; FPSWAP_R f0 - shufpd xmm0, xmm0, 1 - ; FPSWAP_R f0 - shufpd xmm0, xmm0, 1 - ; COND_R r0, ns(r2, -308295242) - xor ecx, ecx - cmp r10d, -308295242 - setns cl - add r8, rcx - ; IMUL_9C r1, 591587965 - lea r9, [r9+r9*8+591587965] - ; FPADD_R f3, a1 - addpd xmm3, xmm9 - ; IMUL_R r5, r4 - imul r13, r12 - ; IMUL_M r7, L1[r0] - mov eax, r8d - and eax, 16376 - imul r15, qword ptr [rsi+rax] - ; COND_R r6, sg(r5, -1119525789) - xor ecx, ecx - cmp r13d, -1119525789 - sets cl - add r14, rcx - ; IMUL_M r0, L1[r1] - mov eax, r9d - and eax, 16376 - imul r8, qword ptr [rsi+rax] - ; IADD_M r3, L2[r7] - mov eax, r15d - and eax, 262136 - add r11, qword ptr [rsi+rax] - ; IADD_R r0, r1 - add r8, r9 - ; FPSUB_R f2, a1 - subpd xmm2, xmm9 - ; IXOR_M r0, L2[r7] - mov eax, r15d - and eax, 262136 - xor r8, qword ptr [rsi+rax] - ; COND_R r6, be(r6, 1481939391) - xor ecx, ecx - cmp r14d, 1481939391 - setbe cl - add r14, rcx - ; FPADD_R f0, a1 - addpd xmm0, xmm9 - ; IXOR_R r3, r2 - xor r11, r10 - ; FPSUB_R f0, a1 - subpd xmm0, xmm9 - ; IXOR_R r7, r3 - xor r15, r11 - ; IXOR_M r6, L1[r4] - mov eax, r12d - and eax, 16376 - xor r14, qword ptr [rsi+rax] - ; IMULH_R r2, r7 - mov rax, r10 - mul r15 - mov r10, rdx - ; ISUB_R r5, r1 - sub r13, r9 - ; FPMUL_R e1, a3 - mulpd xmm5, xmm11 - ; FPADD_R f3, a2 - addpd xmm3, xmm10 - ; FPSWAP_R f1 - shufpd xmm1, xmm1, 1 - ; FPSUB_R f1, a3 - subpd xmm1, xmm11 - ; FPSUB_M f0, L1[r4] - mov eax, r12d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm0, xmm12 - ; FPMUL_R e1, a2 - mulpd xmm5, xmm10 - ; FPADD_R f3, a0 - addpd xmm3, xmm8 - ; IROL_R r2, r4 - mov ecx, r12d - rol r10, cl - ; COND_M r7, ab(L2[r7], -2012390318) - xor ecx, ecx - mov eax, r15d - and eax, 262136 - cmp dword ptr [rsi+rax], -2012390318 - seta cl - add r15, rcx - ; IMUL_9C r4, -38079585 - lea r12, [r12+r12*8-38079585] - ; IXOR_R r0, r1 - xor r8, r9 - ; FPMUL_R e1, a3 - mulpd xmm5, xmm11 - ; FPMUL_R e1, a1 - mulpd xmm5, xmm9 - ; FPSUB_R f1, a2 - subpd xmm1, xmm10 - ; IMUL_9C r4, -847745598 - lea r12, [r12+r12*8-847745598] - ; FPSQRT_R e1 - sqrtpd xmm5, xmm5 - ; IADD_R r7, r6 - add r15, r14 - ; FPSUB_R f3, a0 - subpd xmm3, xmm8 - ; FPSUB_R f1, a1 - subpd xmm1, xmm9 - ; IADD_R r7, r6 - add r15, r14 - ; IROL_R r2, r5 - mov ecx, r13d - rol r10, cl - ; IADD_RC r4, r2, 1338806320 - lea r12, [r12+r10+1338806320] - ; FPSQRT_R e3 - sqrtpd xmm7, xmm7 - ; IMUL_R r5, r0 - imul r13, r8 + mulpd xmm7, xmm12 + maxpd xmm7, xmm13 ; FPADD_R f2, a1 addpd xmm2, xmm9 - ; INEG_R r6 - neg r14 - ; IXOR_M r6, L1[r2] + ; IROL_R r3, r6 + mov ecx, r14d + rol r11, cl + ; FPSUB_R f3, a0 + subpd xmm3, xmm8 + ; IMUL_M r5, L1[r2] mov eax, r10d and eax, 16376 - xor r14, qword ptr [rsi+rax] - ; FPSUB_R f2, a2 - subpd xmm2, xmm10 - ; FPADD_R f2, a2 - addpd xmm2, xmm10 - ; FPADD_R f1, a2 - addpd xmm1, xmm10 - ; COND_R r3, be(r4, 174667458) + imul r13, qword ptr [rsi+rax] + ; COND_R r7, ge(r4, 1959614002) xor ecx, ecx - cmp r12d, 174667458 - setbe cl - add r11, rcx - ; INEG_R r6 - neg r14 - ; IXOR_R r6, r3 - xor r14, r11 - ; COND_M r5, sg(L1[r0], -864345921) - xor ecx, ecx - mov eax, r8d - and eax, 16376 - cmp dword ptr [rsi+rax], -864345921 - sets cl - add r13, rcx - ; IROL_R r7, r3 - mov ecx, r11d - rol r15, cl - ; FPSUB_R f1, a2 - subpd xmm1, xmm10 - ; IADD_M r1, L1[r0] - mov eax, r8d - and eax, 16376 - add r9, qword ptr [rsi+rax] - ; IMULH_R r1, r3 - mov rax, r9 - mul r11 - mov r9, rdx - ; IMUL_R r0, -1489192296 - imul r8, -1489192296 - ; FPMUL_R e0, a2 - mulpd xmm4, xmm10 - ; COND_R r1, ge(r1, -1358904097) - xor ecx, ecx - cmp r9d, -1358904097 + cmp r12d, 1959614002 setge cl - add r9, rcx - ; FPSUB_R f1, a1 - subpd xmm1, xmm9 - ; FPADD_R f2, a3 - addpd xmm2, xmm11 - ; IROR_R r4, r7 - mov ecx, r15d - ror r12, cl - ; ISDIV_C r1, -1368098113 - mov rax, -7238896260565957085 - imul r9 - xor eax, eax - sar rdx, 29 - sets al - add rdx, rax - add r9, rdx - ; IADD_M r4, L1[r1] - mov eax, r9d - and eax, 16376 - add r12, qword ptr [rsi+rax] - ; IMUL_R r0, -1011605520 - imul r8, -1011605520 - ; FPSUB_R f3, a1 - subpd xmm3, xmm9 - ; IADD_RC r1, r4, 272540736 - lea r9, [r9+r12+272540736] - ; FPSWAP_R f2 - shufpd xmm2, xmm2, 1 + add r15, rcx + ; IADD_RC r5, r2, 1887914017 + lea r13, [r13+r10+1887914017] + ; FSTORE L2[r4], f0 + mov eax, r12d + and eax, 262128 + movapd xmmword ptr [rsi+rax], xmm0 + ; IMUL_R r6, r4 + imul r14, r12 + ; IMULH_M r2, L1[r6] + mov ecx, r14d + and ecx, 16376 + mov rax, r10 + mul qword ptr [rsi+rcx] + mov r10, rdx + ; IADD_RC r0, r6, 723017482 + lea r8, [r8+r14+723017482] + ; ISUB_R r6, r5 + sub r14, r13 ; IROR_R r3, r2 mov ecx, r10d ror r11, cl - ; IMUL_R r3, 2085105439 - imul r11, 2085105439 - ; FPMUL_R e0, a0 - mulpd xmm4, xmm8 - ; IMUL_9C r6, -483723153 - lea r14, [r14+r14*8-483723153] - ; FPSUB_M f3, L1[r7] - mov eax, r15d - and eax, 16376 - cvtdq2pd xmm12, qword ptr [rsi+rax] - subpd xmm3, xmm12 - ; IMUL_R r3, r2 - imul r11, r10 - ; ISMULH_R r7, r1 - mov rax, r15 - imul r9 - mov r15, rdx - ; COND_R r1, of(r7, 778804236) + ; IADD_R r6, r2 + add r14, r10 + ; FPSQRT_R e2 + sqrtpd xmm6, xmm6 + ; ISDIV_C r3, -1434854386 + mov rax, -3451054131664006427 + imul r11 + xor eax, eax + sar rdx, 28 + sets al + add rdx, rax + add r11, rdx + ; IROL_R r7, r5 + mov ecx, r13d + rol r15, cl + ; FPSQRT_R e3 + sqrtpd xmm7, xmm7 + ; IMUL_R r4, r7 + imul r12, r15 + ; FPSUB_R f2, a2 + subpd xmm2, xmm10 + ; FPSUB_R f2, a1 + subpd xmm2, xmm9 + ; IMUL_9C r0, -43443857 + lea r8, [r8+r8*8-43443857] + ; COND_M r2, of(L2[r7], -1059200178) xor ecx, ecx - cmp r15d, 778804236 + mov eax, r15d + and eax, 262136 + cmp dword ptr [rsi+rax], -1059200178 seto cl - add r9, rcx - ; FPSUB_R f3, a2 - subpd xmm3, xmm10 - ; IROL_R r5, r7 - mov ecx, r15d - rol r13, cl - ; FPADD_R f1, a0 - addpd xmm1, xmm8 - ; FPADD_R f2, a3 - addpd xmm2, xmm11 - ; IMUL_R r6, r0 - imul r14, r8 - ; ISUB_M r2, L2[r4] - mov eax, r12d - and eax, 262136 - sub r10, qword ptr [rsi+rax] - ; IXOR_R r0, r6 - xor r8, r14 - ; INEG_R r6 - neg r14 - ; FPMUL_R e2, a3 - mulpd xmm6, xmm11 - ; IADD_RC r4, r6, -1312075035 - lea r12, [r12+r14-1312075035] - ; IMUL_R r1, r5 - imul r9, r13 - ; IXOR_M r7, L2[r6] - mov eax, r14d - and eax, 262136 - xor r15, qword ptr [rsi+rax] - ; IROR_R r2, 23 - ror r10, 23 + add r10, rcx + ; IMUL_M r5, L1[r2] + mov eax, r10d + and eax, 16376 + imul r13, qword ptr [rsi+rax] ; FPMUL_R e0, a2 mulpd xmm4, xmm10 - ; ISMULH_M r5, L1[r2] + ; IXOR_R r1, r3 + xor r9, r11 + ; ISUB_R r5, r1 + sub r13, r9 + ; FPSQRT_R e1 + sqrtpd xmm5, xmm5 + ; IDIV_C r3, 531478046 + mov rax, 2329240217168594533 + mul r11 + shr rdx, 26 + add r11, rdx + ; IMULH_M r7, L2[r2] mov ecx, r10d - and ecx, 16376 - mov rax, r13 - imul qword ptr [rsi+rcx] - mov r13, rdx - ; ISUB_M r7, L1[r4] - mov eax, r12d - and eax, 16376 - sub r15, qword ptr [rsi+rax] - ; COND_R r0, sg(r2, 1538841628) - xor ecx, ecx - cmp r10d, 1538841628 - sets cl - add r8, rcx - ; IMUL_R r6, r2 - imul r14, r10 - ; ISUB_R r0, r1 - sub r8, r9 - ; IMUL_R r5, r7 - imul r13, r15 - ; IADD_RC r1, r0, 516706834 - lea r9, [r9+r8+516706834] - ; INEG_R r5 - neg r13 - ; FPSQRT_R e3 - sqrtpd xmm7, xmm7 - ; IADD_RC r5, r4, -1679394922 - lea r13, [r13+r12-1679394922] - ; FPSUB_R f1, a1 - subpd xmm1, xmm9 - ; IMUL_R r0, r2 - imul r8, r10 - ; ISUB_R r3, r2 - sub r11, r10 - ; FPDIV_R e0, a3 - divpd xmm4, xmm11 - maxpd xmm4, xmm13 - ; ISUB_R r1, r5 - sub r9, r13 - ; COND_M r2, be(L2[r2], 1840094725) - xor ecx, ecx - mov eax, r10d - and eax, 262136 - cmp dword ptr [rsi+rax], 1840094725 - setbe cl - add r10, rcx - ; IMUL_M r6, L1[r7] + and ecx, 262136 + mov rax, r15 + mul qword ptr [rsi+rcx] + mov r15, rdx + ; IMUL_9C r1, -1546338561 + lea r9, [r9+r9*8-1546338561] + ; FSTORE L2[r6], e2 + mov eax, r14d + and eax, 262128 + movapd xmmword ptr [rsi+rax], xmm6 + ; IROR_R r6, r7 + mov ecx, r15d + ror r14, cl + ; FPMUL_R e1, a1 + mulpd xmm5, xmm9 + ; FPMUL_R e3, a3 + mulpd xmm7, xmm11 + ; IROR_R r2, r3 + mov ecx, r11d + ror r10, cl + ; FPADD_R f1, a1 + addpd xmm1, xmm9 + ; IXOR_R r2, r4 + xor r10, r12 + ; FPNEG_R f0 + xorps xmm0, xmm15 + ; FPDIV_R e2, a2 + divpd xmm6, xmm10 + maxpd xmm6, xmm13 + ; IXOR_M r6, L1[r7] mov eax, r15d and eax, 16376 - imul r14, qword ptr [rsi+rax] - ; IMULH_M r6, L1[r5] - mov ecx, r13d - and ecx, 16376 - mov rax, r14 - mul qword ptr [rsi+rcx] - mov r14, rdx - ; IMUL_9C r7, -1048659408 - lea r15, [r15+r15*8-1048659408] - ; IMUL_R r6, r3 - imul r14, r11 - ; FPADD_R f3, a0 - addpd xmm3, xmm8 - ; IMULH_R r0, r3 - mov rax, r8 + xor r14, qword ptr [rsi+rax] + ; FPSWAP_R f1 + shufpd xmm1, xmm1, 1 + ; FSTORE L1[r7], e0 + mov eax, r15d + and eax, 16368 + movapd xmmword ptr [rsi+rax], xmm4 + ; FPADD_M f1, L1[r2] + mov eax, r10d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm1, xmm12 + ; IXOR_R r2, r0 + xor r10, r8 + ; COND_M r7, no(L1[r7], 995954598) + xor ecx, ecx + mov eax, r15d + and eax, 16376 + cmp dword ptr [rsi+rax], 995954598 + setno cl + add r15, rcx + ; ISTORE L1[r0], r1 + mov eax, r8d + and eax, 16376 + mov qword ptr [rsi+rax], r9 + ; IADD_R r4, r0 + add r12, r8 + ; FPSUB_R f2, a3 + subpd xmm2, xmm11 + ; FPNEG_R f1 + xorps xmm1, xmm15 + ; FPSUB_R f0, a0 + subpd xmm0, xmm8 + ; FPADD_R f2, a0 + addpd xmm2, xmm8 + ; COND_R r7, ns(r2, -772621280) + xor ecx, ecx + cmp r10d, -772621280 + setns cl + add r15, rcx + ; IMULH_R r3, -531436276 + mov eax, -531436276 mul r11 - mov r8, rdx - ; FPSWAP_R f0 - shufpd xmm0, xmm0, 1 - ; FPSQRT_R e3 - sqrtpd xmm7, xmm7 - ; IMULH_R r2, r0 - mov rax, r10 - mul r8 - mov r10, rdx - ; FPDIV_R e1, a1 - divpd xmm5, xmm9 + add r11, rdx + ; COND_R r0, lt(r4, -1228919974) + xor ecx, ecx + cmp r12d, -1228919974 + setl cl + add r8, rcx + ; ISTORE L1[r0], r0 + mov eax, r8d + and eax, 16376 + mov qword ptr [rsi+rax], r8 + ; IROR_R r4, 25 + ror r12, 25 + ; FPMUL_M e3, L1[r0] + mov eax, r8d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + mulpd xmm7, xmm12 + maxpd xmm7, xmm13 + ; FPSUB_R f3, a2 + subpd xmm3, xmm10 + ; IXOR_R r7, r0 + xor r15, r8 + ; IROL_R r6, r2 + mov ecx, r10d + rol r14, cl + ; FPMUL_R e1, a2 + mulpd xmm5, xmm10 + ; FPSQRT_R e0 + sqrtpd xmm4, xmm4 + ; IMUL_M r3, L1[r4] + mov eax, r12d + and eax, 16376 + imul r11, qword ptr [rsi+rax] + ; FPADD_R f2, a2 + addpd xmm2, xmm10 + ; ISMULH_R r6, r2 + mov rax, r14 + imul r10 + mov r14, rdx + ; FPMUL_M e3, L1[r5] + mov eax, r13d + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + mulpd xmm7, xmm12 + maxpd xmm7, xmm13 + ; ISUB_R r3, r5 + sub r11, r13 + ; IADD_R r2, r3 + add r10, r11 + ; COND_R r3, of(r6, -566248014) + xor ecx, ecx + cmp r14d, -566248014 + seto cl + add r11, rcx + ; ISTORE L1[r3], r2 + mov eax, r11d + and eax, 16376 + mov qword ptr [rsi+rax], r10 + ; FPADD_R f1, a0 + addpd xmm1, xmm8 + ; IXOR_M r2, L2[r1] + mov eax, r9d + and eax, 262136 + xor r10, qword ptr [rsi+rax] + ; FPSUB_R f2, a2 + subpd xmm2, xmm10 + ; FPADD_R f1, a1 + addpd xmm1, xmm9 + ; IMUL_M r5, L1[6488] + imul r13, qword ptr [rsi+6488] + ; IROR_R r0, 50 + ror r8, 50 + ; IMUL_9C r7, 1313192705 + lea r15, [r15+r15*8+1313192705] + ; FPADD_R f0, a1 + addpd xmm0, xmm9 + ; IMUL_9C r0, 611229050 + lea r8, [r8+r8*8+611229050] + ; FPADD_R f0, a0 + addpd xmm0, xmm8 + ; FPMUL_R e2, a2 + mulpd xmm6, xmm10 + ; IMUL_R r6, r5 + imul r14, r13 + ; IADD_M r3, L1[r5] + mov eax, r13d + and eax, 16376 + add r11, qword ptr [rsi+rax] + ; COND_M r1, lt(L1[r7], -248613240) + xor ecx, ecx + mov eax, r15d + and eax, 16376 + cmp dword ptr [rsi+rax], -248613240 + setl cl + add r9, rcx + ; ISUB_R r2, r4 + sub r10, r12 + ; FPADD_R f3, a3 + addpd xmm3, xmm11 + ; FSTORE L1[r0], e3 + mov eax, r8d + and eax, 16368 + movapd xmmword ptr [rsi+rax], xmm7 + ; IMUL_R r6, r0 + imul r14, r8 + ; FPADD_R f2, a2 + addpd xmm2, xmm10 + ; FPDIV_R e1, a2 + divpd xmm5, xmm10 maxpd xmm5, xmm13