diff --git a/doc/isa.md b/doc/isa.md
index cedece9..d46b16e 100644
--- a/doc/isa.md
+++ b/doc/isa.md
@@ -83,10 +83,10 @@ The `B.LOC.L` flag determines the B operand. It can be either a register or imme
|`B.LOC.L`|IA/DIV|IA/SHIFT|IA/MATH|FP|CL|
|----|--------|----|------|----|---|
-|0|register|register|register|register|register|
+|0|register|`imm8`|`imm32`|register|register|
|1|`imm32`|register|register|register|register|
|2|`imm32`|`imm8`|register|register|register|
-|3|`imm32`|`imm8`|`imm32`|register|register|
+|3|`imm32`|register|register|register|register|
Integer instructions are split into 3 classes: integer division (IA/DIV), shift and rotate (IA/SHIFT) and other (IA/MATH). Floating point (FP) and control (CL) instructions always use a register operand.
diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp
index 9389634..efa0818 100644
--- a/src/AssemblyGeneratorX86.cpp
+++ b/src/AssemblyGeneratorX86.cpp
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see.
*/
//#define TRACE
-//#define MAGIC_DIVISION
+#define MAGIC_DIVISION
#include "AssemblyGeneratorX86.hpp"
#include "Pcg32.hpp"
#include "common.hpp"
@@ -64,108 +64,61 @@ namespace RandomX {
(this->*generator)(instr, i);
}
- void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
+ void AssemblyGeneratorX86::gena(Instruction& instr, int i) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
asmCode << "\tjnz short rx_body_" << i << std::endl;
- switch (instr.loca & 3)
- {
- case 0:
- case 1:
- case 2:
- asmCode << "\tcall rx_read_l1" << std::endl;
- asmCode << "rx_body_" << i << ":" << std::endl;
- if ((instr.loca & 192) == 0)
- asmCode << "\txor " << regMx << ", rcx" << std::endl;
- asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
- break;
- default: //3
- asmCode << "\tcall rx_read_l2" << std::endl;
- asmCode << "rx_body_" << i << ":" << std::endl;
- if ((instr.loca & 192) == 0)
- asmCode << "\txor " << regMx << ", rcx" << std::endl;
- asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
- break;
+ if (instr.loca & 3) {
+ asmCode << "\tcall rx_read_l1" << std::endl;
+ asmCode << "rx_body_" << i << ":" << std::endl;
+ if ((instr.loca & 192) == 0)
+ asmCode << "\txor " << regMx << ", rcx" << std::endl;
+ asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
}
+ else {
+ asmCode << "\tcall rx_read_l2" << std::endl;
+ asmCode << "rx_body_" << i << ":" << std::endl;
+ if ((instr.loca & 192) == 0)
+ asmCode << "\txor " << regMx << ", rcx" << std::endl;
+ asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
+ }
+ }
+
+ void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
+ gena(instr, i);
asmCode << "\tmov rax, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
}
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
- asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
- asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
- asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
- asmCode << "\tjnz short rx_body_" << i << std::endl;
- switch (instr.loca & 3)
- {
- case 0:
- case 1:
- case 2:
- asmCode << "\tcall rx_read_l1" << std::endl;
- asmCode << "rx_body_" << i << ":" << std::endl;
- if((instr.loca & 192) == 0)
- asmCode << "\txor " << regMx << ", rcx" << std::endl;
- asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
- break;
- default: //3
- asmCode << "\tcall rx_read_l2" << std::endl;
- asmCode << "rx_body_" << i << ":" << std::endl;
- if ((instr.loca & 192) == 0)
- asmCode << "\txor " << regMx << ", rcx" << std::endl;
- asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
- break;
- }
+ gena(instr, i);
asmCode << "\tcvtdq2pd xmm0, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
}
- void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) {
- switch (instr.locb & 7)
- {
- case 0:
- case 1:
- case 2:
- case 3:
+ void AssemblyGeneratorX86::genbiashift(Instruction& instr, const char* instrx86) {
+ if (instr.locb & 1) {
asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl;
asmCode << "\t" << instrx86 << " rax, cl" << std::endl;
- return;
- default:
+ } else {
asmCode << "\t" << instrx86 << " rax, " << (instr.imm8 & 63) << std::endl;;
- return;
}
}
- void AssemblyGeneratorX86::genbr1(Instruction& instr) {
- switch (instr.locb & 7)
- {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
+ void AssemblyGeneratorX86::genbia(Instruction& instr) {
+ if (instr.locb & 3) {
asmCode << regR[instr.regb % RegistersCount] << std::endl;
- return;
- default:
+ } else {
asmCode << instr.imm32 << std::endl;;
- return;
}
}
- void AssemblyGeneratorX86::genbr132(Instruction& instr) {
- switch (instr.locb & 7)
- {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
+ void AssemblyGeneratorX86::genbia32(Instruction& instr) {
+ if (instr.locb & 3) {
asmCode << regR32[instr.regb % RegistersCount] << std::endl;
- return;
- default:
+ }
+ else {
asmCode << instr.imm32 << std::endl;;
- return;
}
}
@@ -241,28 +194,28 @@ namespace RandomX {
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tadd rax, ";
- genbr1(instr);
+ genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tadd eax, ";
- genbr132(instr);
+ genbia32(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tsub rax, ";
- genbr1(instr);
+ genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tsub eax, ";
- genbr132(instr);
+ genbia32(instr);
gencr(instr);
}
@@ -272,14 +225,14 @@ namespace RandomX {
if ((instr.locb & 7) >= 6) {
asmCode << "rax, ";
}
- genbr1(instr);
+ genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tmov rcx, ";
- genbr1(instr);
+ genbia(instr);
asmCode << "\tmul rcx" << std::endl;
asmCode << "\tmov rax, rdx" << std::endl;
gencr(instr);
@@ -289,7 +242,7 @@ namespace RandomX {
genar(instr, i);
asmCode << "\tmov ecx, eax" << std::endl;
asmCode << "\tmov eax, ";
- genbr132(instr);
+ genbia32(instr);
asmCode << "\timul rax, rcx" << std::endl;
gencr(instr);
}
@@ -310,7 +263,7 @@ namespace RandomX {
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tmov rcx, ";
- genbr1(instr);
+ genbia(instr);
asmCode << "\timul rcx" << std::endl;
asmCode << "\tmov rax, rdx" << std::endl;
gencr(instr);
@@ -318,7 +271,7 @@ namespace RandomX {
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
genar(instr, i);
- if ((instr.locb & 7) >= 6) {
+ if (instr.locb & 3) {
#ifdef MAGIC_DIVISION
if (instr.imm32 != 0) {
uint32_t divisor = instr.imm32;
@@ -373,8 +326,8 @@ namespace RandomX {
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
genar(instr, i);
+ if (instr.locb & 3) {
#ifdef MAGIC_DIVISION
- if ((instr.locb & 7) >= 6) {
int64_t divisor = instr.imm32;
asmCode << "\t; magic divide by " << divisor << std::endl;
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
@@ -394,9 +347,10 @@ namespace RandomX {
asmCode << "\tadd rax, rcx" << std::endl;
asmCode << "\tsar rax, " << shift << std::endl;
}
- if(negative)
+ if (negative)
asmCode << "\tneg rax" << std::endl;
- } else if(divisor != 0) {
+ }
+ else if (divisor != 0) {
magics_info mi = compute_signed_magic_info(divisor);
if ((divisor >= 0) != (mi.multiplier >= 0))
asmCode << "\tmov rcx, rax" << std::endl;
@@ -422,25 +376,29 @@ namespace RandomX {
asmCode << "\tsets dl" << std::endl;
asmCode << "\tadd rax, rdx" << std::endl;
}
+#else
+ asmCode << "\tmov edx, " << instr.imm32 << std::endl;
+#endif
}
else {
-#endif
- asmCode << "\tmov edx, ";
- genbr132(instr);
- asmCode << "\tcmp edx, -1" << std::endl;
- asmCode << "\tjne short safe_idiv_" << i << std::endl;
- asmCode << "\tneg rax" << std::endl;
- asmCode << "\tjmp short result_idiv_" << i << std::endl;
- asmCode << "safe_idiv_" << i << ":" << std::endl;
- asmCode << "\tmov ecx, 1" << std::endl;
- asmCode << "\ttest edx, edx" << std::endl;
- asmCode << "\tcmovne ecx, edx" << std::endl;
- asmCode << "\tmovsxd rcx, ecx" << std::endl;
- asmCode << "\tcqo" << std::endl;
- asmCode << "\tidiv rcx" << std::endl;
- asmCode << "result_idiv_" << i << ":" << std::endl;
-#ifdef MAGIC_DIVISION
+ asmCode << "\tmov edx, " << regR32[instr.regb % RegistersCount] << std::endl;
+#ifndef MAGIC_DIVISION
}
+#endif
+ asmCode << "\tcmp edx, -1" << std::endl;
+ asmCode << "\tjne short body_idiv_" << i << std::endl;
+ asmCode << "\tneg rax" << std::endl;
+ asmCode << "\tjmp short result_idiv_" << i << std::endl;
+ asmCode << "body_idiv_" << i << ":" << std::endl;
+ asmCode << "\tmov ecx, 1" << std::endl;
+ asmCode << "\ttest edx, edx" << std::endl;
+ asmCode << "\tcmovne ecx, edx" << std::endl;
+ asmCode << "\tmovsxd rcx, ecx" << std::endl;
+ asmCode << "\tcqo" << std::endl;
+ asmCode << "\tidiv rcx" << std::endl;
+ asmCode << "result_idiv_" << i << ":" << std::endl;
+#ifdef MAGIC_DIVISION
+ }
#endif
gencr(instr);
}
@@ -448,72 +406,72 @@ namespace RandomX {
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tand rax, ";
- genbr1(instr);
+ genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tand eax, ";
- genbr132(instr);
+ genbia32(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tor rax, ";
- genbr1(instr);
+ genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tor eax, ";
- genbr132(instr);
+ genbia32(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\txor rax, ";
- genbr1(instr);
+ genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\txor eax, ";
- genbr132(instr);
+ genbia32(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
genar(instr, i);
- genbr0(instr, "shl");
+ genbiashift(instr, "shl");
gencr(instr);
}
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
genar(instr, i);
- genbr0(instr, "shr");
+ genbiashift(instr, "shr");
gencr(instr);
}
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
genar(instr, i);
- genbr0(instr, "sar");
+ genbiashift(instr, "sar");
gencr(instr);
}
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
genar(instr, i);
- genbr0(instr, "rol");
+ genbiashift(instr, "rol");
gencr(instr);
}
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
genar(instr, i);
- genbr0(instr, "ror");
+ genbiashift(instr, "ror");
gencr(instr);
}
diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp
index 2a1be1b..d2e2eb0 100644
--- a/src/AssemblyGeneratorX86.hpp
+++ b/src/AssemblyGeneratorX86.hpp
@@ -38,11 +38,12 @@ namespace RandomX {
static InstructionGenerator engine[256];
std::stringstream asmCode;
+ void gena(Instruction&, int);
void genar(Instruction&, int);
void genaf(Instruction&, int);
- void genbr0(Instruction&, const char*);
- void genbr1(Instruction&);
- void genbr132(Instruction&);
+ void genbiashift(Instruction&, const char*);
+ void genbia(Instruction&);
+ void genbia32(Instruction&);
void genbf(Instruction&, const char*);
void gencr(Instruction&, bool);
void gencf(Instruction&, bool);
diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp
index 1f09cd9..32bad3a 100644
--- a/src/JitCompilerX86.cpp
+++ b/src/JitCompilerX86.cpp
@@ -17,10 +17,14 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see.
*/
+//#define MAGIC_DIVISION
#include "JitCompilerX86.hpp"
#include "Pcg32.hpp"
#include
#include
+#ifdef MAGIC_DIVISION
+#include "divideByConstantCodegen.h"
+#endif
#ifdef _WIN32
#include
@@ -152,6 +156,17 @@ namespace RandomX {
instructionOffsets.push_back(codePos);
emit(0x840fcbff); //dec ebx; jz
emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative)
+ auto generator = engine[instr.opcode];
+ (this->*generator)(instr, i);
+ }
+
+ void JitCompilerX86::fixCallOffsets() {
+ for (CallOffset& co : callOffsets) {
+ *reinterpret_cast(code + co.pos) = instructionOffsets[co.index] - (co.pos + 4);
+ }
+ }
+
+ void JitCompilerX86::gena(Instruction& instr) {
emit(uint16_t(0x8149)); //xor
emitByte(0xf0 + (instr.rega % RegistersCount));
emit(instr.addra);
@@ -169,41 +184,28 @@ namespace RandomX {
emit(uint16_t(0x3348));
emitByte(0xe9); //xor rbp, rcx
}
- auto generator = engine[instr.opcode];
- (this->*generator)(instr, i);
- }
-
- void JitCompilerX86::fixCallOffsets() {
- for (CallOffset& co : callOffsets) {
- *reinterpret_cast(code + co.pos) = instructionOffsets[co.index] - (co.pos + 4);
+ emit(uint16_t(0xe181)); //and ecx,
+ if (instr.loca & 3) {
+ emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
+ }
+ else {
+ emit(ScratchpadL2 - 1); //whole scratchpad
}
}
void JitCompilerX86::genar(Instruction& instr) {
- emit(uint16_t(0xe181)); //and ecx,
- if (instr.loca & 3) {
- emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
- }
- else {
- emit(ScratchpadL2 - 1); //whole scratchpad
- }
+ gena(instr);
emit(0xce048b48); //mov rax,QWORD PTR [rsi+rcx*8]
}
void JitCompilerX86::genaf(Instruction& instr) {
- emit(uint16_t(0xe181)); //and ecx,
- if (instr.loca & 3) {
- emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
- }
- else {
- emit(ScratchpadL2 - 1); //whole scratchpad
- }
+ gena(instr);
emitByte(0xf3);
emit(0xce04e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rcx*8]
}
- void JitCompilerX86::genbr0(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
- if ((instr.locb & 7) <= 3) {
+ void JitCompilerX86::genbiashift(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
+ if (instr.locb & 1) {
emit(uint16_t(0x8b49)); //mov
emitByte(0xc8 + (instr.regb % RegistersCount)); //rcx, regb
emitByte(0x48); //REX.W
@@ -216,8 +218,8 @@ namespace RandomX {
}
}
- void JitCompilerX86::genbr1(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
- if ((instr.locb & 7) <= 5) {
+ void JitCompilerX86::genbia(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
+ if (instr.locb & 3) {
emit(opcodeReg); // xxx rax, r64
emitByte(0xc0 + (instr.regb % RegistersCount));
}
@@ -227,8 +229,8 @@ namespace RandomX {
}
}
- void JitCompilerX86::genbr132(Instruction& instr, uint16_t opcodeReg, uint8_t opcodeImm) {
- if ((instr.locb & 7) <= 5) {
+ void JitCompilerX86::genbia32(Instruction& instr, uint16_t opcodeReg, uint8_t opcodeImm) {
+ if (instr.locb & 3) {
emit(opcodeReg); // xxx eax, r32
emitByte(0xc0 + (instr.regb % RegistersCount));
}
@@ -328,25 +330,25 @@ namespace RandomX {
void JitCompilerX86::h_ADD_64(Instruction& instr, int i) {
genar(instr);
- genbr1(instr, 0x0349, 0x0548);
+ genbia(instr, 0x0349, 0x0548);
gencr(instr);
}
void JitCompilerX86::h_ADD_32(Instruction& instr, int i) {
genar(instr);
- genbr132(instr, 0x0341, 0x05);
+ genbia32(instr, 0x0341, 0x05);
gencr(instr);
}
void JitCompilerX86::h_SUB_64(Instruction& instr, int i) {
genar(instr);
- genbr1(instr, 0x2b49, 0x2d48);
+ genbia(instr, 0x2b49, 0x2d48);
gencr(instr);
}
void JitCompilerX86::h_SUB_32(Instruction& instr, int i) {
genar(instr);
- genbr132(instr, 0x2b41, 0x2d);
+ genbia32(instr, 0x2b41, 0x2d);
gencr(instr);
}
@@ -435,104 +437,209 @@ namespace RandomX {
void JitCompilerX86::h_DIV_64(Instruction& instr, int i) {
genar(instr);
- if ((instr.locb & 7) <= 5) {
+ if (instr.locb & 3) {
+#ifdef MAGIC_DIVISION
+ if (instr.imm32 != 0) {
+ uint32_t divisor = instr.imm32;
+ if (divisor & (divisor - 1)) {
+ magicu_info mi = compute_unsigned_magic_info(divisor, sizeof(uint64_t) * 8);
+ if (mi.pre_shift > 0) {
+ if (mi.pre_shift == 1) {
+ emitByte(0x48);
+ emit(uint16_t(0xe8d1)); //shr rax,1
+ }
+ else {
+ emit(0x00e8c148 | (mi.pre_shift << 24)); //shr rax, pre_shift
+ }
+ }
+ if (mi.increment) {
+ emit(0x00d8834801c08348); //add rax,1; sbb rax,0
+ }
+ emit(uint16_t(0xb948)); //movabs rcx, multiplier
+ emit(mi.multiplier);
+ emit(0x48e1f748); //mul rcx; REX
+ emit(uint16_t(0xc28b)); //mov rax,rdx
+ if (mi.post_shift > 0)
+ emit(0x00e8c148 | (mi.post_shift << 24)); //shr rax, post_shift
+ }
+ else { //divisor is a power of two
+ int shift = 0;
+ while (divisor >>= 1)
+ ++shift;
+ if (shift > 0)
+ emit(0x00e8c148 | (shift << 24)); //shr rax, shift
+ }
+ }
+#else
+ emitByte(0xb9); //mov ecx, imm32
+ emit(instr.imm32 != 0 ? instr.imm32 : 1);
+#endif
+ }
+ else {
emitByte(0xb9); //mov ecx, 1
emit(1);
emit(uint16_t(0x8b41)); //mov edx, r32
emitByte(0xd0 + (instr.regb % RegistersCount));
emit(0x450fd285); //test edx, edx; cmovne ecx,edx
emitByte(0xca);
+#ifdef MAGIC_DIVISION
+ emit(0xf748d233); //xor edx,edx; div rcx
+ emitByte(0xf1);
+#endif
}
- else {
- emitByte(0xb9); //mov ecx, imm32
- emit(instr.imm32 != 0 ? instr.imm32 : 1);
- }
+#ifndef MAGIC_DIVISION
emit(0xf748d233); //xor edx,edx; div rcx
emitByte(0xf1);
+#endif
gencr(instr);
}
void JitCompilerX86::h_IDIV_64(Instruction& instr, int i) {
genar(instr);
- if ((instr.locb & 7) <= 5) {
- emit(uint16_t(0x8b41)); //mov edx, r32
- emitByte(0xd0 + (instr.regb % RegistersCount));
+ if (instr.locb & 3) {
+#ifdef MAGIC_DIVISION
+ int64_t divisor = instr.imm32;
+ if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
+ // +/- power of two
+ bool negative = divisor < 0;
+ if (negative)
+ divisor = -divisor;
+ int shift = 0;
+ uint64_t unsignedDivisor = divisor;
+ while (unsignedDivisor >>= 1)
+ ++shift;
+ if (shift > 0) {
+ emitByte(0x48);
+ emit(uint16_t(0xc88b)); //mov rcx, rax
+ emit(0x3ff9c148); //sar rcx, 63
+ uint32_t mask = (1ULL << shift) - 1;
+ emit(uint16_t(0xe181)); //and ecx, mask
+ emit(mask);
+ emitByte(0x48);
+ emit(uint16_t(0xc103)); //add rax, rcx
+ emit(0x00f8c148 | (shift << 24)); //sar rax, shift
+ }
+ if (negative) {
+ emitByte(0x48);
+ emit(uint16_t(0xd8f7)); //neg rax
+ }
+ }
+ else if (divisor != 0) {
+ magics_info mi = compute_signed_magic_info(divisor);
+ if ((divisor >= 0) != (mi.multiplier >= 0)) {
+ emitByte(0x48);
+ emit(uint16_t(0xc88b)); //mov rcx, rax
+ }
+ emit(uint16_t(0xba48)); //movabs rdx, multiplier
+ emit(mi.multiplier);
+ emit(0xd233c28b48eaf748); //imul rdx; mov rax,rdx; xor edx,edx
+ bool haveSF = false;
+ if (divisor > 0 && mi.multiplier < 0) {
+ emitByte(0x48);
+ emit(uint16_t(0xc103)); //add rax, rcx
+ haveSF = true;
+ }
+ if (divisor < 0 && mi.multiplier > 0) {
+ emitByte(0x48);
+ emit(uint16_t(0xc12b)); //sub rax, rcx
+ haveSF = true;
+ }
+ if (mi.shift > 0) {
+ emit(0x00f8c148 | (mi.shift << 24)); //sar rax, shift
+ haveSF = true;
+ }
+ if (!haveSF) {
+ emitByte(0x48);
+ emit(uint16_t(0x85c0));
+ }
+ emit(0x48c2980f); //sets dl; add rax, rdx
+ emit(uint16_t(0xc203));
+ }
+#else
+ emitByte(0xba); // mov edx, imm32
+ emit(instr.imm32);
+#endif
}
else {
- emitByte(0xba); // xxx edx, imm32
- emit(instr.imm32);
+ emit(uint16_t(0x8b41)); //mov edx, r32
+ emitByte(0xd0 + (instr.regb % RegistersCount));
+#ifndef MAGIC_DIVISION
}
+#endif
emit(0xc88b480b75fffa83);
emit(0x1274c9ff48c1d148);
emit(0x0fd28500000001b9);
emit(0x489948c96348ca45);
emit(uint16_t(0xf9f7)); //idiv rcx
+#ifdef MAGIC_DIVISION
+ }
+#endif
gencr(instr);
}
void JitCompilerX86::h_AND_64(Instruction& instr, int i) {
genar(instr);
- genbr1(instr, 0x2349, 0x2548);
+ genbia(instr, 0x2349, 0x2548);
gencr(instr);
}
void JitCompilerX86::h_AND_32(Instruction& instr, int i) {
genar(instr);
- genbr132(instr, 0x2341, 0x25);
+ genbia32(instr, 0x2341, 0x25);
gencr(instr);
}
void JitCompilerX86::h_OR_64(Instruction& instr, int i) {
genar(instr);
- genbr1(instr, 0x0b49, 0x0d48);
+ genbia(instr, 0x0b49, 0x0d48);
gencr(instr);
}
void JitCompilerX86::h_OR_32(Instruction& instr, int i) {
genar(instr);
- genbr132(instr, 0x0b41, 0x0d);
+ genbia32(instr, 0x0b41, 0x0d);
gencr(instr);
}
void JitCompilerX86::h_XOR_64(Instruction& instr, int i) {
genar(instr);
- genbr1(instr, 0x3349, 0x3548);
+ genbia(instr, 0x3349, 0x3548);
gencr(instr);
}
void JitCompilerX86::h_XOR_32(Instruction& instr, int i) {
genar(instr);
- genbr132(instr, 0x3341, 0x35);
+ genbia32(instr, 0x3341, 0x35);
gencr(instr);
}
void JitCompilerX86::h_SHL_64(Instruction& instr, int i) {
genar(instr);
- genbr0(instr, 0xe0d3, 0xe0c1);
+ genbiashift(instr, 0xe0d3, 0xe0c1);
gencr(instr);
}
void JitCompilerX86::h_SHR_64(Instruction& instr, int i) {
genar(instr);
- genbr0(instr, 0xe8d3, 0xe8c1);
+ genbiashift(instr, 0xe8d3, 0xe8c1);
gencr(instr);
}
void JitCompilerX86::h_SAR_64(Instruction& instr, int i) {
genar(instr);
- genbr0(instr, 0xf8d3, 0xf8c1);
+ genbiashift(instr, 0xf8d3, 0xf8c1);
gencr(instr);
}
void JitCompilerX86::h_ROL_64(Instruction& instr, int i) {
genar(instr);
- genbr0(instr, 0xc0d3, 0xc0c1);
+ genbiashift(instr, 0xc0d3, 0xc0c1);
gencr(instr);
}
void JitCompilerX86::h_ROR_64(Instruction& instr, int i) {
genar(instr);
- genbr0(instr, 0xc8d3, 0xc8c1);
+ genbiashift(instr, 0xc8d3, 0xc8c1);
gencr(instr);
}
diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp
index e4277c6..d95cbad 100644
--- a/src/JitCompilerX86.hpp
+++ b/src/JitCompilerX86.hpp
@@ -58,11 +58,12 @@ namespace RandomX {
std::vector instructionOffsets;
std::vector callOffsets;
+ void gena(Instruction&);
void genar(Instruction&);
void genaf(Instruction&);
- void genbr0(Instruction&, uint16_t, uint16_t);
- void genbr1(Instruction&, uint16_t, uint16_t);
- void genbr132(Instruction&, uint16_t, uint8_t);
+ void genbiashift(Instruction&, uint16_t, uint16_t);
+ void genbia(Instruction&, uint16_t, uint16_t);
+ void genbia32(Instruction&, uint16_t, uint8_t);
void genbf(Instruction&, uint8_t);
void scratchpadStoreR(Instruction&, uint32_t, bool);
void scratchpadStoreF(Instruction&, int, uint32_t, bool);
diff --git a/src/divideByConstantCodegen.c b/src/divideByConstantCodegen.c
index 4b06712..255baf4 100644
--- a/src/divideByConstantCodegen.c
+++ b/src/divideByConstantCodegen.c
@@ -11,10 +11,10 @@
#include "divideByConstantCodegen.h"
-struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
+struct magicu_info compute_unsigned_magic_info(unsigned_type D, unsigned num_bits) {
- //The numerator must fit in a uint
- assert(num_bits > 0 && num_bits <= sizeof(uint) * CHAR_BIT);
+ //The numerator must fit in a unsigned_type
+ assert(num_bits > 0 && num_bits <= sizeof(unsigned_type) * CHAR_BIT);
// D must be larger than zero and not a power of 2
assert(D & (D - 1));
@@ -22,29 +22,29 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
// The eventual result
struct magicu_info result;
- // Bits in a uint
- const unsigned UINT_BITS = sizeof(uint) * CHAR_BIT;
+ // Bits in a unsigned_type
+ const unsigned UINT_BITS = sizeof(unsigned_type) * CHAR_BIT;
// The extra shift implicit in the difference between UINT_BITS and num_bits
const unsigned extra_shift = UINT_BITS - num_bits;
// The initial power of 2 is one less than the first one that can possibly work
- const uint initial_power_of_2 = (uint)1 << (UINT_BITS - 1);
+ const unsigned_type initial_power_of_2 = (unsigned_type)1 << (UINT_BITS - 1);
// The remainder and quotient of our power of 2 divided by d
- uint quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;
+ unsigned_type quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;
// ceil(log_2 D)
unsigned ceil_log_2_D;
// The magic info for the variant "round down" algorithm
- uint down_multiplier = 0;
+ unsigned_type down_multiplier = 0;
unsigned down_exponent = 0;
int has_magic_down = 0;
// Compute ceil(log_2 D)
ceil_log_2_D = 0;
- uint tmp;
+ unsigned_type tmp;
for (tmp = D; tmp > 0; tmp >>= 1)
ceil_log_2_D += 1;
@@ -67,11 +67,11 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
// We're done if this exponent works for the round_up algorithm.
// Note that exponent may be larger than the maximum shift supported,
// so the check for >= ceil_log_2_D is critical.
- if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((uint)1 << (exponent + extra_shift)))
+ if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((unsigned_type)1 << (exponent + extra_shift)))
break;
// Set magic_down if we have not set it yet and this exponent works for the round_down algorithm
- if (!has_magic_down && remainder <= ((uint)1 << (exponent + extra_shift))) {
+ if (!has_magic_down && remainder <= ((unsigned_type)1 << (exponent + extra_shift))) {
has_magic_down = 1;
down_multiplier = quotient;
down_exponent = exponent;
@@ -96,7 +96,7 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
else {
// Even divisor, so use a prefix-shifted dividend
unsigned pre_shift = 0;
- uint shifted_D = D;
+ unsigned_type shifted_D = D;
while ((shifted_D & 1) == 0) {
shifted_D >>= 1;
pre_shift += 1;
@@ -108,34 +108,34 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
return result;
}
-struct magics_info compute_signed_magic_info(sint D) {
+struct magics_info compute_signed_magic_info(signed_type D) {
// D must not be zero and must not be a power of 2 (or its negative)
assert(D != 0 && (D & -D) != D && (D & -D) != -D);
// Our result
struct magics_info result;
- // Bits in an sint
- const unsigned SINT_BITS = sizeof(sint) * CHAR_BIT;
+ // Bits in an signed_type
+ const unsigned SINT_BITS = sizeof(signed_type) * CHAR_BIT;
// Absolute value of D (we know D is not the most negative value since that's a power of 2)
- const uint abs_d = (D < 0 ? -D : D);
+ const unsigned_type abs_d = (D < 0 ? -D : D);
// The initial power of 2 is one less than the first one that can possibly work
// "two31" in Warren
unsigned exponent = SINT_BITS - 1;
- const uint initial_power_of_2 = (uint)1 << exponent;
+ const unsigned_type initial_power_of_2 = (unsigned_type)1 << exponent;
// Compute the absolute value of our "test numerator,"
// which is the largest dividend whose remainder with d is d-1.
// This is called anc in Warren.
- const uint tmp = initial_power_of_2 + (D < 0);
- const uint abs_test_numer = tmp - 1 - tmp % abs_d;
+ const unsigned_type tmp = initial_power_of_2 + (D < 0);
+ const unsigned_type abs_test_numer = tmp - 1 - tmp % abs_d;
// Initialize our quotients and remainders (q1, r1, q2, r2 in Warren)
- uint quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
- uint quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
- uint delta;
+ unsigned_type quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
+ unsigned_type quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
+ unsigned_type delta;
// Begin our loop
do {
diff --git a/src/divideByConstantCodegen.h b/src/divideByConstantCodegen.h
index 1ac55e8..800647c 100644
--- a/src/divideByConstantCodegen.h
+++ b/src/divideByConstantCodegen.h
@@ -24,11 +24,11 @@ along with RandomX. If not, see.
extern "C" {
#endif
- typedef uint64_t uint;
- typedef int64_t sint;
+ typedef uint64_t unsigned_type;
+ typedef int64_t signed_type;
/* Computes "magic info" for performing signed division by a fixed integer D.
- The type 'sint' is assumed to be defined as a signed integer type large enough
+ The type 'signed_type' is assumed to be defined as a signed integer type large enough
to hold both the dividend and the divisor.
Here >> is arithmetic (signed) shift, and >>> is logical shift.
@@ -55,17 +55,17 @@ extern "C" {
*/
struct magics_info {
- sint multiplier; // the "magic number" multiplier
+ signed_type multiplier; // the "magic number" multiplier
unsigned shift; // shift for the dividend after multiplying
};
- struct magics_info compute_signed_magic_info(sint D);
+ struct magics_info compute_signed_magic_info(signed_type D);
/* Computes "magic info" for performing unsigned division by a fixed positive integer D.
- The type 'uint' is assumed to be defined as an unsigned integer type large enough
+ The type 'unsigned_type' is assumed to be defined as an unsigned integer type large enough
to hold both the dividend and the divisor. num_bits can be set appropriately if n is
- known to be smaller than the largest uint; if this is not known then pass
- (sizeof(uint) * CHAR_BIT) for num_bits.
+ known to be smaller than the largest unsigned_type; if this is not known then pass
+ (sizeof(unsigned_type) * CHAR_BIT) for num_bits.
Assume we have a hardware register of width UINT_BITS, a known constant D which is
not zero and not a power of 2, and a variable n of width num_bits (which may be
@@ -105,12 +105,12 @@ extern "C" {
*/
struct magicu_info {
- uint multiplier; // the "magic number" multiplier
+ unsigned_type multiplier; // the "magic number" multiplier
unsigned pre_shift; // shift for the dividend before multiplying
unsigned post_shift; //shift for the dividend after multiplying
int increment; // 0 or 1; if set then increment the numerator, using one of the two strategies
};
- struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits);
+ struct magicu_info compute_unsigned_magic_info(unsigned_type D, unsigned num_bits);
#if defined(__cplusplus)
}
diff --git a/src/instructionWeights.hpp b/src/instructionWeights.hpp
index 39f8dec..7771a35 100644
--- a/src/instructionWeights.hpp
+++ b/src/instructionWeights.hpp
@@ -19,17 +19,17 @@ along with RandomX. If not, see.
#pragma once
-#define WT_ADD_64 15
+#define WT_ADD_64 12
#define WT_ADD_32 2
-#define WT_SUB_64 15
+#define WT_SUB_64 12
#define WT_SUB_32 2
#define WT_MUL_64 23
#define WT_MULH_64 10
#define WT_MUL_32 15
#define WT_IMUL_32 15
#define WT_IMULH_64 6
-#define WT_DIV_64 1
-#define WT_IDIV_64 1
+#define WT_DIV_64 4
+#define WT_IDIV_64 4
#define WT_AND_64 4
#define WT_AND_32 2
#define WT_OR_64 4
diff --git a/src/program.inc b/src/program.inc
index 66b9147..79a7dda 100644
--- a/src/program.inc
+++ b/src/program.inc
@@ -5,10 +5,10 @@ rx_i_0: ;CALL
mov ecx, r9d
test bl, 63
jnz short rx_body_0
- call rx_read_l1
+ call rx_read_l2
rx_body_0:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r12d
@@ -19,20 +19,23 @@ rx_body_0:
ja short rx_i_1
call rx_i_30
-rx_i_1: ;IMULH_64
+rx_i_1: ;DIV_64
dec ebx
jz rx_finish
xor r15, 06afc2fa4h
mov ecx, r15d
test bl, 63
jnz short rx_body_1
- call rx_read_l1
+ call rx_read_l2
rx_body_1:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r10
- imul rcx
- mov rax, rdx
+ mov ecx, 1
+ mov edx, r10d
+ test edx, edx
+ cmovne ecx, edx
+ xor edx, edx
+ div rcx
mov r12, rax
rx_i_2: ;JUMP
@@ -62,10 +65,10 @@ rx_i_3: ;FPDIV
mov ecx, r13d
test bl, 63
jnz short rx_body_3
- call rx_read_l1
+ call rx_read_l2
rx_body_3:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
divpd xmm0, xmm9
movaps xmm1, xmm0
@@ -84,9 +87,9 @@ rx_i_4: ;MULH_64
mov ecx, r14d
test bl, 63
jnz short rx_body_4
- call rx_read_l1
+ call rx_read_l2
rx_body_4:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, r14
mul rcx
@@ -104,13 +107,13 @@ rx_i_5: ;MUL_32
mov ecx, r15d
test bl, 63
jnz short rx_body_5
- call rx_read_l2
+ call rx_read_l1
rx_body_5:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov ecx, eax
- mov eax, r12d
+ mov eax, 1037420699
imul rax, rcx
mov r12, rax
@@ -139,9 +142,9 @@ rx_i_7: ;FPADD
mov ecx, r10d
test bl, 63
jnz short rx_body_7
- call rx_read_l1
+ call rx_read_l2
rx_body_7:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm6
movaps xmm6, xmm0
@@ -157,32 +160,34 @@ rx_i_8: ;XOR_64
mov ecx, r13d
test bl, 63
jnz short rx_body_8
- call rx_read_l1
+ call rx_read_l2
rx_body_8:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- xor rax, 1344700093
+ xor rax, r11
mov rcx, rax
mov eax, r12d
xor eax, 050267ebdh
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_9: ;IMULH_64
+rx_i_9: ;DIV_64
dec ebx
jz rx_finish
xor r14, 085121c54h
mov ecx, r14d
test bl, 63
jnz short rx_body_9
- call rx_read_l1
+ call rx_read_l2
rx_body_9:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, 565870810
- imul rcx
+ ; magic divide by 565870810
+ mov rcx, 8750690209911200579
+ mul rcx
mov rax, rdx
+ shr rax, 28
mov r10, rax
rx_i_10: ;AND_64
@@ -196,7 +201,7 @@ rx_i_10: ;AND_64
rx_body_10:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- and rax, -727859809
+ and rax, r10
mov r13, rax
rx_i_11: ;FPADD
@@ -206,9 +211,9 @@ rx_i_11: ;FPADD
mov ecx, r10d
test bl, 63
jnz short rx_body_11
- call rx_read_l2
+ call rx_read_l1
rx_body_11:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm5
movaps xmm4, xmm0
@@ -224,9 +229,9 @@ rx_i_12: ;FPSQRT
mov ecx, r10d
test bl, 63
jnz short rx_body_12
- call rx_read_l2
+ call rx_read_l1
rx_body_12:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
andps xmm0, xmm10
sqrtpd xmm8, xmm0
@@ -276,9 +281,9 @@ rx_i_15: ;RET
mov ecx, r11d
test bl, 63
jnz short rx_body_15
- call rx_read_l2
+ call rx_read_l1
rx_body_15:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r14d
@@ -314,9 +319,9 @@ rx_i_17: ;FPMUL
mov ecx, r11d
test bl, 63
jnz short rx_body_17
- call rx_read_l1
+ call rx_read_l2
rx_body_17:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm4
movaps xmm1, xmm0
@@ -386,10 +391,10 @@ rx_i_21: ;ROR_64
mov ecx, r8d
test bl, 63
jnz short rx_body_21
- call rx_read_l2
+ call rx_read_l1
rx_body_21:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, r9
ror rax, cl
@@ -406,10 +411,10 @@ rx_i_22: ;ADD_64
mov ecx, r13d
test bl, 63
jnz short rx_body_22
- call rx_read_l1
+ call rx_read_l2
rx_body_22:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
add rax, r8
mov rcx, rax
@@ -429,7 +434,7 @@ rx_i_23: ;MUL_64
rx_body_23:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r11
+ imul rax, 1283724485
mov r8, rax
rx_i_24: ;IMUL_32
@@ -439,10 +444,10 @@ rx_i_24: ;IMUL_32
mov ecx, r8d
test bl, 63
jnz short rx_body_24
- call rx_read_l1
+ call rx_read_l2
rx_body_24:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
movsxd rax, r15d
@@ -460,10 +465,10 @@ rx_i_25: ;FPMUL
mov ecx, r12d
test bl, 63
jnz short rx_body_25
- call rx_read_l2
+ call rx_read_l1
rx_body_25:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm9
movaps xmm1, xmm0
@@ -502,9 +507,9 @@ rx_i_27: ;FPMUL
mov ecx, r12d
test bl, 63
jnz short rx_body_27
- call rx_read_l2
+ call rx_read_l1
rx_body_27:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm3
movaps xmm1, xmm0
@@ -523,21 +528,21 @@ rx_i_28: ;AND_32
rx_body_28:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- and eax, r13d
+ and eax, 565865719
mov r14, rax
-rx_i_29: ;ADD_64
+rx_i_29: ;SUB_64
dec ebx
jz rx_finish
xor r12, 0be2e7c42h
mov ecx, r12d
test bl, 63
jnz short rx_body_29
- call rx_read_l2
+ call rx_read_l1
rx_body_29:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, 1944166515
+ sub rax, r13
mov r14, rax
rx_i_30: ;FPADD
@@ -561,13 +566,12 @@ rx_i_31: ;ROR_64
mov ecx, r14d
test bl, 63
jnz short rx_body_31
- call rx_read_l2
+ call rx_read_l1
rx_body_31:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r11
- ror rax, cl
+ ror rax, 55
mov r14, rax
rx_i_32: ;AND_32
@@ -577,11 +581,11 @@ rx_i_32: ;AND_32
mov ecx, r12d
test bl, 63
jnz short rx_body_32
- call rx_read_l1
+ call rx_read_l2
rx_body_32:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- and eax, -1936869641
+ and eax, r14d
mov r9, rax
rx_i_33: ;MUL_64
@@ -591,9 +595,9 @@ rx_i_33: ;MUL_64
mov ecx, r9d
test bl, 63
jnz short rx_body_33
- call rx_read_l2
+ call rx_read_l1
rx_body_33:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
imul rax, r15
mov r12, rax
@@ -622,9 +626,9 @@ rx_i_35: ;CALL
mov ecx, r15d
test bl, 63
jnz short rx_body_35
- call rx_read_l1
+ call rx_read_l2
rx_body_35:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov r8, rax
cmp r9d, -2040787098
@@ -655,9 +659,9 @@ rx_i_37: ;FPSUB
mov ecx, r12d
test bl, 63
jnz short rx_body_37
- call rx_read_l1
+ call rx_read_l2
rx_body_37:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm2
movaps xmm9, xmm0
@@ -687,10 +691,10 @@ rx_i_39: ;ADD_64
mov ecx, r14d
test bl, 63
jnz short rx_body_39
- call rx_read_l1
+ call rx_read_l2
rx_body_39:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
add rax, r14
mov r14, rax
@@ -722,9 +726,9 @@ rx_i_41: ;JUMP
mov ecx, r9d
test bl, 63
jnz short rx_body_41
- call rx_read_l1
+ call rx_read_l2
rx_body_41:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov r9, rax
cmp r14d, -1070581824
@@ -737,26 +741,26 @@ rx_i_42: ;FPADD
mov ecx, r15d
test bl, 63
jnz short rx_body_42
- call rx_read_l1
+ call rx_read_l2
rx_body_42:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm6
movaps xmm6, xmm0
-rx_i_43: ;ADD_32
+rx_i_43: ;SUB_64
dec ebx
jz rx_finish
xor r12, 02b2a2eech
mov ecx, r12d
test bl, 63
jnz short rx_body_43
- call rx_read_l1
+ call rx_read_l2
rx_body_43:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- add eax, 1693705407
+ sub rax, r8
mov rcx, rax
mov eax, r11d
xor eax, 064f3e4bfh
@@ -785,10 +789,10 @@ rx_i_45: ;FPSUB
mov ecx, r12d
test bl, 63
jnz short rx_body_45
- call rx_read_l2
+ call rx_read_l1
rx_body_45:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm2
movaps xmm5, xmm0
@@ -800,9 +804,9 @@ rx_i_46: ;ADD_64
mov ecx, r8d
test bl, 63
jnz short rx_body_46
- call rx_read_l2
+ call rx_read_l1
rx_body_46:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
add rax, r9
mov rcx, rax
@@ -818,10 +822,10 @@ rx_i_47: ;JUMP
mov ecx, r12d
test bl, 63
jnz short rx_body_47
- call rx_read_l2
+ call rx_read_l1
rx_body_47:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r13d
@@ -873,10 +877,10 @@ rx_i_50: ;AND_64
mov ecx, r9d
test bl, 63
jnz short rx_body_50
- call rx_read_l1
+ call rx_read_l2
rx_body_50:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
and rax, r10
mov rcx, rax
@@ -892,11 +896,11 @@ rx_i_51: ;SUB_64
mov ecx, r10d
test bl, 63
jnz short rx_body_51
- call rx_read_l2
+ call rx_read_l1
rx_body_51:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, 419241919
+ sub rax, r15
mov r15, rax
rx_i_52: ;FPSQRT
@@ -906,9 +910,9 @@ rx_i_52: ;FPSQRT
mov ecx, r11d
test bl, 63
jnz short rx_body_52
- call rx_read_l2
+ call rx_read_l1
rx_body_52:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
andps xmm0, xmm10
sqrtpd xmm7, xmm0
@@ -929,20 +933,20 @@ rx_body_53:
je short rx_i_54
ret
-rx_i_54: ;IMUL_32
+rx_i_54: ;IMULH_64
dec ebx
jz rx_finish
xor r11, 060638de0h
mov ecx, r11d
test bl, 63
jnz short rx_body_54
- call rx_read_l2
+ call rx_read_l1
rx_body_54:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- mov rax, 282209221
- imul rax, rcx
+ mov rcx, r8
+ imul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r12d
xor eax, 010d22bc5h
@@ -970,58 +974,62 @@ rx_body_55:
and eax, 2047
movhpd qword ptr [rsi + rax * 8], xmm3
-rx_i_56: ;IMULH_64
+rx_i_56: ;DIV_64
dec ebx
jz rx_finish
xor r14, 0f1456b8eh
mov ecx, r14d
test bl, 63
jnz short rx_body_56
- call rx_read_l1
+ call rx_read_l2
rx_body_56:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r15
- imul rcx
+ ; magic divide by 4244198545
+ add rax, 1
+ sbb rax, 0
+ mov rcx, 9333701248213440683
+ mul rcx
mov rax, rdx
+ shr rax, 31
mov rcx, rax
mov eax, r8d
xor eax, 0fcf95491h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_57: ;SUB_32
+rx_i_57: ;MUL_64
dec ebx
jz rx_finish
xor r9, 010dc4571h
mov ecx, r9d
test bl, 63
jnz short rx_body_57
- call rx_read_l2
+ call rx_read_l1
rx_body_57:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub eax, r14d
+ imul rax, 172123015
mov rcx, rax
mov eax, r15d
xor eax, 0a426387h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_58: ;IMUL_32
+rx_i_58: ;IMULH_64
dec ebx
jz rx_finish
xor r14, 0bcec0ebah
mov ecx, r14d
test bl, 63
jnz short rx_body_58
- call rx_read_l2
+ call rx_read_l1
rx_body_58:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r13d
- imul rax, rcx
+ mov rcx, r13
+ imul rcx
+ mov rax, rdx
mov r8, rax
rx_i_59: ;FPSUB
@@ -1045,9 +1053,9 @@ rx_i_60: ;CALL
mov ecx, r15d
test bl, 63
jnz short rx_body_60
- call rx_read_l1
+ call rx_read_l2
rx_body_60:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r11d
@@ -1112,9 +1120,9 @@ rx_i_64: ;SUB_64
mov ecx, r13d
test bl, 63
jnz short rx_body_64
- call rx_read_l1
+ call rx_read_l2
rx_body_64:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
sub rax, r15
mov r9, rax
@@ -1126,9 +1134,9 @@ rx_i_65: ;JUMP
mov ecx, r13d
test bl, 63
jnz short rx_body_65
- call rx_read_l1
+ call rx_read_l2
rx_body_65:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov r11, rax
cmp r8d, 1498056607
@@ -1141,10 +1149,10 @@ rx_i_66: ;FPDIV
mov ecx, r15d
test bl, 63
jnz short rx_body_66
- call rx_read_l2
+ call rx_read_l1
rx_body_66:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
divpd xmm0, xmm3
movaps xmm1, xmm0
@@ -1178,9 +1186,9 @@ rx_i_68: ;FPADD
mov ecx, r13d
test bl, 63
jnz short rx_body_68
- call rx_read_l1
+ call rx_read_l2
rx_body_68:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm2
movaps xmm4, xmm0
@@ -1196,27 +1204,29 @@ rx_i_69: ;FPADD
mov ecx, r15d
test bl, 63
jnz short rx_body_69
- call rx_read_l2
+ call rx_read_l1
rx_body_69:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm5
movaps xmm8, xmm0
-rx_i_70: ;MUL_64
+rx_i_70: ;MULH_64
dec ebx
jz rx_finish
xor r8, 0bbbec3fah
mov ecx, r8d
test bl, 63
jnz short rx_body_70
- call rx_read_l2
+ call rx_read_l1
rx_body_70:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r9
+ mov rcx, r9
+ mul rcx
+ mov rax, rdx
mov r13, rax
rx_i_71: ;FPMUL
@@ -1262,9 +1272,9 @@ rx_i_73: ;FPDIV
mov ecx, r12d
test bl, 63
jnz short rx_body_73
- call rx_read_l2
+ call rx_read_l1
rx_body_73:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
divpd xmm0, xmm3
movaps xmm1, xmm0
@@ -1284,7 +1294,7 @@ rx_body_74:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, rax, -1431647438
+ imul rax, rax, r13
mov rcx, rax
mov eax, r9d
xor eax, 0aaaacb32h
@@ -1298,9 +1308,9 @@ rx_i_75: ;CALL
mov ecx, r14d
test bl, 63
jnz short rx_body_75
- call rx_read_l2
+ call rx_read_l1
rx_body_75:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov r13, rax
cmp r11d, -1160798683
@@ -1314,9 +1324,9 @@ rx_i_76: ;FPADD
mov ecx, r11d
test bl, 63
jnz short rx_body_76
- call rx_read_l1
+ call rx_read_l2
rx_body_76:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm3
movaps xmm7, xmm0
@@ -1332,9 +1342,9 @@ rx_i_77: ;RET
mov ecx, r14d
test bl, 63
jnz short rx_body_77
- call rx_read_l2
+ call rx_read_l1
rx_body_77:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r11d
@@ -1368,9 +1378,9 @@ rx_i_79: ;CALL
mov ecx, r11d
test bl, 63
jnz short rx_body_79
- call rx_read_l1
+ call rx_read_l2
rx_body_79:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r11d
@@ -1388,11 +1398,12 @@ rx_i_80: ;ROR_64
mov ecx, r13d
test bl, 63
jnz short rx_body_80
- call rx_read_l2
+ call rx_read_l1
rx_body_80:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- ror rax, 4
+ mov rcx, r11
+ ror rax, cl
mov rcx, rax
mov eax, r11d
xor eax, 01a681d13h
@@ -1410,7 +1421,7 @@ rx_i_81: ;AND_64
rx_body_81:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- and rax, r13
+ and rax, 338325607
mov r8, rax
rx_i_82: ;JUMP
@@ -1432,20 +1443,22 @@ rx_body_82:
cmp r12d, -68969733
jo rx_i_145
-rx_i_83: ;IMULH_64
+rx_i_83: ;DIV_64
dec ebx
jz rx_finish
xor r10, 0d9b6a533h
mov ecx, r10d
test bl, 63
jnz short rx_body_83
- call rx_read_l1
+ call rx_read_l2
rx_body_83:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r10
- imul rcx
+ ; magic divide by 91850728
+ mov rcx, 13477737914993774191
+ mul rcx
mov rax, rdx
+ shr rax, 26
mov r12, rax
rx_i_84: ;SAR_64
@@ -1455,12 +1468,11 @@ rx_i_84: ;SAR_64
mov ecx, r15d
test bl, 63
jnz short rx_body_84
- call rx_read_l2
+ call rx_read_l1
rx_body_84:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r10
- sar rax, cl
+ sar rax, 45
mov rcx, rax
mov eax, r13d
xor eax, 0ec5c52e6h
@@ -1478,7 +1490,7 @@ rx_i_85: ;MUL_64
rx_body_85:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r8
+ imul rax, 20014507
mov r10, rax
rx_i_86: ;AND_64
@@ -1499,7 +1511,7 @@ rx_body_86:
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_87: ;ADD_32
+rx_i_87: ;SUB_64
dec ebx
jz rx_finish
xor r9, 0d75a0ecfh
@@ -1511,7 +1523,7 @@ rx_body_87:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add eax, r12d
+ sub rax, r12
mov r8, rax
rx_i_88: ;ROR_64
@@ -1537,9 +1549,9 @@ rx_i_89: ;MUL_64
mov ecx, r9d
test bl, 63
jnz short rx_body_89
- call rx_read_l2
+ call rx_read_l1
rx_body_89:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
imul rax, r8
mov rcx, rax
@@ -1555,9 +1567,9 @@ rx_i_90: ;FPADD
mov ecx, r12d
test bl, 63
jnz short rx_body_90
- call rx_read_l1
+ call rx_read_l2
rx_body_90:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm3
movaps xmm6, xmm0
@@ -1587,9 +1599,9 @@ rx_i_92: ;JUMP
mov ecx, r8d
test bl, 63
jnz short rx_body_92
- call rx_read_l2
+ call rx_read_l1
rx_body_92:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov r12, rax
cmp r14d, 1288893603
@@ -1621,10 +1633,10 @@ rx_i_94: ;CALL
mov ecx, r13d
test bl, 63
jnz short rx_body_94
- call rx_read_l1
+ call rx_read_l2
rx_body_94:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov r8, rax
cmp r13d, -343122976
@@ -1656,13 +1668,13 @@ rx_i_96: ;MUL_32
mov ecx, r11d
test bl, 63
jnz short rx_body_96
- call rx_read_l2
+ call rx_read_l1
rx_body_96:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov ecx, eax
- mov eax, -1354397081
+ mov eax, r11d
imul rax, rcx
mov r11, rax
@@ -1673,9 +1685,9 @@ rx_i_97: ;FPDIV
mov ecx, r15d
test bl, 63
jnz short rx_body_97
- call rx_read_l1
+ call rx_read_l2
rx_body_97:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
divpd xmm0, xmm9
movaps xmm1, xmm0
@@ -1694,9 +1706,9 @@ rx_i_98: ;SUB_64
mov ecx, r14d
test bl, 63
jnz short rx_body_98
- call rx_read_l1
+ call rx_read_l2
rx_body_98:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
sub rax, r15
mov r14, rax
@@ -1708,9 +1720,9 @@ rx_i_99: ;FPMUL
mov ecx, r9d
test bl, 63
jnz short rx_body_99
- call rx_read_l2
+ call rx_read_l1
rx_body_99:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm6
movaps xmm1, xmm0
@@ -1747,7 +1759,7 @@ rx_i_101: ;SUB_64
rx_body_101:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r8
+ sub rax, 1732300336
mov r11, rax
rx_i_102: ;FPMUL
@@ -1774,9 +1786,9 @@ rx_i_103: ;MUL_64
mov ecx, r10d
test bl, 63
jnz short rx_body_103
- call rx_read_l1
+ call rx_read_l2
rx_body_103:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
imul rax, r13
mov rcx, rax
@@ -1792,9 +1804,9 @@ rx_i_104: ;IMUL_32
mov ecx, r11d
test bl, 63
jnz short rx_body_104
- call rx_read_l1
+ call rx_read_l2
rx_body_104:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
mov rax, -1913070089
@@ -1805,7 +1817,7 @@ rx_body_104:
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_105: ;MULH_64
+rx_i_105: ;MUL_32
dec ebx
jz rx_finish
xor r13, 036a51f72h
@@ -1817,9 +1829,9 @@ rx_body_105:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r15
- mul rcx
- mov rax, rdx
+ mov ecx, eax
+ mov eax, r15d
+ imul rax, rcx
mov rcx, rax
mov eax, r14d
xor eax, 09c8724edh
@@ -1914,9 +1926,9 @@ rx_i_110: ;SHR_64
mov ecx, r9d
test bl, 63
jnz short rx_body_110
- call rx_read_l1
+ call rx_read_l2
rx_body_110:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, r9
shr rax, cl
@@ -1933,9 +1945,9 @@ rx_i_111: ;CALL
mov ecx, r8d
test bl, 63
jnz short rx_body_111
- call rx_read_l1
+ call rx_read_l2
rx_body_111:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r12d
@@ -1953,32 +1965,34 @@ rx_i_112: ;SUB_64
mov ecx, r12d
test bl, 63
jnz short rx_body_112
- call rx_read_l2
+ call rx_read_l1
rx_body_112:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r13
+ sub rax, -1025977295
mov rcx, rax
mov eax, r14d
xor eax, 0c2d8d431h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_113: ;MUL_64
+rx_i_113: ;MULH_64
dec ebx
jz rx_finish
xor r10, 07a4f8cbbh
mov ecx, r10d
test bl, 63
jnz short rx_body_113
- call rx_read_l1
+ call rx_read_l2
rx_body_113:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r9
+ mov rcx, r9
+ mul rcx
+ mov rax, rdx
mov r13, rax
-rx_i_114: ;IMUL_32
+rx_i_114: ;IMULH_64
dec ebx
jz rx_finish
xor r13, 06e83e2cdh
@@ -1989,9 +2003,9 @@ rx_i_114: ;IMUL_32
rx_body_114:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r15d
- imul rax, rcx
+ mov rcx, r15
+ imul rcx
+ mov rax, rdx
mov r14, rax
rx_i_115: ;IDIV_64
@@ -2001,25 +2015,18 @@ rx_i_115: ;IDIV_64
mov ecx, r14d
test bl, 63
jnz short rx_body_115
- call rx_read_l2
+ call rx_read_l1
rx_body_115:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov edx, r10d
- cmp edx, -1
- jne short safe_idiv_115
- mov rcx, rax
- rol rcx, 1
- dec rcx
- jz short result_idiv_115
-safe_idiv_115:
- mov ecx, 1
- test edx, edx
- cmovne ecx, edx
- movsxd rcx, ecx
- cqo
- idiv rcx
-result_idiv_115:
+ ; magic divide by 587029837
+ mov rdx, 527204905636414983
+ imul rdx
+ mov rax, rdx
+ xor edx, edx
+ sar rax, 24
+ sets dl
+ add rax, rdx
mov r14, rax
rx_i_116: ;IMUL_32
@@ -2042,7 +2049,7 @@ rx_body_116:
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_117: ;IMULH_64
+rx_i_117: ;DIV_64
dec ebx
jz rx_finish
xor r11, 015f2012bh
@@ -2053,9 +2060,11 @@ rx_i_117: ;IMULH_64
rx_body_117:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, -1205826972
- imul rcx
+ ; magic divide by 3089140324
+ mov rcx, 12823658721283834045
+ mul rcx
mov rax, rdx
+ shr rax, 31
mov rcx, rax
mov eax, r15d
xor eax, 0b8208a64h
@@ -2069,9 +2078,9 @@ rx_i_118: ;FPSUB
mov ecx, r9d
test bl, 63
jnz short rx_body_118
- call rx_read_l2
+ call rx_read_l1
rx_body_118:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm5
movaps xmm6, xmm0
@@ -2097,10 +2106,10 @@ rx_i_120: ;FPADD
mov ecx, r12d
test bl, 63
jnz short rx_body_120
- call rx_read_l1
+ call rx_read_l2
rx_body_120:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm4
movaps xmm8, xmm0
@@ -2112,9 +2121,9 @@ rx_i_121: ;FPSUB
mov ecx, r9d
test bl, 63
jnz short rx_body_121
- call rx_read_l1
+ call rx_read_l2
rx_body_121:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm5
movaps xmm8, xmm0
@@ -2126,9 +2135,9 @@ rx_i_122: ;CALL
mov ecx, r10d
test bl, 63
jnz short rx_body_122
- call rx_read_l1
+ call rx_read_l2
rx_body_122:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r14d
@@ -2139,7 +2148,7 @@ rx_body_122:
jno short rx_i_123
call rx_i_192
-rx_i_123: ;ADD_64
+rx_i_123: ;ADD_32
dec ebx
jz rx_finish
xor r13, 073e9f58ah
@@ -2150,7 +2159,7 @@ rx_i_123: ;ADD_64
rx_body_123:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, r15
+ add eax, 1530846772
mov r13, rax
rx_i_124: ;JUMP
@@ -2160,9 +2169,9 @@ rx_i_124: ;JUMP
mov ecx, r12d
test bl, 63
jnz short rx_body_124
- call rx_read_l2
+ call rx_read_l1
rx_body_124:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r11d
@@ -2179,13 +2188,13 @@ rx_i_125: ;MUL_32
mov ecx, r8d
test bl, 63
jnz short rx_body_125
- call rx_read_l2
+ call rx_read_l1
rx_body_125:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov ecx, eax
- mov eax, r14d
+ mov eax, 1774711622
imul rax, rcx
mov r14, rax
@@ -2196,9 +2205,9 @@ rx_i_126: ;FPMUL
mov ecx, r8d
test bl, 63
jnz short rx_body_126
- call rx_read_l1
+ call rx_read_l2
rx_body_126:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm6
movaps xmm1, xmm0
@@ -2229,10 +2238,10 @@ rx_i_128: ;MUL_64
mov ecx, r13d
test bl, 63
jnz short rx_body_128
- call rx_read_l2
+ call rx_read_l1
rx_body_128:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
imul rax, r9
mov r9, rax
@@ -2252,7 +2261,7 @@ rx_body_129:
cmp r13d, -590624856
jge rx_i_154
-rx_i_130: ;DIV_64
+rx_i_130: ;IDIV_64
dec ebx
jz rx_finish
xor r9, 077c3b332h
@@ -2263,9 +2272,14 @@ rx_i_130: ;DIV_64
rx_body_130:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, -281794782
+ ; magic divide by -281794782
+ mov rdx, -8786110448882479839
+ imul rdx
+ mov rax, rdx
xor edx, edx
- div rcx
+ sar rax, 27
+ sets dl
+ add rax, rdx
mov rcx, rax
mov eax, r11d
xor eax, 0ef342722h
@@ -2317,7 +2331,7 @@ rx_i_133: ;OR_64
rx_body_133:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- or rax, -1000526796
+ or rax, r13
mov rcx, rax
mov eax, r15d
xor eax, 0c45d2c34h
@@ -2335,7 +2349,7 @@ rx_i_134: ;ADD_64
rx_body_134:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, 1516102347
+ add rax, r8
mov r13, rax
rx_i_135: ;FPMUL
@@ -2383,10 +2397,10 @@ rx_i_137: ;SHR_64
mov ecx, r11d
test bl, 63
jnz short rx_body_137
- call rx_read_l1
+ call rx_read_l2
rx_body_137:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, r9
shr rax, cl
@@ -2420,7 +2434,7 @@ rx_body_139:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, 515364082
+ add rax, r8
mov rcx, rax
mov eax, r11d
xor eax, 01eb7d4f2h
@@ -2434,9 +2448,9 @@ rx_i_140: ;IMUL_32
mov ecx, r14d
test bl, 63
jnz short rx_body_140
- call rx_read_l2
+ call rx_read_l1
rx_body_140:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
movsxd rax, r11d
@@ -2469,9 +2483,9 @@ rx_i_142: ;JUMP
mov ecx, r11d
test bl, 63
jnz short rx_body_142
- call rx_read_l2
+ call rx_read_l1
rx_body_142:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r10d
@@ -2481,39 +2495,39 @@ rx_body_142:
cmp r12d, 1365939282
js rx_i_257
-rx_i_143: ;MUL_32
+rx_i_143: ;IMUL_32
dec ebx
jz rx_finish
xor r15, 037f4b5d0h
mov ecx, r15d
test bl, 63
jnz short rx_body_143
- call rx_read_l2
+ call rx_read_l1
rx_body_143:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, eax
- mov eax, r11d
+ movsxd rcx, eax
+ movsxd rax, r11d
imul rax, rcx
mov r9, rax
-rx_i_144: ;IMUL_32
+rx_i_144: ;IMULH_64
dec ebx
jz rx_finish
xor r10, 02e59e00ah
mov ecx, r10d
test bl, 63
jnz short rx_body_144
- call rx_read_l2
+ call rx_read_l1
rx_body_144:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r11d
- imul rax, rcx
+ mov rcx, -1304483355
+ imul rcx
+ mov rax, rdx
mov r15, rax
-rx_i_145: ;IMUL_32
+rx_i_145: ;IMULH_64
dec ebx
jz rx_finish
xor r13, 08d5c798h
@@ -2524,9 +2538,9 @@ rx_i_145: ;IMUL_32
rx_body_145:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r11d
- imul rax, rcx
+ mov rcx, r11
+ imul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r10d
xor eax, 0dd491985h
@@ -2562,14 +2576,14 @@ rx_body_147:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, rax, 1784404616
+ imul rax, rax, r11
mov rcx, rax
mov eax, r12d
xor eax, 06a5bda88h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_148: ;ADD_32
+rx_i_148: ;SUB_64
dec ebx
jz rx_finish
xor r10, 0783e5c4eh
@@ -2580,7 +2594,7 @@ rx_i_148: ;ADD_32
rx_body_148:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add eax, r14d
+ sub rax, r14
mov rcx, rax
mov eax, r10d
xor eax, 08c783d2ch
@@ -2607,7 +2621,7 @@ rx_body_149:
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_150: ;IMUL_32
+rx_i_150: ;IMULH_64
dec ebx
jz rx_finish
xor r9, 01504ca7ah
@@ -2618,9 +2632,9 @@ rx_i_150: ;IMUL_32
rx_body_150:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r8d
- imul rax, rcx
+ mov rcx, -933976796
+ imul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r9d
xor eax, 0c854a524h
@@ -2638,7 +2652,7 @@ rx_i_151: ;AND_64
rx_body_151:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- and rax, r13
+ and rax, -2018584590
mov rcx, rax
mov eax, r11d
xor eax, 087aed7f2h
@@ -2688,12 +2702,12 @@ rx_i_154: ;MUL_32
mov ecx, r10d
test bl, 63
jnz short rx_body_154
- call rx_read_l2
+ call rx_read_l1
rx_body_154:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov ecx, eax
- mov eax, r13d
+ mov eax, -820047839
imul rax, rcx
mov r10, rax
@@ -2704,9 +2718,9 @@ rx_i_155: ;ROL_64
mov ecx, r11d
test bl, 63
jnz short rx_body_155
- call rx_read_l1
+ call rx_read_l2
rx_body_155:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, r10
rol rax, cl
@@ -2723,9 +2737,9 @@ rx_i_156: ;IMUL_32
mov ecx, r10d
test bl, 63
jnz short rx_body_156
- call rx_read_l2
+ call rx_read_l1
rx_body_156:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
movsxd rax, r15d
@@ -2757,7 +2771,7 @@ rx_i_158: ;ADD_64
rx_body_158:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, 1233402159
+ add rax, r13
mov r10, rax
rx_i_159: ;CALL
@@ -2767,9 +2781,9 @@ rx_i_159: ;CALL
mov ecx, r13d
test bl, 63
jnz short rx_body_159
- call rx_read_l1
+ call rx_read_l2
rx_body_159:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r13d
@@ -2780,7 +2794,7 @@ rx_body_159:
ja short rx_i_160
call rx_i_181
-rx_i_160: ;ADD_32
+rx_i_160: ;SUB_64
dec ebx
jz rx_finish
xor r14, 0b1685b90h
@@ -2792,7 +2806,7 @@ rx_body_160:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add eax, 1518778665
+ sub rax, r14
mov rcx, rax
mov eax, r10d
xor eax, 05a86b929h
@@ -2806,18 +2820,16 @@ rx_i_161: ;IDIV_64
mov ecx, r15d
test bl, 63
jnz short rx_body_161
- call rx_read_l1
+ call rx_read_l2
rx_body_161:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov edx, r14d
cmp edx, -1
- jne short safe_idiv_161
- mov rcx, rax
- rol rcx, 1
- dec rcx
- jz short result_idiv_161
-safe_idiv_161:
+ jne short body_idiv_161
+ neg rax
+ jmp short result_idiv_161
+body_idiv_161:
mov ecx, 1
test edx, edx
cmovne ecx, edx
@@ -2838,23 +2850,22 @@ rx_i_162: ;SHL_64
rx_body_162:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r10
- shl rax, cl
+ shl rax, 7
mov r13, rax
-rx_i_163: ;ADD_32
+rx_i_163: ;SUB_64
dec ebx
jz rx_finish
xor r12, 0e3486c0ah
mov ecx, r12d
test bl, 63
jnz short rx_body_163
- call rx_read_l2
+ call rx_read_l1
rx_body_163:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add eax, -2101130488
+ sub rax, r8
mov rcx, rax
mov eax, r14d
xor eax, 082c34b08h
@@ -2909,12 +2920,11 @@ rx_i_166: ;SHR_64
mov ecx, r9d
test bl, 63
jnz short rx_body_166
- call rx_read_l2
+ call rx_read_l1
rx_body_166:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r8
- shr rax, cl
+ shr rax, 62
mov rcx, rax
mov eax, r13d
xor eax, 0bb67f8abh
@@ -2986,9 +2996,9 @@ rx_i_170: ;FPSQRT
mov ecx, r8d
test bl, 63
jnz short rx_body_170
- call rx_read_l2
+ call rx_read_l1
rx_body_170:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
andps xmm0, xmm10
sqrtpd xmm6, xmm0
@@ -2997,7 +3007,7 @@ rx_body_170:
and eax, 32767
movlpd qword ptr [rsi + rax * 8], xmm6
-rx_i_171: ;IMUL_32
+rx_i_171: ;IMULH_64
dec ebx
jz rx_finish
xor r15, 09901e05bh
@@ -3008,9 +3018,9 @@ rx_i_171: ;IMUL_32
rx_body_171:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r12d
- imul rax, rcx
+ mov rcx, r12
+ imul rcx
+ mov rax, rdx
mov r12, rax
rx_i_172: ;SUB_64
@@ -3024,7 +3034,7 @@ rx_i_172: ;SUB_64
rx_body_172:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r11
+ sub rax, -478081934
mov r12, rax
rx_i_173: ;MUL_64
@@ -3039,7 +3049,7 @@ rx_body_173:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r12
+ imul rax, -1386172772
mov rcx, rax
mov eax, r12d
xor eax, 0ad60ae9ch
@@ -3088,11 +3098,11 @@ rx_i_176: ;SUB_64
mov ecx, r9d
test bl, 63
jnz short rx_body_176
- call rx_read_l2
+ call rx_read_l1
rx_body_176:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r14
+ sub rax, -2101315181
mov r10, rax
rx_i_177: ;ADD_64
@@ -3102,11 +3112,11 @@ rx_i_177: ;ADD_64
mov ecx, r10d
test bl, 63
jnz short rx_body_177
- call rx_read_l2
+ call rx_read_l1
rx_body_177:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, r10
+ add rax, 794235831
mov rcx, rax
mov eax, r13d
xor eax, 02f5713b7h
@@ -3120,9 +3130,9 @@ rx_i_178: ;RET
mov ecx, r15d
test bl, 63
jnz short rx_body_178
- call rx_read_l1
+ call rx_read_l2
rx_body_178:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r12d
@@ -3140,9 +3150,9 @@ rx_i_179: ;FPADD
mov ecx, r12d
test bl, 63
jnz short rx_body_179
- call rx_read_l1
+ call rx_read_l2
rx_body_179:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm2
movaps xmm8, xmm0
@@ -3154,11 +3164,11 @@ rx_i_180: ;AND_32
mov ecx, r15d
test bl, 63
jnz short rx_body_180
- call rx_read_l2
+ call rx_read_l1
rx_body_180:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- and eax, 1995308563
+ and eax, r9d
mov rcx, rax
mov eax, r9d
xor eax, 076edfe13h
@@ -3172,10 +3182,10 @@ rx_i_181: ;CALL
mov ecx, r10d
test bl, 63
jnz short rx_body_181
- call rx_read_l2
+ call rx_read_l1
rx_body_181:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov r10, rax
cmp r12d, -1612576918
@@ -3208,7 +3218,7 @@ rx_i_183: ;ADD_64
rx_body_183:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, 137260710
+ add rax, r11
mov r10, rax
rx_i_184: ;XOR_32
@@ -3218,12 +3228,12 @@ rx_i_184: ;XOR_32
mov ecx, r12d
test bl, 63
jnz short rx_body_184
- call rx_read_l2
+ call rx_read_l1
rx_body_184:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- xor eax, 790123591
+ xor eax, r13d
mov r12, rax
rx_i_185: ;JUMP
@@ -3233,9 +3243,9 @@ rx_i_185: ;JUMP
mov ecx, r10d
test bl, 63
jnz short rx_body_185
- call rx_read_l1
+ call rx_read_l2
rx_body_185:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r9d
@@ -3257,7 +3267,7 @@ rx_body_186:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- or rax, r15
+ or rax, -1252263008
mov rcx, rax
mov eax, r10d
xor eax, 0b55bfba0h
@@ -3271,9 +3281,9 @@ rx_i_187: ;FPMUL
mov ecx, r13d
test bl, 63
jnz short rx_body_187
- call rx_read_l2
+ call rx_read_l1
rx_body_187:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm6
movaps xmm1, xmm0
@@ -3288,10 +3298,10 @@ rx_i_188: ;FPSUB
mov ecx, r9d
test bl, 63
jnz short rx_body_188
- call rx_read_l1
+ call rx_read_l2
rx_body_188:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm3
movaps xmm4, xmm0
@@ -3320,9 +3330,9 @@ rx_i_190: ;RET
mov ecx, r12d
test bl, 63
jnz short rx_body_190
- call rx_read_l2
+ call rx_read_l1
rx_body_190:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov r13, rax
cmp rsp, rdi
@@ -3336,9 +3346,9 @@ rx_i_191: ;FPSQRT
mov ecx, r15d
test bl, 63
jnz short rx_body_191
- call rx_read_l1
+ call rx_read_l2
rx_body_191:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
andps xmm0, xmm10
sqrtpd xmm6, xmm0
@@ -3388,9 +3398,9 @@ rx_i_194: ;FPMUL
mov ecx, r12d
test bl, 63
jnz short rx_body_194
- call rx_read_l2
+ call rx_read_l1
rx_body_194:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm2
movaps xmm1, xmm0
@@ -3413,22 +3423,21 @@ rx_i_195: ;SHL_64
rx_body_195:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r8
- shl rax, cl
+ shl rax, 27
mov r9, rax
-rx_i_196: ;ADD_32
+rx_i_196: ;SUB_64
dec ebx
jz rx_finish
xor r8, 0c2a9f41bh
mov ecx, r8d
test bl, 63
jnz short rx_body_196
- call rx_read_l2
+ call rx_read_l1
rx_body_196:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add eax, -1907903895
+ sub rax, r8
mov rcx, rax
mov eax, r13d
xor eax, 08e47b269h
@@ -3442,44 +3451,48 @@ rx_i_197: ;MUL_64
mov ecx, r12d
test bl, 63
jnz short rx_body_197
- call rx_read_l1
+ call rx_read_l2
rx_body_197:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
imul rax, r15
mov r11, rax
-rx_i_198: ;MUL_64
+rx_i_198: ;MULH_64
dec ebx
jz rx_finish
xor r14, 0c8d95bbbh
mov ecx, r14d
test bl, 63
jnz short rx_body_198
- call rx_read_l1
+ call rx_read_l2
rx_body_198:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r14
+ mov rcx, r14
+ mul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r8d
xor eax, 01149cba0h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_199: ;MUL_64
+rx_i_199: ;MULH_64
dec ebx
jz rx_finish
xor r13, 050049e2eh
mov ecx, r13d
test bl, 63
jnz short rx_body_199
- call rx_read_l2
+ call rx_read_l1
rx_body_199:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r10
+ mov rcx, r10
+ mul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r10d
xor eax, 0d0e71e9ah
@@ -3530,9 +3543,9 @@ rx_i_202: ;FPADD
mov ecx, r13d
test bl, 63
jnz short rx_body_202
- call rx_read_l2
+ call rx_read_l1
rx_body_202:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm9
movaps xmm5, xmm0
@@ -3544,9 +3557,9 @@ rx_i_203: ;FPSUB
mov ecx, r10d
test bl, 63
jnz short rx_body_203
- call rx_read_l1
+ call rx_read_l2
rx_body_203:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm2
movaps xmm7, xmm0
@@ -3562,9 +3575,9 @@ rx_i_204: ;MUL_64
mov ecx, r9d
test bl, 63
jnz short rx_body_204
- call rx_read_l1
+ call rx_read_l2
rx_body_204:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
imul rax, r15
mov rcx, rax
@@ -3606,7 +3619,7 @@ rx_body_206:
subpd xmm0, xmm7
movaps xmm4, xmm0
-rx_i_207: ;IMULH_64
+rx_i_207: ;IDIV_64
dec ebx
jz rx_finish
xor r9, 039ccdd30h
@@ -3618,9 +3631,14 @@ rx_body_207:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r12
- imul rcx
+ ; magic divide by 314297476
+ mov rdx, 1969376361274661135
+ imul rdx
mov rax, rdx
+ xor edx, edx
+ sar rax, 25
+ sets dl
+ add rax, rdx
mov rcx, rax
mov eax, r9d
xor eax, 012bbcc84h
@@ -3638,7 +3656,7 @@ rx_i_208: ;MUL_64
rx_body_208:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r12
+ imul rax, -486588965
mov r10, rax
rx_i_209: ;XOR_64
@@ -3653,7 +3671,7 @@ rx_body_209:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- xor rax, -1016364182
+ xor rax, r15
mov rcx, rax
mov eax, r12d
xor eax, 0c36b836ah
@@ -3667,13 +3685,13 @@ rx_i_210: ;MUL_32
mov ecx, r12d
test bl, 63
jnz short rx_body_210
- call rx_read_l2
+ call rx_read_l1
rx_body_210:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov ecx, eax
- mov eax, -1027162400
+ mov eax, r12d
imul rax, rcx
mov rcx, rax
mov eax, r15d
@@ -3688,11 +3706,12 @@ rx_i_211: ;ROR_64
mov ecx, r12d
test bl, 63
jnz short rx_body_211
- call rx_read_l2
+ call rx_read_l1
rx_body_211:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- ror rax, 27
+ mov rcx, r9
+ ror rax, cl
mov rcx, rax
mov eax, r11d
xor eax, 0212e615h
@@ -3750,7 +3769,7 @@ rx_body_214:
shl rax, cl
mov r14, rax
-rx_i_215: ;ADD_64
+rx_i_215: ;ADD_32
dec ebx
jz rx_finish
xor r15, 08359265eh
@@ -3762,7 +3781,7 @@ rx_body_215:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, r12
+ add eax, r12d
mov r10, rax
rx_i_216: ;MUL_64
@@ -3772,9 +3791,9 @@ rx_i_216: ;MUL_64
mov ecx, r12d
test bl, 63
jnz short rx_body_216
- call rx_read_l1
+ call rx_read_l2
rx_body_216:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
imul rax, r13
mov rcx, rax
@@ -3783,7 +3802,7 @@ rx_body_216:
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_217: ;MUL_32
+rx_i_217: ;IMUL_32
dec ebx
jz rx_finish
xor r8, 040d5b526h
@@ -3794,8 +3813,8 @@ rx_i_217: ;MUL_32
rx_body_217:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, eax
- mov eax, r9d
+ movsxd rcx, eax
+ movsxd rax, r9d
imul rax, rcx
mov rcx, rax
mov eax, r10d
@@ -3810,9 +3829,9 @@ rx_i_218: ;FPSQRT
mov ecx, r11d
test bl, 63
jnz short rx_body_218
- call rx_read_l2
+ call rx_read_l1
rx_body_218:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
andps xmm0, xmm10
sqrtpd xmm3, xmm0
@@ -3832,7 +3851,7 @@ rx_i_219: ;OR_64
rx_body_219:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- or rax, -740915304
+ or rax, r10
mov rcx, rax
mov eax, r15d
xor eax, 0d3d68798h
@@ -3859,7 +3878,7 @@ rx_body_220:
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_221: ;IMUL_32
+rx_i_221: ;IMULH_64
dec ebx
jz rx_finish
xor r9, 0a3deb512h
@@ -3870,9 +3889,9 @@ rx_i_221: ;IMUL_32
rx_body_221:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r15d
- imul rax, rcx
+ mov rcx, 2146087761
+ imul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r11d
xor eax, 07feab351h
@@ -3886,9 +3905,9 @@ rx_i_222: ;FPMUL
mov ecx, r9d
test bl, 63
jnz short rx_body_222
- call rx_read_l2
+ call rx_read_l1
rx_body_222:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm5
movaps xmm1, xmm0
@@ -3926,31 +3945,31 @@ rx_i_224: ;XOR_32
mov ecx, r12d
test bl, 63
jnz short rx_body_224
- call rx_read_l1
+ call rx_read_l2
rx_body_224:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- xor eax, r14d
+ xor eax, -452933987
mov rcx, rax
mov eax, r11d
xor eax, 0e500c69dh
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_225: ;IMUL_32
+rx_i_225: ;IMULH_64
dec ebx
jz rx_finish
xor r13, 0c558367eh
mov ecx, r13d
test bl, 63
jnz short rx_body_225
- call rx_read_l2
+ call rx_read_l1
rx_body_225:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r10d
- imul rax, rcx
+ mov rcx, r10
+ imul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r12d
xor eax, 0fe304a4ah
@@ -3983,9 +4002,9 @@ rx_i_227: ;FPMUL
mov ecx, r11d
test bl, 63
jnz short rx_body_227
- call rx_read_l1
+ call rx_read_l2
rx_body_227:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm7
movaps xmm1, xmm0
@@ -4018,9 +4037,9 @@ rx_i_229: ;IMUL_32
mov ecx, r11d
test bl, 63
jnz short rx_body_229
- call rx_read_l2
+ call rx_read_l1
rx_body_229:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
movsxd rax, r12d
@@ -4130,12 +4149,12 @@ rx_i_235: ;MUL_32
mov ecx, r13d
test bl, 63
jnz short rx_body_235
- call rx_read_l2
+ call rx_read_l1
rx_body_235:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov ecx, eax
- mov eax, r12d
+ mov eax, 212286089
imul rax, rcx
mov rcx, rax
mov eax, r15d
@@ -4150,9 +4169,9 @@ rx_i_236: ;FPADD
mov ecx, r15d
test bl, 63
jnz short rx_body_236
- call rx_read_l2
+ call rx_read_l1
rx_body_236:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm4
movaps xmm3, xmm0
@@ -4164,9 +4183,9 @@ rx_i_237: ;JUMP
mov ecx, r15d
test bl, 63
jnz short rx_body_237
- call rx_read_l2
+ call rx_read_l1
rx_body_237:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov r11, rax
cmp r12d, -121899164
@@ -4179,10 +4198,10 @@ rx_i_238: ;FPADD
mov ecx, r8d
test bl, 63
jnz short rx_body_238
- call rx_read_l2
+ call rx_read_l1
rx_body_238:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm6
movaps xmm7, xmm0
@@ -4212,10 +4231,10 @@ rx_i_240: ;IMUL_32
mov ecx, r9d
test bl, 63
jnz short rx_body_240
- call rx_read_l1
+ call rx_read_l2
rx_body_240:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
mov rax, -423830277
@@ -4247,11 +4266,11 @@ rx_i_242: ;MULH_64
mov ecx, r12d
test bl, 63
jnz short rx_body_242
- call rx_read_l2
+ call rx_read_l1
rx_body_242:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, 319324914
+ mov rcx, r12
mul rcx
mov rax, rdx
mov rcx, rax
@@ -4271,7 +4290,7 @@ rx_i_243: ;OR_64
rx_body_243:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- or rax, 1198180774
+ or rax, r9
mov r14, rax
rx_i_244: ;ROR_64
@@ -4281,9 +4300,9 @@ rx_i_244: ;ROR_64
mov ecx, r11d
test bl, 63
jnz short rx_body_244
- call rx_read_l2
+ call rx_read_l1
rx_body_244:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, r14
ror rax, cl
@@ -4300,33 +4319,35 @@ rx_i_245: ;AND_32
mov ecx, r13d
test bl, 63
jnz short rx_body_245
- call rx_read_l1
+ call rx_read_l2
rx_body_245:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- and eax, -1546539637
+ and eax, r10d
mov rcx, rax
mov eax, r12d
xor eax, 0a3d1ad8bh
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_246: ;IMULH_64
+rx_i_246: ;DIV_64
dec ebx
jz rx_finish
xor r15, 027eeaa2eh
mov ecx, r15d
test bl, 63
jnz short rx_body_246
- call rx_read_l2
+ call rx_read_l1
rx_body_246:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r9
- imul rcx
+ ; magic divide by 4138158808
+ mov rcx, 9572876028959826425
+ mul rcx
mov rax, rdx
+ shr rax, 31
mov r12, rax
rx_i_247: ;MUL_32
@@ -4349,21 +4370,21 @@ rx_body_247:
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_248: ;MULH_64
+rx_i_248: ;MUL_32
dec ebx
jz rx_finish
xor r8, 0649df46fh
mov ecx, r8d
test bl, 63
jnz short rx_body_248
- call rx_read_l2
+ call rx_read_l1
rx_body_248:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r15
- mul rcx
- mov rax, rdx
+ mov ecx, eax
+ mov eax, r15d
+ imul rax, rcx
mov rcx, rax
mov eax, r9d
xor eax, 07b10fc32h
@@ -4377,10 +4398,10 @@ rx_i_249: ;IMUL_32
mov ecx, r15d
test bl, 63
jnz short rx_body_249
- call rx_read_l2
+ call rx_read_l1
rx_body_249:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
movsxd rax, r11d
@@ -4391,18 +4412,18 @@ rx_body_249:
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_250: ;SUB_32
+rx_i_250: ;MUL_64
dec ebx
jz rx_finish
xor r13, 083eafe6fh
mov ecx, r13d
test bl, 63
jnz short rx_body_250
- call rx_read_l2
+ call rx_read_l1
rx_body_250:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub eax, r8d
+ imul rax, r8
mov rcx, rax
mov eax, r14d
xor eax, 031115b87h
@@ -4416,9 +4437,9 @@ rx_i_251: ;FPMUL
mov ecx, r13d
test bl, 63
jnz short rx_body_251
- call rx_read_l2
+ call rx_read_l1
rx_body_251:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm2
movaps xmm1, xmm0
@@ -4437,12 +4458,11 @@ rx_i_252: ;SHL_64
mov ecx, r14d
test bl, 63
jnz short rx_body_252
- call rx_read_l2
+ call rx_read_l1
rx_body_252:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r8
- shl rax, cl
+ shl rax, 53
mov r14, rax
rx_i_253: ;CALL
@@ -4490,9 +4510,9 @@ rx_i_255: ;FPADD
mov ecx, r9d
test bl, 63
jnz short rx_body_255
- call rx_read_l1
+ call rx_read_l2
rx_body_255:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm5
movaps xmm6, xmm0
@@ -4501,7 +4521,7 @@ rx_body_255:
and eax, 2047
movlpd qword ptr [rsi + rax * 8], xmm6
-rx_i_256: ;MUL_64
+rx_i_256: ;MULH_64
dec ebx
jz rx_finish
xor r8, 08375472ch
@@ -4513,7 +4533,9 @@ rx_body_256:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r15
+ mov rcx, r15
+ mul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r9d
xor eax, 0f8942c0h
@@ -4527,9 +4549,9 @@ rx_i_257: ;FPADD
mov ecx, r12d
test bl, 63
jnz short rx_body_257
- call rx_read_l2
+ call rx_read_l1
rx_body_257:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm5
movaps xmm3, xmm0
@@ -4545,10 +4567,10 @@ rx_i_258: ;MUL_32
mov ecx, r11d
test bl, 63
jnz short rx_body_258
- call rx_read_l1
+ call rx_read_l2
rx_body_258:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov ecx, eax
mov eax, r14d
@@ -4580,10 +4602,10 @@ rx_i_260: ;FPSUB
mov ecx, r13d
test bl, 63
jnz short rx_body_260
- call rx_read_l2
+ call rx_read_l1
rx_body_260:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm5
movaps xmm9, xmm0
@@ -4595,10 +4617,10 @@ rx_i_261: ;FPDIV
mov ecx, r14d
test bl, 63
jnz short rx_body_261
- call rx_read_l2
+ call rx_read_l1
rx_body_261:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
divpd xmm0, xmm3
movaps xmm1, xmm0
@@ -4622,7 +4644,7 @@ rx_body_262:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- and rax, r13
+ and rax, -1569587450
mov rcx, rax
mov eax, r11d
xor eax, 0a271ff06h
@@ -4636,10 +4658,10 @@ rx_i_263: ;FPMUL
mov ecx, r11d
test bl, 63
jnz short rx_body_263
- call rx_read_l1
+ call rx_read_l2
rx_body_263:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm8
movaps xmm1, xmm0
@@ -4654,9 +4676,9 @@ rx_i_264: ;FPMUL
mov ecx, r11d
test bl, 63
jnz short rx_body_264
- call rx_read_l2
+ call rx_read_l1
rx_body_264:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm3
movaps xmm1, xmm0
@@ -4671,10 +4693,10 @@ rx_i_265: ;FPADD
mov ecx, r13d
test bl, 63
jnz short rx_body_265
- call rx_read_l2
+ call rx_read_l1
rx_body_265:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm8
movaps xmm2, xmm0
@@ -4690,9 +4712,9 @@ rx_i_266: ;CALL
mov ecx, r13d
test bl, 63
jnz short rx_body_266
- call rx_read_l1
+ call rx_read_l2
rx_body_266:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov r10, rax
cmp r12d, 136160027
@@ -4710,7 +4732,8 @@ rx_i_267: ;ROL_64
rx_body_267:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- rol rax, 56
+ mov rcx, r10
+ rol rax, cl
mov r11, rax
rx_i_268: ;JUMP
@@ -4720,10 +4743,10 @@ rx_i_268: ;JUMP
mov ecx, r12d
test bl, 63
jnz short rx_body_268
- call rx_read_l2
+ call rx_read_l1
rx_body_268:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov r13, rax
cmp r15d, -2062812966
@@ -4740,8 +4763,7 @@ rx_i_269: ;ROL_64
rx_body_269:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r8
- rol rax, cl
+ rol rax, 50
mov rcx, rax
mov eax, r10d
xor eax, 01ba81447h
@@ -4777,7 +4799,7 @@ rx_body_271:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov ecx, eax
- mov eax, -2032281772
+ mov eax, r10d
imul rax, rcx
mov rcx, rax
mov eax, r9d
@@ -4792,9 +4814,9 @@ rx_i_272: ;AND_64
mov ecx, r12d
test bl, 63
jnz short rx_body_272
- call rx_read_l2
+ call rx_read_l1
rx_body_272:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
and rax, r12
mov r13, rax
@@ -4826,9 +4848,9 @@ rx_i_274: ;FPADD
mov ecx, r15d
test bl, 63
jnz short rx_body_274
- call rx_read_l2
+ call rx_read_l1
rx_body_274:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm4
movaps xmm6, xmm0
@@ -4837,24 +4859,26 @@ rx_body_274:
and eax, 2047
movhpd qword ptr [rsi + rax * 8], xmm6
-rx_i_275: ;DIV_64
+rx_i_275: ;IDIV_64
dec ebx
jz rx_finish
xor r10, 0788eceb7h
mov ecx, r10d
test bl, 63
jnz short rx_body_275
- call rx_read_l2
+ call rx_read_l1
rx_body_275:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, 1
- mov edx, r11d
- test edx, edx
- cmovne ecx, edx
+ ; magic divide by -333089764
+ mov rdx, -7433071640624659213
+ imul rdx
+ mov rax, rdx
xor edx, edx
- div rcx
+ sar rax, 27
+ sets dl
+ add rax, rdx
mov r13, rax
rx_i_276: ;JUMP
@@ -4864,10 +4888,10 @@ rx_i_276: ;JUMP
mov ecx, r9d
test bl, 63
jnz short rx_body_276
- call rx_read_l1
+ call rx_read_l2
rx_body_276:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r12d
@@ -4884,9 +4908,9 @@ rx_i_277: ;IMUL_32
mov ecx, r11d
test bl, 63
jnz short rx_body_277
- call rx_read_l2
+ call rx_read_l1
rx_body_277:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
movsxd rax, r10d
@@ -4922,9 +4946,9 @@ rx_i_279: ;FPADD
mov ecx, r15d
test bl, 63
jnz short rx_body_279
- call rx_read_l1
+ call rx_read_l2
rx_body_279:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm5
movaps xmm9, xmm0
@@ -4933,20 +4957,22 @@ rx_body_279:
and eax, 2047
movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_280: ;IMULH_64
+rx_i_280: ;DIV_64
dec ebx
jz rx_finish
xor r12, 066246b43h
mov ecx, r12d
test bl, 63
jnz short rx_body_280
- call rx_read_l2
+ call rx_read_l1
rx_body_280:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r11
- imul rcx
+ ; magic divide by 555412224
+ mov rcx, 2228867111296024113
+ mul rcx
mov rax, rdx
+ shr rax, 26
mov rcx, rax
mov eax, r13d
xor eax, 0211aeb00h
@@ -4964,7 +4990,7 @@ rx_i_281: ;SUB_64
rx_body_281:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r10
+ sub rax, -202979002
mov rcx, rax
mov eax, r11d
xor eax, 0f3e6c946h
@@ -4982,7 +5008,7 @@ rx_i_282: ;SUB_64
rx_body_282:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, 1367326224
+ sub rax, r12
mov r11, rax
rx_i_283: ;ADD_64
@@ -4992,12 +5018,12 @@ rx_i_283: ;ADD_64
mov ecx, r9d
test bl, 63
jnz short rx_body_283
- call rx_read_l1
+ call rx_read_l2
rx_body_283:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- add rax, -1156732976
+ add rax, r12
mov rcx, rax
mov eax, r12d
xor eax, 0bb0da7d0h
@@ -5023,7 +5049,7 @@ rx_body_284:
and eax, 32767
movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_285: ;MUL_32
+rx_i_285: ;IMUL_32
dec ebx
jz rx_finish
xor r8, 09adb333bh
@@ -5034,8 +5060,8 @@ rx_i_285: ;MUL_32
rx_body_285:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, eax
- mov eax, r8d
+ movsxd rcx, eax
+ movsxd rax, r8d
imul rax, rcx
mov r14, rax
@@ -5070,21 +5096,14 @@ rx_body_287:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov edx, r15d
- cmp edx, -1
- jne short safe_idiv_287
- mov rcx, rax
- rol rcx, 1
- dec rcx
- jz short result_idiv_287
-safe_idiv_287:
- mov ecx, 1
- test edx, edx
- cmovne ecx, edx
- movsxd rcx, ecx
- cqo
- idiv rcx
-result_idiv_287:
+ ; magic divide by 1227278330
+ mov rdx, 8069498232143512385
+ imul rdx
+ mov rax, rdx
+ xor edx, edx
+ sar rax, 29
+ sets dl
+ add rax, rdx
mov rcx, rax
mov eax, r8d
xor eax, 04926c7fah
@@ -5116,9 +5135,9 @@ rx_i_289: ;FPMUL
mov ecx, r14d
test bl, 63
jnz short rx_body_289
- call rx_read_l2
+ call rx_read_l1
rx_body_289:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm9
movaps xmm1, xmm0
@@ -5133,9 +5152,9 @@ rx_i_290: ;FPSUB
mov ecx, r15d
test bl, 63
jnz short rx_body_290
- call rx_read_l2
+ call rx_read_l1
rx_body_290:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm8
movaps xmm9, xmm0
@@ -5147,9 +5166,9 @@ rx_i_291: ;RET
mov ecx, r13d
test bl, 63
jnz short rx_body_291
- call rx_read_l2
+ call rx_read_l1
rx_body_291:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r14d
@@ -5167,11 +5186,12 @@ rx_i_292: ;ROL_64
mov ecx, r13d
test bl, 63
jnz short rx_body_292
- call rx_read_l1
+ call rx_read_l2
rx_body_292:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- rol rax, 23
+ mov rcx, r8
+ rol rax, cl
mov r10, rax
rx_i_293: ;FPADD
@@ -5181,9 +5201,9 @@ rx_i_293: ;FPADD
mov ecx, r9d
test bl, 63
jnz short rx_body_293
- call rx_read_l2
+ call rx_read_l1
rx_body_293:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm5
movaps xmm8, xmm0
@@ -5195,9 +5215,9 @@ rx_i_294: ;RET
mov ecx, r14d
test bl, 63
jnz short rx_body_294
- call rx_read_l2
+ call rx_read_l1
rx_body_294:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r8d
@@ -5215,10 +5235,10 @@ rx_i_295: ;FPSUB
mov ecx, r9d
test bl, 63
jnz short rx_body_295
- call rx_read_l1
+ call rx_read_l2
rx_body_295:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm8
movaps xmm7, xmm0
@@ -5230,9 +5250,9 @@ rx_i_296: ;FPSQRT
mov ecx, r14d
test bl, 63
jnz short rx_body_296
- call rx_read_l1
+ call rx_read_l2
rx_body_296:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
andps xmm0, xmm10
sqrtpd xmm8, xmm0
@@ -5258,9 +5278,9 @@ rx_i_298: ;FPSUB
mov ecx, r14d
test bl, 63
jnz short rx_body_298
- call rx_read_l1
+ call rx_read_l2
rx_body_298:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm7
movaps xmm6, xmm0
@@ -5277,7 +5297,7 @@ rx_body_299:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, 21400308
+ add rax, r10
mov rcx, rax
mov eax, r12d
xor eax, 01468af4h
@@ -5291,10 +5311,10 @@ rx_i_300: ;FPSUB
mov ecx, r12d
test bl, 63
jnz short rx_body_300
- call rx_read_l2
+ call rx_read_l1
rx_body_300:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm8
movaps xmm2, xmm0
@@ -5342,9 +5362,9 @@ rx_i_303: ;FPADD
mov ecx, r14d
test bl, 63
jnz short rx_body_303
- call rx_read_l1
+ call rx_read_l2
rx_body_303:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm3
movaps xmm9, xmm0
@@ -5364,21 +5384,21 @@ rx_i_304: ;MUL_64
rx_body_304:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r15
+ imul rax, 2007686513
mov r13, rax
-rx_i_305: ;SUB_32
+rx_i_305: ;MUL_64
dec ebx
jz rx_finish
xor r11, 03c6c62b8h
mov ecx, r11d
test bl, 63
jnz short rx_body_305
- call rx_read_l2
+ call rx_read_l1
rx_body_305:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub eax, -65873120
+ imul rax, rax, r15
mov r10, rax
rx_i_306: ;ADD_64
@@ -5388,11 +5408,11 @@ rx_i_306: ;ADD_64
mov ecx, r15d
test bl, 63
jnz short rx_body_306
- call rx_read_l2
+ call rx_read_l1
rx_body_306:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, r15
+ add rax, 400578979
mov r13, rax
rx_i_307: ;SHL_64
@@ -5406,8 +5426,7 @@ rx_i_307: ;SHL_64
rx_body_307:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r8
- shl rax, cl
+ shl rax, 33
mov r10, rax
rx_i_308: ;MUL_64
@@ -5417,9 +5436,9 @@ rx_i_308: ;MUL_64
mov ecx, r11d
test bl, 63
jnz short rx_body_308
- call rx_read_l2
+ call rx_read_l1
rx_body_308:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
imul rax, r13
mov r15, rax
@@ -5431,9 +5450,9 @@ rx_i_309: ;IMUL_32
mov ecx, r9d
test bl, 63
jnz short rx_body_309
- call rx_read_l1
+ call rx_read_l2
rx_body_309:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
mov rax, -1652850028
@@ -5482,20 +5501,20 @@ rx_body_311:
andps xmm0, xmm1
movaps xmm4, xmm0
-rx_i_312: ;MULH_64
+rx_i_312: ;MUL_32
dec ebx
jz rx_finish
xor r13, 0b18904cdh
mov ecx, r13d
test bl, 63
jnz short rx_body_312
- call rx_read_l1
+ call rx_read_l2
rx_body_312:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, -1147928648
- mul rcx
- mov rax, rdx
+ mov ecx, eax
+ mov eax, r14d
+ imul rax, rcx
mov r10, rax
rx_i_313: ;ROR_64
@@ -5523,9 +5542,9 @@ rx_i_314: ;IMUL_32
mov ecx, r15d
test bl, 63
jnz short rx_body_314
- call rx_read_l1
+ call rx_read_l2
rx_body_314:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
movsxd rax, r9d
@@ -5543,9 +5562,9 @@ rx_i_315: ;XOR_64
mov ecx, r9d
test bl, 63
jnz short rx_body_315
- call rx_read_l1
+ call rx_read_l2
rx_body_315:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
xor rax, r15
mov r9, rax
@@ -5592,11 +5611,12 @@ rx_i_318: ;ROR_64
mov ecx, r9d
test bl, 63
jnz short rx_body_318
- call rx_read_l1
+ call rx_read_l2
rx_body_318:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- ror rax, 41
+ mov rcx, r11
+ ror rax, cl
mov rcx, rax
mov eax, r15d
xor eax, 061cb9db8h
@@ -5610,12 +5630,11 @@ rx_i_319: ;SHR_64
mov ecx, r13d
test bl, 63
jnz short rx_body_319
- call rx_read_l2
+ call rx_read_l1
rx_body_319:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r15
- shr rax, cl
+ shr rax, 46
mov rcx, rax
mov eax, r11d
xor eax, 01f931a08h
@@ -5640,19 +5659,19 @@ rx_body_320:
and eax, 2047
movlpd qword ptr [rsi + rax * 8], xmm2
-rx_i_321: ;MUL_32
+rx_i_321: ;IMUL_32
dec ebx
jz rx_finish
xor r11, 0a7bae383h
mov ecx, r11d
test bl, 63
jnz short rx_body_321
- call rx_read_l1
+ call rx_read_l2
rx_body_321:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, eax
- mov eax, r9d
+ movsxd rcx, eax
+ movsxd rax, r9d
imul rax, rcx
mov rcx, rax
mov eax, r12d
@@ -5680,19 +5699,21 @@ rx_body_322:
jno short rx_i_323
call rx_i_343
-rx_i_323: ;MUL_64
+rx_i_323: ;MULH_64
dec ebx
jz rx_finish
xor r14, 07b07664bh
mov ecx, r14d
test bl, 63
jnz short rx_body_323
- call rx_read_l1
+ call rx_read_l2
rx_body_323:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- imul rax, rax, -696924877
+ mov rcx, r14
+ mul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r14d
xor eax, 0d675c533h
@@ -5731,10 +5752,10 @@ rx_i_325: ;OR_32
rx_body_325:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- or eax, -281580460
+ or eax, r8d
mov r13, rax
-rx_i_326: ;MUL_64
+rx_i_326: ;MULH_64
dec ebx
jz rx_finish
xor r11, 0d1b27540h
@@ -5746,14 +5767,16 @@ rx_body_326:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r8
+ mov rcx, -1233771581
+ mul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r9d
xor eax, 0b67623c3h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_327: ;IMULH_64
+rx_i_327: ;DIV_64
dec ebx
jz rx_finish
xor r9, 09665f98dh
@@ -5765,9 +5788,11 @@ rx_body_327:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r15
- imul rcx
+ ; magic divide by 1572662125
+ mov rcx, 12594593786994192665
+ mul rcx
mov rax, rdx
+ shr rax, 30
mov r12, rax
rx_i_328: ;SHR_64
@@ -5781,8 +5806,7 @@ rx_i_328: ;SHR_64
rx_body_328:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r13
- shr rax, cl
+ shr rax, 18
mov r9, rax
rx_i_329: ;RET
@@ -5792,9 +5816,9 @@ rx_i_329: ;RET
mov ecx, r11d
test bl, 63
jnz short rx_body_329
- call rx_read_l1
+ call rx_read_l2
rx_body_329:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov r11, rax
cmp rsp, rdi
@@ -5808,13 +5832,13 @@ rx_i_330: ;MUL_32
mov ecx, r9d
test bl, 63
jnz short rx_body_330
- call rx_read_l1
+ call rx_read_l2
rx_body_330:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov ecx, eax
- mov eax, r13d
+ mov eax, -1349816041
imul rax, rcx
mov rcx, rax
mov eax, r11d
@@ -5829,10 +5853,10 @@ rx_i_331: ;FPADD
mov ecx, r9d
test bl, 63
jnz short rx_body_331
- call rx_read_l1
+ call rx_read_l2
rx_body_331:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm3
movaps xmm9, xmm0
@@ -5863,12 +5887,12 @@ rx_i_333: ;OR_64
mov ecx, r14d
test bl, 63
jnz short rx_body_333
- call rx_read_l2
+ call rx_read_l1
rx_body_333:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- or rax, -175125848
+ or rax, r12
mov r11, rax
rx_i_334: ;ADD_64
@@ -5878,10 +5902,10 @@ rx_i_334: ;ADD_64
mov ecx, r8d
test bl, 63
jnz short rx_body_334
- call rx_read_l1
+ call rx_read_l2
rx_body_334:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
add rax, r13
mov r8, rax
@@ -5893,9 +5917,9 @@ rx_i_335: ;SUB_64
mov ecx, r15d
test bl, 63
jnz short rx_body_335
- call rx_read_l1
+ call rx_read_l2
rx_body_335:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
sub rax, r8
mov rcx, rax
@@ -5916,8 +5940,7 @@ rx_body_336:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r10
- ror rax, cl
+ ror rax, 42
mov rcx, rax
mov eax, r11d
xor eax, 02644c5ah
@@ -5949,10 +5972,10 @@ rx_i_338: ;MUL_64
mov ecx, r12d
test bl, 63
jnz short rx_body_338
- call rx_read_l2
+ call rx_read_l1
rx_body_338:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
imul rax, r12
mov r11, rax
@@ -5978,9 +6001,9 @@ rx_i_340: ;FPADD
mov ecx, r15d
test bl, 63
jnz short rx_body_340
- call rx_read_l1
+ call rx_read_l2
rx_body_340:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm5
movaps xmm5, xmm0
@@ -6012,9 +6035,9 @@ rx_i_342: ;FPSUB
mov ecx, r9d
test bl, 63
jnz short rx_body_342
- call rx_read_l2
+ call rx_read_l1
rx_body_342:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm2
movaps xmm3, xmm0
@@ -6045,25 +6068,27 @@ rx_i_344: ;FPSUB
mov ecx, r10d
test bl, 63
jnz short rx_body_344
- call rx_read_l2
+ call rx_read_l1
rx_body_344:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm6
movaps xmm5, xmm0
-rx_i_345: ;MUL_64
+rx_i_345: ;MULH_64
dec ebx
jz rx_finish
xor r12, 0bbbcdbach
mov ecx, r12d
test bl, 63
jnz short rx_body_345
- call rx_read_l1
+ call rx_read_l2
rx_body_345:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r13
+ mov rcx, r13
+ mul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r9d
xor eax, 0ef03b0ddh
@@ -6077,9 +6102,9 @@ rx_i_346: ;AND_32
mov ecx, r12d
test bl, 63
jnz short rx_body_346
- call rx_read_l1
+ call rx_read_l2
rx_body_346:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
and eax, r15d
mov rcx, rax
@@ -6127,9 +6152,9 @@ rx_i_349: ;OR_64
mov ecx, r8d
test bl, 63
jnz short rx_body_349
- call rx_read_l2
+ call rx_read_l1
rx_body_349:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
or rax, r15
mov r13, rax
@@ -6141,9 +6166,9 @@ rx_i_350: ;CALL
mov ecx, r9d
test bl, 63
jnz short rx_body_350
- call rx_read_l1
+ call rx_read_l2
rx_body_350:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r12d
@@ -6161,9 +6186,9 @@ rx_i_351: ;MUL_64
mov ecx, r11d
test bl, 63
jnz short rx_body_351
- call rx_read_l2
+ call rx_read_l1
rx_body_351:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
imul rax, r10
mov r13, rax
@@ -6205,18 +6230,20 @@ rx_body_353:
and eax, 2047
movlpd qword ptr [rsi + rax * 8], xmm7
-rx_i_354: ;MUL_64
+rx_i_354: ;MULH_64
dec ebx
jz rx_finish
xor r13, 02412fc10h
mov ecx, r13d
test bl, 63
jnz short rx_body_354
- call rx_read_l2
+ call rx_read_l1
rx_body_354:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r13
+ mov rcx, r13
+ mul rcx
+ mov rax, rdx
mov r13, rax
rx_i_355: ;MUL_64
@@ -6226,9 +6253,9 @@ rx_i_355: ;MUL_64
mov ecx, r10d
test bl, 63
jnz short rx_body_355
- call rx_read_l1
+ call rx_read_l2
rx_body_355:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
imul rax, r14
mov rcx, rax
@@ -6237,19 +6264,19 @@ rx_body_355:
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_356: ;SUB_64
+rx_i_356: ;MUL_64
dec ebx
jz rx_finish
xor r10, 01cd85d80h
mov ecx, r10d
test bl, 63
jnz short rx_body_356
- call rx_read_l2
+ call rx_read_l1
rx_body_356:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r10
+ imul rax, r10
mov r11, rax
rx_i_357: ;ADD_64
@@ -6259,27 +6286,27 @@ rx_i_357: ;ADD_64
mov ecx, r10d
test bl, 63
jnz short rx_body_357
- call rx_read_l2
+ call rx_read_l1
rx_body_357:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, 820073637
+ add rax, r11
mov r11, rax
-rx_i_358: ;IMUL_32
+rx_i_358: ;IMULH_64
dec ebx
jz rx_finish
xor r13, 088fa6e5ah
mov ecx, r13d
test bl, 63
jnz short rx_body_358
- call rx_read_l2
+ call rx_read_l1
rx_body_358:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r11d
- imul rax, rcx
+ mov rcx, r11
+ imul rcx
+ mov rax, rdx
mov r9, rax
rx_i_359: ;FPSUB
@@ -6289,10 +6316,10 @@ rx_i_359: ;FPSUB
mov ecx, r10d
test bl, 63
jnz short rx_body_359
- call rx_read_l2
+ call rx_read_l1
rx_body_359:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm9
movaps xmm4, xmm0
@@ -6350,7 +6377,7 @@ rx_i_362: ;SUB_64
rx_body_362:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, 1082179469
+ sub rax, r9
mov rcx, rax
mov eax, r15d
xor eax, 04080bf8dh
@@ -6364,9 +6391,9 @@ rx_i_363: ;FPMUL
mov ecx, r12d
test bl, 63
jnz short rx_body_363
- call rx_read_l1
+ call rx_read_l2
rx_body_363:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm6
movaps xmm1, xmm0
@@ -6390,19 +6417,19 @@ rx_body_364:
mov rax, rdx
mov r8, rax
-rx_i_365: ;MUL_32
+rx_i_365: ;IMUL_32
dec ebx
jz rx_finish
xor r15, 02db4444ah
mov ecx, r15d
test bl, 63
jnz short rx_body_365
- call rx_read_l2
+ call rx_read_l1
rx_body_365:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, eax
- mov eax, r9d
+ movsxd rcx, eax
+ movsxd rax, r9d
imul rax, rcx
mov rcx, rax
mov eax, r12d
@@ -6417,9 +6444,9 @@ rx_i_366: ;IMUL_32
mov ecx, r12d
test bl, 63
jnz short rx_body_366
- call rx_read_l2
+ call rx_read_l1
rx_body_366:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
movsxd rax, r8d
@@ -6437,43 +6464,44 @@ rx_i_367: ;ROR_64
mov ecx, r9d
test bl, 63
jnz short rx_body_367
- call rx_read_l2
+ call rx_read_l1
rx_body_367:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r9
- ror rax, cl
+ ror rax, 18
mov r12, rax
-rx_i_368: ;SUB_64
+rx_i_368: ;SUB_32
dec ebx
jz rx_finish
xor r10, 0a14836bah
mov ecx, r10d
test bl, 63
jnz short rx_body_368
- call rx_read_l1
+ call rx_read_l2
rx_body_368:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r10
+ sub eax, r10d
mov r8, rax
-rx_i_369: ;IMULH_64
+rx_i_369: ;DIV_64
dec ebx
jz rx_finish
xor r9, 053fe22e2h
mov ecx, r9d
test bl, 63
jnz short rx_body_369
- call rx_read_l1
+ call rx_read_l2
rx_body_369:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r13
- imul rcx
+ ; magic divide by 470792991
+ mov rcx, 1314739240972876203
+ mul rcx
mov rax, rdx
+ shr rax, 25
mov r9, rax
rx_i_370: ;FPSUB
@@ -6483,9 +6511,9 @@ rx_i_370: ;FPSUB
mov ecx, r15d
test bl, 63
jnz short rx_body_370
- call rx_read_l1
+ call rx_read_l2
rx_body_370:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm6
movaps xmm6, xmm0
@@ -6520,9 +6548,9 @@ rx_i_372: ;SHL_64
mov ecx, r10d
test bl, 63
jnz short rx_body_372
- call rx_read_l2
+ call rx_read_l1
rx_body_372:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, r13
shl rax, cl
@@ -6535,9 +6563,9 @@ rx_i_373: ;FPMUL
mov ecx, r15d
test bl, 63
jnz short rx_body_373
- call rx_read_l2
+ call rx_read_l1
rx_body_373:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm8
movaps xmm1, xmm0
@@ -6569,11 +6597,11 @@ rx_i_375: ;ADD_64
mov ecx, r9d
test bl, 63
jnz short rx_body_375
- call rx_read_l1
+ call rx_read_l2
rx_body_375:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- add rax, r15
+ add rax, -332030999
mov rcx, rax
mov eax, r12d
xor eax, 0ec359be9h
@@ -6591,7 +6619,7 @@ rx_i_376: ;ADD_64
rx_body_376:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, 476136066
+ add rax, r9
mov rcx, rax
mov eax, r8d
xor eax, 01c614282h
@@ -6612,20 +6640,20 @@ rx_body_377:
subpd xmm0, xmm3
movaps xmm7, xmm0
-rx_i_378: ;MULH_64
+rx_i_378: ;MUL_32
dec ebx
jz rx_finish
xor r12, 082aa21ach
mov ecx, r12d
test bl, 63
jnz short rx_body_378
- call rx_read_l1
+ call rx_read_l2
rx_body_378:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, 547725353
- mul rcx
- mov rax, rdx
+ mov ecx, eax
+ mov eax, r14d
+ imul rax, rcx
mov r15, rax
rx_i_379: ;ROR_64
@@ -6635,25 +6663,26 @@ rx_i_379: ;ROR_64
mov ecx, r10d
test bl, 63
jnz short rx_body_379
- call rx_read_l2
+ call rx_read_l1
rx_body_379:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- ror rax, 56
+ mov rcx, r9
+ ror rax, cl
mov r13, rax
-rx_i_380: ;SUB_32
+rx_i_380: ;MUL_64
dec ebx
jz rx_finish
xor r11, 0229e3d6eh
mov ecx, r11d
test bl, 63
jnz short rx_body_380
- call rx_read_l1
+ call rx_read_l2
rx_body_380:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- sub eax, -1443002912
+ imul rax, rax, r10
mov rcx, rax
mov eax, r13d
xor eax, 0a9fd85e0h
@@ -6667,10 +6696,10 @@ rx_i_381: ;XOR_32
mov ecx, r8d
test bl, 63
jnz short rx_body_381
- call rx_read_l2
+ call rx_read_l1
rx_body_381:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
xor eax, r14d
mov r9, rax
@@ -6682,9 +6711,9 @@ rx_i_382: ;ROL_64
mov ecx, r14d
test bl, 63
jnz short rx_body_382
- call rx_read_l1
+ call rx_read_l2
rx_body_382:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
rol rax, 55
mov r11, rax
@@ -6718,7 +6747,7 @@ rx_i_384: ;XOR_64
rx_body_384:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- xor rax, r11
+ xor rax, 1413715044
mov rcx, rax
mov eax, r9d
xor eax, 054439464h
@@ -6750,26 +6779,26 @@ rx_i_386: ;FPADD
mov ecx, r9d
test bl, 63
jnz short rx_body_386
- call rx_read_l2
+ call rx_read_l1
rx_body_386:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm8
movaps xmm9, xmm0
-rx_i_387: ;SUB_64
+rx_i_387: ;SUB_32
dec ebx
jz rx_finish
xor r9, 0d4f7bc6ah
mov ecx, r9d
test bl, 63
jnz short rx_body_387
- call rx_read_l2
+ call rx_read_l1
rx_body_387:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r15
+ sub eax, r15d
mov r9, rax
rx_i_388: ;RET
@@ -6779,9 +6808,9 @@ rx_i_388: ;RET
mov ecx, r8d
test bl, 63
jnz short rx_body_388
- call rx_read_l2
+ call rx_read_l1
rx_body_388:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r11d
@@ -6799,9 +6828,9 @@ rx_i_389: ;JUMP
mov ecx, r11d
test bl, 63
jnz short rx_body_389
- call rx_read_l1
+ call rx_read_l2
rx_body_389:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov r14, rax
cmp r9d, -350609584
@@ -6842,11 +6871,12 @@ rx_i_392: ;SAR_64
mov ecx, r14d
test bl, 63
jnz short rx_body_392
- call rx_read_l2
+ call rx_read_l1
rx_body_392:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sar rax, 0
+ mov rcx, r9
+ sar rax, cl
mov rcx, rax
mov eax, r13d
xor eax, 08c4a0f0dh
@@ -6864,7 +6894,7 @@ rx_i_393: ;AND_64
rx_body_393:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- and rax, 552339548
+ and rax, r12
mov rcx, rax
mov eax, r13d
xor eax, 020ec085ch
@@ -6878,14 +6908,14 @@ rx_i_394: ;FPADD
mov ecx, r12d
test bl, 63
jnz short rx_body_394
- call rx_read_l2
+ call rx_read_l1
rx_body_394:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm9
movaps xmm6, xmm0
-rx_i_395: ;IMUL_32
+rx_i_395: ;IMULH_64
dec ebx
jz rx_finish
xor r8, 04ae4fe8ch
@@ -6897,9 +6927,9 @@ rx_body_395:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r13d
- imul rax, rcx
+ mov rcx, r13
+ imul rcx
+ mov rax, rdx
mov r8, rax
rx_i_396: ;ROR_64
@@ -6909,9 +6939,9 @@ rx_i_396: ;ROR_64
mov ecx, r10d
test bl, 63
jnz short rx_body_396
- call rx_read_l1
+ call rx_read_l2
rx_body_396:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
ror rax, 62
mov rcx, rax
@@ -6920,19 +6950,19 @@ rx_body_396:
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_397: ;SUB_64
+rx_i_397: ;SUB_32
dec ebx
jz rx_finish
xor r8, 0916f3819h
mov ecx, r8d
test bl, 63
jnz short rx_body_397
- call rx_read_l1
+ call rx_read_l2
rx_body_397:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r12
+ sub eax, r12d
mov rcx, rax
mov eax, r10d
xor eax, 0146db5dfh
@@ -6946,11 +6976,12 @@ rx_i_398: ;SHR_64
mov ecx, r8d
test bl, 63
jnz short rx_body_398
- call rx_read_l1
+ call rx_read_l2
rx_body_398:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- shr rax, 44
+ mov rcx, r8
+ shr rax, cl
mov rcx, rax
mov eax, r11d
xor eax, 0724e7136h
@@ -6982,11 +7013,11 @@ rx_i_400: ;AND_64
mov ecx, r13d
test bl, 63
jnz short rx_body_400
- call rx_read_l1
+ call rx_read_l2
rx_body_400:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- and rax, r11
+ and rax, -1800645748
mov rcx, rax
mov eax, r14d
xor eax, 094ac538ch
@@ -7000,9 +7031,9 @@ rx_i_401: ;FPSUB
mov ecx, r13d
test bl, 63
jnz short rx_body_401
- call rx_read_l1
+ call rx_read_l2
rx_body_401:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm4
movaps xmm6, xmm0
@@ -7027,7 +7058,7 @@ rx_body_402:
je short rx_i_403
ret
-rx_i_403: ;IMUL_32
+rx_i_403: ;IMULH_64
dec ebx
jz rx_finish
xor r9, 0e59500f7h
@@ -7038,29 +7069,29 @@ rx_i_403: ;IMUL_32
rx_body_403:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r12d
- imul rax, rcx
+ mov rcx, r12
+ imul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r11d
xor eax, 01ff394a0h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_404: ;MULH_64
+rx_i_404: ;MUL_32
dec ebx
jz rx_finish
xor r15, 05b8ceb2fh
mov ecx, r15d
test bl, 63
jnz short rx_body_404
- call rx_read_l1
+ call rx_read_l2
rx_body_404:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r8
- mul rcx
- mov rax, rdx
+ mov ecx, eax
+ mov eax, r8d
+ imul rax, rcx
mov r15, rax
rx_i_405: ;CALL
@@ -7070,9 +7101,9 @@ rx_i_405: ;CALL
mov ecx, r8d
test bl, 63
jnz short rx_body_405
- call rx_read_l1
+ call rx_read_l2
rx_body_405:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r12d
@@ -7090,9 +7121,9 @@ rx_i_406: ;FPDIV
mov ecx, r9d
test bl, 63
jnz short rx_body_406
- call rx_read_l2
+ call rx_read_l1
rx_body_406:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
divpd xmm0, xmm7
movaps xmm1, xmm0
@@ -7111,10 +7142,10 @@ rx_i_407: ;FPSUB
mov ecx, r14d
test bl, 63
jnz short rx_body_407
- call rx_read_l2
+ call rx_read_l1
rx_body_407:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm9
movaps xmm8, xmm0
@@ -7126,11 +7157,11 @@ rx_i_408: ;MUL_64
mov ecx, r15d
test bl, 63
jnz short rx_body_408
- call rx_read_l1
+ call rx_read_l2
rx_body_408:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r9
+ imul rax, 693109961
mov rcx, rax
mov eax, r10d
xor eax, 0295004c9h
@@ -7159,9 +7190,9 @@ rx_i_410: ;RET
mov ecx, r15d
test bl, 63
jnz short rx_body_410
- call rx_read_l1
+ call rx_read_l2
rx_body_410:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov r8, rax
cmp rsp, rdi
@@ -7229,33 +7260,33 @@ rx_i_414: ;AND_64
mov ecx, r14d
test bl, 63
jnz short rx_body_414
- call rx_read_l1
+ call rx_read_l2
rx_body_414:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- and rax, r8
+ and rax, -378293327
mov rcx, rax
mov eax, r10d
xor eax, 0e973b3b1h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_415: ;IMUL_32
+rx_i_415: ;IMULH_64
dec ebx
jz rx_finish
xor r8, 08c3e59a1h
mov ecx, r8d
test bl, 63
jnz short rx_body_415
- call rx_read_l1
+ call rx_read_l2
rx_body_415:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- mov rax, -538093385
- imul rax, rcx
+ mov rcx, r8
+ imul rcx
+ mov rax, rdx
mov r9, rax
rx_i_416: ;FPADD
@@ -7291,18 +7322,20 @@ rx_body_417:
sub rax, r12
mov r10, rax
-rx_i_418: ;MUL_64
+rx_i_418: ;MULH_64
dec ebx
jz rx_finish
xor r10, 02bd61c5fh
mov ecx, r10d
test bl, 63
jnz short rx_body_418
- call rx_read_l1
+ call rx_read_l2
rx_body_418:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r11
+ mov rcx, r11
+ mul rcx
+ mov rax, rdx
mov r10, rax
rx_i_419: ;OR_64
@@ -7312,9 +7345,9 @@ rx_i_419: ;OR_64
mov ecx, r9d
test bl, 63
jnz short rx_body_419
- call rx_read_l1
+ call rx_read_l2
rx_body_419:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
or rax, r14
mov rcx, rax
@@ -7334,8 +7367,7 @@ rx_i_420: ;ROR_64
rx_body_420:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r11
- ror rax, cl
+ ror rax, 38
mov r9, rax
rx_i_421: ;CALL
@@ -7345,33 +7377,33 @@ rx_i_421: ;CALL
mov ecx, r12d
test bl, 63
jnz short rx_body_421
- call rx_read_l2
+ call rx_read_l1
rx_body_421:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov r10, rax
cmp r8d, -1600409762
jo short rx_i_422
call rx_i_31
-rx_i_422: ;MUL_32
+rx_i_422: ;IMUL_32
dec ebx
jz rx_finish
xor r11, 04dd16ca4h
mov ecx, r11d
test bl, 63
jnz short rx_body_422
- call rx_read_l2
+ call rx_read_l1
rx_body_422:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, eax
- mov eax, r10d
+ movsxd rcx, eax
+ movsxd rax, r10d
imul rax, rcx
mov r13, rax
-rx_i_423: ;SUB_64
+rx_i_423: ;MUL_64
dec ebx
jz rx_finish
xor r12, 04df5ce05h
@@ -7382,7 +7414,7 @@ rx_i_423: ;SUB_64
rx_body_423:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r10
+ imul rax, r10
mov rcx, rax
mov eax, r15d
xor eax, 0a5d40d0ah
@@ -7396,10 +7428,10 @@ rx_i_424: ;FPADD
mov ecx, r13d
test bl, 63
jnz short rx_body_424
- call rx_read_l2
+ call rx_read_l1
rx_body_424:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm7
movaps xmm9, xmm0
@@ -7408,7 +7440,7 @@ rx_body_424:
and eax, 2047
movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_425: ;MUL_32
+rx_i_425: ;IMUL_32
dec ebx
jz rx_finish
xor r8, 0a3c5391dh
@@ -7419,25 +7451,27 @@ rx_i_425: ;MUL_32
rx_body_425:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, eax
- mov eax, r10d
+ movsxd rcx, eax
+ movsxd rax, r10d
imul rax, rcx
mov r14, rax
-rx_i_426: ;IMULH_64
+rx_i_426: ;DIV_64
dec ebx
jz rx_finish
xor r12, 09dd55ba0h
mov ecx, r12d
test bl, 63
jnz short rx_body_426
- call rx_read_l2
+ call rx_read_l1
rx_body_426:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r9
- imul rcx
+ ; magic divide by 3704238575
+ mov rcx, 1336782190693946083
+ mul rcx
mov rax, rdx
+ shr rax, 28
mov rcx, rax
mov eax, r14d
xor eax, 0dcca31efh
@@ -7456,7 +7490,7 @@ rx_body_427:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r11
+ mov rcx, -2146332428
mul rcx
mov rax, rdx
mov rcx, rax
@@ -7492,11 +7526,11 @@ rx_i_429: ;MUL_64
mov ecx, r12d
test bl, 63
jnz short rx_body_429
- call rx_read_l2
+ call rx_read_l1
rx_body_429:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, rax, 1990438276
+ imul rax, rax, r9
mov r15, rax
rx_i_430: ;FPADD
@@ -7540,14 +7574,14 @@ rx_i_432: ;SUB_64
mov ecx, r10d
test bl, 63
jnz short rx_body_432
- call rx_read_l2
+ call rx_read_l1
rx_body_432:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r10
+ sub rax, 876274173
mov r8, rax
-rx_i_433: ;ADD_64
+rx_i_433: ;ADD_32
dec ebx
jz rx_finish
xor r13, 0bbb88499h
@@ -7558,7 +7592,7 @@ rx_i_433: ;ADD_64
rx_body_433:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, r12
+ add eax, 1193456495
mov rcx, rax
mov eax, r12d
xor eax, 04722b36fh
@@ -7572,9 +7606,9 @@ rx_i_434: ;FPDIV
mov ecx, r13d
test bl, 63
jnz short rx_body_434
- call rx_read_l2
+ call rx_read_l1
rx_body_434:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
divpd xmm0, xmm3
movaps xmm1, xmm0
@@ -7598,7 +7632,7 @@ rx_body_435:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, r15
+ imul rax, 1971717631
mov rcx, rax
mov eax, r9d
xor eax, 0758605ffh
@@ -7612,9 +7646,9 @@ rx_i_436: ;FPADD
mov ecx, r15d
test bl, 63
jnz short rx_body_436
- call rx_read_l2
+ call rx_read_l1
rx_body_436:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm2
movaps xmm7, xmm0
@@ -7665,11 +7699,11 @@ rx_i_439: ;OR_64
mov ecx, r13d
test bl, 63
jnz short rx_body_439
- call rx_read_l2
+ call rx_read_l1
rx_body_439:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- or rax, r15
+ or rax, -1299288575
mov rcx, rax
mov eax, r10d
xor eax, 0b28e6e01h
@@ -7705,7 +7739,7 @@ rx_body_441:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, 529736748
+ add rax, r14
mov rcx, rax
mov eax, r9d
xor eax, 01f93242ch
@@ -7753,9 +7787,9 @@ rx_i_444: ;FPSUB
mov ecx, r8d
test bl, 63
jnz short rx_body_444
- call rx_read_l2
+ call rx_read_l1
rx_body_444:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm7
movaps xmm5, xmm0
@@ -7881,7 +7915,7 @@ rx_body_451:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, -287502157
+ add rax, r10
mov r8, rax
rx_i_452: ;RET
@@ -7891,9 +7925,9 @@ rx_i_452: ;RET
mov ecx, r13d
test bl, 63
jnz short rx_body_452
- call rx_read_l1
+ call rx_read_l2
rx_body_452:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r11d
@@ -7904,20 +7938,20 @@ rx_body_452:
je short rx_i_453
ret
-rx_i_453: ;IMUL_32
+rx_i_453: ;IMULH_64
dec ebx
jz rx_finish
xor r11, 0a2096aa4h
mov ecx, r11d
test bl, 63
jnz short rx_body_453
- call rx_read_l1
+ call rx_read_l2
rx_body_453:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r14d
- imul rax, rcx
+ mov rcx, r14
+ imul rcx
+ mov rax, rdx
mov r8, rax
rx_i_454: ;FPADD
@@ -7927,9 +7961,9 @@ rx_i_454: ;FPADD
mov ecx, r13d
test bl, 63
jnz short rx_body_454
- call rx_read_l1
+ call rx_read_l2
rx_body_454:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm9
movaps xmm4, xmm0
@@ -7960,11 +7994,11 @@ rx_i_456: ;AND_64
mov ecx, r9d
test bl, 63
jnz short rx_body_456
- call rx_read_l2
+ call rx_read_l1
rx_body_456:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- and rax, r11
+ and rax, 401943615
mov rcx, rax
mov eax, r9d
xor eax, 017f52c3fh
@@ -7983,7 +8017,7 @@ rx_body_457:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, r10
+ sub rax, 1482178870
mov rcx, rax
mov eax, r10d
xor eax, 058584136h
@@ -7997,14 +8031,15 @@ rx_i_458: ;SAR_64
mov ecx, r11d
test bl, 63
jnz short rx_body_458
- call rx_read_l1
+ call rx_read_l2
rx_body_458:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- sar rax, 22
+ mov rcx, r8
+ sar rax, cl
mov r14, rax
-rx_i_459: ;SUB_64
+rx_i_459: ;MUL_64
dec ebx
jz rx_finish
xor r9, 0346f46adh
@@ -8015,14 +8050,14 @@ rx_i_459: ;SUB_64
rx_body_459:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, 381354340
+ imul rax, rax, r9
mov rcx, rax
mov eax, r13d
xor eax, 016bb0164h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_460: ;ADD_64
+rx_i_460: ;ADD_32
dec ebx
jz rx_finish
xor r11, 098ab71fch
@@ -8033,7 +8068,7 @@ rx_i_460: ;ADD_64
rx_body_460:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, r14
+ add eax, -347784553
mov rcx, rax
mov eax, r12d
xor eax, 0eb453a97h
@@ -8047,11 +8082,11 @@ rx_i_461: ;XOR_64
mov ecx, r11d
test bl, 63
jnz short rx_body_461
- call rx_read_l2
+ call rx_read_l1
rx_body_461:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- xor rax, r13
+ xor rax, 1659853721
mov rcx, rax
mov eax, r12d
xor eax, 062ef5b99h
@@ -8065,14 +8100,14 @@ rx_i_462: ;ADD_64
mov ecx, r10d
test bl, 63
jnz short rx_body_462
- call rx_read_l2
+ call rx_read_l1
rx_body_462:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, -1734323376
+ add rax, r8
mov r15, rax
-rx_i_463: ;ADD_64
+rx_i_463: ;ADD_32
dec ebx
jz rx_finish
xor r9, 08c29341h
@@ -8083,7 +8118,7 @@ rx_i_463: ;ADD_64
rx_body_463:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, r15
+ add eax, r15d
mov r10, rax
rx_i_464: ;MUL_64
@@ -8111,14 +8146,14 @@ rx_i_465: ;FPADD
mov ecx, r12d
test bl, 63
jnz short rx_body_465
- call rx_read_l2
+ call rx_read_l1
rx_body_465:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm5
movaps xmm2, xmm0
-rx_i_466: ;MUL_32
+rx_i_466: ;IMUL_32
dec ebx
jz rx_finish
xor r13, 05c541c42h
@@ -8130,8 +8165,8 @@ rx_body_466:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, eax
- mov eax, 282682508
+ movsxd rcx, eax
+ mov rax, 282682508
imul rax, rcx
mov r9, rax
@@ -8150,7 +8185,7 @@ rx_body_467:
addpd xmm0, xmm9
movaps xmm8, xmm0
-rx_i_468: ;IMUL_32
+rx_i_468: ;IMULH_64
dec ebx
jz rx_finish
xor r8, 091044dc3h
@@ -8162,9 +8197,9 @@ rx_body_468:
xor rbp, rcx
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- mov rax, -13394825
- imul rax, rcx
+ mov rcx, r8
+ imul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r8d
xor eax, 0ff339c77h
@@ -8178,12 +8213,12 @@ rx_i_469: ;MUL_32
mov ecx, r9d
test bl, 63
jnz short rx_body_469
- call rx_read_l1
+ call rx_read_l2
rx_body_469:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov ecx, eax
- mov eax, 294019485
+ mov eax, r9d
imul rax, rcx
mov rcx, rax
mov eax, r9d
@@ -8198,9 +8233,9 @@ rx_i_470: ;OR_64
mov ecx, r14d
test bl, 63
jnz short rx_body_470
- call rx_read_l1
+ call rx_read_l2
rx_body_470:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
or rax, r11
mov rcx, rax
@@ -8216,9 +8251,9 @@ rx_i_471: ;IMUL_32
mov ecx, r14d
test bl, 63
jnz short rx_body_471
- call rx_read_l1
+ call rx_read_l2
rx_body_471:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
movsxd rax, r13d
@@ -8232,10 +8267,10 @@ rx_i_472: ;JUMP
mov ecx, r9d
test bl, 63
jnz short rx_body_472
- call rx_read_l2
+ call rx_read_l1
rx_body_472:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov r10, rax
cmp r10d, 1738497427
@@ -8252,7 +8287,7 @@ rx_i_473: ;MUL_64
rx_body_473:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- imul rax, rax, -751043211
+ imul rax, rax, r11
mov r12, rax
rx_i_474: ;JUMP
@@ -8262,10 +8297,10 @@ rx_i_474: ;JUMP
mov ecx, r9d
test bl, 63
jnz short rx_body_474
- call rx_read_l2
+ call rx_read_l1
rx_body_474:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov r15, rax
cmp r15d, -233120543
@@ -8278,9 +8313,9 @@ rx_i_475: ;FPSUB
mov ecx, r10d
test bl, 63
jnz short rx_body_475
- call rx_read_l2
+ call rx_read_l1
rx_body_475:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm9
movaps xmm7, xmm0
@@ -8306,9 +8341,9 @@ rx_i_477: ;FPADD
mov ecx, r12d
test bl, 63
jnz short rx_body_477
- call rx_read_l1
+ call rx_read_l2
rx_body_477:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm9
movaps xmm6, xmm0
@@ -8324,9 +8359,9 @@ rx_i_478: ;MUL_64
mov ecx, r14d
test bl, 63
jnz short rx_body_478
- call rx_read_l1
+ call rx_read_l2
rx_body_478:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
imul rax, r10
mov r12, rax
@@ -8363,7 +8398,7 @@ rx_body_480:
addpd xmm0, xmm4
movaps xmm6, xmm0
-rx_i_481: ;IMUL_32
+rx_i_481: ;IMULH_64
dec ebx
jz rx_finish
xor r14, 0225ba1f9h
@@ -8374,9 +8409,9 @@ rx_i_481: ;IMUL_32
rx_body_481:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- movsxd rcx, eax
- movsxd rax, r13d
- imul rax, rcx
+ mov rcx, r13
+ imul rcx
+ mov rax, rdx
mov r12, rax
rx_i_482: ;AND_32
@@ -8386,11 +8421,11 @@ rx_i_482: ;AND_32
mov ecx, r14d
test bl, 63
jnz short rx_body_482
- call rx_read_l2
+ call rx_read_l1
rx_body_482:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- and eax, r12d
+ and eax, 1304556205
mov r11, rax
rx_i_483: ;FPADD
@@ -8429,10 +8464,10 @@ rx_i_485: ;JUMP
mov ecx, r13d
test bl, 63
jnz short rx_body_485
- call rx_read_l2
+ call rx_read_l1
rx_body_485:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r15d
@@ -8453,7 +8488,7 @@ rx_i_486: ;ADD_64
rx_body_486:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- add rax, 942846898
+ add rax, r8
mov rcx, rax
mov eax, r8d
xor eax, 03832b3b2h
@@ -8471,7 +8506,7 @@ rx_i_487: ;SUB_64
rx_body_487:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- sub rax, -333279706
+ sub rax, r9
mov r11, rax
rx_i_488: ;IMUL_32
@@ -8481,9 +8516,9 @@ rx_i_488: ;IMUL_32
mov ecx, r12d
test bl, 63
jnz short rx_body_488
- call rx_read_l1
+ call rx_read_l2
rx_body_488:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
movsxd rcx, eax
mov rax, 297357073
@@ -8517,10 +8552,10 @@ rx_i_490: ;ROR_64
mov ecx, r11d
test bl, 63
jnz short rx_body_490
- call rx_read_l2
+ call rx_read_l1
rx_body_490:
xor rbp, rcx
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
mov rcx, r9
ror rax, cl
@@ -8537,9 +8572,9 @@ rx_i_491: ;FPADD
mov ecx, r8d
test bl, 63
jnz short rx_body_491
- call rx_read_l2
+ call rx_read_l1
rx_body_491:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
addpd xmm0, xmm9
movaps xmm7, xmm0
@@ -8555,25 +8590,20 @@ rx_i_492: ;IDIV_64
mov ecx, r9d
test bl, 63
jnz short rx_body_492
- call rx_read_l2
+ call rx_read_l1
rx_body_492:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov edx, r9d
- cmp edx, -1
- jne short safe_idiv_492
+ ; magic divide by -1779388031
mov rcx, rax
- rol rcx, 1
- dec rcx
- jz short result_idiv_492
-safe_idiv_492:
- mov ecx, 1
- test edx, edx
- cmovne ecx, edx
- movsxd rcx, ecx
- cqo
- idiv rcx
-result_idiv_492:
+ mov rdx, 7315366159790064091
+ imul rdx
+ mov rax, rdx
+ xor edx, edx
+ sub rax, rcx
+ sar rax, 30
+ sets dl
+ add rax, rdx
mov r12, rax
rx_i_493: ;FPSUB
@@ -8590,20 +8620,20 @@ rx_body_493:
subpd xmm0, xmm9
movaps xmm4, xmm0
-rx_i_494: ;MULH_64
+rx_i_494: ;MUL_32
dec ebx
jz rx_finish
xor r10, 0b0d50e46h
mov ecx, r10d
test bl, 63
jnz short rx_body_494
- call rx_read_l2
+ call rx_read_l1
rx_body_494:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r11
- mul rcx
- mov rax, rdx
+ mov ecx, eax
+ mov eax, r11d
+ imul rax, rcx
mov r14, rax
rx_i_495: ;FPMUL
@@ -8613,9 +8643,9 @@ rx_i_495: ;FPMUL
mov ecx, r11d
test bl, 63
jnz short rx_body_495
- call rx_read_l1
+ call rx_read_l2
rx_body_495:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
mulpd xmm0, xmm2
movaps xmm1, xmm0
@@ -8623,7 +8653,7 @@ rx_body_495:
andps xmm0, xmm1
movaps xmm8, xmm0
-rx_i_496: ;DIV_64
+rx_i_496: ;IDIV_64
dec ebx
jz rx_finish
xor r14, 0fe757b73h
@@ -8634,9 +8664,14 @@ rx_i_496: ;DIV_64
rx_body_496:
and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, -359802064
+ ; magic divide by -359802064
+ mov rdx, -860153514353783887
+ imul rdx
+ mov rax, rdx
xor edx, edx
- div rcx
+ sar rax, 24
+ sets dl
+ add rax, rdx
mov r9, rax
rx_i_497: ;FPMUL
@@ -8678,19 +8713,19 @@ rx_body_498:
and eax, 2047
movlpd qword ptr [rsi + rax * 8], xmm8
-rx_i_499: ;MUL_32
+rx_i_499: ;IMUL_32
dec ebx
jz rx_finish
xor r12, 08925556bh
mov ecx, r12d
test bl, 63
jnz short rx_body_499
- call rx_read_l2
+ call rx_read_l1
rx_body_499:
- and ecx, 32767
+ and ecx, 2047
mov rax, qword ptr [rsi+rcx*8]
- mov ecx, eax
- mov eax, -1795485757
+ movsxd rcx, eax
+ mov rax, -1795485757
imul rax, rcx
mov r8, rax
@@ -8701,9 +8736,9 @@ rx_i_500: ;FPSQRT
mov ecx, r10d
test bl, 63
jnz short rx_body_500
- call rx_read_l1
+ call rx_read_l2
rx_body_500:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
andps xmm0, xmm10
sqrtpd xmm2, xmm0
@@ -8733,10 +8768,10 @@ rx_i_502: ;RET
mov ecx, r10d
test bl, 63
jnz short rx_body_502
- call rx_read_l1
+ call rx_read_l2
rx_body_502:
xor rbp, rcx
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
mov rcx, rax
mov eax, r9d
@@ -8754,9 +8789,9 @@ rx_i_503: ;FPSUB
mov ecx, r13d
test bl, 63
jnz short rx_body_503
- call rx_read_l1
+ call rx_read_l2
rx_body_503:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm2
movaps xmm9, xmm0
@@ -8790,9 +8825,9 @@ rx_i_505: ;FPSUB
mov ecx, r12d
test bl, 63
jnz short rx_body_505
- call rx_read_l2
+ call rx_read_l1
rx_body_505:
- and ecx, 32767
+ and ecx, 2047
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm4
movaps xmm8, xmm0
@@ -8808,9 +8843,9 @@ rx_i_506: ;FPSUB
mov ecx, r9d
test bl, 63
jnz short rx_body_506
- call rx_read_l1
+ call rx_read_l2
rx_body_506:
- and ecx, 2047
+ and ecx, 32767
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
subpd xmm0, xmm9
movaps xmm3, xmm0
@@ -8887,12 +8922,11 @@ rx_i_511: ;SHR_64
mov ecx, r11d
test bl, 63
jnz short rx_body_511
- call rx_read_l1
+ call rx_read_l2
rx_body_511:
- and ecx, 2047
+ and ecx, 32767
mov rax, qword ptr [rsi+rcx*8]
- mov rcx, r10
- shr rax, cl
+ shr rax, 56
mov r11, rax
jmp rx_i_0