diff --git a/makefile b/makefile index 4b51084..21584cb 100644 --- a/makefile +++ b/makefile @@ -12,6 +12,9 @@ OBJDIR=obj LDFLAGS=-lpthread TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o) ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o) +ifeq ($(PLATFORM),x86_64) + ROBJS += $(OBJDIR)/JitCompilerX86-static.o +endif all: release test @@ -57,6 +60,9 @@ $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) | $(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@ +$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_r.inc read_f.inc)) | $(OBJDIR) + $(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@ + $(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp instructions.hpp intrinPortable.h) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@ diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index 0d61f43..bb0e106 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -54,7 +54,7 @@ namespace RandomX { (this->*generator)(instr, i); } - void AssemblyGeneratorX86::gena(Instruction& instr) { + void AssemblyGeneratorX86::genar(Instruction& instr) { asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl; switch (instr.loca & 7) { @@ -63,7 +63,7 @@ namespace RandomX { case 2: case 3: asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl; - asmCode << "\tcall rx_read_dataset" << std::endl; + asmCode << "\tcall rx_read_dataset_r" << std::endl; return; case 4: @@ -80,6 +80,33 @@ namespace RandomX { } } + + void AssemblyGeneratorX86::genaf(Instruction& instr) { + asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl; + switch (instr.loca & 7) + { + case 0: + case 1: + case 2: + case 3: + asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl; + asmCode << "\tcall rx_read_dataset_f" << std::endl; + return; + + case 4: + asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl; + asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl; + asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl; + return; + + default: + asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl; + asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl; + asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl; + return; + } + } + void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) { switch (instr.locb & 7) { @@ -87,8 +114,6 @@ namespace RandomX { case 1: case 2: case 3: - case 4: - case 5: asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl; asmCode << "\t" << instrx86 << " rax, cl" << std::endl; return; @@ -133,26 +158,7 @@ namespace RandomX { } void AssemblyGeneratorX86::genbf(Instruction& instr, const char* instrx86) { - asmCode << "\tand rax, -2048" << std::endl; - asmCode << "\tcvtsi2sd xmm0, rax" << std::endl; - switch (instr.locb & 7) - { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - asmCode << "\t" << instrx86 << " xmm0, " << regF[instr.regb % RegistersCount] << std::endl; - return; - default: - convertible_t bimm; - bimm.f64 = (double)instr.imm32; - asmCode << "\tmov rax, " << bimm.i64 << std::endl; - asmCode << "\tmovd xmm1, rax" << std::endl; - asmCode << "\t" << instrx86 << " xmm0, xmm1" << std::endl; - return; - } + asmCode << "\t" << instrx86 << " xmm0, " << regF[instr.regb % RegistersCount] << std::endl; } void AssemblyGeneratorX86::gencr(Instruction& instr) { @@ -165,7 +171,7 @@ namespace RandomX { asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl; asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl; if (trace) { - asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rcx" << std::endl; + asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl; } return; @@ -178,76 +184,75 @@ namespace RandomX { asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl; asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl; if (trace) { - asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rcx" << std::endl; + asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl; } return; default: asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl; if (trace) { - asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl; + asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl; } } } - void AssemblyGeneratorX86::gencf(Instruction& instr) { + void AssemblyGeneratorX86::gencf(Instruction& instr, bool alwaysLow = false) { + if(!alwaysLow) + asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl; + const char* store = (!alwaysLow && (instr.locc & 8)) ? "movhpd" : "movlpd"; switch (instr.locc & 7) { - case 0: - asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl; - asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl; - asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl; - asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl; - break; + case 4: + asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl; + asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl; + asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl; + asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl; + break; - case 1: - case 2: - case 3: - asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl; - asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl; - asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl; - asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl; - break; - - default: - asmCode << "\tmovsd " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl; - break; + case 5: + case 6: + case 7: + asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl; + asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl; + asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl; + asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl; + break; } if (trace) { - asmCode << "\tmovd qword ptr [rsi + rdi * 8 + 262144], xmm0" << std::endl; + asmCode << "\t" << store << " qword ptr [rsi + rdi * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl; } } void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tadd rax, "; genbr1(instr); gencr(instr); } void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tadd eax, "; genbr132(instr); gencr(instr); } void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tsub rax, "; genbr1(instr); gencr(instr); } void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tsub eax, "; genbr132(instr); gencr(instr); } void AssemblyGeneratorX86::h_MUL_64(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\timul rax, "; if ((instr.locb & 7) >= 6) { asmCode << "rax, "; @@ -257,7 +262,7 @@ namespace RandomX { } void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tmov rcx, "; genbr1(instr); asmCode << "\tmul rcx" << std::endl; @@ -266,7 +271,7 @@ namespace RandomX { } void AssemblyGeneratorX86::h_MUL_32(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tmov ecx, eax" << std::endl; asmCode << "\tmov eax, "; genbr132(instr); @@ -275,7 +280,7 @@ namespace RandomX { } void AssemblyGeneratorX86::h_IMUL_32(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tmovsxd rcx, eax" << std::endl; if ((instr.locb & 7) >= 6) { asmCode << "\tmov rax, " << instr.imm32 << std::endl; @@ -288,7 +293,7 @@ namespace RandomX { } void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tmov rcx, "; genbr1(instr); asmCode << "\timul rcx" << std::endl; @@ -297,7 +302,7 @@ namespace RandomX { } void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) { - gena(instr); + genar(instr); if ((instr.locb & 7) >= 6) { if (instr.imm32 == 0) { asmCode << "\tmov ecx, 1" << std::endl; @@ -318,7 +323,7 @@ namespace RandomX { } void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tmov edx, "; genbr132(instr); asmCode << "\tcmp edx, -1" << std::endl; @@ -339,123 +344,125 @@ namespace RandomX { } void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tand rax, "; genbr1(instr); gencr(instr); } void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tand eax, "; genbr132(instr); gencr(instr); } void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tor rax, "; genbr1(instr); gencr(instr); } void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tor eax, "; genbr132(instr); gencr(instr); } void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\txor rax, "; genbr1(instr); gencr(instr); } void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\txor eax, "; genbr132(instr); gencr(instr); } void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) { - gena(instr); + genar(instr); genbr0(instr, "shl"); gencr(instr); } void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) { - gena(instr); + genar(instr); genbr0(instr, "shr"); gencr(instr); } void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) { - gena(instr); + genar(instr); genbr0(instr, "sar"); gencr(instr); } void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) { - gena(instr); + genar(instr); genbr0(instr, "rol"); gencr(instr); } void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) { - gena(instr); + genar(instr); genbr0(instr, "ror"); gencr(instr); } void AssemblyGeneratorX86::h_FPADD(Instruction& instr, int i) { - gena(instr); - genbf(instr, "addsd"); + genaf(instr); + genbf(instr, "addpd"); gencf(instr); } void AssemblyGeneratorX86::h_FPSUB(Instruction& instr, int i) { - gena(instr); - genbf(instr, "subsd"); + genaf(instr); + genbf(instr, "subpd"); gencf(instr); } void AssemblyGeneratorX86::h_FPMUL(Instruction& instr, int i) { - gena(instr); - asmCode << "\tor rax, 2048" << std::endl; - genbf(instr, "mulsd"); + genaf(instr); + genbf(instr, "mulpd"); + asmCode << "\tmovaps xmm1, xmm0" << std::endl; + asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl; + asmCode << "\tandps xmm0, xmm1" << std::endl; gencf(instr); } void AssemblyGeneratorX86::h_FPDIV(Instruction& instr, int i) { - gena(instr); - asmCode << "\tor rax, 2048" << std::endl; - genbf(instr, "divsd"); + genaf(instr); + genbf(instr, "divpd"); + asmCode << "\tmovaps xmm1, xmm0" << std::endl; + asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl; + asmCode << "\tandps xmm0, xmm1" << std::endl; gencf(instr); } void AssemblyGeneratorX86::h_FPSQRT(Instruction& instr, int i) { - gena(instr); - asmCode << "\tmov rcx, 9223372036854773760" << std::endl; - asmCode << "\tand rax, rcx" << std::endl; - asmCode << "\tcvtsi2sd xmm0, rax" << std::endl; - asmCode << "\tsqrtsd xmm0, xmm0" << std::endl; + genaf(instr); + asmCode << "\tandps xmm0, xmm10" << std::endl; + asmCode << "\tsqrtpd xmm0, xmm0" << std::endl; gencf(instr); } void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tmov rcx, rax" << std::endl; asmCode << "\tshl eax, 13" << std::endl; asmCode << "\tand rcx, -2048" << std::endl; asmCode << "\tand eax, 24576" << std::endl; - asmCode << "\tcvtsi2sd xmm0, rcx" << std::endl; + asmCode << "\tcvtsi2sd " << regF[instr.regc % RegistersCount] << ", rcx" << std::endl; asmCode << "\tor eax, 40896" << std::endl; asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl; asmCode << "\tldmxcsr dword ptr [rsp - 8]" << std::endl; - gencf(instr); + gencf(instr, true); } static inline const char* jumpCondition(Instruction& instr, bool invert = false) { @@ -481,7 +488,7 @@ namespace RandomX { } void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl; asmCode << "\t" << jumpCondition(instr); asmCode << " short taken_call_" << i << std::endl; @@ -489,14 +496,14 @@ namespace RandomX { asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl; asmCode << "taken_call_" << i << ":" << std::endl; if (trace) { - asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl; + asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl; } asmCode << "\tpush rax" << std::endl; asmCode << "\tcall rx_i_" << wrapInstr(i + (instr.imm8 & 127) + 2) << std::endl; } void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) { - gena(instr); + genar(instr); asmCode << "\tcmp rsp, rbp" << std::endl; asmCode << "\tje short not_taken_ret_" << i << std::endl; asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl; diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp index e61fa26..3097a94 100644 --- a/src/AssemblyGeneratorX86.hpp +++ b/src/AssemblyGeneratorX86.hpp @@ -38,13 +38,14 @@ namespace RandomX { static InstructionGenerator engine[256]; std::stringstream asmCode; - void gena(Instruction&); + void genar(Instruction&); + void genaf(Instruction&); void genbr0(Instruction&, const char*); void genbr1(Instruction&); void genbr132(Instruction&); void genbf(Instruction&, const char*); void gencr(Instruction&); - void gencf(Instruction&); + void gencf(Instruction&, bool); void generateCode(Instruction&, int); diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp index 5ef3cd7..7803003 100644 --- a/src/CompiledVirtualMachine.cpp +++ b/src/CompiledVirtualMachine.cpp @@ -26,9 +26,7 @@ along with RandomX. If not, see. namespace RandomX { CompiledVirtualMachine::CompiledVirtualMachine(bool softAes) : VirtualMachine(softAes) { -#if !defined(_M_X64) && !defined(__x86_64__) - throw std::runtime_error("Compiled VM only supports x86-64 CPUs"); -#endif + } void CompiledVirtualMachine::setDataset(dataset_t ds, bool lightClient) { @@ -51,7 +49,7 @@ namespace RandomX { void CompiledVirtualMachine::execute() { //executeProgram(reg, mem, scratchpad, readDataset); compiler.getProgramFunc()(reg, mem, scratchpad); -#ifdef TRACE +#ifdef TRACEVM for (int32_t i = InstructionCount - 1; i >= 0; --i) { std::cout << std::hex << tracepad[i].u64 << std::endl; } diff --git a/src/CompiledVirtualMachine.hpp b/src/CompiledVirtualMachine.hpp index e7e9299..0932cfe 100644 --- a/src/CompiledVirtualMachine.hpp +++ b/src/CompiledVirtualMachine.hpp @@ -18,7 +18,7 @@ along with RandomX. If not, see. */ #pragma once -//#define TRACE +//#define TRACEVM #include "VirtualMachine.hpp" #include "JitCompilerX86.hpp" @@ -34,7 +34,7 @@ namespace RandomX { return compiler.getCode(); } private: -#ifdef TRACE +#ifdef TRACEVM convertible_t tracepad[InstructionCount]; #endif JitCompilerX86 compiler; diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 2bb4d75..c436ef7 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -44,9 +44,11 @@ namespace RandomX { *(((uint32_t*)®) + i) = gen(); } FPINIT(); - for (int i = 0; i < 8; ++i) { - reg.f[i].f64 = (double)reg.f[i].i64; + for (int i = 0; i < RegistersCount; ++i) { + reg.f[i].lo.f64 = (double)reg.f[i].lo.i64; + reg.f[i].hi.f64 = (double)reg.f[i].hi.i64; } + //std::cout << reg; p.initialize(gen); mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7; mem.mx = *(((uint32_t*)seed) + 5); @@ -97,52 +99,36 @@ namespace RandomX { convertible_t InterpretedVirtualMachine::loadbr1(Instruction& inst) { switch (inst.locb & 7) { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - return reg.r[inst.regb % RegistersCount]; - case 6: - case 7: - convertible_t temp; - temp.i64 = inst.imm32; //sign-extend imm32 - return temp; + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + return reg.r[inst.regb % RegistersCount]; + case 6: + case 7: + convertible_t temp; + temp.i64 = inst.imm32; //sign-extend imm32 + return temp; } } convertible_t InterpretedVirtualMachine::loadbr0(Instruction& inst) { switch (inst.locb & 7) { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - return reg.r[inst.regb % RegistersCount]; - case 6: - case 7: - convertible_t temp; - temp.u64 = inst.imm8; - return temp; - } - } - - double InterpretedVirtualMachine::loadbf(Instruction& inst) { - switch (inst.locb & 7) - { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - return reg.f[inst.regb % RegistersCount].f64; - case 6: - case 7: - return (double)inst.imm32; + case 0: + case 1: + case 2: + case 3: + return reg.r[inst.regb % RegistersCount]; + case 4: + case 5: + case 6: + case 7: + convertible_t temp; + temp.u64 = inst.imm8; + return temp; } } @@ -150,43 +136,61 @@ namespace RandomX { addr_t addr; switch (inst.locc & 7) { - case 0: - addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc; - return scratchpad[addr % ScratchpadL2]; + case 0: + addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc; + return scratchpad[addr % ScratchpadL2]; - case 1: - case 2: - case 3: - addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc; - return scratchpad[addr % ScratchpadL1]; + case 1: + case 2: + case 3: + addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc; + return scratchpad[addr % ScratchpadL1]; - case 4: - case 5: - case 6: - case 7: - return reg.r[inst.regc % RegistersCount]; + case 4: + case 5: + case 6: + case 7: + return reg.r[inst.regc % RegistersCount]; } } - convertible_t& InterpretedVirtualMachine::getcf(Instruction& inst) { + void InterpretedVirtualMachine::writecf(Instruction& inst, fpu_reg_t& regc) { addr_t addr; switch (inst.locc & 7) { - case 0: - addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc; - return scratchpad[addr % ScratchpadL2]; + case 4: + addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc; + scratchpad[addr % ScratchpadL2] = (inst.locc & 8) ? regc.hi : regc.lo; + break; - case 1: - case 2: - case 3: - addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc; - return scratchpad[addr % ScratchpadL1]; + case 5: + case 6: + case 7: + addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc; + scratchpad[addr % ScratchpadL1] = (inst.locc & 8) ? regc.hi : regc.lo; - case 4: - case 5: - case 6: - case 7: - return reg.f[inst.regc % RegistersCount]; + default: + break; + } + } + + void InterpretedVirtualMachine::writecflo(Instruction& inst, fpu_reg_t& regc) { + addr_t addr; + switch (inst.locc & 7) + { + case 4: + addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc; + scratchpad[addr % ScratchpadL2] = regc.lo; + break; + + case 5: + case 6: + case 7: + addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc; + scratchpad[addr % ScratchpadL1] = regc.lo; + + default: + break; } } @@ -194,22 +198,18 @@ namespace RandomX { if(trace) std::cout << std::hex << /*a.u64 << " " << b.u64 << " " <<*/ c.u64 << std::endl; #define FPU_RETIRE(x) x(a, b, c); \ + writecf(inst, c); \ if(trace) { \ - convertible_t bc; \ - bc.f64 = b; \ - std::cout << std::hex << /*a.u64 << " " << bc.u64 << " " <<*/ c.u64 << std::endl; \ + std::cout << std::hex << ((inst.locc & 8) ? c.hi.u64 : c.lo.u64) << std::endl; \ } \ if(fpuCheck) { \ - convertible_t bc; \ - if(c.f64 != c.f64) { \ + if(c.hi.f64 != c.hi.f64 || c.lo.f64 != c.lo.f64) { \ std::stringstream ss; \ - bc.f64 = b; \ - ss << "NaN result of " << #x << "(" << std::hex << a.u64 << ", " << bc.u64 << ") = " << c.u64; \ + ss << "NaN result of " << #x << "(" << std::hex << a.u64 << ", " << b.hi.u64 << " " << b.lo.u64 << ") = " << c.hi.u64 << " " << c.lo.u64 << std::endl; \ throw std::runtime_error(ss.str()); \ - } else if (std::fpclassify(c.f64) == FP_SUBNORMAL) {\ + } else if (std::fpclassify(c.hi.f64) == FP_SUBNORMAL || std::fpclassify(c.lo.f64) == FP_SUBNORMAL) {\ std::stringstream ss; \ - bc.f64 = b; \ - ss << "Denormal result of " << #x << "(" << std::hex << a.u64 << ", " << bc.u64 << ") = " << c.u64; \ + ss << "Denormal result of " << #x << "(" << std::hex << a.u64 << ", " << b.hi.u64 << " " << b.lo.u64 << ") = " << c.hi.u64 << " " << c.lo.u64 << std::endl; \ throw std::runtime_error(ss.str()); \ } \ } @@ -220,8 +220,13 @@ namespace RandomX { #define INC_COUNT(x) #endif -#define FPU_RETIRE_NB(x) x(a, b, c); \ - if(trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl; +#define FPU_RETIRE_FPSQRT(x) FPSQRT(a, b, c); \ + writecf(inst, c); \ + if(trace) std::cout << std::hex << ((inst.locc & 8) ? c.hi.u64 : c.lo.u64) << std::endl; + +#define FPU_RETIRE_FPROUND(x) FPROUND(a, b, c); \ + writecflo(inst, c); \ + if(trace) std::cout << std::hex << c.lo.u64 << std::endl; #define ALU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \ INC_COUNT(x) \ @@ -242,17 +247,17 @@ namespace RandomX { #define FPU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \ INC_COUNT(x) \ convertible_t a = loada(inst); \ - double b = loadbf(inst); \ - convertible_t& c = getcf(inst); \ + fpu_reg_t& b = reg.f[inst.regb % RegistersCount]; \ + fpu_reg_t& c = reg.f[inst.regc % RegistersCount]; \ FPU_RETIRE(x) \ } #define FPU_INST_NB(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \ INC_COUNT(x) \ convertible_t a = loada(inst); \ - convertible_t b; \ - convertible_t& c = getcf(inst); \ - FPU_RETIRE_NB(x) \ + fpu_reg_t b; \ + fpu_reg_t& c = reg.f[inst.regc % RegistersCount]; \ + FPU_RETIRE_##x(x) \ } ALU_INST(ADD_64) diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp index 5a6f49a..b8fd98f 100644 --- a/src/InterpretedVirtualMachine.hpp +++ b/src/InterpretedVirtualMachine.hpp @@ -18,7 +18,7 @@ along with RandomX. If not, see. */ #pragma once -#define STATS +//#define STATS #include "VirtualMachine.hpp" #include "Program.hpp" #include @@ -88,9 +88,9 @@ namespace RandomX { convertible_t loada(Instruction&); convertible_t loadbr0(Instruction&); convertible_t loadbr1(Instruction&); - double loadbf(Instruction&); convertible_t& getcr(Instruction&); - convertible_t& getcf(Instruction&); + void writecf(Instruction&, fpu_reg_t&); + void writecflo(Instruction&, fpu_reg_t&); void stackPush(convertible_t& c) { stack.push_back(c); diff --git a/src/JitCompilerX86-static.S b/src/JitCompilerX86-static.S new file mode 100644 index 0000000..be156ef --- /dev/null +++ b/src/JitCompilerX86-static.S @@ -0,0 +1,58 @@ +;# Copyright (c) 2018 tevador +;# +;# This file is part of RandomX. +;# +;# RandomX is free software: you can redistribute it and/or modify +;# it under the terms of the GNU General Public License as published by +;# the Free Software Foundation, either version 3 of the License, or +;# (at your option) any later version. +;# +;# RandomX is distributed in the hope that it will be useful, +;# but WITHOUT ANY WARRANTY; without even the implied warranty of +;# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;# GNU General Public License for more details. +;# +;# You should have received a copy of the GNU General Public License +;# along with RandomX. If not, see. + +.intel_syntax noprefix +#if defined(__APPLE__) +.text +#else +.section .text +#endif +#if defined(__WIN32__) || defined(__APPLE__) +#define DECL(x) _##x +#else +#define DECL(x) x +#endif +.global DECL(randomx_program_prologue) +.global DECL(randomx_program_begin) +.global DECL(randomx_program_epilogue) +.global DECL(randomx_program_read_r) +.global DECL(randomx_program_read_f) +.global DECL(randomx_program_end) + +.align 64 +DECL(randomx_program_prologue): + #include "asm/program_prologue_linux.inc" + +.align 64 +DECL(randomx_program_begin): + nop + +.align 64 +DECL(randomx_program_epilogue): + #include "asm/program_epilogue_linux.inc" + +.align 64 +DECL(randomx_program_read_r): + #include "asm/program_read_r.inc" + +.align 64 +DECL(randomx_program_read_f): + #include "asm/program_read_f.inc" + +.align 64 +DECL(randomx_program_end): + nop \ No newline at end of file diff --git a/src/JitCompilerX86-static.asm b/src/JitCompilerX86-static.asm new file mode 100644 index 0000000..d7d3d4b --- /dev/null +++ b/src/JitCompilerX86-static.asm @@ -0,0 +1,59 @@ +;# Copyright (c) 2018 tevador +;# +;# This file is part of RandomX. +;# +;# RandomX is free software: you can redistribute it and/or modify +;# it under the terms of the GNU General Public License as published by +;# the Free Software Foundation, either version 3 of the License, or +;# (at your option) any later version. +;# +;# RandomX is distributed in the hope that it will be useful, +;# but WITHOUT ANY WARRANTY; without even the implied warranty of +;# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;# GNU General Public License for more details. +;# +;# You should have received a copy of the GNU General Public License +;# along with RandomX. If not, see. + +_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE + +PUBLIC randomx_program_prologue +PUBLIC randomx_program_begin +PUBLIC randomx_program_epilogue +PUBLIC randomx_program_read_r +PUBLIC randomx_program_read_f +PUBLIC randomx_program_end + +ALIGN 64 +randomx_program_prologue PROC + include asm/program_prologue_win64.inc +randomx_program_prologue ENDP + +ALIGN 64 +randomx_program_begin PROC + nop +randomx_program_begin ENDP + +ALIGN 64 +randomx_program_epilogue PROC + include asm/program_epilogue_win64.inc +randomx_program_epilogue ENDP + +ALIGN 64 +randomx_program_read_r PROC + include asm/program_read_r.inc +randomx_program_read_r ENDP + +ALIGN 64 +randomx_program_read_f PROC + include asm/program_read_f.inc +randomx_program_read_f ENDP + +ALIGN 64 +randomx_program_end PROC + nop +randomx_program_end ENDP + +_RANDOMX_JITX86_STATIC ENDS + +END \ No newline at end of file diff --git a/src/JitCompilerX86-static.hpp b/src/JitCompilerX86-static.hpp new file mode 100644 index 0000000..6052283 --- /dev/null +++ b/src/JitCompilerX86-static.hpp @@ -0,0 +1,27 @@ +/* +Copyright (c) 2018 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +extern "C" { + void randomx_program_prologue(); + void randomx_program_begin(); + void randomx_program_epilogue(); + void randomx_program_read_r(); + void randomx_program_read_f(); + void randomx_program_end(); +} \ No newline at end of file diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index fe10229..b03a330 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -34,6 +34,16 @@ along with RandomX. If not, see. namespace RandomX { +#if !defined(_M_X64) && !defined(__x86_64__) + JitCompilerX86::JitCompilerX86() { + throw std::runtime_error("JIT compiler only supports x86-64 CPUs"); + } + + void JitCompilerX86::generateProgram(Pcg32& gen) { + + } +#else + /* REGISTER ALLOCATION: @@ -41,7 +51,7 @@ namespace RandomX { rbx -> MemoryRegisters& memory rcx -> temporary rdx -> temporary - rsi -> convertible_t& scratchpad + rsi -> convertible_t* scratchpad rdi -> "ic" (instruction counter) rbp -> beginning of VM stack rsp -> end of VM stack @@ -63,6 +73,7 @@ namespace RandomX { xmm7 -> "f7" xmm8 -> "f0" xmm9 -> "f1" + xmm10 -> absolute value mask 0x7fffffffffffffff7fffffffffffffff STACK STRUCTURE: @@ -81,127 +92,23 @@ namespace RandomX { */ - constexpr uint8_t ic3 = ((InstructionCount + 1) >> 24); - constexpr uint8_t ic2 = ((InstructionCount + 1) >> 16); - constexpr uint8_t ic1 = ((InstructionCount + 1) >> 8); - constexpr uint8_t ic0 = ((InstructionCount + 1) >> 0); +#include "JitCompilerX86-static.hpp" - const uint8_t prologue[] = { - 0x53, //push rbx - 0x55, //push rbp -#ifdef _WIN32 - 0x57, //push rdi - 0x56, //push rsi -#endif - 0x41, 0x54, //push r12 - 0x41, 0x55, //push r13 - 0x41, 0x56, //push r14 - 0x41, 0x57, //push r15 -#ifdef _WIN32 - 0x48, 0x83, 0xec, 0x48, //sub rsp,0x48 - 0xf3, 0x0f, 0x7f, 0x74, 0x24, 0x30, //movdqu XMMWORD PTR[rsp + 0x30],xmm6 - 0xf3, 0x0f, 0x7f, 0x7c, 0x24, 0x20, //movdqu XMMWORD PTR[rsp + 0x20],xmm7 - 0xf3, 0x44, 0x0f, 0x7f, 0x44, 0x24, 0x10, //movdqu XMMWORD PTR[rsp + 0x10],xmm8 - 0xf3, 0x44, 0x0f, 0x7f, 0x0c, 0x24, //movdqu XMMWORD PTR[rsp],xmm9 - 0x51, //push rcx - 0x48, 0x8b, 0xda, //mov rbx,rdx - 0x49, 0x8b, 0xf0, //mov rsi,r8 -#else - 0x57, //push rdi - 0x48, 0x8b, 0xde, //mov rbx, rsi - 0x48, 0x8b, 0xf2, //mov rsi, rdx - 0x48, 0x8b, 0xcf, //mov rcx, rdi -#endif - 0x48, 0x8b, 0xec, //mov rbp,rsp - 0x48, 0xc7, 0xc7, ic0, ic1, ic2, ic3, //mov rdi, "InstructionCount" - 0x4c, 0x8b, 0x01, //mov r8,QWORD PTR[rcx] - 0x4c, 0x8b, 0x49, 0x08, //mov r9,QWORD PTR[rcx+0x8] - 0x4c, 0x8b, 0x51, 0x10, //mov r10,QWORD PTR[rcx+0x10] - 0x4c, 0x8b, 0x59, 0x18, //mov r11,QWORD PTR[rcx+0x18] - 0x4c, 0x8b, 0x61, 0x20, //mov r12,QWORD PTR[rcx+0x20] - 0x4c, 0x8b, 0x69, 0x28, //mov r13,QWORD PTR[rcx+0x28] - 0x4c, 0x8b, 0x71, 0x30, //mov r14,QWORD PTR[rcx+0x30] - 0x4c, 0x8b, 0x79, 0x38, //mov r15,QWORD PTR[rcx+0x38] - 0xc7, 0x44, 0x24, 0xf8, 0xc0, 0x9f, 0x00, //mov DWORD PTR[rsp-0x8],0x9fc0 - 0x00, - 0x0f, 0xae, 0x54, 0x24, 0xf8, //ldmxcsr DWORD PTR[rsp-0x8] - 0xf2, 0x4c, 0x0f, 0x2a, 0x41, 0x40, //cvtsi2sd xmm8,QWORD PTR[rcx+0x40] - 0xf2, 0x4c, 0x0f, 0x2a, 0x49, 0x48, //cvtsi2sd xmm9,QWORD PTR[rcx+0x48] - 0xf2, 0x48, 0x0f, 0x2a, 0x51, 0x50, //cvtsi2sd xmm2,QWORD PTR[rcx+0x50] - 0xf2, 0x48, 0x0f, 0x2a, 0x59, 0x58, //cvtsi2sd xmm3,QWORD PTR[rcx+0x58] - 0xf2, 0x48, 0x0f, 0x2a, 0x61, 0x60, //cvtsi2sd xmm4,QWORD PTR[rcx+0x60] - 0xf2, 0x48, 0x0f, 0x2a, 0x69, 0x68, //cvtsi2sd xmm5,QWORD PTR[rcx+0x68] - 0xf2, 0x48, 0x0f, 0x2a, 0x71, 0x70, //cvtsi2sd xmm6,QWORD PTR[rcx+0x70] - 0xf2, 0x48, 0x0f, 0x2a, 0x79, 0x78, //cvtsi2sd xmm7,QWORD PTR[rcx+0x78] - }; + const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue; + const uint8_t* codeProgramBegin = (uint8_t*)&randomx_program_begin; + const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue; + const uint8_t* codeReadDatasetR = (uint8_t*)&randomx_program_read_r; + const uint8_t* codeReadDatasetF = (uint8_t*)&randomx_program_read_f; + const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end; - const uint8_t epilogue[] = { - 0x48, 0x8b, 0xe5, //mov rsp,rbp - 0x59, //pop rcx - 0x4c, 0x89, 0x01, //mov QWORD PTR [rcx],r8 - 0x4c, 0x89, 0x49, 0x08, //mov QWORD PTR [rcx+0x8],r9 - 0x4c, 0x89, 0x51, 0x10, //mov QWORD PTR [rcx+0x10],r10 - 0x4c, 0x89, 0x59, 0x18, //mov QWORD PTR [rcx+0x18],r11 - 0x4c, 0x89, 0x61, 0x20, //mov QWORD PTR [rcx+0x20],r12 - 0x4c, 0x89, 0x69, 0x28, //mov QWORD PTR [rcx+0x28],r13 - 0x4c, 0x89, 0x71, 0x30, //mov QWORD PTR [rcx+0x30],r14 - 0x4c, 0x89, 0x79, 0x38, //mov QWORD PTR [rcx+0x38],r15 - 0x66, 0x4c, 0x0f, 0x7e, 0x41, 0x40, //movq QWORD PTR [rcx+0x40],xmm8 - 0x66, 0x4c, 0x0f, 0x7e, 0x49, 0x48, //movq QWORD PTR [rcx+0x48],xmm9 - 0x66, 0x48, 0x0f, 0x7e, 0x51, 0x50, //movq QWORD PTR [rcx+0x50],xmm2 - 0x66, 0x48, 0x0f, 0x7e, 0x59, 0x58, //movq QWORD PTR [rcx+0x58],xmm3 - 0x66, 0x48, 0x0f, 0x7e, 0x61, 0x60, //movq QWORD PTR [rcx+0x60],xmm4 - 0x66, 0x48, 0x0f, 0x7e, 0x69, 0x68, //movq QWORD PTR [rcx+0x68],xmm5 - 0x66, 0x48, 0x0f, 0x7e, 0x71, 0x70, //movq QWORD PTR [rcx+0x70],xmm6 - 0x66, 0x48, 0x0f, 0x7e, 0x79, 0x78, //movq QWORD PTR [rcx+0x78],xmm7 -#ifdef _WIN32 - 0xf3, 0x44, 0x0f, 0x6f, 0x0c, 0x24, //movdqu xmm9,XMMWORD PTR [rsp] - 0xf3, 0x44, 0x0f, 0x6f, 0x44, 0x24, 0x10, //movdqu xmm8,XMMWORD PTR [rsp+0x10] - 0xf3, 0x0f, 0x6f, 0x7c, 0x24, 0x20, //movdqu xmm7,XMMWORD PTR [rsp+0x20] - 0xf3, 0x0f, 0x6f, 0x74, 0x24, 0x30, //movdqu xmm6,XMMWORD PTR [rsp+0x30] - 0x48, 0x83, 0xc4, 0x48, //add rsp,0x48 -#endif - 0x41, 0x5f, //pop r15 - 0x41, 0x5e, //pop r14 - 0x41, 0x5d, //pop r13 - 0x41, 0x5c, //pop r12 -#ifdef _WIN32 - 0x5e, //pop rsi - 0x5f, //pop rdi -#endif - 0x5d, //pop rbp - 0x5b, //pop rbx - 0xc3, //ret - }; + const int32_t prologueSize = codeProgramBegin - codePrologue; + const int32_t epilogueSize = codeReadDatasetR - codeEpilogue; + const int32_t readDatasetRSize = codeReadDatasetF - codeReadDatasetR; + const int32_t readDatasetFSize = codeProgramEnd - codeReadDatasetF; - //41 bytes -> 1 cache line - const uint8_t readDatasetSub[] = { - 0x8b, 0x13, //mov edx,DWORD PTR [rbx] - 0x48, 0x8b, 0x43, 0x08, //mov rax,QWORD PTR [rbx+0x8] - 0x48, 0x8b, 0x04, 0x10, //mov rax,QWORD PTR [rax+rdx*1] - 0x83, 0x03, 0x08, //add DWORD PTR [rbx],0x8 - 0x33, 0x4b, 0x04, //xor ecx,DWORD PTR [rbx+0x4] - 0x89, 0x4b, 0x04, //mov DWORD PTR [rbx+0x4],ecx - 0xf7, 0xc1, 0xf8, 0xff, 0x00, 0x00, //test ecx,0xfff8 - 0x75, 0x0d, //jne - 0x83, 0xe1, 0xf8, //and ecx,0xfffffff8 - 0x89, 0x0b, //mov DWORD PTR [rbx],ecx - 0x48, 0x8b, 0x53, 0x08, //mov rdx,QWORD PTR [rbx+0x8] - 0x0f, 0x18, 0x0c, 0x0a, //prefetcht0 BYTE PTR [rdx+rcx*1] - 0xc3, //ret - }; - - constexpr int getNumCacheLines(size_t size) { - return (size + (CacheLineSize - 1)) / CacheLineSize; - } - - constexpr int32_t align(int32_t pos, int32_t align) { - return ((pos - 1) / align + 1) * align; - } - - constexpr int32_t readDatasetSubOffset = CodeSize - CacheLineSize * getNumCacheLines(sizeof(readDatasetSub)); - constexpr int32_t epilogueOffset = readDatasetSubOffset - CacheLineSize * getNumCacheLines(sizeof(epilogue)); - constexpr int32_t startOffsetAligned = align(sizeof(prologue), CacheLineSize); + const int32_t readDatasetFOffset = CodeSize - readDatasetFSize; + const int32_t readDatasetROffset = readDatasetFOffset - readDatasetRSize; + const int32_t epilogueOffset = readDatasetROffset - epilogueSize; JitCompilerX86::JitCompilerX86() { #ifdef _WIN32 @@ -213,24 +120,16 @@ namespace RandomX { if (code == (uint8_t*)-1) throw std::runtime_error("mmap failed"); #endif - memcpy(code, prologue, sizeof(prologue)); - codePos = sizeof(prologue); - if (startOffsetAligned - codePos > 4) { - emitByte(0xeb); - emitByte(startOffsetAligned - (codePos + 1)); - } - else { - while (codePos < startOffsetAligned) - emitByte(0x90); //nop - } - memcpy(code + readDatasetSubOffset, readDatasetSub, sizeof(readDatasetSub)); - memcpy(code + epilogueOffset, epilogue, sizeof(epilogue)); + memcpy(code, codePrologue, prologueSize); + memcpy(code + CodeSize - readDatasetRSize - readDatasetFSize - epilogueSize, codeEpilogue, epilogueSize); + memcpy(code + CodeSize - readDatasetRSize - readDatasetFSize, codeReadDatasetR, readDatasetRSize); + memcpy(code + CodeSize - readDatasetFSize, codeReadDatasetF, readDatasetFSize); } void JitCompilerX86::generateProgram(Pcg32& gen) { instructionOffsets.clear(); callOffsets.clear(); - codePos = startOffsetAligned; + codePos = prologueSize; Instruction instr; for (unsigned i = 0; i < ProgramLength; ++i) { for (unsigned j = 0; j < sizeof(instr) / sizeof(Pcg32::result_type); ++j) { @@ -247,7 +146,6 @@ namespace RandomX { instructionOffsets.push_back(codePos); emit(0x840fcfff); //dec edx; jz emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative) - gena(instr); auto generator = engine[instr.opcode]; (this->*generator)(instr, i); } @@ -258,11 +156,10 @@ namespace RandomX { } } - void JitCompilerX86::gena(Instruction& instr) { + void JitCompilerX86::genar(Instruction& instr) { emit(uint16_t(0x8149)); //xor emitByte(0xf0 + (instr.rega % RegistersCount)); emit(instr.addra); - int32_t pc; switch (instr.loca & 7) { case 0: @@ -272,7 +169,7 @@ namespace RandomX { emit(uint16_t(0x8b41)); //mov emitByte(0xc8 + (instr.rega % RegistersCount)); //ecx, rega emitByte(0xe8); //call - emit(readDatasetSubOffset - (codePos + 4)); + emit(readDatasetROffset - (codePos + 4)); return; case 4: @@ -293,8 +190,44 @@ namespace RandomX { } } + void JitCompilerX86::genaf(Instruction& instr) { + emit(uint16_t(0x8149)); //xor + emitByte(0xf0 + (instr.rega % RegistersCount)); + emit(instr.addra); + switch (instr.loca & 7) + { + case 0: + case 1: + case 2: + case 3: + emit(uint16_t(0x8b41)); //mov + emitByte(0xc8 + (instr.rega % RegistersCount)); //ecx, rega + emitByte(0xe8); //call + emit(readDatasetFOffset - (codePos + 4)); + return; + + case 4: + emit(uint16_t(0x8b41)); //mov + emitByte(0xc0 + (instr.rega % RegistersCount)); //eax, rega + emitByte(0x25); //and + emit(ScratchpadL2 - 1); //whole scratchpad + emitByte(0xf3); + emit(0xc604e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rax*8] + return; + + default: + emit(uint16_t(0x8b41)); //mov + emitByte(0xc0 + (instr.rega % RegistersCount)); //eax, rega + emitByte(0x25); //and + emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad + emitByte(0xf3); + emit(0xc604e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rax*8] + return; + } + } + void JitCompilerX86::genbr0(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) { - if ((instr.locb & 7) <= 5) { + if ((instr.locb & 7) <= 3) { emit(uint16_t(0x8b49)); //mov emitByte(0xc8 + (instr.regb % RegistersCount)); //rcx, regb emitByte(0x48); //REX.W @@ -330,126 +263,117 @@ namespace RandomX { } void JitCompilerX86::genbf(Instruction& instr, uint8_t opcode) { - emit(0x48f2fffff8002548); //and rax,0xfffffffffffff800; cvtsi2sd xmm0,rax - emit(uint16_t(0x2a0f)); - emitByte(0xc0); - if ((instr.locb & 7) <= 5) { - int regb = (instr.regb % RegistersCount); - emitByte(0xf2); //xxxsd xmm0,regb - if (regb <= 1) { - emitByte(0x41); //REX - } - emitByte(0x0f); - emitByte(opcode); - emitByte(0xc0 + regb); - } - else { - convertible_t bimm; - bimm.f64 = (double)instr.imm32; - emit(uint16_t(0xb848)); //movabs rax,imm64 - emit(bimm.i64); - emitByte(0x66); //movq xmm1,rax - emit(0xc86e0f48); - emit(uint16_t(0x0ff2)); //xxxsd xmm0,xmm1 - emitByte(opcode); - emitByte(0xc1); + int regb = (instr.regb % RegistersCount); + emitByte(0x66); //xxxpd xmm0,regb + if (regb <= 1) { + emitByte(0x41); //REX } + emitByte(0x0f); + emitByte(opcode); + emitByte(0xc0 + regb); + } + + + void JitCompilerX86::scratchpadStoreR(Instruction& instr, uint32_t scratchpadSize) { + emit(0x41c88b48); //mov rcx, rax; REX + emitByte(0x8b); // mov + emitByte(0xc0 + (instr.regc % RegistersCount)); //eax, regc + emitByte(0x35); // xor eax + emit(instr.addrc); + emitByte(0x25); //and + emit(scratchpadSize - 1); + emit(0xc60c8948); // mov QWORD PTR [rsi+rax*8],rcx } void JitCompilerX86::gencr(Instruction& instr) { switch (instr.locc & 7) { - case 0: - emit(0x41c88b48); //mov rcx, rax; REX - emitByte(0x8b); // mov - emitByte(0xc0 + (instr.regc % RegistersCount)); //eax, regc - emitByte(0x35); // xor eax - emit(instr.addrc); - emitByte(0x25); //and - emit(ScratchpadL2 - 1); //whole scratchpad - emit(0xc60c8948); // mov QWORD PTR [rsi+rax*8],rcx - break; + case 0: + scratchpadStoreR(instr, ScratchpadL2); + break; - case 1: - case 2: - case 3: - emit(0x41c88b48); //mov rcx, rax; REX - emitByte(0x8b); // mov - emitByte(0xc0 + (instr.regc % RegistersCount)); //eax, regc - emitByte(0x35); // xor eax - emit(instr.addrc); - emitByte(0x25); //and - emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad - emit(0xc60c8948); // mov QWORD PTR [rsi+rax*8],rcx - break; + case 1: + case 2: + case 3: + scratchpadStoreR(instr, ScratchpadL1); + break; - default: - emit(uint16_t(0x8b4c)); //mov - emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax - break; + default: + emit(uint16_t(0x8b4c)); //mov + emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax + break; } } - void JitCompilerX86::gencf(Instruction& instr) { + void JitCompilerX86::scratchpadStoreF(Instruction& instr, int regc, uint32_t scratchpadSize, bool storeHigh) { + emit(uint16_t(0x8b41)); //mov + emitByte(0xc0 + regc); //eax, regc + emitByte(0x35); // xor eax + emit(instr.addrc); + emitByte(0x25); //and + emit(scratchpadSize - 1); + emitByte(0x66); //movhpd/movlpd QWORD PTR [rsi+rax*8], regc + if (regc <= 1) { + emitByte(0x44); //REX + } + emitByte(0x0f); + emitByte(storeHigh ? 0x17 : 0x13); + emitByte(4 + 8 * regc); + emitByte(0xc6); + } + + void JitCompilerX86::gencf(Instruction& instr, bool alwaysLow = false) { int regc = (instr.regc % RegistersCount); - switch (instr.locc & 7) - { - case 0: - emit(uint16_t(0x8b41)); //mov - emitByte(0xc0 + regc); //eax, regc - emitByte(0x35); // xor eax - emit(instr.addrc); - emitByte(0x25); //and - emit(ScratchpadL2 - 1); //whole scratchpad - emit(uint16_t(0x4866)); //prefix - emit(0xc6047e0f); // movq QWORD PTR [rsi+rax*8],xmm0 - break; - - case 1: - case 2: - case 3: - emit(uint16_t(0x8b41)); //mov - emitByte(0xc0 + regc); //eax, regc - emitByte(0x35); // xor eax - emit(instr.addrc); - emitByte(0x25); //and - emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad - emit(uint16_t(0x4866)); //prefix - emit(0xc6047e0f); // movq QWORD PTR [rsi+rax*8],xmm0 - break; - - default: - emitByte(0xf2); + if (!alwaysLow) { if (regc <= 1) { emitByte(0x44); //REX } - emit(uint16_t(0x100f)); //movsd + emit(uint16_t(0x280f)); //movaps emitByte(0xc0 + 8 * regc); // regc, xmm0 - break; + } + switch (instr.locc & 7) + { + case 4: + scratchpadStoreF(instr, regc, ScratchpadL2, !alwaysLow && (instr.locc & 8)); + break; + + case 5: + case 6: + case 7: + scratchpadStoreF(instr, regc, ScratchpadL1, !alwaysLow && (instr.locc & 8)); + break; + + default: + break; } } void JitCompilerX86::h_ADD_64(Instruction& instr, int i) { + genar(instr); genbr1(instr, 0x0349, 0x0548); gencr(instr); } void JitCompilerX86::h_ADD_32(Instruction& instr, int i) { + genar(instr); genbr132(instr, 0x0341, 0x05); gencr(instr); } void JitCompilerX86::h_SUB_64(Instruction& instr, int i) { + genar(instr); genbr1(instr, 0x2b49, 0x2d48); gencr(instr); } void JitCompilerX86::h_SUB_32(Instruction& instr, int i) { + genar(instr); genbr132(instr, 0x2b41, 0x2d); gencr(instr); } void JitCompilerX86::h_MUL_64(Instruction& instr, int i) { + genar(instr); if ((instr.locb & 7) <= 5) { emitByte(0x49); //REX emit(uint16_t(0xaf0f)); // imul rax, r64 @@ -464,6 +388,7 @@ namespace RandomX { } void JitCompilerX86::h_MULH_64(Instruction& instr, int i) { + genar(instr); if ((instr.locb & 7) <= 5) { emit(uint16_t(0x8b49)); //mov rcx, r64 emitByte(0xc8 + (instr.regb % RegistersCount)); @@ -481,6 +406,7 @@ namespace RandomX { } void JitCompilerX86::h_MUL_32(Instruction& instr, int i) { + genar(instr); emit(uint16_t(0xc88b)); //mov ecx, eax if ((instr.locb & 7) <= 5) { emit(uint16_t(0x8b41)); // mov eax, r32 @@ -495,6 +421,7 @@ namespace RandomX { } void JitCompilerX86::h_IMUL_32(Instruction& instr, int i) { + genar(instr); emitByte(0x48); emit(uint16_t(0xc863)); //movsxd rcx,eax if ((instr.locb & 7) <= 5) { @@ -511,6 +438,7 @@ namespace RandomX { } void JitCompilerX86::h_IMULH_64(Instruction& instr, int i) { + genar(instr); if ((instr.locb & 7) <= 5) { emit(uint16_t(0x8b49)); //mov rcx, r64 emitByte(0xc8 + (instr.regb % RegistersCount)); @@ -528,6 +456,7 @@ namespace RandomX { } void JitCompilerX86::h_DIV_64(Instruction& instr, int i) { + genar(instr); if ((instr.locb & 7) <= 5) { emitByte(0xb9); //mov ecx, 1 emit(1); @@ -546,6 +475,7 @@ namespace RandomX { } void JitCompilerX86::h_IDIV_64(Instruction& instr, int i) { + genar(instr); if ((instr.locb & 7) <= 5) { emit(uint16_t(0x8b41)); //mov edx, r32 emitByte(0xd0 + (instr.regb % RegistersCount)); @@ -563,100 +493,127 @@ namespace RandomX { } void JitCompilerX86::h_AND_64(Instruction& instr, int i) { + genar(instr); genbr1(instr, 0x2349, 0x2548); gencr(instr); } void JitCompilerX86::h_AND_32(Instruction& instr, int i) { + genar(instr); genbr132(instr, 0x2341, 0x25); gencr(instr); } void JitCompilerX86::h_OR_64(Instruction& instr, int i) { + genar(instr); genbr1(instr, 0x0b49, 0x0d48); gencr(instr); } void JitCompilerX86::h_OR_32(Instruction& instr, int i) { + genar(instr); genbr132(instr, 0x0b41, 0x0d); gencr(instr); } void JitCompilerX86::h_XOR_64(Instruction& instr, int i) { + genar(instr); genbr1(instr, 0x3349, 0x3548); gencr(instr); } void JitCompilerX86::h_XOR_32(Instruction& instr, int i) { + genar(instr); genbr132(instr, 0x3341, 0x35); gencr(instr); } void JitCompilerX86::h_SHL_64(Instruction& instr, int i) { + genar(instr); genbr0(instr, 0xe0d3, 0xe0c1); gencr(instr); } void JitCompilerX86::h_SHR_64(Instruction& instr, int i) { + genar(instr); genbr0(instr, 0xe8d3, 0xe8c1); gencr(instr); } void JitCompilerX86::h_SAR_64(Instruction& instr, int i) { + genar(instr); genbr0(instr, 0xf8d3, 0xf8c1); gencr(instr); } void JitCompilerX86::h_ROL_64(Instruction& instr, int i) { + genar(instr); genbr0(instr, 0xc0d3, 0xc0c1); gencr(instr); } void JitCompilerX86::h_ROR_64(Instruction& instr, int i) { + genar(instr); genbr0(instr, 0xc8d3, 0xc8c1); gencr(instr); } void JitCompilerX86::h_FPADD(Instruction& instr, int i) { + genaf(instr); genbf(instr, 0x58); gencf(instr); } void JitCompilerX86::h_FPSUB(Instruction& instr, int i) { + genaf(instr); genbf(instr, 0x5c); gencf(instr); } void JitCompilerX86::h_FPMUL(Instruction& instr, int i) { - emit(uint16_t(0x0d48)); //or rax,0x800 - emit(0x00000800); + genaf(instr); genbf(instr, 0x59); + emit(0x00c9c20f66c8280f); //movaps xmm1,xmm0; cmpeqpd xmm1,xmm1 + emit(uint16_t(0x540f)); //andps xmm0,xmm1 + emitByte(0xc1); gencf(instr); } void JitCompilerX86::h_FPDIV(Instruction& instr, int i) { - emit(uint16_t(0x0d48)); //or rax,0x800 - emit(0x00000800); + genaf(instr); genbf(instr, 0x5e); + emit(0x00c9c20f66c8280f); //movaps xmm1,xmm0; cmpeqpd xmm1,xmm1 + emit(uint16_t(0x540f)); //andps xmm0,xmm1 + emitByte(0xc1); gencf(instr); } void JitCompilerX86::h_FPSQRT(Instruction& instr, int i) { - emit(uint16_t(0xb948)); //or movabs rcx, imm64 - emit(0x7ffffffffffff800); - emit(0xc02a0f48f2c12348); //and rax,rcx; cvtsi2sd xmm0,rax - emit(0xc0510ff2); //sqrtsd xmm0,xmm0 + genaf(instr); + emit(0xc0510f66c2540f41); //andps xmm0,xmm10; sqrtpd xmm0,xmm0 gencf(instr); } void JitCompilerX86::h_FPROUND(Instruction& instr, int i) { + genar(instr); emit(0x81480de0c1c88b48); emit(0x600025fffff800e1); - emit(0x0dc12a0f48f20000); + emit(uint16_t(0x0000)); + emitByte(0xf2); + int regc = (instr.regc % RegistersCount); + if (regc <= 1) { + emitByte(0x4c); //REX + } + else { + emitByte(0x48); //REX + } + emit(uint16_t(0x2a0f)); + emitByte(0xc1 + 8 * regc); + emitByte(0x0d); emit(0xf824448900009fc0); emit(0x2454ae0f); //ldmxcsr DWORD PTR [rsp-0x8] emitByte(0xf8); - gencf(instr); + gencf(instr, true); } static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) { @@ -682,6 +639,7 @@ namespace RandomX { } void JitCompilerX86::h_CALL(Instruction& instr, int i) { + genar(instr); emit(uint16_t(0x8141)); //cmp regb, imm32 emitByte(0xf8 + (instr.regb % RegistersCount)); emit(instr.imm32); @@ -707,6 +665,7 @@ namespace RandomX { } void JitCompilerX86::h_RET(Instruction& instr, int i) { + genar(instr); int crlen = 0; if ((instr.locc & 7) <= 3) { crlen = 17; @@ -756,4 +715,6 @@ namespace RandomX { INST_HANDLE(CALL) INST_HANDLE(RET) }; + +#endif } \ No newline at end of file diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp index c453ba1..e2c432c 100644 --- a/src/JitCompilerX86.hpp +++ b/src/JitCompilerX86.hpp @@ -58,13 +58,16 @@ namespace RandomX { std::vector instructionOffsets; std::vector callOffsets; - void gena(Instruction&); + void genar(Instruction&); + void genaf(Instruction&); void genbr0(Instruction&, uint16_t, uint16_t); void genbr1(Instruction&, uint16_t, uint16_t); void genbr132(Instruction&, uint16_t, uint8_t); void genbf(Instruction&, uint8_t); + void scratchpadStoreR(Instruction&, uint32_t); + void scratchpadStoreF(Instruction&, int, uint32_t, bool); void gencr(Instruction&); - void gencf(Instruction&); + void gencf(Instruction&, bool); void generateCode(Instruction&, int); void fixCallOffsets(); diff --git a/src/TestAluFpu.cpp b/src/TestAluFpu.cpp index f2fe387..de90083 100644 --- a/src/TestAluFpu.cpp +++ b/src/TestAluFpu.cpp @@ -21,33 +21,36 @@ along with RandomX. If not, see. #include #include #include "instructions.hpp" -#include "Pcg32.hpp" //#define DEBUG using namespace RandomX; -typedef void(*VmOperation)(convertible_t&, convertible_t&, convertible_t&); - -uint64_t rxRound(uint32_t mode, int64_t x, int64_t y, VmOperation op) { - convertible_t a, b, c; - a.u64 = mode; - FPROUND(a, b, c); -#ifdef DEBUG - a.f64 = convertToDouble(x); - b.f64 = convertToDouble(y); - std::cout << std::hex << (uint64_t)x << " -> " << a.u64 << std::endl; - std::cout << std::hex << (uint64_t)y << " -> " << b.u64 << std::endl; - std::cout << std::dec; -#endif - a.i64 = x; - b.i64 = y; - op(a, b, c); - return c.u64; -} +typedef void(*FpuOperation)(convertible_t&, fpu_reg_t&, fpu_reg_t&); #define CATCH_CONFIG_MAIN #include "catch.hpp" +uint64_t rxRound(uint32_t mode, int64_t x, int64_t y, FpuOperation op, bool hiEqualsLo = true) { + convertible_t a; + fpu_reg_t b, c; + a.u64 = mode; + FPROUND(a, b, c); + if (hiEqualsLo) { + a.i32lo = x; + a.i32hi = x; + } + else { + a.i64 = x; + } + b.lo.i64 = y; + b.hi.i64 = y; + op(a, b, c); + if (hiEqualsLo) { + CHECK(c.lo.u64 == c.hi.u64); + } + return c.lo.u64; +} + #define RX_EXECUTE_U64(va, vb, INST) do { \ a.u64 = va; \ b.u64 = vb; \ @@ -273,118 +276,126 @@ TEST_CASE("Circular right shift (64-bit)", "[ROR_64]") { TEST_CASE("Denormal results are not produced", "[FTZ]") { FPINIT(); - convertible_t a, b, c; - a.i64 = 2048; - FPDIV(a, DBL_MAX, c); + convertible_t a; + fpu_reg_t b; + a.i64 = 1; + b.lo.f64 = DBL_MAX; + FPDIV(a, b, b); #ifdef DEBUG - std::cout << a.i64 << " / " << DBL_MAX << " = " << std::hex << c.u64 << std::endl; + std::cout << a.i64 << " / " << DBL_MAX << " = " << std::hex << b.lo.u64 << std::endl; #endif - REQUIRE(std::fpclassify(c.f64) != FP_SUBNORMAL); - b.f64 = c.f64; + CHECK(std::fpclassify(b.lo.f64) != FP_SUBNORMAL); a.i64 = 0; - FPSUB_64(a, b, c); + FPSUB(a, b, b); #ifdef DEBUG - std::cout << a.i64 << " - " << b.f64 << " = " << std::hex << c.u64 << std::endl; + std::cout << a.i64 << " - " << b.lo.f64 << " = " << std::hex << b.lo.u64 << std::endl; #endif - CHECK(std::fpclassify(c.f64) != FP_SUBNORMAL); + CHECK(std::fpclassify(b.lo.f64) != FP_SUBNORMAL); } TEST_CASE("NaN results are not produced", "[NAN]") { FPINIT(); - convertible_t a, c; + convertible_t a; + fpu_reg_t b; a.i64 = 0; - FPDIV(a, 0, c); - CHECK(std::fpclassify(c.f64) != FP_NAN); - FPMUL(a, std::numeric_limits::infinity(), c); - CHECK(std::fpclassify(c.f64) != FP_NAN); + b.lo.f64 = 0; + FPDIV(a, b, b); + CHECK(std::fpclassify(b.lo.f64) != FP_NAN); + b.lo.f64 = std::numeric_limits::infinity(); + FPMUL(a, b, b); + CHECK(std::fpclassify(b.lo.f64) != FP_NAN); } -volatile int64_t fpAdda = 7379480244170225589; -volatile int64_t fpAddb = -438072579179686797; -volatile int64_t fpSuba = 2939258788088626026; -volatile int64_t fpSubb = 4786131045320678734; -volatile int64_t fpMula1 = 8399833736388895639; -volatile int64_t fpMulb1 = 5671608020317594922; -volatile int64_t fpMula2 = -7094299423744805450; -volatile int64_t fpMulb2 = 4982086006202596504; -volatile int64_t fpDiva1 = 8399833736388895639; -volatile int64_t fpDivb1 = 5671608020317594922; -volatile int64_t fpDiva2 = -7434878587645025912; -volatile int64_t fpDivb2 = 5266243837734830806; -volatile int64_t fpSqrta = -7594301562963134542; +volatile int64_t fpRounda = 7379480244170225589; +volatile int32_t fpAdda = -2110701072; +volatile int64_t fpAddb = 5822431907862180274; +volatile int32_t fpSuba = -1651770302; +volatile int64_t fpSubb = 4982086006202596504; +volatile int32_t fpMula1 = 122885310; +volatile int64_t fpMulb1 = 6036690890763685020; +volatile int32_t fpMula2 = -1952486466; +volatile int64_t fpMulb2 = 5693689137909219638; +volatile int32_t fpDiva1 = -1675630642; +volatile int64_t fpDivb1 = -3959960229647489051; +volatile int32_t fpDiva2 = -1651770302; +volatile int64_t fpDivb2 = 4982086006202596504; +volatile int32_t fpSqrta1 = 440505508; +volatile int32_t fpSqrta2 = -2147483648; TEST_CASE("IEEE-754 compliance", "[FPU]") { FPINIT(); - convertible_t a, b, c; + convertible_t a; + fpu_reg_t b, c; + b.lo.f64 = 0.0; - a.i64 = 2048; - FPDIV(a, 0, c); - CHECK(c.f64 == std::numeric_limits::infinity()); + a.i64 = 1; + FPDIV(a, b, c); + CHECK(c.lo.f64 == std::numeric_limits::infinity()); - a.i64 = -2048; - FPDIV(a, 0, c); - CHECK(c.f64 == -std::numeric_limits::infinity()); + a.i64 = -1; + FPDIV(a, b, c); + CHECK(c.lo.f64 == -std::numeric_limits::infinity()); #ifdef DEBUG std::cout << "FPROUND" << std::endl; #endif - CHECK(rxRound(RoundToNearest, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU); - CHECK(rxRound(RoundDown, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU); - CHECK(rxRound(RoundUp, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU); - CHECK(rxRound(RoundToZero, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU); - - CHECK(rxRound(RoundToNearest, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU); - CHECK(rxRound(RoundDown, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU); - CHECK(rxRound(RoundUp, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU); - CHECK(rxRound(RoundToZero, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU); + CHECK(rxRound(RoundToNearest, fpRounda, 0, &FPROUND, false) == 0x43d99a4b8bc531dcU); + CHECK(rxRound(RoundDown, fpRounda, 0, &FPROUND, false) == 0x43d99a4b8bc531dcU); + CHECK(rxRound(RoundUp, fpRounda, 0, &FPROUND, false) == 0x43d99a4b8bc531dcU); + CHECK(rxRound(RoundToZero, fpRounda, 0, &FPROUND, false) == 0x43d99a4b8bc531dcU); #ifdef DEBUG std::cout << "FPADD" << std::endl; #endif - CHECK(rxRound(RoundToNearest, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d473U); - CHECK(rxRound(RoundDown, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d473U); - CHECK(rxRound(RoundUp, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d472U); - CHECK(rxRound(RoundToZero, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d472U); + CHECK(rxRound(RoundToNearest, fpAdda, fpAddb, &FPADD) == 0x50cd6ef8bd0671b2U); + CHECK(rxRound(RoundDown, fpAdda, fpAddb, &FPADD) == 0x50cd6ef8bd0671b1U); + CHECK(rxRound(RoundUp, fpAdda, fpAddb, &FPADD) == 0x50cd6ef8bd0671b2U); + CHECK(rxRound(RoundToZero, fpAdda, fpAddb, &FPADD) == 0x50cd6ef8bd0671b1U); #ifdef DEBUG std::cout << "FPSUB" << std::endl; #endif - CHECK(rxRound(RoundToNearest, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c49U); - CHECK(rxRound(RoundDown, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c48U); - CHECK(rxRound(RoundUp, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c49U); - CHECK(rxRound(RoundToZero, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c48U); + CHECK(rxRound(RoundToNearest, fpSuba, fpSubb, &FPSUB) == 0xc523ecd390267c99U); + CHECK(rxRound(RoundDown, fpSuba, fpSubb, &FPSUB) == 0xc523ecd390267c99U); + CHECK(rxRound(RoundUp, fpSuba, fpSubb, &FPSUB) == 0xc523ecd390267c98U); + CHECK(rxRound(RoundToZero, fpSuba, fpSubb, &FPSUB) == 0xc523ecd390267c98U); #ifdef DEBUG std::cout << "FPMUL" << std::endl; #endif - CHECK(rxRound(RoundToNearest, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e9U); - CHECK(rxRound(RoundDown, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e8U); - CHECK(rxRound(RoundUp, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e9U); - CHECK(rxRound(RoundToZero, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e8U); + CHECK(rxRound(RoundToNearest, fpMula1, fpMulb1, &FPMUL) == 0x5574b924d2f24542U); + CHECK(rxRound(RoundDown, fpMula1, fpMulb1, &FPMUL) == 0x5574b924d2f24541U); + CHECK(rxRound(RoundUp, fpMula1, fpMulb1, &FPMUL) == 0x5574b924d2f24542U); + CHECK(rxRound(RoundToZero, fpMula1, fpMulb1, &FPMUL) == 0x5574b924d2f24541U); - CHECK(rxRound(RoundToNearest, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c583U); - CHECK(rxRound(RoundDown, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c583U); - CHECK(rxRound(RoundUp, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c582U); - CHECK(rxRound(RoundToZero, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c582U); + CHECK(rxRound(RoundToNearest, fpMula2, fpMulb2, &FPMUL) == 0xd0f23a18891a7470U); + CHECK(rxRound(RoundDown, fpMula2, fpMulb2, &FPMUL) == 0xd0f23a18891a7470U); + CHECK(rxRound(RoundUp, fpMula2, fpMulb2, &FPMUL) == 0xd0f23a18891a746fU); + CHECK(rxRound(RoundToZero, fpMula2, fpMulb2, &FPMUL) == 0xd0f23a18891a746fU); #ifdef DEBUG std::cout << "FPDIV" << std::endl; #endif - CHECK(rxRound(RoundToNearest, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81cU); - CHECK(rxRound(RoundDown, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81bU); - CHECK(rxRound(RoundUp, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81cU); - CHECK(rxRound(RoundToZero, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81bU); + CHECK(rxRound(RoundToNearest, fpDiva1, fpDivb1, &FPDIV) == 0x38bd2a7732b5eb0aU); + CHECK(rxRound(RoundDown, fpDiva1, fpDivb1, &FPDIV) == 0x38bd2a7732b5eb09U); + CHECK(rxRound(RoundUp, fpDiva1, fpDivb1, &FPDIV) == 0x38bd2a7732b5eb0aU); + CHECK(rxRound(RoundToZero, fpDiva1, fpDivb1, &FPDIV) == 0x38bd2a7732b5eb09U); - CHECK(rxRound(RoundToNearest, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fccU); - CHECK(rxRound(RoundDown, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fccU); - CHECK(rxRound(RoundUp, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fcbU); - CHECK(rxRound(RoundToZero, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fcbU); + CHECK(rxRound(RoundToNearest, fpDiva2, fpDivb2, &FPDIV) == 0xbca3c3c039ccc71cU); + CHECK(rxRound(RoundDown, fpDiva2, fpDivb2, &FPDIV) == 0xbca3c3c039ccc71cU); + CHECK(rxRound(RoundUp, fpDiva2, fpDivb2, &FPDIV) == 0xbca3c3c039ccc71bU); + CHECK(rxRound(RoundToZero, fpDiva2, fpDivb2, &FPDIV) == 0xbca3c3c039ccc71bU); #ifdef DEBUG std::cout << "FPSQRT" << std::endl; #endif - CHECK(rxRound(RoundToNearest, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2dU); - CHECK(rxRound(RoundDown, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2cU); - CHECK(rxRound(RoundUp, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2dU); - CHECK(rxRound(RoundToZero, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2cU); + CHECK(rxRound(RoundToNearest, fpSqrta1, 0, &FPSQRT) == 0x40d47f0e46ebc19dU); + CHECK(rxRound(RoundDown, fpSqrta1, 0, &FPSQRT) == 0x40d47f0e46ebc19cU); + CHECK(rxRound(RoundUp, fpSqrta1, 0, &FPSQRT) == 0x40d47f0e46ebc19dU); + CHECK(rxRound(RoundToZero, fpSqrta1, 0, &FPSQRT) == 0x40d47f0e46ebc19cU); + + CHECK(rxRound(RoundToNearest, fpSqrta2, 0, &FPSQRT) == 0x40e6a09e667f3bcdU); + CHECK(rxRound(RoundDown, fpSqrta2, 0, &FPSQRT) == 0x40e6a09e667f3bccU); + CHECK(rxRound(RoundUp, fpSqrta2, 0, &FPSQRT) == 0x40e6a09e667f3bcdU); + CHECK(rxRound(RoundToZero, fpSqrta2, 0, &FPSQRT) == 0x40e6a09e667f3bccU); } diff --git a/src/VirtualMachine.cpp b/src/VirtualMachine.cpp index 21c52ac..103d245 100644 --- a/src/VirtualMachine.cpp +++ b/src/VirtualMachine.cpp @@ -24,8 +24,19 @@ along with RandomX. If not, see. #include "t1ha/t1ha.h" #include "blake2/blake2.h" #include +#include + +std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) { + for (int i = 0; i < RandomX::RegistersCount; ++i) + os << std::hex << "r" << i << " = " << rf.r[i].u64 << std::endl << std::dec; + for (int i = 0; i < RandomX::RegistersCount; ++i) + os << std::hex << "f" << i << " = " << rf.f[i].hi.u64 << " (" << rf.f[i].hi.f64 << ")" << std::endl + << " = " << rf.f[i].lo.u64 << " (" << rf.f[i].lo.f64 << ")" << std::endl << std::dec; + return os; +} namespace RandomX { + VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) { mem.ds.dataset = nullptr; } @@ -83,9 +94,10 @@ namespace RandomX { } void VirtualMachine::getResult(void* out) { - uint64_t smallState[sizeof(RegisterFile) / sizeof(uint64_t) + 2]; + constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 2; + uint64_t smallState[smallStateLength]; memcpy(smallState, ®, sizeof(RegisterFile)); - smallState[17] = t1ha2_atonce128(&smallState[16], scratchpad, ScratchpadSize, reg.r[0].u64); + smallState[smallStateLength - 1] = t1ha2_atonce128(&smallState[smallStateLength - 2], scratchpad, ScratchpadSize, reg.r[0].u64); blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0); } } \ No newline at end of file diff --git a/src/VirtualMachine.hpp b/src/VirtualMachine.hpp index 569718c..f7fdcd0 100644 --- a/src/VirtualMachine.hpp +++ b/src/VirtualMachine.hpp @@ -32,11 +32,14 @@ namespace RandomX { virtual void initializeProgram(const void* seed) = 0; virtual void execute() = 0; void getResult(void*); + const RegisterFile& getRegisterFile() { + return reg; + } protected: bool softAes, lightClient; - RegisterFile reg; - MemoryRegisters mem; DatasetReadFunc readDataset; + alignas(16) RegisterFile reg; + MemoryRegisters mem; alignas(16) convertible_t scratchpad[ScratchpadLength]; }; } \ No newline at end of file diff --git a/src/asm/program_epilogue_linux.inc b/src/asm/program_epilogue_linux.inc new file mode 100644 index 0000000..414c9ba --- /dev/null +++ b/src/asm/program_epilogue_linux.inc @@ -0,0 +1,12 @@ + #include "program_epilogue_store.inc" + + ;# restore callee-saved registers - System V AMD64 ABI + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + + ;# program finished + ret 0 \ No newline at end of file diff --git a/src/asm/program_epilogue_store.inc b/src/asm/program_epilogue_store.inc new file mode 100644 index 0000000..b7b779b --- /dev/null +++ b/src/asm/program_epilogue_store.inc @@ -0,0 +1,22 @@ + ;# unroll VM stack + mov rsp, rbp + + ;# save VM register values + pop rcx + mov qword ptr [rcx+0], r8 + mov qword ptr [rcx+8], r9 + mov qword ptr [rcx+16], r10 + mov qword ptr [rcx+24], r11 + mov qword ptr [rcx+32], r12 + mov qword ptr [rcx+40], r13 + mov qword ptr [rcx+48], r14 + mov qword ptr [rcx+56], r15 + movdqa xmmword ptr [rcx+64], xmm8 + movdqa xmmword ptr [rcx+80], xmm9 + movdqa xmmword ptr [rcx+96], xmm2 + movdqa xmmword ptr [rcx+112], xmm3 + lea rcx, [rcx+64] + movdqa xmmword ptr [rcx+64], xmm4 + movdqa xmmword ptr [rcx+80], xmm5 + movdqa xmmword ptr [rcx+96], xmm6 + movdqa xmmword ptr [rcx+112], xmm7 \ No newline at end of file diff --git a/src/asm/program_epilogue_win64.inc b/src/asm/program_epilogue_win64.inc new file mode 100644 index 0000000..220bed8 --- /dev/null +++ b/src/asm/program_epilogue_win64.inc @@ -0,0 +1,20 @@ + include program_epilogue_store.inc + + ;# restore callee-saved registers - Microsoft x64 calling convention + movdqu xmm10, xmmword ptr [rsp] + movdqu xmm9, xmmword ptr [rsp+16] + movdqu xmm8, xmmword ptr [rsp+32] + movdqu xmm7, xmmword ptr [rsp+48] + movdqu xmm6, xmmword ptr [rsp+64] + add rsp, 80 + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbp + pop rbx + + ;# program finished + ret 0 \ No newline at end of file diff --git a/src/asm/program_prologue_linux.inc b/src/asm/program_prologue_linux.inc new file mode 100644 index 0000000..8d09d88 --- /dev/null +++ b/src/asm/program_prologue_linux.inc @@ -0,0 +1,17 @@ + ;# callee-saved registers - System V AMD64 ABI + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + ;# function arguments + push rdi ;# RegisterFile& registerFile + mov rbx, rsi ;# MemoryRegisters& memory + mov rsi, rdx ;# convertible_t* scratchpad + mov rcx, rdi + + #include "program_prologue_load.inc" + + jmp randomx_program_begin \ No newline at end of file diff --git a/src/asm/program_prologue_load.inc b/src/asm/program_prologue_load.inc new file mode 100644 index 0000000..df44c08 --- /dev/null +++ b/src/asm/program_prologue_load.inc @@ -0,0 +1,63 @@ + mov rbp, rsp ;# beginning of VM stack + mov rdi, 1048577 ;# number of VM instructions to execute + 1 + + xorps xmm10, xmm10 + cmpeqpd xmm10, xmm10 + psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff + + ;# reset rounding mode + mov dword ptr [rsp-8], 40896 + ldmxcsr dword ptr [rsp-8] + + ;# load integer registers + mov r8, qword ptr [rcx+0] + mov r9, qword ptr [rcx+8] + mov r10, qword ptr [rcx+16] + mov r11, qword ptr [rcx+24] + mov r12, qword ptr [rcx+32] + mov r13, qword ptr [rcx+40] + mov r14, qword ptr [rcx+48] + mov r15, qword ptr [rcx+56] + + ;# initialize floating point registers + xorps xmm8, xmm8 + cvtsi2sd xmm8, qword ptr [rcx+72] + pslldq xmm8, 8 + cvtsi2sd xmm8, qword ptr [rcx+64] + + xorps xmm9, xmm9 + cvtsi2sd xmm9, qword ptr [rcx+88] + pslldq xmm9, 8 + cvtsi2sd xmm9, qword ptr [rcx+80] + + xorps xmm2, xmm2 + cvtsi2sd xmm2, qword ptr [rcx+104] + pslldq xmm2, 8 + cvtsi2sd xmm2, qword ptr [rcx+96] + + xorps xmm3, xmm3 + cvtsi2sd xmm3, qword ptr [rcx+120] + pslldq xmm3, 8 + cvtsi2sd xmm3, qword ptr [rcx+112] + + lea rcx, [rcx+64] + + xorps xmm4, xmm4 + cvtsi2sd xmm4, qword ptr [rcx+72] + pslldq xmm4, 8 + cvtsi2sd xmm4, qword ptr [rcx+64] + + xorps xmm5, xmm5 + cvtsi2sd xmm5, qword ptr [rcx+88] + pslldq xmm5, 8 + cvtsi2sd xmm5, qword ptr [rcx+80] + + xorps xmm6, xmm6 + cvtsi2sd xmm6, qword ptr [rcx+104] + pslldq xmm6, 8 + cvtsi2sd xmm6, qword ptr [rcx+96] + + xorps xmm7, xmm7 + cvtsi2sd xmm7, qword ptr [rcx+120] + pslldq xmm7, 8 + cvtsi2sd xmm7, qword ptr [rcx+112] \ No newline at end of file diff --git a/src/asm/program_prologue_win64.inc b/src/asm/program_prologue_win64.inc new file mode 100644 index 0000000..6059904 --- /dev/null +++ b/src/asm/program_prologue_win64.inc @@ -0,0 +1,24 @@ + ;# callee-saved registers - Microsoft x64 calling convention + push rbx + push rbp + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + sub rsp, 80 + movdqu xmmword ptr [rsp+64], xmm6 + movdqu xmmword ptr [rsp+48], xmm7 + movdqu xmmword ptr [rsp+32], xmm8 + movdqu xmmword ptr [rsp+16], xmm9 + movdqu xmmword ptr [rsp+0], xmm10 + + ;# function arguments + push rcx ;# RegisterFile& registerFile + mov rbx, rdx ;# MemoryRegisters& memory + mov rsi, r8 ;# convertible_t* scratchpad + + include program_prologue_load.inc + + jmp randomx_program_begin \ No newline at end of file diff --git a/src/asm/program_read_f.inc b/src/asm/program_read_f.inc new file mode 100644 index 0000000..1d70dab --- /dev/null +++ b/src/asm/program_read_f.inc @@ -0,0 +1,13 @@ + mov edx, dword ptr [rbx] ;# ma + mov rax, qword ptr [rbx+8] ;# dataset + cvtdq2pd xmm0, qword ptr [rax+rdx] + add dword ptr [rbx], 8 + xor ecx, dword ptr [rbx+4] ;# mx + mov dword ptr [rbx+4], ecx + test ecx, 65528 + jne short rx_read_dataset_f_ret + and ecx, -8 + mov dword ptr [rbx], ecx + prefetcht0 byte ptr [rax+rcx] +rx_read_dataset_f_ret: + ret 0 \ No newline at end of file diff --git a/src/asm/program_read_r.inc b/src/asm/program_read_r.inc new file mode 100644 index 0000000..b3102dc --- /dev/null +++ b/src/asm/program_read_r.inc @@ -0,0 +1,13 @@ + mov eax, dword ptr [rbx] ;# ma + mov rdx, qword ptr [rbx+8] ;# dataset + mov rax, qword ptr [rdx+rax] + add dword ptr [rbx], 8 + xor ecx, dword ptr [rbx+4] ;# mx + mov dword ptr [rbx+4], ecx + test ecx, 65528 + jne short rx_read_dataset_r_ret + and ecx, -8 + mov dword ptr [rbx], ecx + prefetcht0 byte ptr [rdx+rcx] +rx_read_dataset_r_ret: + ret 0 \ No newline at end of file diff --git a/src/common.hpp b/src/common.hpp index 761d9f5..0bfc834 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -20,6 +20,7 @@ along with RandomX. If not, see. #pragma once #include +#include namespace RandomX { @@ -59,6 +60,15 @@ namespace RandomX { uint64_t u64; int32_t i32; uint32_t u32; + struct { + int32_t i32lo; + int32_t i32hi; + }; + }; + + struct fpu_reg_t { + convertible_t lo; + convertible_t hi; }; constexpr int ProgramLength = 512; @@ -96,10 +106,10 @@ namespace RandomX { struct RegisterFile { convertible_t r[RegistersCount]; - convertible_t f[RegistersCount]; + fpu_reg_t f[RegistersCount]; }; - static_assert(sizeof(RegisterFile) == 2 * RegistersCount * sizeof(convertible_t), "Invalid alignment of struct RandomX::RegisterFile"); + static_assert(sizeof(RegisterFile) == 3 * RegistersCount * sizeof(convertible_t), "Invalid alignment of struct RandomX::RegisterFile"); typedef convertible_t(*DatasetReadFunc)(addr_t, MemoryRegisters&); @@ -108,4 +118,6 @@ namespace RandomX { extern "C" { void executeProgram(RegisterFile&, MemoryRegisters&, convertible_t*, DatasetReadFunc); } -} \ No newline at end of file +} + +std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf); diff --git a/src/executeProgram-win64.asm b/src/executeProgram-win64.asm index e5ff87d..356428c 100644 --- a/src/executeProgram-win64.asm +++ b/src/executeProgram-win64.asm @@ -1,19 +1,19 @@ -; Copyright (c) 2018 tevador -; -; This file is part of RandomX. -; -; RandomX is free software: you can redistribute it and/or modify -; it under the terms of the GNU General Public License as published by -; the Free Software Foundation, either version 3 of the License, or -; (at your option) any later version. -; -; RandomX is distributed in the hope that it will be useful, -; but WITHOUT ANY WARRANTY; without even the implied warranty of -; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -; GNU General Public License for more details. -; -; You should have received a copy of the GNU General Public License -; along with RandomX. If not, see. +;# Copyright (c) 2018 tevador +;# +;# This file is part of RandomX. +;# +;# RandomX is free software: you can redistribute it and/or modify +;# it under the terms of the GNU General Public License as published by +;# the Free Software Foundation, either version 3 of the License, or +;# (at your option) any later version. +;# +;# RandomX is distributed in the hope that it will be useful, +;# but WITHOUT ANY WARRANTY; without even the implied warranty of +;# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;# GNU General Public License for more details. +;# +;# You should have received a copy of the GNU General Public License +;# along with RandomX. If not, see. PUBLIC executeProgram @@ -47,6 +47,7 @@ executeProgram PROC ; xmm7 -> "f7" ; xmm8 -> "f0" ; xmm9 -> "f1" + ; xmm10 -> absolute value mask ; STACK STRUCTURE: ; | @@ -71,11 +72,12 @@ executeProgram PROC push r13 push r14 push r15 - sub rsp, 64 - movdqu xmmword ptr [rsp+48], xmm6 - movdqu xmmword ptr [rsp+32], xmm7 - movdqu xmmword ptr [rsp+16], xmm8 - movdqu xmmword ptr [rsp+0], xmm9 + sub rsp, 80 + movdqu xmmword ptr [rsp+64], xmm6 + movdqu xmmword ptr [rsp+48], xmm7 + movdqu xmmword ptr [rsp+32], xmm8 + movdqu xmmword ptr [rsp+16], xmm9 + movdqu xmmword ptr [rsp+0], xmm10 ; function arguments push rcx ; RegisterFile& registerFile @@ -86,7 +88,15 @@ executeProgram PROC mov rbp, rsp ; beginning of VM stack mov rdi, 1048577 ; number of VM instructions to execute + 1 - ; load VM register values + xorps xmm10, xmm10 + cmpeqpd xmm10, xmm10 + psrlq xmm10, 1 ; mask for absolute value = 0x7fffffffffffffff7fffffffffffffff + + ; reset rounding mode + mov dword ptr [rsp-8], 40896 + ldmxcsr dword ptr [rsp-8] + + ; load integer registers mov r8, qword ptr [rcx+0] mov r9, qword ptr [rcx+8] mov r10, qword ptr [rcx+16] @@ -95,16 +105,56 @@ executeProgram PROC mov r13, qword ptr [rcx+40] mov r14, qword ptr [rcx+48] mov r15, qword ptr [rcx+56] - mov dword ptr [rsp-8], 40896 - ldmxcsr dword ptr [rsp-8] + + ; load register f0 hi, lo + xorps xmm8, xmm8 + cvtsi2sd xmm8, qword ptr [rcx+72] + pslldq xmm8, 8 cvtsi2sd xmm8, qword ptr [rcx+64] - cvtsi2sd xmm9, qword ptr [rcx+72] - cvtsi2sd xmm2, qword ptr [rcx+80] - cvtsi2sd xmm3, qword ptr [rcx+88] - cvtsi2sd xmm4, qword ptr [rcx+96] - cvtsi2sd xmm5, qword ptr [rcx+104] - cvtsi2sd xmm6, qword ptr [rcx+112] + + ; load register f1 hi, lo + xorps xmm9, xmm9 + cvtsi2sd xmm9, qword ptr [rcx+88] + pslldq xmm9, 8 + cvtsi2sd xmm9, qword ptr [rcx+80] + + ; load register f2 hi, lo + xorps xmm2, xmm2 + cvtsi2sd xmm2, qword ptr [rcx+104] + pslldq xmm2, 8 + cvtsi2sd xmm2, qword ptr [rcx+96] + + ; load register f3 hi, lo + xorps xmm3, xmm3 + cvtsi2sd xmm3, qword ptr [rcx+120] + pslldq xmm3, 8 + cvtsi2sd xmm3, qword ptr [rcx+112] + + lea rcx, [rcx+64] + + ; load register f4 hi, lo + xorps xmm4, xmm4 + cvtsi2sd xmm4, qword ptr [rcx+72] + pslldq xmm4, 8 + cvtsi2sd xmm4, qword ptr [rcx+64] + + ; load register f5 hi, lo + xorps xmm5, xmm5 + cvtsi2sd xmm5, qword ptr [rcx+88] + pslldq xmm5, 8 + cvtsi2sd xmm5, qword ptr [rcx+80] + + ; load register f6 hi, lo + xorps xmm6, xmm6 + cvtsi2sd xmm6, qword ptr [rcx+104] + pslldq xmm6, 8 + cvtsi2sd xmm6, qword ptr [rcx+96] + + ; load register f7 hi, lo + xorps xmm7, xmm7 cvtsi2sd xmm7, qword ptr [rcx+120] + pslldq xmm7, 8 + cvtsi2sd xmm7, qword ptr [rcx+112] ; program body @@ -125,21 +175,23 @@ rx_finish: mov qword ptr [rcx+40], r13 mov qword ptr [rcx+48], r14 mov qword ptr [rcx+56], r15 - movd qword ptr [rcx+64], xmm8 - movd qword ptr [rcx+72], xmm9 - movd qword ptr [rcx+80], xmm2 - movd qword ptr [rcx+88], xmm3 - movd qword ptr [rcx+96], xmm4 - movd qword ptr [rcx+104], xmm5 - movd qword ptr [rcx+112], xmm6 - movd qword ptr [rcx+120], xmm7 + movdqa xmmword ptr [rcx+64], xmm8 + movdqa xmmword ptr [rcx+80], xmm9 + movdqa xmmword ptr [rcx+96], xmm2 + movdqa xmmword ptr [rcx+112], xmm3 + lea rcx, [rcx+64] + movdqa xmmword ptr [rcx+64], xmm4 + movdqa xmmword ptr [rcx+80], xmm5 + movdqa xmmword ptr [rcx+96], xmm6 + movdqa xmmword ptr [rcx+112], xmm7 ; load callee-saved registers - movdqu xmm9, xmmword ptr [rsp] - movdqu xmm8, xmmword ptr [rsp+16] - movdqu xmm7, xmmword ptr [rsp+32] - movdqu xmm6, xmmword ptr [rsp+48] - add rsp, 64 + movdqu xmm10, xmmword ptr [rsp] + movdqu xmm9, xmmword ptr [rsp+16] + movdqu xmm8, xmmword ptr [rsp+32] + movdqu xmm7, xmmword ptr [rsp+48] + movdqu xmm6, xmmword ptr [rsp+64] + add rsp, 80 pop r15 pop r14 pop r13 @@ -171,7 +223,7 @@ rx_read_dataset: pop r8 ret 0 -rx_read_dataset_full: +rx_read_dataset_r: mov edx, dword ptr [rbx] ; ma mov rax, qword ptr [rbx+8] ; dataset mov rax, qword ptr [rax+rdx] @@ -179,12 +231,27 @@ rx_read_dataset_full: xor ecx, dword ptr [rbx+4] ; mx mov dword ptr [rbx+4], ecx test ecx, 0FFF8h - jne short rx_read_dataset_full_ret + jne short rx_read_dataset_r_ret and ecx, -8 mov dword ptr [rbx], ecx mov rdx, qword ptr [rbx+8] prefetcht0 byte ptr [rdx+rcx] -rx_read_dataset_full_ret: +rx_read_dataset_r_ret: + ret 0 + +rx_read_dataset_f: + mov edx, dword ptr [rbx] ; ma + mov rax, qword ptr [rbx+8] ; dataset + cvtdq2pd xmm0, qword ptr [rax+rdx] + add dword ptr [rbx], 8 + xor ecx, dword ptr [rbx+4] ; mx + mov dword ptr [rbx+4], ecx + test ecx, 0FFF8h + jne short rx_read_dataset_f_ret + and ecx, -8 + mov dword ptr [rbx], ecx + prefetcht0 byte ptr [rax+rcx] +rx_read_dataset_f_ret: ret 0 executeProgram ENDP diff --git a/src/instructionWeights.hpp b/src/instructionWeights.hpp index e25b6cc..bb99ca7 100644 --- a/src/instructionWeights.hpp +++ b/src/instructionWeights.hpp @@ -19,15 +19,15 @@ along with RandomX. If not, see. #pragma once -#define WT_ADD_64 10 +#define WT_ADD_64 11 #define WT_ADD_32 2 -#define WT_SUB_64 10 +#define WT_SUB_64 11 #define WT_SUB_32 2 -#define WT_MUL_64 21 +#define WT_MUL_64 23 #define WT_MULH_64 10 #define WT_MUL_32 15 #define WT_IMUL_32 15 -#define WT_IMULH_64 10 +#define WT_IMULH_64 6 #define WT_DIV_64 1 #define WT_IDIV_64 1 #define WT_AND_64 4 @@ -47,8 +47,9 @@ along with RandomX. If not, see. #define WT_FPDIV 8 #define WT_FPSQRT 6 #define WT_FPROUND 2 -#define WT_CALL 24 -#define WT_RET 18 +#define WT_CALL 20 +#define WT_RET 22 + constexpr int wtSum = WT_ADD_64 + WT_ADD_32 + WT_SUB_64 + WT_SUB_32 + \ WT_MUL_64 + WT_MULH_64 + WT_MUL_32 + WT_IMUL_32 + WT_IMULH_64 + \ @@ -60,6 +61,7 @@ WT_SAR_64 + WT_ROL_64 + WT_ROR_64 + WT_FPADD + WT_FPSUB + WT_FPMUL \ static_assert(wtSum == 256, "Sum of instruction weights must be 256"); +#define REP0(x) #define REP1(x) x, #define REP2(x) REP1(x) x, #define REP3(x) REP2(x) x, @@ -86,6 +88,16 @@ static_assert(wtSum == 256, #define REP24(x) REP23(x) x, #define REP25(x) REP24(x) x, #define REP26(x) REP25(x) x, +#define REP27(x) REP26(x) x, +#define REP28(x) REP27(x) x, +#define REP29(x) REP28(x) x, +#define REP30(x) REP29(x) x, +#define REP31(x) REP30(x) x, +#define REP32(x) REP31(x) x, +#define REP33(x) REP32(x) x, +#define REP40(x) REP32(x) REP8(x) +#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x) +#define REP256(x) REP128(x) REP128(x) #define REPNX(x,N) REP##N(x) #define REPN(x,N) REPNX(x,N) #define NUM(x) x diff --git a/src/instructions.hpp b/src/instructions.hpp index 7afb916..2321be6 100644 --- a/src/instructions.hpp +++ b/src/instructions.hpp @@ -22,16 +22,10 @@ along with RandomX. If not, see. namespace RandomX { - inline double convertToDouble(int64_t x) { - return (double)(x &-2048L); - } - - inline double convertToDoubleNonZero(int64_t x) { - return (double)((x & -2048L) | 2048); - } - - inline double convertToDoubleNonNegative(int64_t x) { - return (double)(x & 9223372036854773760L); + //Clears the 11 least-significant bits before conversion. This is done so the number + //fits exactly into the 52-bit mantissa without rounding. + inline double convertSigned52(int64_t x) { + return (double)(x & -2048L); } extern "C" { @@ -59,27 +53,11 @@ namespace RandomX { void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c); bool JMP_COND(uint8_t, convertible_t&, int32_t); void FPINIT(); - void FPADD(convertible_t& a, double b, convertible_t& c); - void FPSUB(convertible_t& a, double b, convertible_t& c); - void FPMUL(convertible_t& a, double b, convertible_t& c); - void FPDIV(convertible_t& a, double b, convertible_t& c); - void FPSQRT(convertible_t& a, convertible_t& b, convertible_t& c); - void FPROUND(convertible_t& a, convertible_t& b, convertible_t& c); - - inline void FPADD_64(convertible_t& a, convertible_t& b, convertible_t& c) { - FPADD(a, b.f64, c); - } - - inline void FPSUB_64(convertible_t& a, convertible_t& b, convertible_t& c) { - FPSUB(a, b.f64, c); - } - - inline void FPMUL_64(convertible_t& a, convertible_t& b, convertible_t& c) { - FPMUL(a, b.f64, c); - } - - inline void FPDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) { - FPDIV(a, b.f64, c); - } + void FPADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c); + void FPSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c); + void FPMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c); + void FPDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c); + void FPSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c); + void FPROUND(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c); } } \ No newline at end of file diff --git a/src/instructionsPortable.cpp b/src/instructionsPortable.cpp index 5207c1e..790506b 100644 --- a/src/instructionsPortable.cpp +++ b/src/instructionsPortable.cpp @@ -17,7 +17,6 @@ You should have received a copy of the GNU General Public License along with RandomX. If not, see. */ //#define DEBUG -//#define FTZ #include "instructions.hpp" #include "intrinPortable.h" #pragma STDC FENV_ACCESS on @@ -154,19 +153,17 @@ static inline int32_t safeSub(int32_t a, int32_t b) { #define subOverflow __subOverflow #endif -static double FlushDenormal(double x) { - if (std::fpclassify(x) == FP_SUBNORMAL) { - return 0; +static inline double FlushDenormalNaN(double x) { + int fpc = std::fpclassify(x); + if (fpc == FP_SUBNORMAL || fpc == FP_NAN) { + return 0.0; } return x; } -#ifdef FTZ -#undef FTZ -#define FTZ(x) FlushDenormal(x) -#else -#define FTZ(x) x -#endif +static inline double FlushNaN(double x) { + return x != x ? 0.0 : x; +} namespace RandomX { @@ -286,37 +283,95 @@ namespace RandomX { } void FPINIT() { - setRoundMode(FE_TONEAREST); - } - - void FPADD(convertible_t& a, double b, convertible_t& c) { - c.f64 = FTZ(convertToDouble(a.i64) + b); - } - - void FPSUB(convertible_t& a, double b, convertible_t& c) { - c.f64 = FTZ(convertToDouble(a.i64) - b); - } - - void FPMUL(convertible_t& a, double b, convertible_t& c) { - c.f64 = FTZ(convertToDoubleNonZero(a.i64) * b); - } - - void FPDIV(convertible_t& a, double b, convertible_t& c) { - c.f64 = FTZ(convertToDoubleNonZero(a.i64) / b); - } - - void FPSQRT(convertible_t& a, convertible_t& b, convertible_t& c) { #ifdef __SSE2__ - double d = convertToDoubleNonNegative(a.i64); - c.f64 = _mm_cvtsd_f64(_mm_sqrt_sd(_mm_setzero_pd(), _mm_load_pd(&d))); + _mm_setcsr(0x9FC0); //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled #else - c.f64 = FTZ(sqrt(convertToDoubleNonNegative(a.i64))); + setRoundMode(FE_TONEAREST); #endif - } - void FPROUND(convertible_t& a, convertible_t& b, convertible_t& c) { - c.f64 = convertToDouble(a.i64); + void FPADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) { +#ifdef __SSE2__ + __m128i ai = _mm_loadl_epi64((const __m128i*)&a); + __m128d ad = _mm_cvtepi32_pd(ai); + __m128d bd = _mm_load_pd(&b.lo.f64); + __m128d cd = _mm_add_pd(ad, bd); + _mm_store_pd(&c.lo.f64, cd); +#else + double alo = (double)a.i32lo; + double ahi = (double)a.i32hi; + c.lo.f64 = alo + b.lo.f64; + c.hi.f64 = ahi + b.hi.f64; +#endif + } + + void FPSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) { +#ifdef __SSE2__ + __m128i ai = _mm_loadl_epi64((const __m128i*)&a); + __m128d ad = _mm_cvtepi32_pd(ai); + __m128d bd = _mm_load_pd(&b.lo.f64); + __m128d cd = _mm_sub_pd(ad, bd); + _mm_store_pd(&c.lo.f64, cd); +#else + double alo = (double)a.i32lo; + double ahi = (double)a.i32hi; + c.lo.f64 = alo - b.lo.f64; + c.hi.f64 = ahi - b.hi.f64; +#endif + } + + void FPMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) { +#ifdef __SSE2__ + __m128i ai = _mm_loadl_epi64((const __m128i*)&a); + __m128d ad = _mm_cvtepi32_pd(ai); + __m128d bd = _mm_load_pd(&b.lo.f64); + __m128d cd = _mm_mul_pd(ad, bd); + __m128d mask = _mm_cmpeq_pd(cd, cd); + cd = _mm_and_pd(cd, mask); + _mm_store_pd(&c.lo.f64, cd); +#else + double alo = (double)a.i32lo; + double ahi = (double)a.i32hi; + c.lo.f64 = FlushNaN(alo * b.lo.f64); + c.hi.f64 = FlushNaN(ahi * b.hi.f64); +#endif + } + + void FPDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) { +#ifdef __SSE2__ + __m128i ai = _mm_loadl_epi64((const __m128i*)&a); + __m128d ad = _mm_cvtepi32_pd(ai); + __m128d bd = _mm_load_pd(&b.lo.f64); + __m128d cd = _mm_div_pd(ad, bd); + __m128d mask = _mm_cmpeq_pd(cd, cd); + cd = _mm_and_pd(cd, mask); + _mm_store_pd(&c.lo.f64, cd); +#else + double alo = (double)a.i32lo; + double ahi = (double)a.i32hi; + c.lo.f64 = FlushDenormalNaN(alo / b.lo.f64); + c.hi.f64 = FlushDenormalNaN(ahi / b.hi.f64); +#endif + } + + void FPSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) { +#ifdef __SSE2__ + __m128i ai = _mm_loadl_epi64((const __m128i*)&a); + __m128d ad = _mm_cvtepi32_pd(ai); + const __m128d absmask = _mm_castsi128_pd(_mm_set1_epi64x(~(1LL << 63))); + ad = _mm_and_pd(ad, absmask); + __m128d cd = _mm_sqrt_pd(ad); + _mm_store_pd(&c.lo.f64, cd); +#else + double alo = (double)a.i32lo; + double ahi = (double)a.i32hi; + c.lo.f64 = sqrt(std::abs(alo)); + c.hi.f64 = sqrt(std::abs(ahi)); +#endif + } + + void FPROUND(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) { + c.lo.f64 = convertSigned52(a.i64); switch (a.u64 & 3) { case RoundDown: #ifdef DEBUG diff --git a/src/main.cpp b/src/main.cpp index fc95c8b..8bb5492 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -79,14 +79,6 @@ void readInt(int argc, char** argv, int& out, int defaultValue) { out = defaultValue; } -std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) { - for (int i = 0; i < RandomX::RegistersCount; ++i) - os << std::hex << "r" << i << " = " << rf.r[i].u64 << std::endl << std::dec; - for (int i = 0; i < RandomX::RegistersCount; ++i) - os << std::hex << "f" << i << " = " << rf.f[i].u64 << " (" << rf.f[i].f64 << ")" << std::endl << std::dec; - return os; -} - class AtomicHash { public: AtomicHash() { @@ -282,7 +274,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if(programCount == 1000) - std::cout << "Reference result: f6bf06465d5fa1b1dc919140b9e9f9e210b07ae6d662988458a172e9a267eb3f" << std::endl; + std::cout << "Reference result: 3e1c5f9b9d0bf8ffa250f860bf5f7ab76ac823b206ddee6a592660119a3640c6" << std::endl; std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl; /*if (threadCount == 1 && !compiled) { auto ivm = (RandomX::InterpretedVirtualMachine*)vms[0]; diff --git a/src/program.inc b/src/program.inc index b41c7b5..081647f 100644 --- a/src/program.inc +++ b/src/program.inc @@ -1,82 +1,12 @@ -rx_i_0: ;SUB_64 +rx_i_0: ;RET dec edi - js rx_finish - xor r14, 087d93944h - mov eax, r14d - and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - sub rax, r10 - mov r14, rax - -rx_i_1: ;IMULH_64 - dec edi - js rx_finish - xor r8, 0d7a5aadbh - mov eax, r8d - and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - mov rcx, -615634046 - imul rcx - mov rax, rdx - mov rcx, rax - mov eax, r14d - xor eax, 0db4e2b82h - and eax, 2047 - mov qword ptr [rsi + rax * 8], rcx - -rx_i_2: ;RET - dec edi - js rx_finish - xor r10, 06ffcedb1h - mov eax, r10d - and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - cmp rsp, rbp - je short not_taken_ret_2 - cmp r8d, 330026357 - jo short not_taken_ret_2 - xor rax, qword ptr [rsp + 8] - mov r13, rax - ret 8 -not_taken_ret_2: - mov r13, rax - -rx_i_3: ;RET - dec edi - js rx_finish - xor r9, 0a35d739ch - mov eax, r9d - and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - cmp rsp, rbp - je short not_taken_ret_3 - cmp r14d, 2014518380 - jno short not_taken_ret_3 - xor rax, qword ptr [rsp + 8] - mov rcx, rax - mov eax, r11d - xor eax, 078131c6ch - and eax, 2047 - mov qword ptr [rsi + rax * 8], rcx - ret 8 -not_taken_ret_3: - mov rcx, rax - mov eax, r11d - xor eax, 078131c6ch - and eax, 2047 - mov qword ptr [rsi + rax * 8], rcx - -rx_i_4: ;RET - dec edi - js rx_finish + jz rx_finish xor r9, 0ca9788ah mov eax, r9d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_4 - cmp r11d, 445530481 - ja short not_taken_ret_4 + je short not_taken_ret_0 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r12d @@ -84,117 +14,120 @@ rx_i_4: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_4: +not_taken_ret_0: mov rcx, rax mov eax, r12d xor eax, 01a8e4171h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_5: ;AND_64 +rx_i_1: ;AND_64 dec edi - js rx_finish + jz rx_finish xor r15, 06afc2fa4h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r and rax, r10 mov r12, rax -rx_i_6: ;CALL +rx_i_2: ;CALL dec edi - js rx_finish + jz rx_finish xor r15, 097210f7bh mov eax, r15d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r11d, 1348521207 - jno short taken_call_6 + jno short taken_call_2 mov rcx, rax mov eax, r9d xor eax, 05060ccf7h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_7 -taken_call_6: + jmp rx_i_3 +taken_call_2: push rax - call rx_i_51 + call rx_i_47 -rx_i_7: ;FPROUND +rx_i_3: ;FPROUND dec edi - js rx_finish + jz rx_finish xor r13, 082c73195h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov rcx, rax shl eax, 13 and rcx, -2048 and eax, 24576 - cvtsi2sd xmm0, rcx + cvtsi2sd xmm8, rcx or eax, 40896 mov dword ptr [rsp - 8], eax ldmxcsr dword ptr [rsp - 8] - movsd xmm8, xmm0 + mov eax, r8d + xor eax, 06bb1a0b2h + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm8 -rx_i_8: ;MUL_32 +rx_i_4: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r14, 077daefb4h mov eax, r14d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] - mov ecx, eax - mov eax, r14d - imul rax, rcx + mov rcx, r14 + mul rcx + mov rax, rdx mov rcx, rax mov eax, r9d xor eax, 06ce10c20h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_9: ;IMUL_32 +rx_i_5: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r15, 0379f9ee0h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r movsxd rcx, eax movsxd rax, r12d imul rax, rcx mov r12, rax -rx_i_10: ;MULH_64 +rx_i_6: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r8, 03bae7272h mov ecx, r8d - call rx_read_dataset - mov rcx, r15 - mul rcx - mov rax, rdx + call rx_read_dataset_r + imul rax, r15 mov rcx, rax mov eax, r9d xor eax, 098a649d1h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_11: ;FPADD +rx_i_7: ;FPADD dec edi - js rx_finish + jz rx_finish xor r10, 0e264ed81h mov eax, r10d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm6 - movsd xmm6, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm6 + movaps xmm6, xmm0 + mov eax, r14d + xor eax, 057c8c41bh + and eax, 32767 + movhpd qword ptr [rsi + rax * 8], xmm6 -rx_i_12: ;SHL_64 +rx_i_8: ;SHL_64 dec edi - js rx_finish + jz rx_finish xor r13, 068c1e5d2h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r shl rax, 47 mov rcx, rax mov eax, r12d @@ -202,9 +135,9 @@ rx_i_12: ;SHL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_13: ;AND_64 +rx_i_9: ;AND_64 dec edi - js rx_finish + jz rx_finish xor r14, 085121c54h mov eax, r14d and eax, 32767 @@ -212,78 +145,72 @@ rx_i_13: ;AND_64 and rax, 565870810 mov r10, rax -rx_i_14: ;OR_64 +rx_i_10: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r8, 052efde3eh mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r or rax, -727859809 mov r13, rax -rx_i_15: ;FPADD +rx_i_11: ;FPADD dec edi - js rx_finish + jz rx_finish xor r10, 0a9bf8aa1h mov ecx, r10d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm5 - movsd xmm4, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm5 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 0852d40d8h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm4 -rx_i_16: ;CALL +rx_i_12: ;CALL dec edi - js rx_finish + jz rx_finish xor r10, 0db2691ch mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r cmp r8d, -1763940407 - jge short taken_call_16 + jge short taken_call_12 mov r8, rax - jmp rx_i_17 -taken_call_16: + jmp rx_i_13 +taken_call_12: push rax - call rx_i_39 + call rx_i_35 -rx_i_17: ;FPSUB +rx_i_13: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r12, 061c0d34dh mov ecx, r12d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm3 - mov eax, r9d - xor eax, 04f2f223ch - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + subpd xmm0, xmm3 + movaps xmm9, xmm0 -rx_i_18: ;SHR_64 +rx_i_14: ;SHR_64 dec edi - js rx_finish + jz rx_finish xor r10, 0e761d1beh mov ecx, r10d - call rx_read_dataset - mov rcx, r9 - shr rax, cl + call rx_read_dataset_r + shr rax, 4 mov rcx, rax mov eax, r10d xor eax, 03c1a72f8h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_19: ;RET +rx_i_15: ;RET dec edi - js rx_finish + jz rx_finish xor r11, 074ddb688h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_19 - cmp r11d, 1183529144 - js short not_taken_ret_19 + je short not_taken_ret_15 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r14d @@ -291,16 +218,16 @@ rx_i_19: ;RET and eax, 32767 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_19: +not_taken_ret_15: mov rcx, rax mov eax, r14d xor eax, 0468b38b8h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_20: ;ADD_64 +rx_i_16: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r14, 06be90627h mov eax, r14d and eax, 2047 @@ -312,78 +239,78 @@ rx_i_20: ;ADD_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_21: ;FPMUL +rx_i_17: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r11, 0fbc6fc35h mov eax, r11d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm4 - movsd xmm4, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm4 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 0f77ffe16h + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm4 -rx_i_22: ;FPSUB +rx_i_18: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r14, 0c28ca080h mov eax, r14d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm4 - movsd xmm3, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm4 + movaps xmm3, xmm0 + mov eax, r11d + xor eax, 0869baa81h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm3 -rx_i_23: ;FPSUB +rx_i_19: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r13, 0ac009c30h mov eax, r13d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm8 - mov eax, r15d - xor eax, 0e92dc022h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm8 + movaps xmm7, xmm0 -rx_i_24: ;FPMUL +rx_i_20: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r13, 0ecca967dh mov ecx, r13d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm2 - movsd xmm7, xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 0aad81365h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm7 -rx_i_25: ;FPADD +rx_i_21: ;FPADD dec edi - js rx_finish + jz rx_finish xor r8, 0977f0284h mov ecx, r8d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - mov eax, r15d - xor eax, 0db5e0aafh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + addpd xmm0, xmm9 + movaps xmm7, xmm0 -rx_i_26: ;ADD_32 +rx_i_22: ;ADD_32 dec edi - js rx_finish + jz rx_finish xor r13, 080bdfefah mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r add eax, r8d mov rcx, rax mov eax, r10d @@ -391,21 +318,21 @@ rx_i_26: ;ADD_32 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_27: ;MUL_64 +rx_i_23: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r15, 0e1e0d3c4h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r imul rax, r11 mov r8, rax -rx_i_28: ;IMULH_64 +rx_i_24: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r8, 070d3b8c7h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r15 imul rcx mov rax, rdx @@ -415,52 +342,53 @@ rx_i_28: ;IMULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_29: ;FPMUL +rx_i_25: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r12, 01cf77a04h mov ecx, r12d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm9 - movsd xmm6, xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm9 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm6, xmm0 + mov eax, r14d + xor eax, 0baf5c2d4h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm6 -rx_i_30: ;IMULH_64 +rx_i_26: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r11, 0e311468ch mov ecx, r11d - call rx_read_dataset - mov rcx, r13 - imul rcx - mov rax, rdx + call rx_read_dataset_r + movsxd rcx, eax + movsxd rax, r13d + imul rax, rcx mov rcx, rax mov eax, r9d xor eax, 0306ff9ech and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_31: ;FPMUL +rx_i_27: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r12, 01fd9911ah mov eax, r12d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm3 - mov eax, r14d - xor eax, 04b5d4e80h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm3 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm6, xmm0 -rx_i_32: ;XOR_64 +rx_i_28: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r13, 067df757eh mov eax, r13d and eax, 2047 @@ -468,56 +396,51 @@ rx_i_32: ;XOR_64 xor rax, r13 mov r14, rax -rx_i_33: ;SUB_64 +rx_i_29: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r12, 0be2e7c42h mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r sub rax, 1944166515 mov r14, rax -rx_i_34: ;FPADD +rx_i_30: ;FPADD dec edi - js rx_finish + jz rx_finish xor r11, 084d067f7h mov eax, r11d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4485208588087721984 - movd xmm1, rax - addsd xmm0, xmm1 - mov eax, r15d - xor eax, 0dd52e4f9h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm3 + movaps xmm7, xmm0 -rx_i_35: ;FPADD +rx_i_31: ;FPADD dec edi - js rx_finish + jz rx_finish xor r14, 0d352ce37h mov eax, r14d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm3 - movsd xmm6, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm3 + movaps xmm6, xmm0 + mov eax, r14d + xor eax, 01e2da792h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm6 -rx_i_36: ;XOR_64 +rx_i_32: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r12, 0a1f248dah mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r xor rax, -1936869641 mov r9, rax -rx_i_37: ;MULH_64 +rx_i_33: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r9, 0554720fch mov eax, r9d and eax, 2047 @@ -527,70 +450,69 @@ rx_i_37: ;MULH_64 mov rax, rdx mov r12, rax -rx_i_38: ;CALL +rx_i_34: ;CALL dec edi - js rx_finish + jz rx_finish xor r13, 0665e91f1h mov eax, r13d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r14d, -380224718 - js short taken_call_38 + js short taken_call_34 mov r15, rax - jmp rx_i_39 -taken_call_38: + jmp rx_i_35 +taken_call_34: push rax - call rx_i_112 + call rx_i_108 -rx_i_39: ;CALL +rx_i_35: ;RET dec edi - js rx_finish + jz rx_finish xor r15, 05ef1be79h mov eax, r15d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] - cmp r9d, -2040787098 - js short taken_call_39 + cmp rsp, rbp + je short not_taken_ret_35 + xor rax, qword ptr [rsp + 8] + mov r8, rax + ret 8 +not_taken_ret_35: mov r8, rax - jmp rx_i_40 -taken_call_39: - push rax - call rx_i_62 -rx_i_40: ;FPMUL +rx_i_36: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r8, 012ec7e3ah mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm6 - mov eax, r15d - xor eax, 07a07ae2ah - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm6 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 -rx_i_41: ;FPMUL +rx_i_37: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r12, 0d0706601h mov eax, r12d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4480846364313387008 - movd xmm1, rax - mulsd xmm0, xmm1 - movsd xmm9, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 0bca81c78h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm9 -rx_i_42: ;SUB_64 +rx_i_38: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r9, 064056913h mov eax, r9d and eax, 2047 @@ -598,26 +520,24 @@ rx_i_42: ;SUB_64 sub rax, r14 mov r10, rax -rx_i_43: ;ADD_32 +rx_i_39: ;ADD_32 dec edi - js rx_finish + jz rx_finish xor r14, 02c1f1eb0h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r add eax, r14d mov r14, rax -rx_i_44: ;RET +rx_i_40: ;RET dec edi - js rx_finish + jz rx_finish xor r10, 068fd9009h mov eax, r10d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_44 - cmp r12d, -1297973554 - jns short not_taken_ret_44 + je short not_taken_ret_40 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r9d @@ -625,51 +545,44 @@ rx_i_44: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_44: +not_taken_ret_40: mov rcx, rax mov eax, r9d xor eax, 0b2a27eceh and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_45: ;CALL +rx_i_41: ;CALL dec edi - js rx_finish + jz rx_finish xor r9, 037a30933h mov eax, r9d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] cmp r14d, -1070581824 - jo short taken_call_45 + jo short taken_call_41 mov r9, rax - jmp rx_i_46 -taken_call_45: + jmp rx_i_42 +taken_call_41: push rax - call rx_i_131 + call rx_i_127 -rx_i_46: ;FPSUB +rx_i_42: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r15, 0bc1de9f6h mov eax, r15d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4739074351570092032 - movd xmm1, rax - subsd xmm0, xmm1 - mov eax, r14d - xor eax, 029260733h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm6 + movaps xmm6, xmm0 -rx_i_47: ;SUB_64 +rx_i_43: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r12, 02b2a2eech mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r sub rax, 1693705407 mov rcx, rax mov eax, r11d @@ -677,39 +590,32 @@ rx_i_47: ;SUB_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_48: ;ROL_64 +rx_i_44: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r11, 0685817abh mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r9 rol rax, cl mov r15, rax -rx_i_49: ;FPSUB +rx_i_45: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r12, 08cd244ebh mov eax, r12d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4478227626472767488 - movd xmm1, rax - subsd xmm0, xmm1 - mov eax, r13d - xor eax, 0977132cdh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm2 + movaps xmm5, xmm0 -rx_i_50: ;ADD_64 +rx_i_46: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r8, 06d8f4254h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r add rax, r9 mov rcx, rax mov eax, r8d @@ -717,56 +623,55 @@ rx_i_50: ;ADD_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_51: ;CALL +rx_i_47: ;CALL dec edi - js rx_finish + jz rx_finish xor r12, 05ba232c6h mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r cmp r10d, 119251505 - jbe short taken_call_51 + jbe short taken_call_47 mov rcx, rax mov eax, r13d xor eax, 071ba231h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_52 -taken_call_51: + jmp rx_i_48 +taken_call_47: push rax - call rx_i_135 + call rx_i_131 -rx_i_52: ;FPSQRT +rx_i_48: ;FPSQRT dec edi - js rx_finish + jz rx_finish xor r8, 0aaed618fh mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - mov rcx, 9223372036854773760 - and rax, rcx - cvtsi2sd xmm0, rax - sqrtsd xmm0, xmm0 - movsd xmm9, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + andps xmm0, xmm10 + sqrtpd xmm0, xmm0 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 020e5d9e9h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm9 -rx_i_53: ;FPMUL +rx_i_49: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r8, 0f96c6a45h mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm3 - mov eax, r13d - xor eax, 0c56b47bh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm3 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm5, xmm0 -rx_i_54: ;OR_32 +rx_i_50: ;OR_32 dec edi - js rx_finish + jz rx_finish xor r9, 0da3e4842h mov eax, r9d and eax, 32767 @@ -778,53 +683,51 @@ rx_i_54: ;OR_32 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_55: ;SUB_64 +rx_i_51: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r10, 0302b676ah mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r sub rax, 419241919 mov r15, rax -rx_i_56: ;CALL +rx_i_52: ;CALL dec edi - js rx_finish + jz rx_finish xor r11, 0fa88f48bh mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r cmp r13d, -534426193 - js short taken_call_56 + js short taken_call_52 mov rcx, rax mov eax, r15d xor eax, 0e0254dafh and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_57 -taken_call_56: + jmp rx_i_53 +taken_call_52: push rax - call rx_i_98 + call rx_i_94 -rx_i_57: ;RET +rx_i_53: ;RET dec edi - js rx_finish + jz rx_finish xor r13, 03dff9b9eh mov eax, r13d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_57 - cmp r8d, 2028798189 - jno short not_taken_ret_57 + je short not_taken_ret_53 xor rax, qword ptr [rsp + 8] mov r13, rax ret 8 -not_taken_ret_57: +not_taken_ret_53: mov r13, rax -rx_i_58: ;IMULH_64 +rx_i_54: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r11, 060638de0h mov eax, r11d and eax, 2047 @@ -838,22 +741,26 @@ rx_i_58: ;IMULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_59: ;FPMUL +rx_i_55: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r10, 0dda983d4h mov eax, r10d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm5 - movsd xmm3, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm5 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm3, xmm0 + mov eax, r11d + xor eax, 07c79cddh + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm3 -rx_i_60: ;AND_64 +rx_i_56: ;AND_64 dec edi - js rx_finish + jz rx_finish xor r14, 0f1456b8eh mov eax, r14d and eax, 32767 @@ -865,9 +772,9 @@ rx_i_60: ;AND_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_61: ;MUL_64 +rx_i_57: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r9, 010dc4571h mov eax, r9d and eax, 2047 @@ -879,55 +786,48 @@ rx_i_61: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_62: ;IDIV_64 +rx_i_58: ;IDIV_64 dec edi - js rx_finish + jz rx_finish xor r14, 0bcec0ebah mov eax, r14d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] mov edx, r13d cmp edx, -1 - jne short safe_idiv_62 + jne short safe_idiv_58 mov rcx, rax rol rcx, 1 dec rcx - jz short result_idiv_62 -safe_idiv_62: + jz short result_idiv_58 +safe_idiv_58: mov ecx, 1 test edx, edx cmovne ecx, edx movsxd rcx, ecx cqo idiv rcx -result_idiv_62: +result_idiv_58: mov r8, rax -rx_i_63: ;FPSUB +rx_i_59: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r11, 0980dd402h mov eax, r11d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm8 - mov eax, r15d - xor eax, 04f4e2c91h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm8 + movaps xmm7, xmm0 -rx_i_64: ;RET +rx_i_60: ;RET dec edi - js rx_finish + jz rx_finish xor r15, 03de14d1eh mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_64 - cmp r11d, 2075529029 - jo short not_taken_ret_64 + je short not_taken_ret_60 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r11d @@ -935,137 +835,133 @@ rx_i_64: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_64: +not_taken_ret_60: mov rcx, rax mov eax, r11d xor eax, 07bb60f45h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_65: ;CALL +rx_i_61: ;CALL dec edi - js rx_finish + jz rx_finish xor r13, 05058ce64h mov eax, r13d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r15d, 1933164545 - jns short taken_call_65 + jns short taken_call_61 + mov r11, rax + jmp rx_i_62 +taken_call_61: + push rax + call rx_i_120 + +rx_i_62: ;FPMUL + dec edi + jz rx_finish + xor r15, 0c3089414h + mov ecx, r15d + call rx_read_dataset_f + mulpd xmm0, xmm8 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm2, xmm0 + mov eax, r10d + xor eax, 05c4789e3h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm2 + +rx_i_63: ;FPMUL + dec edi + jz rx_finish + xor r9, 065cf272eh + mov eax, r9d + and eax, 2047 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm7 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 + +rx_i_64: ;SUB_64 + dec edi + jz rx_finish + xor r13, 0ae54dfbfh + mov ecx, r13d + call rx_read_dataset_r + sub rax, r15 + mov r9, rax + +rx_i_65: ;CALL + dec edi + jz rx_finish + xor r13, 07b366ce6h + mov ecx, r13d + call rx_read_dataset_r + cmp r8d, 1498056607 + js short taken_call_65 mov r11, rax jmp rx_i_66 taken_call_65: push rax - call rx_i_124 + call rx_i_129 -rx_i_66: ;FPMUL +rx_i_66: ;FPSQRT dec edi - js rx_finish - xor r15, 0c3089414h - mov ecx, r15d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4744280396844236800 - movd xmm1, rax - mulsd xmm0, xmm1 - movsd xmm2, xmm0 - -rx_i_67: ;FPMUL - dec edi - js rx_finish - xor r9, 065cf272eh - mov eax, r9d - and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4480946344868970496 - movd xmm1, rax - mulsd xmm0, xmm1 - mov eax, r8d - xor eax, 0be13d69eh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 - -rx_i_68: ;SUB_64 - dec edi - js rx_finish - xor r13, 0ae54dfbfh - mov ecx, r13d - call rx_read_dataset - sub rax, r15 - mov r9, rax - -rx_i_69: ;CALL - dec edi - js rx_finish - xor r13, 07b366ce6h - mov ecx, r13d - call rx_read_dataset - cmp r8d, 1498056607 - js short taken_call_69 - mov r11, rax - jmp rx_i_70 -taken_call_69: - push rax - call rx_i_133 - -rx_i_70: ;FPSQRT - dec edi - js rx_finish + jz rx_finish xor r15, 015a1b689h mov ecx, r15d - call rx_read_dataset - mov rcx, 9223372036854773760 - and rax, rcx - cvtsi2sd xmm0, rax - sqrtsd xmm0, xmm0 - movsd xmm9, xmm0 + call rx_read_dataset_f + andps xmm0, xmm10 + sqrtpd xmm0, xmm0 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 07305e78h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_71: ;CALL +rx_i_67: ;CALL dec edi - js rx_finish + jz rx_finish xor r14, 088393ba0h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r cmp r13d, 2031541081 - jns short taken_call_71 + jns short taken_call_67 mov r9, rax - jmp rx_i_72 -taken_call_71: + jmp rx_i_68 +taken_call_67: push rax - call rx_i_83 + call rx_i_79 -rx_i_72: ;FPSUB +rx_i_68: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r13, 03aa5c3a4h mov ecx, r13d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm2 - movsd xmm4, xmm0 + call rx_read_dataset_f + subpd xmm0, xmm2 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 03c51ef39h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm4 -rx_i_73: ;FPADD +rx_i_69: ;FPADD dec edi - js rx_finish + jz rx_finish xor r15, 0376c9c27h mov ecx, r15d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm5 - mov eax, r8d - xor eax, 098c2e84dh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + addpd xmm0, xmm5 + movaps xmm8, xmm0 -rx_i_74: ;MULH_64 +rx_i_70: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r8, 0bbbec3fah mov eax, r8d and eax, 2047 @@ -1075,67 +971,58 @@ rx_i_74: ;MULH_64 mov rax, rdx mov r13, rax -rx_i_75: ;FPMUL +rx_i_71: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r14, 0e9efb350h mov eax, r14d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4743866573565984768 - movd xmm1, rax - mulsd xmm0, xmm1 - mov eax, r15d - xor eax, 056660eedh - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm5 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 -rx_i_76: ;CALL +rx_i_72: ;CALL dec edi - js rx_finish + jz rx_finish xor r13, 0f4e51e28h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r cmp r9d, -631091751 - jno short taken_call_76 + jno short taken_call_72 mov rcx, rax mov eax, r11d xor eax, 0da624dd9h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_77 -taken_call_76: + jmp rx_i_73 +taken_call_72: push rax - call rx_i_195 + call rx_i_191 -rx_i_77: ;FPROUND +rx_i_73: ;FPROUND dec edi - js rx_finish + jz rx_finish xor r12, 0c24ddbd4h mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r mov rcx, rax shl eax, 13 and rcx, -2048 and eax, 24576 - cvtsi2sd xmm0, rcx + cvtsi2sd xmm2, rcx or eax, 40896 mov dword ptr [rsp - 8], eax ldmxcsr dword ptr [rsp - 8] - mov eax, r10d - xor eax, 040624270h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 -rx_i_78: ;MUL_64 +rx_i_74: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r8, 04c4b0c7fh mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r imul rax, rax, -1431647438 mov rcx, rax mov eax, r9d @@ -1143,44 +1030,44 @@ rx_i_78: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_79: ;CALL +rx_i_75: ;RET dec edi - js rx_finish + jz rx_finish xor r14, 03bcc02e3h mov eax, r14d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - cmp r11d, -1160798683 - jo short taken_call_79 + cmp rsp, rbp + je short not_taken_ret_75 + xor rax, qword ptr [rsp + 8] + mov r13, rax + ret 8 +not_taken_ret_75: mov r13, rax - jmp rx_i_80 -taken_call_79: - push rax - call rx_i_206 -rx_i_80: ;FPADD +rx_i_76: ;FPADD dec edi - js rx_finish + jz rx_finish xor r11, 04b0ff63eh mov eax, r11d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm3 - movsd xmm7, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm3 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 083bc0396h + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm7 -rx_i_81: ;RET +rx_i_77: ;RET dec edi - js rx_finish + jz rx_finish xor r14, 0b956b3e8h mov eax, r14d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_81 - cmp r15d, 982695034 - jo short not_taken_ret_81 + je short not_taken_ret_77 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r11d @@ -1188,16 +1075,16 @@ rx_i_81: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_81: +not_taken_ret_77: mov rcx, rax mov eax, r11d xor eax, 03a92bc7ah and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_82: ;MUL_32 +rx_i_78: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r9, 0edeca680h mov eax, r9d and eax, 2047 @@ -1207,72 +1094,69 @@ rx_i_82: ;MUL_32 imul rax, rcx mov r15, rax -rx_i_83: ;CALL +rx_i_79: ;RET dec edi - js rx_finish + jz rx_finish xor r11, 0fbdddcb5h mov eax, r11d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] - cmp r13d, 1800043331 - jbe short taken_call_83 + cmp rsp, rbp + je short not_taken_ret_79 + xor rax, qword ptr [rsp + 8] + mov rcx, rax + mov eax, r11d + xor eax, 06b4a7b43h + and eax, 2047 + mov qword ptr [rsi + rax * 8], rcx + ret 8 +not_taken_ret_79: mov rcx, rax mov eax, r11d xor eax, 06b4a7b43h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_84 -taken_call_83: - push rax - call rx_i_97 -rx_i_84: ;FPADD +rx_i_80: ;FPADD dec edi - js rx_finish + jz rx_finish xor r13, 09cec97a1h mov eax, r13d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4736212432215605248 - movd xmm1, rax - addsd xmm0, xmm1 - mov eax, r11d - xor eax, 01a681d13h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm3 + movaps xmm3, xmm0 -rx_i_85: ;OR_64 +rx_i_81: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r15, 078228167h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r or rax, r13 mov r8, rax -rx_i_86: ;CALL +rx_i_82: ;CALL dec edi - js rx_finish + jz rx_finish xor r11, 078cae1ffh mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r cmp r12d, -68969733 - jo short taken_call_86 + jo short taken_call_82 mov rcx, rax mov eax, r10d xor eax, 0fbe39afbh and eax, 32767 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_87 -taken_call_86: + jmp rx_i_83 +taken_call_82: push rax - call rx_i_149 + call rx_i_145 -rx_i_87: ;AND_64 +rx_i_83: ;AND_64 dec edi - js rx_finish + jz rx_finish xor r10, 0d9b6a533h mov eax, r10d and eax, 32767 @@ -1280,12 +1164,12 @@ rx_i_87: ;AND_64 and rax, r10 mov r12, rax -rx_i_88: ;ROR_64 +rx_i_84: ;ROR_64 dec edi - js rx_finish + jz rx_finish xor r15, 0e9e75336h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r10 ror rax, cl mov rcx, rax @@ -1294,9 +1178,9 @@ rx_i_88: ;ROR_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_89: ;MUL_64 +rx_i_85: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r13, 04c0d378ah mov eax, r13d and eax, 2047 @@ -1304,12 +1188,12 @@ rx_i_89: ;MUL_64 imul rax, r8 mov r10, rax -rx_i_90: ;OR_64 +rx_i_86: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r11, 04386e368h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r or rax, r8 mov rcx, rax mov eax, r12d @@ -1317,9 +1201,9 @@ rx_i_90: ;OR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_91: ;SUB_64 +rx_i_87: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r9, 0d75a0ecfh mov eax, r9d and eax, 2047 @@ -1327,24 +1211,26 @@ rx_i_91: ;SUB_64 sub rax, r12 mov r8, rax -rx_i_92: ;FPADD +rx_i_88: ;FPADD dec edi - js rx_finish + jz rx_finish xor r9, 031bb7f7ah mov eax, r9d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm6 - movsd xmm9, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm6 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 0c149906eh + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_93: ;MUL_64 +rx_i_89: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r9, 03b45ecebh mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r imul rax, r8 mov rcx, rax mov eax, r10d @@ -1352,85 +1238,77 @@ rx_i_93: ;MUL_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_94: ;FPADD +rx_i_90: ;FPADD dec edi - js rx_finish + jz rx_finish xor r12, 0ee08e76bh mov eax, r12d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm3 - mov eax, r14d - xor eax, 0b435cf2dh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm3 + movaps xmm6, xmm0 -rx_i_95: ;FPMUL +rx_i_91: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r9, 042e28e94h mov ecx, r9d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm2 - mov eax, r12d - xor eax, 0b723c20bh - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 -rx_i_96: ;CALL +rx_i_92: ;CALL dec edi - js rx_finish + jz rx_finish xor r8, 0729260e1h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r cmp r14d, 1288893603 - jge short taken_call_96 + jge short taken_call_92 mov r12, rax - jmp rx_i_97 -taken_call_96: + jmp rx_i_93 +taken_call_92: push rax - call rx_i_174 + call rx_i_170 -rx_i_97: ;FPADD +rx_i_93: ;FPADD dec edi - js rx_finish + jz rx_finish xor r8, 0bfcebaf4h mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm2 - movsd xmm2, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm2 + movaps xmm2, xmm0 + mov eax, r10d + xor eax, 07e48a0d8h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm2 -rx_i_98: ;RET +rx_i_94: ;RET dec edi - js rx_finish + jz rx_finish xor r13, 0ea326630h mov eax, r13d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_98 - cmp r13d, -343122976 - jns short not_taken_ret_98 + je short not_taken_ret_94 xor rax, qword ptr [rsp + 8] mov r8, rax ret 8 -not_taken_ret_98: +not_taken_ret_94: mov r8, rax -rx_i_99: ;MUL_64 +rx_i_95: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r13, 0b5451a2dh mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r imul rax, r10 mov rcx, rax mov eax, r15d @@ -1438,90 +1316,91 @@ rx_i_99: ;MUL_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_100: ;IMUL_32 +rx_i_96: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r11, 04f912ef8h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r movsxd rcx, eax mov rax, -1354397081 imul rax, rcx mov r11, rax -rx_i_101: ;FPSQRT +rx_i_97: ;FPSQRT dec edi - js rx_finish + jz rx_finish xor r15, 0acc45b3bh mov ecx, r15d - call rx_read_dataset - mov rcx, 9223372036854773760 - and rax, rcx - cvtsi2sd xmm0, rax - sqrtsd xmm0, xmm0 - movsd xmm5, xmm0 + call rx_read_dataset_f + andps xmm0, xmm10 + sqrtpd xmm0, xmm0 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 0c477e850h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm5 -rx_i_102: ;SUB_64 +rx_i_98: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r14, 09900a4e8h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r sub rax, r15 mov r14, rax -rx_i_103: ;FPDIV +rx_i_99: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r9, 0841b2984h mov ecx, r9d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4743144143516073984 - movd xmm1, rax - divsd xmm0, xmm1 - movsd xmm4, xmm0 + call rx_read_dataset_f + divpd xmm0, xmm6 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 04c21df83h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm4 -rx_i_104: ;ADD_64 +rx_i_100: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r15, 07ebea48fh mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r add rax, r9 mov r14, rax -rx_i_105: ;SUB_32 +rx_i_101: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r10, 0631209d3h mov eax, r10d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - sub eax, r8d + sub rax, r8 mov r11, rax -rx_i_106: ;FPDIV +rx_i_102: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r10, 0e50bf07ah mov eax, r10d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm3 - mov eax, r15d - xor eax, 03ec98420h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + divpd xmm0, xmm3 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 -rx_i_107: ;MUL_64 +rx_i_103: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r10, 02b7096f1h mov eax, r10d and eax, 32767 @@ -1533,9 +1412,9 @@ rx_i_107: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_108: ;IMULH_64 +rx_i_104: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r11, 075deaf71h mov eax, r11d and eax, 32767 @@ -1549,12 +1428,12 @@ rx_i_108: ;IMULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_109: ;MUL_32 +rx_i_105: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r13, 036a51f72h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov ecx, eax mov eax, r15d imul rax, rcx @@ -1564,70 +1443,73 @@ rx_i_109: ;MUL_32 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_110: ;FPMUL +rx_i_106: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r11, 07b512986h mov ecx, r11d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm3 - movsd xmm4, xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm3 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 03cb2505h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm4 -rx_i_111: ;CALL +rx_i_107: ;CALL dec edi - js rx_finish + jz rx_finish xor r12, 0f1d2e50h mov eax, r12d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r11d, 1917037441 - jl short taken_call_111 + jl short taken_call_107 mov rcx, rax mov eax, r14d xor eax, 07243ab81h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_112 -taken_call_111: + jmp rx_i_108 +taken_call_107: push rax - call rx_i_147 + call rx_i_143 -rx_i_112: ;FPDIV +rx_i_108: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r9, 07327ba60h mov ecx, r9d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm5 - movsd xmm9, xmm0 + call rx_read_dataset_f + divpd xmm0, xmm5 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 0678b65beh + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_113: ;FPADD +rx_i_109: ;FPADD dec edi - js rx_finish + jz rx_finish xor r15, 0594e37deh mov eax, r15d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm2 - mov eax, r11d - xor eax, 094ab5a5ch - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm2 + movaps xmm3, xmm0 -rx_i_114: ;ROL_64 +rx_i_110: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r9, 04cdf5ebah mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r9 rol rax, cl mov rcx, rax @@ -1636,27 +1518,31 @@ rx_i_114: ;ROL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_115: ;CALL +rx_i_111: ;RET dec edi - js rx_finish + jz rx_finish xor r8, 02e16c97ch mov ecx, r8d - call rx_read_dataset - cmp r14d, 1562606859 - jge short taken_call_115 + call rx_read_dataset_r + cmp rsp, rbp + je short not_taken_ret_111 + xor rax, qword ptr [rsp + 8] + mov rcx, rax + mov eax, r12d + xor eax, 05d237d0bh + and eax, 32767 + mov qword ptr [rsi + rax * 8], rcx + ret 8 +not_taken_ret_111: mov rcx, rax mov eax, r12d xor eax, 05d237d0bh and eax, 32767 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_116 -taken_call_115: - push rax - call rx_i_216 -rx_i_116: ;SUB_64 +rx_i_112: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r12, 0d42ddbd4h mov eax, r12d and eax, 2047 @@ -1668,43 +1554,43 @@ rx_i_116: ;SUB_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_117: ;MUL_32 +rx_i_113: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r10, 07a4f8cbbh mov ecx, r10d - call rx_read_dataset - mov ecx, eax - mov eax, r9d - imul rax, rcx + call rx_read_dataset_r + mov rcx, r9 + mul rcx + mov rax, rdx mov r13, rax -rx_i_118: ;IMULH_64 +rx_i_114: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r13, 06e83e2cdh mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r15 imul rcx mov rax, rdx mov r14, rax -rx_i_119: ;OR_64 +rx_i_115: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r14, 0336c980eh mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r or rax, r10 mov r14, rax -rx_i_120: ;IMULH_64 +rx_i_116: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r10, 0d122702eh mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r mov rcx, -1850776691 imul rcx mov rax, rdx @@ -1714,9 +1600,9 @@ rx_i_120: ;IMULH_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_121: ;AND_64 +rx_i_117: ;AND_64 dec edi - js rx_finish + jz rx_finish xor r11, 015f2012bh mov eax, r11d and eax, 2047 @@ -1728,177 +1614,160 @@ rx_i_121: ;AND_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_122: ;FPSUB +rx_i_118: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r9, 037ddf43dh mov eax, r9d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm5 - mov eax, r14d - xor eax, 0d0b219d0h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm5 + movaps xmm6, xmm0 -rx_i_123: ;FPSUB +rx_i_119: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r9, 0bba475f3h mov eax, r9d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm3 - mov eax, r13d - xor eax, 02401488h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm3 + movaps xmm5, xmm0 -rx_i_124: ;FPADD +rx_i_120: ;FPADD dec edi - js rx_finish + jz rx_finish xor r12, 0e5561e3eh mov eax, r12d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm4 - mov eax, r8d - xor eax, 04d46f867h - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm4 + movaps xmm8, xmm0 -rx_i_125: ;FPMUL +rx_i_121: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r9, 03ab8f73h mov ecx, r9d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm5 - mov eax, r8d - xor eax, 0808a2d8bh - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm5 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 -rx_i_126: ;CALL +rx_i_122: ;RET dec edi - js rx_finish + jz rx_finish xor r10, 04e0dbd40h mov ecx, r10d - call rx_read_dataset - cmp r11d, 2029448233 - jo short taken_call_126 + call rx_read_dataset_r + cmp rsp, rbp + je short not_taken_ret_122 + xor rax, qword ptr [rsp + 8] + mov rcx, rax + mov eax, r14d + xor eax, 078f6ec29h + and eax, 2047 + mov qword ptr [rsi + rax * 8], rcx + ret 8 +not_taken_ret_122: mov rcx, rax mov eax, r14d xor eax, 078f6ec29h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_127 -taken_call_126: - push rax - call rx_i_196 -rx_i_127: ;SUB_64 +rx_i_123: ;ADD_32 dec edi - js rx_finish + jz rx_finish xor r13, 073e9f58ah mov eax, r13d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - sub rax, r15 + add eax, r15d mov r13, rax -rx_i_128: ;CALL +rx_i_124: ;CALL dec edi - js rx_finish + jz rx_finish xor r12, 0e3fa3670h mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r cmp r11d, 1719505436 - jns short taken_call_128 + jns short taken_call_124 mov rcx, rax mov eax, r11d xor eax, 0667d921ch and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_129 -taken_call_128: + jmp rx_i_125 +taken_call_124: push rax - call rx_i_241 + call rx_i_237 -rx_i_129: ;IMUL_32 +rx_i_125: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r8, 0ebec27cdh mov ecx, r8d - call rx_read_dataset - movsxd rcx, eax - movsxd rax, r14d + call rx_read_dataset_r + mov ecx, eax + mov eax, r14d imul rax, rcx mov r14, rax -rx_i_130: ;FPDIV +rx_i_126: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r8, 01feb5264h mov eax, r8d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm6 - mov eax, r10d - xor eax, 04b88e021h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + divpd xmm0, xmm6 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm2, xmm0 -rx_i_131: ;IMULH_64 +rx_i_127: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r9, 0405f500fh mov ecx, r9d - call rx_read_dataset - mov rcx, r10 - imul rcx - mov rax, rdx + call rx_read_dataset_r + movsxd rcx, eax + movsxd rax, r10d + imul rax, rcx mov r8, rax -rx_i_132: ;MUL_64 +rx_i_128: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r13, 0459f1154h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r imul rax, r9 mov r9, rax -rx_i_133: ;CALL +rx_i_129: ;CALL dec edi - js rx_finish + jz rx_finish xor r9, 081918b4ch mov eax, r9d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r13d, -590624856 - jge short taken_call_133 + jge short taken_call_129 mov r9, rax - jmp rx_i_134 -taken_call_133: + jmp rx_i_130 +taken_call_129: push rax - call rx_i_158 + call rx_i_154 -rx_i_134: ;OR_64 +rx_i_130: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r9, 077c3b332h mov eax, r9d and eax, 2047 @@ -1910,17 +1779,15 @@ rx_i_134: ;OR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_135: ;RET +rx_i_131: ;RET dec edi - js rx_finish + jz rx_finish xor r12, 05792310bh mov eax, r12d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_135 - cmp r15d, -537890955 - jns short not_taken_ret_135 + je short not_taken_ret_131 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r15d @@ -1928,33 +1795,28 @@ rx_i_135: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_135: +not_taken_ret_131: mov rcx, rax mov eax, r15d xor eax, 0dff06f75h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_136: ;FPADD +rx_i_132: ;FPADD dec edi - js rx_finish + jz rx_finish xor r10, 0ebc6e10h mov ecx, r10d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm6 - mov eax, r15d - xor eax, 0b0c38959h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + addpd xmm0, xmm6 + movaps xmm7, xmm0 -rx_i_137: ;XOR_64 +rx_i_133: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r14, 0822f8b60h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r xor rax, -1000526796 mov rcx, rax mov eax, r15d @@ -1962,45 +1824,44 @@ rx_i_137: ;XOR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_138: ;ADD_64 +rx_i_134: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r10, 0d0f18593h mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r add rax, 1516102347 mov r13, rax -rx_i_139: ;FPMUL +rx_i_135: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r11, 088212ef9h mov ecx, r11d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm3 - mov eax, r8d - xor eax, 0b29f3d2ah - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm3 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 -rx_i_140: ;FPSQRT +rx_i_136: ;FPSQRT dec edi - js rx_finish + jz rx_finish xor r8, 01ae56e03h mov ecx, r8d - call rx_read_dataset - mov rcx, 9223372036854773760 - and rax, rcx - cvtsi2sd xmm0, rax - sqrtsd xmm0, xmm0 - movsd xmm5, xmm0 + call rx_read_dataset_f + andps xmm0, xmm10 + sqrtpd xmm0, xmm0 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 0efd7799dh + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm5 -rx_i_141: ;ROL_64 +rx_i_137: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r11, 015a24231h mov eax, r11d and eax, 32767 @@ -2009,25 +1870,23 @@ rx_i_141: ;ROL_64 rol rax, cl mov r11, rax -rx_i_142: ;RET +rx_i_138: ;RET dec edi - js rx_finish + jz rx_finish xor r13, 02fd380c5h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_142 - cmp r9d, -1910517416 - jbe short not_taken_ret_142 + je short not_taken_ret_138 xor rax, qword ptr [rsp + 8] mov r10, rax ret 8 -not_taken_ret_142: +not_taken_ret_138: mov r10, rax -rx_i_143: ;ADD_64 +rx_i_139: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r9, 093172470h mov eax, r9d and eax, 2047 @@ -2039,9 +1898,9 @@ rx_i_143: ;ADD_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_144: ;IMUL_32 +rx_i_140: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r14, 052543553h mov eax, r14d and eax, 2047 @@ -2051,40 +1910,40 @@ rx_i_144: ;IMUL_32 imul rax, rcx mov r14, rax -rx_i_145: ;FPADD +rx_i_141: ;FPADD dec edi - js rx_finish + jz rx_finish xor r8, 02f636da1h mov ecx, r8d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4478407513863094272 - movd xmm1, rax - addsd xmm0, xmm1 - movsd xmm9, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm2 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 099ff9ffdh + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_146: ;CALL +rx_i_142: ;CALL dec edi - js rx_finish + jz rx_finish xor r11, 0b11a4f2ch mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r cmp r12d, 1365939282 - js short taken_call_146 + js short taken_call_142 mov rcx, rax mov eax, r10d xor eax, 0516a9452h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_147 -taken_call_146: + jmp rx_i_143 +taken_call_142: push rax - call rx_i_261 + call rx_i_257 -rx_i_147: ;IMUL_32 +rx_i_143: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r15, 037f4b5d0h mov eax, r15d and eax, 2047 @@ -2094,9 +1953,9 @@ rx_i_147: ;IMUL_32 imul rax, rcx mov r9, rax -rx_i_148: ;IMULH_64 +rx_i_144: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r10, 02e59e00ah mov eax, r10d and eax, 2047 @@ -2106,12 +1965,12 @@ rx_i_148: ;IMULH_64 mov rax, rdx mov r15, rax -rx_i_149: ;IMULH_64 +rx_i_145: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r13, 08d5c798h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r11 imul rcx mov rax, rdx @@ -2121,21 +1980,21 @@ rx_i_149: ;IMULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_150: ;IMULH_64 +rx_i_146: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r13, 02327e6e2h mov eax, r13d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - mov rcx, r12 - imul rcx - mov rax, rdx + movsxd rcx, eax + movsxd rax, r12d + imul rax, rcx mov r10, rax -rx_i_151: ;MULH_64 +rx_i_147: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r13, 03a7df043h mov eax, r13d and eax, 2047 @@ -2149,12 +2008,12 @@ rx_i_151: ;MULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_152: ;SUB_64 +rx_i_148: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r10, 0783e5c4eh mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r sub rax, r14 mov rcx, rax mov eax, r10d @@ -2162,9 +2021,9 @@ rx_i_152: ;SUB_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_153: ;MUL_32 +rx_i_149: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r12, 0aa0f5b2fh mov eax, r12d and eax, 2047 @@ -2178,9 +2037,9 @@ rx_i_153: ;MUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_154: ;DIV_64 +rx_i_150: ;DIV_64 dec edi - js rx_finish + jz rx_finish xor r9, 01504ca7ah mov eax, r9d and eax, 2047 @@ -2197,9 +2056,9 @@ rx_i_154: ;DIV_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_155: ;OR_32 +rx_i_151: ;OR_32 dec edi - js rx_finish + jz rx_finish xor r9, 0ea72a7cfh mov eax, r9d and eax, 2047 @@ -2211,31 +2070,35 @@ rx_i_155: ;OR_32 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_156: ;ROR_64 +rx_i_152: ;ROR_64 dec edi - js rx_finish + jz rx_finish xor r13, 0ad0e7a88h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r10 ror rax, cl mov r10, rax -rx_i_157: ;FPDIV +rx_i_153: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r15, 0fd95ab87h mov ecx, r15d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm2 - movsd xmm8, xmm0 + call rx_read_dataset_f + divpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 + mov eax, r8d + xor eax, 09111c981h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm8 -rx_i_158: ;MUL_32 +rx_i_154: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r10, 0256697b0h mov eax, r10d and eax, 2047 @@ -2245,12 +2108,12 @@ rx_i_158: ;MUL_32 imul rax, rcx mov r10, rax -rx_i_159: ;ROR_64 +rx_i_155: ;ROR_64 dec edi - js rx_finish + jz rx_finish xor r11, 0d23f3b78h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r10 ror rax, cl mov rcx, rax @@ -2259,29 +2122,29 @@ rx_i_159: ;ROR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_160: ;IMUL_32 +rx_i_156: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r10, 098917533h mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r movsxd rcx, eax movsxd rax, r15d imul rax, rcx mov r15, rax -rx_i_161: ;ADD_64 +rx_i_157: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r10, 0dfac3efch mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r add rax, r12 mov r14, rax -rx_i_162: ;ADD_64 +rx_i_158: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r15, 0a64de090h mov eax, r15d and eax, 2047 @@ -2289,30 +2152,34 @@ rx_i_162: ;ADD_64 add rax, 1233402159 mov r10, rax -rx_i_163: ;CALL +rx_i_159: ;RET dec edi - js rx_finish + jz rx_finish xor r13, 0952a3abbh mov ecx, r13d - call rx_read_dataset - cmp r15d, -8571241 - jbe short taken_call_163 + call rx_read_dataset_r + cmp rsp, rbp + je short not_taken_ret_159 + xor rax, qword ptr [rsp + 8] + mov rcx, rax + mov eax, r13d + xor eax, 0ff7d3697h + and eax, 2047 + mov qword ptr [rsi + rax * 8], rcx + ret 8 +not_taken_ret_159: mov rcx, rax mov eax, r13d xor eax, 0ff7d3697h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_164 -taken_call_163: - push rax - call rx_i_185 -rx_i_164: ;SUB_64 +rx_i_160: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r14, 0b1685b90h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r sub rax, 1518778665 mov rcx, rax mov eax, r10d @@ -2320,18 +2187,18 @@ rx_i_164: ;SUB_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_165: ;OR_64 +rx_i_161: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r15, 0ea992531h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r or rax, r14 mov r8, rax -rx_i_166: ;SAR_64 +rx_i_162: ;SAR_64 dec edi - js rx_finish + jz rx_finish xor r9, 01fd57a4ah mov eax, r9d and eax, 2047 @@ -2340,12 +2207,12 @@ rx_i_166: ;SAR_64 sar rax, cl mov r13, rax -rx_i_167: ;SUB_64 +rx_i_163: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r12, 0e3486c0ah mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r sub rax, -2101130488 mov rcx, rax mov eax, r14d @@ -2353,9 +2220,9 @@ rx_i_167: ;SUB_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_168: ;MUL_32 +rx_i_164: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r12, 01f0c2737h mov eax, r12d and eax, 2047 @@ -2369,17 +2236,15 @@ rx_i_168: ;MUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_169: ;RET +rx_i_165: ;RET dec edi - js rx_finish + jz rx_finish xor r12, 0debb493eh mov eax, r12d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_169 - cmp r8d, 1682991196 - jno short not_taken_ret_169 + je short not_taken_ret_165 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r9d @@ -2387,16 +2252,16 @@ rx_i_169: ;RET and eax, 32767 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_169: +not_taken_ret_165: mov rcx, rax mov eax, r9d xor eax, 06450685ch and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_170: ;ROL_64 +rx_i_166: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r9, 0fe684081h mov eax, r9d and eax, 2047 @@ -2409,47 +2274,42 @@ rx_i_170: ;ROL_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_171: ;FPMUL +rx_i_167: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r11, 0d10371ch mov eax, r11d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4739242735460941824 - movd xmm1, rax - mulsd xmm0, xmm1 - movsd xmm2, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm4 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm2, xmm0 + mov eax, r10d + xor eax, 02a58510fh + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm2 -rx_i_172: ;FPSQRT +rx_i_168: ;FPSQRT dec edi - js rx_finish + jz rx_finish xor r12, 071b15effh mov eax, r12d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - mov rcx, 9223372036854773760 - and rax, rcx - cvtsi2sd xmm0, rax - sqrtsd xmm0, xmm0 - mov eax, r15d - xor eax, 08d1a76f8h - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + andps xmm0, xmm10 + sqrtpd xmm0, xmm0 + movaps xmm7, xmm0 -rx_i_173: ;RET +rx_i_169: ;RET dec edi - js rx_finish + jz rx_finish xor r11, 072790347h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_173 - cmp r10d, -1286357107 - ja short not_taken_ret_173 + je short not_taken_ret_169 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r14d @@ -2457,31 +2317,31 @@ rx_i_173: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_173: +not_taken_ret_169: mov rcx, rax mov eax, r14d xor eax, 0b353bf8dh and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_174: ;CALL +rx_i_170: ;CALL dec edi - js rx_finish + jz rx_finish xor r8, 04ae8a020h mov eax, r8d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r10d, -1541051751 - jl short taken_call_174 + jl short taken_call_170 mov r14, rax - jmp rx_i_175 -taken_call_174: + jmp rx_i_171 +taken_call_170: push rax - call rx_i_208 + call rx_i_204 -rx_i_175: ;IMULH_64 +rx_i_171: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r15, 09901e05bh mov eax, r15d and eax, 2047 @@ -2491,9 +2351,9 @@ rx_i_175: ;IMULH_64 mov rax, rdx mov r12, rax -rx_i_176: ;SUB_64 +rx_i_172: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r13, 050e8c510h mov eax, r13d and eax, 2047 @@ -2501,25 +2361,23 @@ rx_i_176: ;SUB_64 sub rax, r11 mov r12, rax -rx_i_177: ;MULH_64 +rx_i_173: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r14, 05422cf8fh mov eax, r14d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - mov rcx, r12 - mul rcx - mov rax, rdx + imul rax, r12 mov rcx, rax mov eax, r12d xor eax, 0ad60ae9ch and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_178: ;FPROUND +rx_i_174: ;FPROUND dec edi - js rx_finish + jz rx_finish xor r12, 0a025c3dbh mov eax, r12d and eax, 2047 @@ -2528,37 +2386,40 @@ rx_i_178: ;FPROUND shl eax, 13 and rcx, -2048 and eax, 24576 - cvtsi2sd xmm0, rcx + cvtsi2sd xmm6, rcx or eax, 40896 mov dword ptr [rsp - 8], eax ldmxcsr dword ptr [rsp - 8] - movsd xmm6, xmm0 + mov eax, r14d + xor eax, 02be6989fh + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm6 -rx_i_179: ;SAR_64 +rx_i_175: ;SAR_64 dec edi - js rx_finish + jz rx_finish xor r13, 08f74c11h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r8 sar rax, cl mov r8, rax -rx_i_180: ;SUB_64 +rx_i_176: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r9, 01f2ed5f1h mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r sub rax, r14 mov r10, rax -rx_i_181: ;ADD_64 +rx_i_177: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r10, 0d2072c79h mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r add rax, r10 mov rcx, rax mov eax, r13d @@ -2566,16 +2427,14 @@ rx_i_181: ;ADD_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_182: ;RET +rx_i_178: ;RET dec edi - js rx_finish + jz rx_finish xor r15, 0a8e51933h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_182 - cmp r12d, -1016679819 - js short not_taken_ret_182 + je short not_taken_ret_178 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r12d @@ -2583,31 +2442,26 @@ rx_i_182: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_182: +not_taken_ret_178: mov rcx, rax mov eax, r12d xor eax, 0c366b275h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_183: ;FPADD +rx_i_179: ;FPADD dec edi - js rx_finish + jz rx_finish xor r12, 0934ad492h mov eax, r12d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm2 - mov eax, r8d - xor eax, 01a997aebh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm2 + movaps xmm8, xmm0 -rx_i_184: ;XOR_64 +rx_i_180: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r15, 01cb3ce1fh mov eax, r15d and eax, 2047 @@ -2619,39 +2473,32 @@ rx_i_184: ;XOR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_185: ;CALL +rx_i_181: ;RET dec edi - js rx_finish + jz rx_finish xor r10, 023c7845fh mov ecx, r10d - call rx_read_dataset - cmp r12d, -1612576918 - ja short taken_call_185 + call rx_read_dataset_r + cmp rsp, rbp + je short not_taken_ret_181 + xor rax, qword ptr [rsp + 8] + mov r10, rax + ret 8 +not_taken_ret_181: mov r10, rax - jmp rx_i_186 -taken_call_185: - push rax - call rx_i_215 -rx_i_186: ;FPSUB +rx_i_182: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r8, 0f8884327h mov ecx, r8d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4746551310076084224 - movd xmm1, rax - subsd xmm0, xmm1 - mov eax, r14d - xor eax, 07c8d12a5h - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + subpd xmm0, xmm7 + movaps xmm6, xmm0 -rx_i_187: ;ADD_64 +rx_i_183: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r13, 013070461h mov eax, r13d and eax, 2047 @@ -2659,40 +2506,40 @@ rx_i_187: ;ADD_64 add rax, 137260710 mov r10, rax -rx_i_188: ;SAR_64 +rx_i_184: ;SAR_64 dec edi - js rx_finish + jz rx_finish xor r12, 04764cdf7h mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r sar rax, 40 mov r12, rax -rx_i_189: ;CALL +rx_i_185: ;CALL dec edi - js rx_finish + jz rx_finish xor r10, 03c41026fh mov eax, r10d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] cmp r15d, -1510284125 - jbe short taken_call_189 + jbe short taken_call_185 mov rcx, rax mov eax, r9d xor eax, 0a5fae4a3h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_190 -taken_call_189: + jmp rx_i_186 +taken_call_185: push rax - call rx_i_250 + call rx_i_246 -rx_i_190: ;XOR_32 +rx_i_186: ;XOR_32 dec edi - js rx_finish + jz rx_finish xor r9, 0cded414bh mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r xor eax, r15d mov rcx, rax mov eax, r10d @@ -2700,113 +2547,99 @@ rx_i_190: ;XOR_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_191: ;FPDIV +rx_i_187: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r13, 05c6d64a8h mov ecx, r13d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm6 - mov eax, r13d - xor eax, 020dcdd88h - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + divpd xmm0, xmm6 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm5, xmm0 -rx_i_192: ;FPMUL +rx_i_188: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r9, 04659becbh mov eax, r9d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4746030173290233856 - movd xmm1, rax - mulsd xmm0, xmm1 - mov eax, r12d - xor eax, 075253031h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm3 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 -rx_i_193: ;FPROUND +rx_i_189: ;FPROUND dec edi - js rx_finish + jz rx_finish xor r11, 0c52741d5h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r mov rcx, rax shl eax, 13 and rcx, -2048 and eax, 24576 - cvtsi2sd xmm0, rcx + cvtsi2sd xmm5, rcx or eax, 40896 mov dword ptr [rsp - 8], eax ldmxcsr dword ptr [rsp - 8] - mov eax, r13d - xor eax, 0e6f1a3b7h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 -rx_i_194: ;RET +rx_i_190: ;RET dec edi - js rx_finish + jz rx_finish xor r12, 0217bf5f3h mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_194 - cmp r8d, -1901851954 - jns short not_taken_ret_194 + je short not_taken_ret_190 xor rax, qword ptr [rsp + 8] mov r13, rax ret 8 -not_taken_ret_194: +not_taken_ret_190: mov r13, rax -rx_i_195: ;CALL +rx_i_191: ;CALL dec edi - js rx_finish + jz rx_finish xor r15, 0884f3526h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r cmp r11d, 1687119072 - jno short taken_call_195 + jno short taken_call_191 mov rcx, rax mov eax, r14d xor eax, 0648f64e0h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_196 -taken_call_195: + jmp rx_i_192 +taken_call_191: push rax - call rx_i_279 + call rx_i_275 -rx_i_196: ;CALL +rx_i_192: ;CALL dec edi - js rx_finish + jz rx_finish xor r8, 0d76edad3h mov eax, r8d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r14d, -117628864 - jns short taken_call_196 + jns short taken_call_192 mov r8, rax - jmp rx_i_197 -taken_call_196: + jmp rx_i_193 +taken_call_192: push rax - call rx_i_309 + call rx_i_305 -rx_i_197: ;MUL_32 +rx_i_193: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r12, 0e9939ach mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r mov ecx, eax mov eax, r12d imul rax, rcx @@ -2816,35 +2649,39 @@ rx_i_197: ;MUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_198: ;FPMUL +rx_i_194: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r12, 0f21ca520h mov eax, r12d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm2 - movsd xmm5, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 040eb9f47h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm5 -rx_i_199: ;ROL_64 +rx_i_195: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r10, 09405152ch mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r8 rol rax, cl mov r9, rax -rx_i_200: ;SUB_64 +rx_i_196: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r8, 0c2a9f41bh mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r sub rax, -1907903895 mov rcx, rax mov eax, r13d @@ -2852,20 +2689,18 @@ rx_i_200: ;SUB_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_201: ;MULH_64 +rx_i_197: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r12, 0229208efh mov ecx, r12d - call rx_read_dataset - mov rcx, r15 - mul rcx - mov rax, rdx + call rx_read_dataset_r + imul rax, r15 mov r11, rax -rx_i_202: ;MULH_64 +rx_i_198: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r14, 0c8d95bbbh mov eax, r14d and eax, 32767 @@ -2879,12 +2714,12 @@ rx_i_202: ;MULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_203: ;MULH_64 +rx_i_199: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r13, 050049e2eh mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r10 mul rcx mov rax, rdx @@ -2894,60 +2729,59 @@ rx_i_203: ;MULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_204: ;FPSUB +rx_i_200: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r10, 0c63b99e8h mov ecx, r10d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm2 - movsd xmm4, xmm0 + call rx_read_dataset_f + subpd xmm0, xmm2 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 0b05ce8abh + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm4 -rx_i_205: ;FPADD +rx_i_201: ;FPADD dec edi - js rx_finish + jz rx_finish xor r8, 0cdda801dh mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - movsd xmm4, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm9 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 040cfe68eh + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm4 -rx_i_206: ;FPSUB +rx_i_202: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r13, 0fa44b04ah mov eax, r13d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm9 - mov eax, r13d - xor eax, 0b44dbc71h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm9 + movaps xmm5, xmm0 -rx_i_207: ;FPSUB +rx_i_203: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r10, 0d73e472ch mov ecx, r10d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4478539544748097536 - movd xmm1, rax - subsd xmm0, xmm1 - movsd xmm7, xmm0 + call rx_read_dataset_f + subpd xmm0, xmm2 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 09bdff355h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm7 -rx_i_208: ;MUL_64 +rx_i_204: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r9, 01af8ab1dh mov eax, r9d and eax, 32767 @@ -2959,45 +2793,37 @@ rx_i_208: ;MUL_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_209: ;FPDIV +rx_i_205: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r14, 094e997c5h mov eax, r14d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm8 - mov eax, r13d - xor eax, 0a1f8d8c7h - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + divpd xmm0, xmm8 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm5, xmm0 -rx_i_210: ;FPMUL +rx_i_206: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r11, 0e836a177h mov ecx, r11d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4483350864878108672 - movd xmm1, rax - mulsd xmm0, xmm1 - mov eax, r12d - xor eax, 0d01fb731h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm7 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 -rx_i_211: ;AND_32 +rx_i_207: ;AND_32 dec edi - js rx_finish + jz rx_finish xor r9, 039ccdd30h mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r and eax, r12d mov rcx, rax mov eax, r9d @@ -3005,18 +2831,18 @@ rx_i_211: ;AND_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_212: ;MUL_64 +rx_i_208: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r9, 0f4f126c5h mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r imul rax, r12 mov r10, rax -rx_i_213: ;SHR_64 +rx_i_209: ;SHR_64 dec edi - js rx_finish + jz rx_finish xor r8, 0b84811f1h mov eax, r8d and eax, 2047 @@ -3028,12 +2854,12 @@ rx_i_213: ;SHR_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_214: ;MUL_32 +rx_i_210: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r12, 0c5efc90ah mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r mov ecx, eax mov eax, -1027162400 imul rax, rcx @@ -3043,27 +2869,22 @@ rx_i_214: ;MUL_32 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_215: ;FPADD +rx_i_211: ;FPADD dec edi - js rx_finish + jz rx_finish xor r12, 0ce533072h mov eax, r12d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - mov eax, r11d - xor eax, 0212e615h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm9 + movaps xmm3, xmm0 -rx_i_216: ;MUL_64 +rx_i_212: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r13, 06b465fdbh mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r imul rax, r13 mov rcx, rax mov eax, r15d @@ -3071,9 +2892,9 @@ rx_i_216: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_217: ;IMUL_32 +rx_i_213: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r13, 02dd1d503h mov eax, r13d and eax, 2047 @@ -3083,9 +2904,9 @@ rx_i_217: ;IMUL_32 imul rax, rcx mov r14, rax -rx_i_218: ;ROL_64 +rx_i_214: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r9, 0a159f313h mov eax, r9d and eax, 2047 @@ -3094,21 +2915,21 @@ rx_i_218: ;ROL_64 rol rax, cl mov r14, rax -rx_i_219: ;SUB_64 +rx_i_215: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r15, 08359265eh mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r sub rax, r12 mov r10, rax -rx_i_220: ;MUL_64 +rx_i_216: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r12, 080696de3h mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r imul rax, r13 mov rcx, rax mov eax, r15d @@ -3116,9 +2937,9 @@ rx_i_220: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_221: ;IMUL_32 +rx_i_217: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r8, 040d5b526h mov eax, r8d and eax, 2047 @@ -3132,26 +2953,26 @@ rx_i_221: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_222: ;CALL +rx_i_218: ;CALL dec edi - js rx_finish + jz rx_finish xor r11, 083c0bd93h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r cmp r8d, -585552250 - jge short taken_call_222 + jge short taken_call_218 mov r11, rax - jmp rx_i_223 -taken_call_222: + jmp rx_i_219 +taken_call_218: push rax - call rx_i_244 + call rx_i_240 -rx_i_223: ;XOR_64 +rx_i_219: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r8, 0ca37f668h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r xor rax, -740915304 mov rcx, rax mov eax, r15d @@ -3159,24 +2980,24 @@ rx_i_223: ;XOR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_224: ;IMULH_64 +rx_i_220: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r9, 0bb44c384h mov ecx, r9d - call rx_read_dataset - mov rcx, r11 - imul rcx - mov rax, rdx + call rx_read_dataset_r + movsxd rcx, eax + movsxd rax, r11d + imul rax, rcx mov rcx, rax mov eax, r11d xor eax, 0903fd173h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_225: ;IMULH_64 +rx_i_221: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r9, 0a3deb512h mov eax, r9d and eax, 2047 @@ -3190,37 +3011,43 @@ rx_i_225: ;IMULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_226: ;FPMUL +rx_i_222: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r9, 084a02d64h mov eax, r9d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm5 - movsd xmm7, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm5 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 0d7601963h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm7 -rx_i_227: ;FPSUB +rx_i_223: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r8, 01e5cc085h mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm3 - movsd xmm2, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm3 + movaps xmm2, xmm0 + mov eax, r10d + xor eax, 07fca59eeh + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm2 -rx_i_228: ;SAR_64 +rx_i_224: ;SAR_64 dec edi - js rx_finish + jz rx_finish xor r12, 053982440h mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r14 sar rax, cl mov rcx, rax @@ -3229,12 +3056,12 @@ rx_i_228: ;SAR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_229: ;DIV_64 +rx_i_225: ;DIV_64 dec edi - js rx_finish + jz rx_finish xor r13, 0c558367eh mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov ecx, 1 mov edx, r10d test edx, edx @@ -3247,98 +3074,100 @@ rx_i_229: ;DIV_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_230: ;CALL +rx_i_226: ;CALL dec edi - js rx_finish + jz rx_finish xor r10, 040139b65h mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r cmp r8d, -1752488808 - jno short taken_call_230 + jno short taken_call_226 mov rcx, rax mov eax, r8d xor eax, 0978b2498h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_231 -taken_call_230: + jmp rx_i_227 +taken_call_226: push rax - call rx_i_332 + call rx_i_328 -rx_i_231: ;FPDIV +rx_i_227: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r11, 0fa312dbdh mov eax, r11d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4479585788803153920 - movd xmm1, rax - divsd xmm0, xmm1 - movsd xmm3, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + divpd xmm0, xmm7 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm3, xmm0 + mov eax, r11d + xor eax, 0aabe2a0ah + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm3 -rx_i_232: ;CALL +rx_i_228: ;CALL dec edi - js rx_finish + jz rx_finish xor r11, 0b64246c0h mov eax, r11d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r10d, -2099304 - jns short taken_call_232 + jns short taken_call_228 mov rcx, rax mov eax, r15d xor eax, 0ffdff798h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_233 -taken_call_232: + jmp rx_i_229 +taken_call_228: push rax - call rx_i_287 + call rx_i_283 -rx_i_233: ;IMULH_64 +rx_i_229: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r11, 05c535836h mov ecx, r11d - call rx_read_dataset - mov rcx, r12 - imul rcx - mov rax, rdx + call rx_read_dataset_r + movsxd rcx, eax + movsxd rax, r12d + imul rax, rcx mov rcx, rax mov eax, r13d xor eax, 013e8b2e0h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_234: ;FPMUL +rx_i_230: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r15, 0f394972eh mov eax, r15d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4737156465540726784 - movd xmm1, rax - mulsd xmm0, xmm1 - movsd xmm5, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm6 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 01dc2b4f6h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm5 -rx_i_235: ;RET +rx_i_231: ;RET dec edi - js rx_finish + jz rx_finish xor r9, 0bb56428dh mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_235 - cmp r9d, -422974038 - js short not_taken_ret_235 + je short not_taken_ret_231 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r9d @@ -3346,46 +3175,43 @@ rx_i_235: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_235: +not_taken_ret_231: mov rcx, rax mov eax, r9d xor eax, 0e6c9edaah and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_236: ;FPDIV +rx_i_232: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r15, 09ab46ab3h mov eax, r15d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm3 - mov eax, r15d - xor eax, 07e732935h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + divpd xmm0, xmm3 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 -rx_i_237: ;CALL +rx_i_233: ;CALL dec edi - js rx_finish + jz rx_finish xor r13, 08eb2cd76h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r cmp r12d, 392389867 - jo short taken_call_237 + jo short taken_call_233 mov r14, rax - jmp rx_i_238 -taken_call_237: + jmp rx_i_234 +taken_call_233: push rax - call rx_i_272 + call rx_i_268 -rx_i_238: ;FPROUND +rx_i_234: ;FPROUND dec edi - js rx_finish + jz rx_finish xor r15, 0ba687578h mov eax, r15d and eax, 2047 @@ -3394,21 +3220,17 @@ rx_i_238: ;FPROUND shl eax, 13 and rcx, -2048 and eax, 24576 - cvtsi2sd xmm0, rcx + cvtsi2sd xmm4, rcx or eax, 40896 mov dword ptr [rsp - 8], eax ldmxcsr dword ptr [rsp - 8] - mov eax, r12d - xor eax, 04d2e9e7dh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 -rx_i_239: ;IMUL_32 +rx_i_235: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r13, 0b6cb9ff2h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r movsxd rcx, eax movsxd rax, r12d imul rax, rcx @@ -3418,48 +3240,45 @@ rx_i_239: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_240: ;FPADD +rx_i_236: ;FPADD dec edi - js rx_finish + jz rx_finish xor r15, 03ad196ach mov ecx, r15d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm4 - mov eax, r11d - xor eax, 0b2ab82cdh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + addpd xmm0, xmm4 + movaps xmm3, xmm0 -rx_i_241: ;CALL +rx_i_237: ;CALL dec edi - js rx_finish + jz rx_finish xor r15, 0fab4600h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r cmp r12d, -121899164 - jge short taken_call_241 + jge short taken_call_237 mov r11, rax - jmp rx_i_242 -taken_call_241: + jmp rx_i_238 +taken_call_237: push rax - call rx_i_299 + call rx_i_295 -rx_i_242: ;FPADD +rx_i_238: ;FPADD dec edi - js rx_finish + jz rx_finish xor r8, 0158f119fh mov ecx, r8d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm6 - movsd xmm7, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm6 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 0331bbf8h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm7 -rx_i_243: ;ADD_64 +rx_i_239: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r13, 044f30b3fh mov eax, r13d and eax, 2047 @@ -3467,47 +3286,49 @@ rx_i_243: ;ADD_64 add rax, r10 mov r10, rax -rx_i_244: ;IMULH_64 +rx_i_240: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r9, 0d65d29f9h mov ecx, r9d - call rx_read_dataset - mov rcx, -423830277 - imul rcx - mov rax, rdx + call rx_read_dataset_r + movsxd rcx, eax + mov rax, -423830277 + imul rax, rcx mov r8, rax -rx_i_245: ;FPADD +rx_i_241: ;FPADD dec edi - js rx_finish + jz rx_finish xor r11, 0ce5260adh mov ecx, r11d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm3 - movsd xmm7, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm3 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 0bc2423ebh + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm7 -rx_i_246: ;MUL_32 +rx_i_242: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r12, 01119b0f9h mov eax, r12d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - mov ecx, eax - mov eax, 319324914 - imul rax, rcx + mov rcx, 319324914 + mul rcx + mov rax, rdx mov rcx, rax mov eax, r10d xor eax, 0130882f2h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_247: ;XOR_64 +rx_i_243: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r12, 0d6c2ce3dh mov eax, r12d and eax, 2047 @@ -3515,27 +3336,22 @@ rx_i_247: ;XOR_64 xor rax, 1198180774 mov r14, rax -rx_i_248: ;FPADD +rx_i_244: ;FPADD dec edi - js rx_finish + jz rx_finish xor r11, 0c6a6248h mov eax, r11d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm6 - mov eax, r9d - xor eax, 0b4a1fad6h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm6 + movaps xmm9, xmm0 -rx_i_249: ;XOR_64 +rx_i_245: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r13, 084505739h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r xor rax, -1546539637 mov rcx, rax mov eax, r12d @@ -3543,9 +3359,9 @@ rx_i_249: ;XOR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_250: ;AND_64 +rx_i_246: ;AND_64 dec edi - js rx_finish + jz rx_finish xor r15, 027eeaa2eh mov eax, r15d and eax, 2047 @@ -3553,12 +3369,12 @@ rx_i_250: ;AND_64 and rax, r9 mov r12, rax -rx_i_251: ;IMUL_32 +rx_i_247: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r10, 0c4de0296h mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r movsxd rcx, eax movsxd rax, r14d imul rax, rcx @@ -3568,9 +3384,9 @@ rx_i_251: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_252: ;MUL_32 +rx_i_248: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r8, 0649df46fh mov eax, r8d and eax, 2047 @@ -3584,24 +3400,24 @@ rx_i_252: ;MUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_253: ;IMULH_64 +rx_i_249: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r15, 0499552cch mov ecx, r15d - call rx_read_dataset - mov rcx, r11 - imul rcx - mov rax, rdx + call rx_read_dataset_r + movsxd rcx, eax + movsxd rax, r11d + imul rax, rcx mov rcx, rax mov eax, r13d xor eax, 0e1afcff9h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_254: ;MUL_64 +rx_i_250: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r13, 083eafe6fh mov eax, r13d and eax, 2047 @@ -3613,75 +3429,83 @@ rx_i_254: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_255: ;FPMUL +rx_i_251: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r13, 0a25a4d8ah mov eax, r13d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm2 - movsd xmm4, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 05ed767a3h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm4 -rx_i_256: ;ROL_64 +rx_i_252: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r14, 08a75ad41h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r8 rol rax, cl mov r14, rax -rx_i_257: ;CALL +rx_i_253: ;CALL dec edi - js rx_finish + jz rx_finish xor r14, 057f3f596h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r cmp r15d, 1699431947 - jns short taken_call_257 + jns short taken_call_253 mov rcx, rax mov eax, r13d xor eax, 0654b460bh and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_258 -taken_call_257: + jmp rx_i_254 +taken_call_253: push rax - call rx_i_371 + call rx_i_367 -rx_i_258: ;FPSUB +rx_i_254: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r14, 04cfb709eh mov ecx, r14d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm4 - movsd xmm8, xmm0 + call rx_read_dataset_f + subpd xmm0, xmm4 + movaps xmm8, xmm0 + mov eax, r8d + xor eax, 0c251872eh + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm8 -rx_i_259: ;FPADD +rx_i_255: ;FPADD dec edi - js rx_finish + jz rx_finish xor r9, 0b96ec9ech mov ecx, r9d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm5 - movsd xmm6, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm5 + movaps xmm6, xmm0 + mov eax, r14d + xor eax, 0ae781d10h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm6 -rx_i_260: ;MULH_64 +rx_i_256: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r8, 08375472ch mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r15 mul rcx mov rax, rdx @@ -3691,28 +3515,28 @@ rx_i_260: ;MULH_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_261: ;FPADD +rx_i_257: ;FPADD dec edi - js rx_finish + jz rx_finish xor r12, 0d75a8c3fh mov ecx, r12d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4741056264732147712 - movd xmm1, rax - addsd xmm0, xmm1 - movsd xmm3, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm5 + movaps xmm3, xmm0 + mov eax, r11d + xor eax, 0373b1b6fh + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm3 -rx_i_262: ;IMUL_32 +rx_i_258: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r11, 064fdbda0h mov eax, r11d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] - movsxd rcx, eax - movsxd rax, r14d + mov ecx, eax + mov eax, r14d imul rax, rcx mov rcx, rax mov eax, r9d @@ -3720,54 +3544,46 @@ rx_i_262: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_263: ;FPADD +rx_i_259: ;FPADD dec edi - js rx_finish + jz rx_finish xor r11, 02e36a073h mov eax, r11d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - mov eax, r11d - xor eax, 06c1856f0h - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm9 + movaps xmm3, xmm0 -rx_i_264: ;FPMUL +rx_i_260: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r13, 0f94e9fa9h mov eax, r13d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4743938178866479104 - movd xmm1, rax - mulsd xmm0, xmm1 - mov eax, r9d - xor eax, 0576a8e8fh - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm5 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm9, xmm0 -rx_i_265: ;FPSQRT +rx_i_261: ;FPSQRT dec edi - js rx_finish + jz rx_finish xor r14, 02346171ch mov ecx, r14d - call rx_read_dataset - mov rcx, 9223372036854773760 - and rax, rcx - cvtsi2sd xmm0, rax - sqrtsd xmm0, xmm0 - movsd xmm3, xmm0 + call rx_read_dataset_f + andps xmm0, xmm10 + sqrtpd xmm0, xmm0 + movaps xmm3, xmm0 + mov eax, r11d + xor eax, 0745a48e9h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm3 -rx_i_266: ;OR_32 +rx_i_262: ;OR_32 dec edi - js rx_finish + jz rx_finish xor r10, 01c42baa6h mov eax, r10d and eax, 2047 @@ -3779,98 +3595,91 @@ rx_i_266: ;OR_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_267: ;FPDIV +rx_i_263: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r11, 0b39b140h mov ecx, r11d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm8 - mov eax, r14d - xor eax, 0d8823dc5h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + divpd xmm0, xmm8 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm6, xmm0 -rx_i_268: ;FPMUL +rx_i_264: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r11, 01a07d201h mov eax, r11d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm3 - mov eax, r15d - xor eax, 0df89f274h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm3 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 -rx_i_269: ;FPADD +rx_i_265: ;FPADD dec edi - js rx_finish + jz rx_finish xor r13, 07a3eb340h mov eax, r13d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm8 - movsd xmm2, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm8 + movaps xmm2, xmm0 + mov eax, r10d + xor eax, 04c559414h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm2 -rx_i_270: ;RET +rx_i_266: ;RET dec edi - js rx_finish + jz rx_finish xor r13, 03d0a3a89h mov eax, r13d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_270 - cmp r12d, 136160027 - jbe short not_taken_ret_270 + je short not_taken_ret_266 xor rax, qword ptr [rsp + 8] mov r10, rax ret 8 -not_taken_ret_270: +not_taken_ret_266: mov r10, rax -rx_i_271: ;ROR_64 +rx_i_267: ;ROR_64 dec edi - js rx_finish + jz rx_finish xor r8, 0c6c7b37h mov eax, r8d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - mov rcx, r10 - ror rax, cl + ror rax, 56 mov r11, rax -rx_i_272: ;CALL +rx_i_268: ;CALL dec edi - js rx_finish + jz rx_finish xor r12, 0c2510cebh mov eax, r12d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r15d, -2062812966 - jl short taken_call_272 + jl short taken_call_268 mov r13, rax - jmp rx_i_273 -taken_call_272: + jmp rx_i_269 +taken_call_268: push rax - call rx_i_385 + call rx_i_381 -rx_i_273: ;ROR_64 +rx_i_269: ;ROR_64 dec edi - js rx_finish + jz rx_finish xor r11, 0c80cc899h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r8 ror rax, cl mov rcx, rax @@ -3879,27 +3688,24 @@ rx_i_273: ;ROR_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_274: ;FPMUL +rx_i_270: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r11, 0eb355caah mov ecx, r11d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm9 - mov eax, r15d - xor eax, 03981662bh - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm9 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 -rx_i_275: ;MUL_32 +rx_i_271: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r13, 0c6f12299h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov ecx, eax mov eax, -2032281772 imul rax, rcx @@ -3909,9 +3715,9 @@ rx_i_275: ;MUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_276: ;OR_32 +rx_i_272: ;OR_32 dec edi - js rx_finish + jz rx_finish xor r12, 0695a5dd2h mov eax, r12d and eax, 2047 @@ -3919,71 +3725,71 @@ rx_i_276: ;OR_32 or eax, r12d mov r13, rax -rx_i_277: ;CALL +rx_i_273: ;CALL dec edi - js rx_finish + jz rx_finish xor r9, 0d315e4dch mov eax, r9d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r12d, 1670848568 - jl short taken_call_277 + jl short taken_call_273 mov rcx, rax mov eax, r13d xor eax, 063972038h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_278 -taken_call_277: + jmp rx_i_274 +taken_call_273: push rax - call rx_i_376 + call rx_i_372 -rx_i_278: ;FPSUB +rx_i_274: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r15, 0b66ca7e0h mov ecx, r15d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4745257761179172864 - movd xmm1, rax - subsd xmm0, xmm1 - movsd xmm6, xmm0 + call rx_read_dataset_f + subpd xmm0, xmm4 + movaps xmm6, xmm0 + mov eax, r14d + xor eax, 06a2b2b5bh + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm6 -rx_i_279: ;OR_64 +rx_i_275: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r10, 0788eceb7h mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r or rax, r11 mov r13, rax -rx_i_280: ;CALL +rx_i_276: ;CALL dec edi - js rx_finish + jz rx_finish xor r9, 0c6ac5edah mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r cmp r11d, -1236180570 - jns short taken_call_280 + jns short taken_call_276 mov rcx, rax mov eax, r12d xor eax, 0b65161a6h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_281 -taken_call_280: + jmp rx_i_277 +taken_call_276: push rax - call rx_i_408 + call rx_i_404 -rx_i_281: ;IMUL_32 +rx_i_277: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r11, 0c9549789h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r movsxd rcx, eax movsxd rax, r10d imul rax, rcx @@ -3993,33 +3799,35 @@ rx_i_281: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_282: ;FPSUB +rx_i_278: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r9, 0a2bc66c9h mov ecx, r9d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm7 - movsd xmm4, xmm0 + call rx_read_dataset_f + subpd xmm0, xmm7 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 02d00ad10h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm4 -rx_i_283: ;FPSUB +rx_i_279: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r15, 0f1a91458h mov ecx, r15d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4742807966216880128 - movd xmm1, rax - subsd xmm0, xmm1 - movsd xmm9, xmm0 + call rx_read_dataset_f + subpd xmm0, xmm5 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 0475ade01h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm9 -rx_i_284: ;AND_64 +rx_i_280: ;AND_64 dec edi - js rx_finish + jz rx_finish xor r12, 066246b43h mov eax, r12d and eax, 2047 @@ -4031,12 +3839,12 @@ rx_i_284: ;AND_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_285: ;SUB_64 +rx_i_281: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r10, 05a762727h mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r sub rax, r10 mov rcx, rax mov eax, r11d @@ -4044,18 +3852,18 @@ rx_i_285: ;SUB_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_286: ;MUL_64 +rx_i_282: ;SUB_32 dec edi - js rx_finish + jz rx_finish xor r15, 0de1ab603h mov ecx, r15d - call rx_read_dataset - imul rax, rax, 1367326224 + call rx_read_dataset_r + sub eax, 1367326224 mov r11, rax -rx_i_287: ;ADD_32 +rx_i_283: ;ADD_32 dec edi - js rx_finish + jz rx_finish xor r9, 0df4d084fh mov eax, r9d and eax, 32767 @@ -4067,48 +3875,43 @@ rx_i_287: ;ADD_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_288: ;FPSUB +rx_i_284: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r15, 0e68f36ach mov ecx, r15d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4477945591619387392 - movd xmm1, rax - subsd xmm0, xmm1 - movsd xmm9, xmm0 + call rx_read_dataset_f + subpd xmm0, xmm6 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 0936f2960h + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_289: ;IMUL_32 +rx_i_285: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r8, 09adb333bh mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r movsxd rcx, eax movsxd rax, r8d imul rax, rcx mov r14, rax -rx_i_290: ;FPADD +rx_i_286: ;FPADD dec edi - js rx_finish + jz rx_finish xor r14, 082f5e36ch mov eax, r14d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - mov eax, r15d - xor eax, 0546e75d1h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm9 + movaps xmm7, xmm0 -rx_i_291: ;OR_64 +rx_i_287: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r11, 049547c9ch mov eax, r11d and eax, 2047 @@ -4120,9 +3923,9 @@ rx_i_291: ;OR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_292: ;MUL_64 +rx_i_288: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r10, 08716ac8bh mov eax, r10d and eax, 2047 @@ -4134,48 +3937,40 @@ rx_i_292: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_293: ;FPDIV +rx_i_289: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r14, 0efef52b5h mov eax, r14d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm9 - mov eax, r8d - xor eax, 046affb49h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + divpd xmm0, xmm9 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 -rx_i_294: ;FPMUL +rx_i_290: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r15, 060665748h mov ecx, r15d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm8 - mov eax, r9d - xor eax, 02f4d18d7h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm8 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm9, xmm0 -rx_i_295: ;RET +rx_i_291: ;RET dec edi - js rx_finish + jz rx_finish xor r13, 0ddf4bd1ah mov eax, r13d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_295 - cmp r14d, 1988795765 - js short not_taken_ret_295 + je short not_taken_ret_291 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r14d @@ -4183,16 +3978,16 @@ rx_i_295: ;RET and eax, 32767 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_295: +not_taken_ret_291: mov rcx, rax mov eax, r14d xor eax, 0768a9d75h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_296: ;ROR_64 +rx_i_292: ;ROR_64 dec edi - js rx_finish + jz rx_finish xor r13, 05a87cc3dh mov eax, r13d and eax, 32767 @@ -4200,30 +3995,23 @@ rx_i_296: ;ROR_64 ror rax, 23 mov r10, rax -rx_i_297: ;FPSUB +rx_i_293: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r9, 0c61f4279h mov ecx, r9d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm5 - mov eax, r8d - xor eax, 014844990h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + subpd xmm0, xmm5 + movaps xmm8, xmm0 -rx_i_298: ;RET +rx_i_294: ;RET dec edi - js rx_finish + jz rx_finish xor r14, 0f3b9d85h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_298 - cmp r15d, -276467273 - jo short not_taken_ret_298 + je short not_taken_ret_294 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r8d @@ -4231,48 +4019,43 @@ rx_i_298: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_298: +not_taken_ret_294: mov rcx, rax mov eax, r8d xor eax, 0ef8571b7h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_299: ;FPSUB +rx_i_295: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r9, 0f42798fdh mov ecx, r9d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm8 - mov eax, r15d - xor eax, 08a66e69fh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + subpd xmm0, xmm8 + movaps xmm7, xmm0 -rx_i_300: ;CALL +rx_i_296: ;CALL dec edi - js rx_finish + jz rx_finish xor r14, 018738758h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r cmp r9d, -207252278 - jns short taken_call_300 + jns short taken_call_296 mov rcx, rax mov eax, r8d xor eax, 0f3a594cah and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_301 -taken_call_300: + jmp rx_i_297 +taken_call_296: push rax - call rx_i_399 + call rx_i_395 -rx_i_301: ;ADD_64 +rx_i_297: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r15, 0de3b9d9bh mov eax, r15d and eax, 2047 @@ -4280,24 +4063,19 @@ rx_i_301: ;ADD_64 add rax, r10 mov r14, rax -rx_i_302: ;FPSUB +rx_i_298: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r14, 084f53637h mov eax, r14d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm7 - mov eax, r14d - xor eax, 0d10f7c42h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm7 + movaps xmm6, xmm0 -rx_i_303: ;ADD_64 +rx_i_299: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r12, 042f4897h mov eax, r12d and eax, 2047 @@ -4309,59 +4087,58 @@ rx_i_303: ;ADD_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_304: ;FPSUB +rx_i_300: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r12, 095765693h mov eax, r12d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4478628807791607808 - movd xmm1, rax - subsd xmm0, xmm1 - mov eax, r10d - xor eax, 09d24b005h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm8 + movaps xmm2, xmm0 -rx_i_305: ;FPMUL +rx_i_301: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r8, 0a0ec5eech mov ecx, r8d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm5 - movsd xmm7, xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm5 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 0433cf2d6h + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm7 -rx_i_306: ;ADD_64 +rx_i_302: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r15, 0f6f8c345h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r add rax, r10 mov r11, rax -rx_i_307: ;FPADD +rx_i_303: ;FPADD dec edi - js rx_finish + jz rx_finish xor r14, 082a3e965h mov eax, r14d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm3 - movsd xmm9, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm3 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 0bb9ee490h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_308: ;MUL_64 +rx_i_304: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r12, 04940c652h mov eax, r12d and eax, 2047 @@ -4369,37 +4146,37 @@ rx_i_308: ;MUL_64 imul rax, r15 mov r13, rax -rx_i_309: ;MUL_64 +rx_i_305: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r11, 03c6c62b8h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r imul rax, rax, -65873120 mov r10, rax -rx_i_310: ;ADD_32 +rx_i_306: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r15, 08b34cdfch mov ecx, r15d - call rx_read_dataset - add eax, r15d + call rx_read_dataset_r + add rax, r15 mov r13, rax -rx_i_311: ;SAR_64 +rx_i_307: ;SAR_64 dec edi - js rx_finish + jz rx_finish xor r15, 04c36adb1h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r8 sar rax, cl mov r10, rax -rx_i_312: ;MUL_64 +rx_i_308: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r11, 0a4213b21h mov eax, r11d and eax, 2047 @@ -4407,9 +4184,9 @@ rx_i_312: ;MUL_64 imul rax, r13 mov r15, rax -rx_i_313: ;IMULH_64 +rx_i_309: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r9, 090c42304h mov eax, r9d and eax, 32767 @@ -4423,72 +4200,62 @@ rx_i_313: ;IMULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_314: ;FPMUL +rx_i_310: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r9, 0f78e1c8ch mov eax, r9d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4746554338141274112 - movd xmm1, rax - mulsd xmm0, xmm1 - movsd xmm7, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm6 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 07c9816c0h + and eax, 32767 + movhpd qword ptr [rsi + rax * 8], xmm7 -rx_i_315: ;FPMUL +rx_i_311: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r8, 0ff8848cfh mov ecx, r8d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4744327281034395648 - movd xmm1, rax - mulsd xmm0, xmm1 - mov eax, r12d - xor eax, 05cf21a31h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm4 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 -rx_i_316: ;MUL_32 +rx_i_312: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r13, 0b18904cdh mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov ecx, eax mov eax, -1147928648 imul rax, rcx mov r10, rax -rx_i_317: ;FPADD +rx_i_313: ;FPADD dec edi - js rx_finish + jz rx_finish xor r8, 0a0d0befh mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4478056002024898560 - movd xmm1, rax - addsd xmm0, xmm1 - mov eax, r14d - xor eax, 09500d514h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm5 + movaps xmm6, xmm0 -rx_i_318: ;IMUL_32 +rx_i_314: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r15, 01e3c65f7h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r movsxd rcx, eax movsxd rax, r9d imul rax, rcx @@ -4498,9 +4265,9 @@ rx_i_318: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_319: ;SHR_64 +rx_i_315: ;SHR_64 dec edi - js rx_finish + jz rx_finish xor r9, 02e36ddafh mov eax, r9d and eax, 32767 @@ -4509,16 +4276,14 @@ rx_i_319: ;SHR_64 shr rax, cl mov r9, rax -rx_i_320: ;RET +rx_i_316: ;RET dec edi - js rx_finish + jz rx_finish xor r14, 05b0cb5bbh mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_320 - cmp r10d, 906151187 - jl short not_taken_ret_320 + je short not_taken_ret_316 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r8d @@ -4526,47 +4291,37 @@ rx_i_320: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_320: +not_taken_ret_316: mov rcx, rax mov eax, r8d xor eax, 03602c513h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_321: ;FPADD +rx_i_317: ;FPADD dec edi - js rx_finish + jz rx_finish xor r9, 0c74e7415h mov ecx, r9d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm7 - mov eax, r13d - xor eax, 0b5bc8h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + addpd xmm0, xmm7 + movaps xmm5, xmm0 -rx_i_322: ;FPADD +rx_i_318: ;FPADD dec edi - js rx_finish + jz rx_finish xor r9, 057621d9ah mov ecx, r9d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm3 - mov eax, r15d - xor eax, 061cb9db8h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + addpd xmm0, xmm3 + movaps xmm7, xmm0 -rx_i_323: ;ROL_64 +rx_i_319: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r13, 08ee02d99h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r15 rol rax, cl mov rcx, rax @@ -4575,20 +4330,22 @@ rx_i_323: ;ROL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_324: ;FPADD +rx_i_320: ;FPADD dec edi - js rx_finish + jz rx_finish xor r15, 013461188h mov ecx, r15d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm4 - movsd xmm2, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm4 + movaps xmm2, xmm0 + mov eax, r10d + xor eax, 02bdc7349h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm2 -rx_i_325: ;IMUL_32 +rx_i_321: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r11, 0a7bae383h mov eax, r11d and eax, 32767 @@ -4602,27 +4359,31 @@ rx_i_325: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_326: ;CALL +rx_i_322: ;RET dec edi - js rx_finish + jz rx_finish xor r14, 08215399bh mov ecx, r14d - call rx_read_dataset - cmp r11d, 1411981860 - jo short taken_call_326 + call rx_read_dataset_r + cmp rsp, rbp + je short not_taken_ret_322 + xor rax, qword ptr [rsp + 8] + mov rcx, rax + mov eax, r11d + xor eax, 054292224h + and eax, 2047 + mov qword ptr [rsi + rax * 8], rcx + ret 8 +not_taken_ret_322: mov rcx, rax mov eax, r11d xor eax, 054292224h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_327 -taken_call_326: - push rax - call rx_i_347 -rx_i_327: ;MULH_64 +rx_i_323: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r14, 07b07664bh mov eax, r14d and eax, 32767 @@ -4636,22 +4397,24 @@ rx_i_327: ;MULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_328: ;FPSQRT +rx_i_324: ;FPSQRT dec edi - js rx_finish + jz rx_finish xor r9, 0f956baffh mov eax, r9d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - mov rcx, 9223372036854773760 - and rax, rcx - cvtsi2sd xmm0, rax - sqrtsd xmm0, xmm0 - movsd xmm9, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + andps xmm0, xmm10 + sqrtpd xmm0, xmm0 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 0944856d4h + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_329: ;SHL_64 +rx_i_325: ;SHL_64 dec edi - js rx_finish + jz rx_finish xor r11, 0708ab9d1h mov eax, r11d and eax, 2047 @@ -4659,12 +4422,12 @@ rx_i_329: ;SHL_64 shl rax, 24 mov r13, rax -rx_i_330: ;MULH_64 +rx_i_326: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r11, 0d1b27540h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r8 mul rcx mov rax, rdx @@ -4674,9 +4437,9 @@ rx_i_330: ;MULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_331: ;AND_64 +rx_i_327: ;AND_64 dec edi - js rx_finish + jz rx_finish xor r9, 09665f98dh mov eax, r9d and eax, 2047 @@ -4684,9 +4447,9 @@ rx_i_331: ;AND_64 and rax, r15 mov r12, rax -rx_i_332: ;ROL_64 +rx_i_328: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r12, 0fb9c32adh mov eax, r12d and eax, 2047 @@ -4695,31 +4458,29 @@ rx_i_332: ;ROL_64 rol rax, cl mov r9, rax -rx_i_333: ;RET +rx_i_329: ;RET dec edi - js rx_finish + jz rx_finish xor r11, 0e1110623h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_333 - cmp r8d, 842221018 - jl short not_taken_ret_333 + je short not_taken_ret_329 xor rax, qword ptr [rsp + 8] mov r11, rax ret 8 -not_taken_ret_333: +not_taken_ret_329: mov r11, rax -rx_i_334: ;IMUL_32 +rx_i_330: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r9, 0f6a93f19h mov eax, r9d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] - movsxd rcx, eax - movsxd rax, r13d + mov ecx, eax + mov eax, r13d imul rax, rcx mov rcx, rax mov eax, r11d @@ -4727,36 +4488,33 @@ rx_i_334: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_335: ;FPADD +rx_i_331: ;FPADD dec edi - js rx_finish + jz rx_finish xor r9, 0bc9bbe4ah mov eax, r9d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm3 - mov eax, r9d - xor eax, 0ba4d4c0fh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm3 + movaps xmm9, xmm0 -rx_i_336: ;FPADD +rx_i_332: ;FPADD dec edi - js rx_finish + jz rx_finish xor r12, 0f253cd4eh mov eax, r12d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm6 - movsd xmm3, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm6 + movaps xmm3, xmm0 + mov eax, r11d + xor eax, 0116c919eh + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm3 -rx_i_337: ;XOR_64 +rx_i_333: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r14, 0f009758bh mov eax, r14d and eax, 2047 @@ -4764,18 +4522,18 @@ rx_i_337: ;XOR_64 xor rax, -175125848 mov r11, rax -rx_i_338: ;ADD_32 +rx_i_334: ;ADD_32 dec edi - js rx_finish + jz rx_finish xor r8, 0dda04168h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r add eax, r13d mov r8, rax -rx_i_339: ;SUB_64 +rx_i_335: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r15, 03e6cfb73h mov eax, r15d and eax, 32767 @@ -4787,27 +4545,22 @@ rx_i_339: ;SUB_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_340: ;FPADD +rx_i_336: ;FPADD dec edi - js rx_finish + jz rx_finish xor r15, 0aea0a435h mov eax, r15d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm2 - mov eax, r11d - xor eax, 02644c5ah - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm2 + movaps xmm3, xmm0 -rx_i_341: ;ADD_32 +rx_i_337: ;ADD_32 dec edi - js rx_finish + jz rx_finish xor r8, 03d6c4ab2h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r add eax, r12d mov rcx, rax mov eax, r13d @@ -4815,9 +4568,9 @@ rx_i_341: ;ADD_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_342: ;MUL_64 +rx_i_338: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r12, 0d428a742h mov eax, r12d and eax, 2047 @@ -4825,38 +4578,28 @@ rx_i_342: ;MUL_64 imul rax, r12 mov r11, rax -rx_i_343: ;FPADD +rx_i_339: ;FPADD dec edi - js rx_finish + jz rx_finish xor r9, 04596ef73h mov eax, r9d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm6 - mov eax, r10d - xor eax, 07c8317fah - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm6 + movaps xmm2, xmm0 -rx_i_344: ;FPSUB +rx_i_340: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r15, 0e51629cch mov ecx, r15d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm5 - mov eax, r13d - xor eax, 038b653beh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + subpd xmm0, xmm5 + movaps xmm5, xmm0 -rx_i_345: ;MUL_32 +rx_i_341: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r12, 019eb9ea5h mov eax, r12d and eax, 2047 @@ -4870,60 +4613,51 @@ rx_i_345: ;MUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_346: ;FPMUL +rx_i_342: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r9, 09ccc7abah mov ecx, r9d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm2 - mov eax, r11d - xor eax, 0319de2d3h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm3, xmm0 -rx_i_347: ;SHR_64 +rx_i_343: ;SHR_64 dec edi - js rx_finish + jz rx_finish xor r14, 056f6cf0bh mov eax, r14d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - mov rcx, r13 - shr rax, cl + shr rax, 48 mov rcx, rax mov eax, r15d xor eax, 0d9a469a9h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_348: ;FPMUL +rx_i_344: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r10, 03ef9bcc4h mov eax, r10d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4744717476367958016 - movd xmm1, rax - mulsd xmm0, xmm1 - mov eax, r13d - xor eax, 0627d9feah - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm6 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm5, xmm0 -rx_i_349: ;MULH_64 +rx_i_345: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r12, 0bbbcdbach mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r13 mul rcx mov rax, rdx @@ -4933,12 +4667,12 @@ rx_i_349: ;MULH_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_350: ;XOR_64 +rx_i_346: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r12, 0ae9d1e96h mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r xor rax, r15 mov rcx, rax mov eax, r13d @@ -4946,9 +4680,9 @@ rx_i_350: ;XOR_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_351: ;ADD_64 +rx_i_347: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r14, 070c34d69h mov eax, r14d and eax, 2047 @@ -4956,23 +4690,23 @@ rx_i_351: ;ADD_64 add rax, r10 mov r13, rax -rx_i_352: ;FPSUB +rx_i_348: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r13, 0523ff904h mov eax, r13d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4741412628788674560 - movd xmm1, rax - subsd xmm0, xmm1 - movsd xmm9, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm3 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 039c35461h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_353: ;XOR_32 +rx_i_349: ;XOR_32 dec edi - js rx_finish + jz rx_finish xor r8, 018e0e5ddh mov eax, r8d and eax, 2047 @@ -4980,77 +4714,81 @@ rx_i_353: ;XOR_32 xor eax, r15d mov r13, rax -rx_i_354: ;CALL +rx_i_350: ;CALL dec edi - js rx_finish + jz rx_finish xor r9, 09bd050f0h mov eax, r9d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] cmp r9d, -980411581 - jbe short taken_call_354 + jbe short taken_call_350 mov rcx, rax mov eax, r12d xor eax, 0c5901b43h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_355 -taken_call_354: + jmp rx_i_351 +taken_call_350: push rax - call rx_i_356 + call rx_i_352 -rx_i_355: ;MULH_64 +rx_i_351: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r11, 0a3a5906fh mov ecx, r11d - call rx_read_dataset - mov rcx, r10 + call rx_read_dataset_r + imul rax, r10 + mov r13, rax + +rx_i_352: ;FPADD + dec edi + jz rx_finish + xor r10, 0afc9af2bh + mov ecx, r10d + call rx_read_dataset_f + addpd xmm0, xmm6 + movaps xmm2, xmm0 + mov eax, r10d + xor eax, 03bf686f2h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm2 + +rx_i_353: ;FPMUL + dec edi + jz rx_finish + xor r13, 02e65278bh + mov eax, r13d + and eax, 2047 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 0b3c9f7aeh + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm7 + +rx_i_354: ;MULH_64 + dec edi + jz rx_finish + xor r13, 02412fc10h + mov ecx, r13d + call rx_read_dataset_r + mov rcx, r13 mul rcx mov rax, rdx mov r13, rax -rx_i_356: ;FPADD +rx_i_355: ;MUL_64 dec edi - js rx_finish - xor r10, 0afc9af2bh - mov ecx, r10d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm6 - movsd xmm2, xmm0 - -rx_i_357: ;FPMUL - dec edi - js rx_finish - xor r13, 02e65278bh - mov eax, r13d - and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm2 - movsd xmm7, xmm0 - -rx_i_358: ;MUL_32 - dec edi - js rx_finish - xor r13, 02412fc10h - mov ecx, r13d - call rx_read_dataset - mov ecx, eax - mov eax, r13d - imul rax, rcx - mov r13, rax - -rx_i_359: ;MUL_64 - dec edi - js rx_finish + jz rx_finish xor r10, 06bd6e65fh mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r imul rax, r14 mov rcx, rax mov eax, r8d @@ -5058,18 +4796,18 @@ rx_i_359: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_360: ;MUL_64 +rx_i_356: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r10, 01cd85d80h mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r imul rax, r10 mov r11, rax -rx_i_361: ;ADD_64 +rx_i_357: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r10, 0f7daed36h mov eax, r10d and eax, 2047 @@ -5077,9 +4815,9 @@ rx_i_361: ;ADD_64 add rax, 820073637 mov r11, rax -rx_i_362: ;DIV_64 +rx_i_358: ;DIV_64 dec edi - js rx_finish + jz rx_finish xor r13, 088fa6e5ah mov eax, r13d and eax, 2047 @@ -5092,48 +4830,49 @@ rx_i_362: ;DIV_64 div rcx mov r9, rax -rx_i_363: ;FPSUB +rx_i_359: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r10, 0714fc2cdh mov ecx, r10d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm9 - movsd xmm4, xmm0 + call rx_read_dataset_f + subpd xmm0, xmm9 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 0f16b9be3h + and eax, 32767 + movhpd qword ptr [rsi + rax * 8], xmm4 -rx_i_364: ;FPMUL +rx_i_360: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r10, 0c2d110b5h mov eax, r10d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm8 - mov eax, r8d - xor eax, 0c41a4103h - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm8 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 -rx_i_365: ;FPSQRT +rx_i_361: ;FPSQRT dec edi - js rx_finish + jz rx_finish xor r15, 01d125a7fh mov ecx, r15d - call rx_read_dataset - mov rcx, 9223372036854773760 - and rax, rcx - cvtsi2sd xmm0, rax - sqrtsd xmm0, xmm0 - movsd xmm6, xmm0 + call rx_read_dataset_f + andps xmm0, xmm10 + sqrtpd xmm0, xmm0 + movaps xmm6, xmm0 + mov eax, r14d + xor eax, 0ad0b81f5h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm6 -rx_i_366: ;SUB_64 +rx_i_362: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r9, 0ed8954bdh mov eax, r9d and eax, 2047 @@ -5145,36 +4884,33 @@ rx_i_366: ;SUB_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_367: ;FPMUL +rx_i_363: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r12, 09f75887bh mov eax, r12d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm6 - mov eax, r11d - xor eax, 05415334dh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm6 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm3, xmm0 -rx_i_368: ;MUL_32 +rx_i_364: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r11, 0badaf867h mov ecx, r11d - call rx_read_dataset - mov ecx, eax - mov eax, r8d - imul rax, rcx + call rx_read_dataset_r + mov rcx, r8 + mul rcx + mov rax, rdx mov r8, rax -rx_i_369: ;IMUL_32 +rx_i_365: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r15, 02db4444ah mov eax, r15d and eax, 2047 @@ -5188,9 +4924,9 @@ rx_i_369: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_370: ;IMUL_32 +rx_i_366: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r12, 0bff7218fh mov eax, r12d and eax, 2047 @@ -5204,29 +4940,31 @@ rx_i_370: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_371: ;FPADD +rx_i_367: ;FPADD dec edi - js rx_finish + jz rx_finish xor r9, 04d14cb3ah mov ecx, r9d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - movsd xmm4, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm9 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 0ad9b92e8h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm4 -rx_i_372: ;MUL_64 +rx_i_368: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r10, 0a14836bah mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r imul rax, r10 mov r8, rax -rx_i_373: ;AND_64 +rx_i_369: ;AND_64 dec edi - js rx_finish + jz rx_finish xor r9, 053fe22e2h mov eax, r9d and eax, 32767 @@ -5234,32 +4972,36 @@ rx_i_373: ;AND_64 and rax, r13 mov r9, rax -rx_i_374: ;FPSUB +rx_i_370: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r15, 010e1fb24h mov eax, r15d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm6 - movsd xmm6, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm6 + movaps xmm6, xmm0 + mov eax, r14d + xor eax, 0a120e0edh + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm6 -rx_i_375: ;FPADD +rx_i_371: ;FPADD dec edi - js rx_finish + jz rx_finish xor r8, 0ebbd5cc9h mov ecx, r8d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - movsd xmm5, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm9 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 0c40fe413h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm5 -rx_i_376: ;ROL_64 +rx_i_372: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r10, 098ab79d7h mov eax, r10d and eax, 2047 @@ -5268,43 +5010,37 @@ rx_i_376: ;ROL_64 rol rax, cl mov r9, rax -rx_i_377: ;FPDIV +rx_i_373: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r15, 056438b3h mov eax, r15d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm8 - mov eax, r12d - xor eax, 05655fac9h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + divpd xmm0, xmm8 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 -rx_i_378: ;FPMUL +rx_i_374: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r11, 0dbcce604h mov ecx, r11d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm2 - mov eax, r10d - xor eax, 03507e810h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm2, xmm0 -rx_i_379: ;ADD_64 +rx_i_375: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r9, 0edea6200h mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r add rax, r15 mov rcx, rax mov eax, r12d @@ -5312,9 +5048,9 @@ rx_i_379: ;ADD_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_380: ;ADD_64 +rx_i_376: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r14, 05e61b279h mov eax, r14d and eax, 2047 @@ -5326,47 +5062,42 @@ rx_i_380: ;ADD_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_381: ;FPSUB +rx_i_377: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r14, 0fc1fb433h mov ecx, r14d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm3 - mov eax, r15d - xor eax, 0d822f28fh - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + subpd xmm0, xmm3 + movaps xmm7, xmm0 -rx_i_382: ;MUL_32 +rx_i_378: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r12, 082aa21ach mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r mov ecx, eax mov eax, 547725353 imul rax, rcx mov r15, rax -rx_i_383: ;FPADD +rx_i_379: ;FPADD dec edi - js rx_finish + jz rx_finish xor r10, 05dba41fbh mov ecx, r10d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4741471142953353216 - movd xmm1, rax - addsd xmm0, xmm1 - movsd xmm5, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm9 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 03a2dc429h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm5 -rx_i_384: ;MUL_64 +rx_i_380: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r11, 0229e3d6eh mov eax, r11d and eax, 32767 @@ -5378,45 +5109,49 @@ rx_i_384: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_385: ;SAR_64 +rx_i_381: ;SAR_64 dec edi - js rx_finish + jz rx_finish xor r8, 019816ff9h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r14 sar rax, cl mov r9, rax -rx_i_386: ;FPADD +rx_i_382: ;FPADD dec edi - js rx_finish + jz rx_finish xor r14, 036b5b81fh mov ecx, r14d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm3 - movsd xmm3, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm3 + movaps xmm3, xmm0 + mov eax, r11d + xor eax, 0a6a2e0b1h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm3 -rx_i_387: ;FPSUB +rx_i_383: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r15, 05f798ec3h mov eax, r15d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm4 - movsd xmm5, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm4 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 0c9f5cc22h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm5 -rx_i_388: ;SHR_64 +rx_i_384: ;SHR_64 dec edi - js rx_finish + jz rx_finish xor r10, 05b459fd7h mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r11 shr rax, cl mov rcx, rax @@ -5425,12 +5160,12 @@ rx_i_388: ;SHR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_389: ;MUL_64 +rx_i_385: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r15, 0c91749bbh mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r imul rax, r12 mov rcx, rax mov eax, r13d @@ -5438,40 +5173,33 @@ rx_i_389: ;MUL_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_390: ;FPADD +rx_i_386: ;FPADD dec edi - js rx_finish + jz rx_finish xor r9, 0575b4bdch mov ecx, r9d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm8 - mov eax, r9d - xor eax, 05702d58dh - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + addpd xmm0, xmm8 + movaps xmm9, xmm0 -rx_i_391: ;MUL_64 +rx_i_387: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r9, 0d4f7bc6ah mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r imul rax, r15 mov r9, rax -rx_i_392: ;RET +rx_i_388: ;RET dec edi - js rx_finish + jz rx_finish xor r8, 08a949356h mov eax, r8d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_392 - cmp r13d, -1600627518 - jo short not_taken_ret_392 + je short not_taken_ret_388 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r11d @@ -5479,61 +5207,51 @@ rx_i_392: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_392: +not_taken_ret_388: mov rcx, rax mov eax, r11d xor eax, 0a0985cc2h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_393: ;CALL +rx_i_389: ;CALL dec edi - js rx_finish + jz rx_finish xor r11, 06531ad2eh mov eax, r11d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] cmp r9d, -350609584 - jge short taken_call_393 + jge short taken_call_389 mov r14, rax - jmp rx_i_394 -taken_call_393: + jmp rx_i_390 +taken_call_389: push rax - call rx_i_425 + call rx_i_421 -rx_i_394: ;FPADD +rx_i_390: ;FPADD dec edi - js rx_finish + jz rx_finish xor r15, 02914abeah mov eax, r15d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm4 - mov eax, r11d - xor eax, 0e5c5acbbh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm4 + movaps xmm3, xmm0 -rx_i_395: ;FPADD +rx_i_391: ;FPADD dec edi - js rx_finish + jz rx_finish xor r8, 0473a41f0h mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm3 - mov eax, r14d - xor eax, 0aba2155fh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm3 + movaps xmm6, xmm0 -rx_i_396: ;ROR_64 +rx_i_392: ;ROR_64 dec edi - js rx_finish + jz rx_finish xor r14, 01ebc1f0dh mov eax, r14d and eax, 2047 @@ -5545,9 +5263,9 @@ rx_i_396: ;ROR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_397: ;OR_32 +rx_i_393: ;OR_32 dec edi - js rx_finish + jz rx_finish xor r14, 0742e95b1h mov eax, r14d and eax, 2047 @@ -5559,67 +5277,55 @@ rx_i_397: ;OR_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_398: ;FPADD +rx_i_394: ;FPADD dec edi - js rx_finish + jz rx_finish xor r12, 0db885c2ch mov eax, r12d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - mov eax, r14d - xor eax, 0910e8628h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm9 + movaps xmm6, xmm0 -rx_i_399: ;IDIV_64 +rx_i_395: ;IDIV_64 dec edi - js rx_finish + jz rx_finish xor r8, 04ae4fe8ch mov eax, r8d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] mov edx, r13d cmp edx, -1 - jne short safe_idiv_399 + jne short safe_idiv_395 mov rcx, rax rol rcx, 1 dec rcx - jz short result_idiv_399 -safe_idiv_399: + jz short result_idiv_395 +safe_idiv_395: mov ecx, 1 test edx, edx cmovne ecx, edx movsxd rcx, ecx cqo idiv rcx -result_idiv_399: +result_idiv_395: mov r8, rax -rx_i_400: ;FPADD +rx_i_396: ;FPADD dec edi - js rx_finish + jz rx_finish xor r10, 07b41862bh mov ecx, r10d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4737472108072796160 - movd xmm1, rax - addsd xmm0, xmm1 - mov eax, r12d - xor eax, 01ee1c837h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + addpd xmm0, xmm7 + movaps xmm4, xmm0 -rx_i_401: ;MUL_64 +rx_i_397: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r8, 0916f3819h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r imul rax, r12 mov rcx, rax mov eax, r10d @@ -5627,39 +5333,35 @@ rx_i_401: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_402: ;ROL_64 +rx_i_398: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r8, 04eb6fd2ah mov eax, r8d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] - mov rcx, r8 - rol rax, cl + rol rax, 44 mov rcx, rax mov eax, r11d xor eax, 0724e7136h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_403: ;FPDIV +rx_i_399: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r11, 0899a98cfh mov ecx, r11d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm2 - mov eax, r14d - xor eax, 0fb6f7016h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + divpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm6, xmm0 -rx_i_404: ;OR_32 +rx_i_400: ;OR_32 dec edi - js rx_finish + jz rx_finish xor r13, 0aae75db6h mov eax, r13d and eax, 32767 @@ -5671,86 +5373,84 @@ rx_i_404: ;OR_32 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_405: ;FPMUL +rx_i_401: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r13, 032e81f25h mov eax, r13d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4742100199122403328 - movd xmm1, rax - mulsd xmm0, xmm1 - movsd xmm6, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm4 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm6, xmm0 + mov eax, r14d + xor eax, 03ea60344h + and eax, 32767 + movhpd qword ptr [rsi + rax * 8], xmm6 -rx_i_406: ;RET +rx_i_402: ;RET dec edi - js rx_finish + jz rx_finish xor r9, 0fa1a07ffh mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_406 - cmp r8d, -1580915312 - jl short not_taken_ret_406 + je short not_taken_ret_402 xor rax, qword ptr [rsp + 8] mov r14, rax ret 8 -not_taken_ret_406: +not_taken_ret_402: mov r14, rax -rx_i_407: ;IDIV_64 +rx_i_403: ;IDIV_64 dec edi - js rx_finish + jz rx_finish xor r9, 0e59500f7h mov eax, r9d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] mov edx, r12d cmp edx, -1 - jne short safe_idiv_407 + jne short safe_idiv_403 mov rcx, rax rol rcx, 1 dec rcx - jz short result_idiv_407 -safe_idiv_407: + jz short result_idiv_403 +safe_idiv_403: mov ecx, 1 test edx, edx cmovne ecx, edx movsxd rcx, ecx cqo idiv rcx -result_idiv_407: +result_idiv_403: mov rcx, rax mov eax, r11d xor eax, 01ff394a0h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_408: ;MUL_32 +rx_i_404: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r15, 05b8ceb2fh mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r mov ecx, eax mov eax, r8d imul rax, rcx mov r15, rax -rx_i_409: ;RET +rx_i_405: ;RET dec edi - js rx_finish + jz rx_finish xor r8, 0f61082a3h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_409 - cmp r10d, 1795880641 - ja short not_taken_ret_409 + je short not_taken_ret_405 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r12d @@ -5758,16 +5458,16 @@ rx_i_409: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_409: +not_taken_ret_405: mov rcx, rax mov eax, r12d xor eax, 06b0af6c1h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_410: ;FPROUND +rx_i_406: ;FPROUND dec edi - js rx_finish + jz rx_finish xor r9, 0af6886b7h mov eax, r9d and eax, 2047 @@ -5776,33 +5476,33 @@ rx_i_410: ;FPROUND shl eax, 13 and rcx, -2048 and eax, 24576 - cvtsi2sd xmm0, rcx + cvtsi2sd xmm9, rcx or eax, 40896 mov dword ptr [rsp - 8], eax ldmxcsr dword ptr [rsp - 8] - movsd xmm9, xmm0 + mov eax, r9d + xor eax, 09862adefh + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_411: ;FPMUL +rx_i_407: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r14, 09699566fh mov ecx, r14d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm9 - mov eax, r8d - xor eax, 0904eec66h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm9 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 -rx_i_412: ;MUL_64 +rx_i_408: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r15, 066e79fa6h mov ecx, r15d - call rx_read_dataset + call rx_read_dataset_r imul rax, r9 mov rcx, rax mov eax, r10d @@ -5810,86 +5510,79 @@ rx_i_412: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_413: ;MULH_64 +rx_i_409: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r11, 04b6caa9ah mov ecx, r11d - call rx_read_dataset - mov rcx, r15 - mul rcx - mov rax, rdx + call rx_read_dataset_r + imul rax, r15 mov r8, rax -rx_i_414: ;RET +rx_i_410: ;RET dec edi - js rx_finish + jz rx_finish xor r15, 0d17f245eh mov eax, r15d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_414 - cmp r12d, -1371608768 - jl short not_taken_ret_414 + je short not_taken_ret_410 xor rax, qword ptr [rsp + 8] mov r8, rax ret 8 -not_taken_ret_414: +not_taken_ret_410: mov r8, rax -rx_i_415: ;RET +rx_i_411: ;RET dec edi - js rx_finish + jz rx_finish xor r12, 0364f10e7h mov eax, r12d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_415 - cmp r13d, -1265436998 - jbe short not_taken_ret_415 + je short not_taken_ret_411 xor rax, qword ptr [rsp + 8] mov r12, rax ret 8 -not_taken_ret_415: +not_taken_ret_411: mov r12, rax -rx_i_416: ;FPSQRT +rx_i_412: ;FPSQRT dec edi - js rx_finish + jz rx_finish xor r10, 0ac90e7ah mov eax, r10d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - mov rcx, 9223372036854773760 - and rax, rcx - cvtsi2sd xmm0, rax - sqrtsd xmm0, xmm0 - movsd xmm3, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + andps xmm0, xmm10 + sqrtpd xmm0, xmm0 + movaps xmm3, xmm0 + mov eax, r11d + xor eax, 0bbd2640ah + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm3 -rx_i_417: ;FPDIV +rx_i_413: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r11, 04b6037abh mov eax, r11d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm2 - mov eax, r12d - xor eax, 043989376h - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + divpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 -rx_i_418: ;OR_64 +rx_i_414: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r14, 06c01554dh mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r or rax, r8 mov rcx, rax mov eax, r10d @@ -5897,55 +5590,57 @@ rx_i_418: ;OR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_419: ;DIV_64 +rx_i_415: ;DIV_64 dec edi - js rx_finish + jz rx_finish xor r8, 08c3e59a1h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r mov ecx, -538093385 xor edx, edx div rcx mov r9, rax -rx_i_420: ;FPSUB +rx_i_416: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r12, 0f3fafde9h mov eax, r12d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm3 - movsd xmm5, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm3 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 0f84b5382h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm5 -rx_i_421: ;SUB_64 +rx_i_417: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r10, 03c6481fah mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r sub rax, r12 mov r10, rax -rx_i_422: ;MULH_64 +rx_i_418: ;MULH_64 dec edi - js rx_finish + jz rx_finish xor r10, 02bd61c5fh mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r11 mul rcx mov rax, rdx mov r10, rax -rx_i_423: ;XOR_64 +rx_i_419: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r9, 0b6ab9d32h mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r xor rax, r14 mov rcx, rax mov eax, r14d @@ -5953,50 +5648,52 @@ rx_i_423: ;XOR_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_424: ;FPADD +rx_i_420: ;FPADD dec edi - js rx_finish + jz rx_finish xor r9, 0f9690ceah mov eax, r9d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm3 - movsd xmm9, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm3 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 08f7bb3ech + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_425: ;CALL +rx_i_421: ;RET dec edi - js rx_finish + jz rx_finish xor r12, 01ada0f39h mov eax, r12d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - cmp r8d, -1600409762 - jno short taken_call_425 + cmp rsp, rbp + je short not_taken_ret_421 + xor rax, qword ptr [rsp + 8] + mov r10, rax + ret 8 +not_taken_ret_421: mov r10, rax - jmp rx_i_426 -taken_call_425: - push rax - call rx_i_35 -rx_i_426: ;IMUL_32 +rx_i_422: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r11, 04dd16ca4h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r movsxd rcx, eax movsxd rax, r10d imul rax, rcx mov r13, rax -rx_i_427: ;MUL_64 +rx_i_423: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r12, 04df5ce05h mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r imul rax, r10 mov rcx, rax mov eax, r15d @@ -6004,31 +5701,33 @@ rx_i_427: ;MUL_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_428: ;FPADD +rx_i_424: ;FPADD dec edi - js rx_finish + jz rx_finish xor r13, 01ad12ce2h mov ecx, r13d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm7 - movsd xmm9, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm7 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 0565ae8aah + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm9 -rx_i_429: ;IMUL_32 +rx_i_425: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r8, 0a3c5391dh mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r movsxd rcx, eax movsxd rax, r10d imul rax, rcx mov r14, rax -rx_i_430: ;AND_64 +rx_i_426: ;AND_64 dec edi - js rx_finish + jz rx_finish xor r12, 09dd55ba0h mov eax, r12d and eax, 2047 @@ -6040,9 +5739,9 @@ rx_i_430: ;AND_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_431: ;MUL_32 +rx_i_427: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r11, 0d6cae9aeh mov eax, r11d and eax, 2047 @@ -6056,17 +5755,15 @@ rx_i_431: ;MUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_432: ;RET +rx_i_428: ;RET dec edi - js rx_finish + jz rx_finish xor r11, 0f807a961h mov eax, r11d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_432 - cmp r12d, -474453201 - jl short not_taken_ret_432 + je short not_taken_ret_428 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r8d @@ -6074,86 +5771,88 @@ rx_i_432: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_432: +not_taken_ret_428: mov rcx, rax mov eax, r8d xor eax, 0e3b86b2fh and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_433: ;MULH_64 +rx_i_429: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r12, 0650a4102h mov eax, r12d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - mov rcx, 1990438276 - mul rcx - mov rax, rdx + imul rax, rax, 1990438276 mov r15, rax -rx_i_434: ;FPADD +rx_i_430: ;FPADD dec edi - js rx_finish + jz rx_finish xor r14, 019cc0e5h mov ecx, r14d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4744016937443393536 - movd xmm1, rax - addsd xmm0, xmm1 - movsd xmm5, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm8 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 058891433h + and eax, 32767 + movhpd qword ptr [rsi + rax * 8], xmm5 -rx_i_435: ;FPADD +rx_i_431: ;FPADD dec edi - js rx_finish + jz rx_finish xor r12, 0ed17ab58h mov ecx, r12d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm5 - movsd xmm5, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm5 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 019fe4aadh + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm5 -rx_i_436: ;SUB_64 +rx_i_432: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r10, 01c3b321fh mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r sub rax, r10 mov r8, rax -rx_i_437: ;SUB_64 +rx_i_433: ;ADD_32 dec edi - js rx_finish + jz rx_finish xor r13, 0bbb88499h mov ecx, r13d - call rx_read_dataset - sub rax, r12 + call rx_read_dataset_r + add eax, r12d mov rcx, rax mov eax, r12d xor eax, 04722b36fh and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_438: ;FPSQRT +rx_i_434: ;FPSQRT dec edi - js rx_finish + jz rx_finish xor r13, 0167edabdh mov ecx, r13d - call rx_read_dataset - mov rcx, 9223372036854773760 - and rax, rcx - cvtsi2sd xmm0, rax - sqrtsd xmm0, xmm0 - movsd xmm9, xmm0 + call rx_read_dataset_f + andps xmm0, xmm10 + sqrtpd xmm0, xmm0 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 08c1cfc74h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_439: ;MUL_64 +rx_i_435: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r15, 0b940480ah mov eax, r15d and eax, 2047 @@ -6165,56 +5864,50 @@ rx_i_439: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_440: ;FPADD +rx_i_436: ;FPADD dec edi - js rx_finish + jz rx_finish xor r15, 0bfc3ca8bh mov eax, r15d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4481057281345060864 - movd xmm1, rax - addsd xmm0, xmm1 - movsd xmm7, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm2 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 0bfa76c43h + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm7 -rx_i_441: ;FPDIV +rx_i_437: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r8, 098a6bcf7h mov ecx, r8d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm3 - mov eax, r8d - xor eax, 025dac800h - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + divpd xmm0, xmm3 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 -rx_i_442: ;FPMUL +rx_i_438: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r10, 0325b38ebh mov ecx, r10d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm9 - mov eax, r12d - xor eax, 0b7c490eeh - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm9 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm4, xmm0 -rx_i_443: ;XOR_32 +rx_i_439: ;XOR_32 dec edi - js rx_finish + jz rx_finish xor r13, 05e807e81h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r xor eax, r15d mov rcx, rax mov eax, r10d @@ -6222,29 +5915,27 @@ rx_i_443: ;XOR_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_444: ;RET +rx_i_440: ;RET dec edi - js rx_finish + jz rx_finish xor r10, 062f83728h mov eax, r10d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_444 - cmp r12d, 2127765370 - jns short not_taken_ret_444 + je short not_taken_ret_440 xor rax, qword ptr [rsp + 8] mov r9, rax ret 8 -not_taken_ret_444: +not_taken_ret_440: mov r9, rax -rx_i_445: ;ADD_64 +rx_i_441: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r14, 0d18ec075h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r add rax, 529736748 mov rcx, rax mov eax, r9d @@ -6252,35 +5943,33 @@ rx_i_445: ;ADD_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_446: ;CALL +rx_i_442: ;CALL dec edi - js rx_finish + jz rx_finish xor r14, 0a53dd1bh mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r cmp r15d, 799523062 - jbe short taken_call_446 + jbe short taken_call_442 mov rcx, rax mov eax, r11d xor eax, 02fa7c0f6h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_447 -taken_call_446: + jmp rx_i_443 +taken_call_442: push rax - call rx_i_13 + call rx_i_9 -rx_i_447: ;RET +rx_i_443: ;RET dec edi - js rx_finish + jz rx_finish xor r14, 0232d1285h mov eax, r14d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_447 - cmp r12d, 1332855833 - jno short not_taken_ret_447 + je short not_taken_ret_443 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r9d @@ -6288,32 +5977,36 @@ rx_i_447: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_447: +not_taken_ret_443: mov rcx, rax mov eax, r9d xor eax, 04f71c419h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_448: ;FPMUL +rx_i_444: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r8, 042455dd8h mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm7 - movsd xmm5, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm7 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 0ce416070h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm5 -rx_i_449: ;ADD_64 +rx_i_445: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r13, 09ae009b2h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r add rax, r11 mov rcx, rax mov eax, r9d @@ -6321,12 +6014,12 @@ rx_i_449: ;ADD_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_450: ;MUL_32 +rx_i_446: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r12, 01734708eh mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r mov ecx, eax mov eax, r15d imul rax, rcx @@ -6336,35 +6029,32 @@ rx_i_450: ;MUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_451: ;FPSUB +rx_i_447: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r8, 01596d0e8h mov ecx, r8d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm7 - movsd xmm5, xmm0 + call rx_read_dataset_f + subpd xmm0, xmm7 + movaps xmm5, xmm0 + mov eax, r13d + xor eax, 0b384d4afh + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm5 -rx_i_452: ;FPSUB +rx_i_448: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r9, 0390cfdb0h mov eax, r9d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm3 - mov eax, r9d - xor eax, 0a700e3f3h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm3 + movaps xmm9, xmm0 -rx_i_453: ;ROR_64 +rx_i_449: ;ROR_64 dec edi - js rx_finish + jz rx_finish xor r8, 04f27744bh mov eax, r8d and eax, 2047 @@ -6372,12 +6062,12 @@ rx_i_453: ;ROR_64 ror rax, 28 mov r8, rax -rx_i_454: ;ROL_64 +rx_i_450: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r8, 04e2c76ffh mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r12 rol rax, cl mov rcx, rax @@ -6386,25 +6076,23 @@ rx_i_454: ;ROL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_455: ;ADD_64 +rx_i_451: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r8, 0c4d99ac9h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r add rax, -287502157 mov r8, rax -rx_i_456: ;RET +rx_i_452: ;RET dec edi - js rx_finish + jz rx_finish xor r13, 040130b88h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_456 - cmp r11d, -495064539 - jl short not_taken_ret_456 + je short not_taken_ret_452 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r11d @@ -6412,41 +6100,41 @@ rx_i_456: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_456: +not_taken_ret_452: mov rcx, rax mov eax, r11d xor eax, 0e27dea25h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_457: ;IMULH_64 +rx_i_453: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r11, 0a2096aa4h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r14 imul rcx mov rax, rdx mov r8, rax -rx_i_458: ;FPADD +rx_i_454: ;FPADD dec edi - js rx_finish + jz rx_finish xor r13, 081314291h mov eax, r13d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4746671349487894528 - movd xmm1, rax - addsd xmm0, xmm1 - movsd xmm4, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm9 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 07e41c60fh + and eax, 2047 + movhpd qword ptr [rsi + rax * 8], xmm4 -rx_i_459: ;XOR_64 +rx_i_455: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r8, 059263cdbh mov eax, r8d and eax, 2047 @@ -6454,12 +6142,12 @@ rx_i_459: ;XOR_64 xor rax, r9 mov r8, rax -rx_i_460: ;OR_32 +rx_i_456: ;OR_32 dec edi - js rx_finish + jz rx_finish xor r9, 010e8fe6h mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r or eax, r11d mov rcx, rax mov eax, r9d @@ -6467,12 +6155,12 @@ rx_i_460: ;OR_32 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_461: ;SUB_64 +rx_i_457: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r9, 09de1a3efh mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r sub rax, r10 mov rcx, rax mov eax, r10d @@ -6480,21 +6168,21 @@ rx_i_461: ;SUB_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_462: ;ROL_64 +rx_i_458: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r11, 05c79df6eh mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r rol rax, 22 mov r14, rax -rx_i_463: ;MUL_64 +rx_i_459: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r9, 0346f46adh mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r imul rax, rax, 381354340 mov rcx, rax mov eax, r13d @@ -6502,12 +6190,12 @@ rx_i_463: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_464: ;SUB_64 +rx_i_460: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r11, 098ab71fch mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r sub rax, r14 mov rcx, rax mov eax, r12d @@ -6515,12 +6203,12 @@ rx_i_464: ;SUB_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_465: ;SHR_64 +rx_i_461: ;SHR_64 dec edi - js rx_finish + jz rx_finish xor r11, 0c814e926h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r13 shr rax, cl mov rcx, rax @@ -6529,9 +6217,9 @@ rx_i_465: ;SHR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_466: ;ADD_64 +rx_i_462: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r10, 0c64b4a9eh mov eax, r10d and eax, 2047 @@ -6539,9 +6227,9 @@ rx_i_466: ;ADD_64 add rax, -1734323376 mov r15, rax -rx_i_467: ;SUB_64 +rx_i_463: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r9, 08c29341h mov eax, r9d and eax, 2047 @@ -6549,12 +6237,12 @@ rx_i_467: ;SUB_64 sub rax, r15 mov r10, rax -rx_i_468: ;MUL_64 +rx_i_464: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r12, 06ff587fdh mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r imul rax, r15 mov rcx, rax mov eax, r13d @@ -6562,24 +6250,19 @@ rx_i_468: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_469: ;FPADD +rx_i_465: ;FPADD dec edi - js rx_finish + jz rx_finish xor r12, 0b62c0003h mov eax, r12d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm5 - mov eax, r10d - xor eax, 0d11c1242h - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm5 + movaps xmm2, xmm0 -rx_i_470: ;IMUL_32 +rx_i_466: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r13, 05c541c42h mov eax, r13d and eax, 2047 @@ -6589,56 +6272,51 @@ rx_i_470: ;IMUL_32 imul rax, rcx mov r9, rax -rx_i_471: ;FPADD +rx_i_467: ;FPADD dec edi - js rx_finish + jz rx_finish xor r8, 0cbb33f81h mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - mov eax, r8d - xor eax, 0ad38e588h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm9 + movaps xmm8, xmm0 -rx_i_472: ;IDIV_64 +rx_i_468: ;IDIV_64 dec edi - js rx_finish + jz rx_finish xor r8, 091044dc3h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r mov edx, -13394825 cmp edx, -1 - jne short safe_idiv_472 + jne short safe_idiv_468 mov rcx, rax rol rcx, 1 dec rcx - jz short result_idiv_472 -safe_idiv_472: + jz short result_idiv_468 +safe_idiv_468: mov ecx, 1 test edx, edx cmovne ecx, edx movsxd rcx, ecx cqo idiv rcx -result_idiv_472: +result_idiv_468: mov rcx, rax mov eax, r8d xor eax, 0ff339c77h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_473: ;IMUL_32 +rx_i_469: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r9, 0c0186beh mov ecx, r9d - call rx_read_dataset - movsxd rcx, eax - mov rax, 294019485 + call rx_read_dataset_r + mov ecx, eax + mov eax, 294019485 imul rax, rcx mov rcx, rax mov eax, r9d @@ -6646,12 +6324,12 @@ rx_i_473: ;IMUL_32 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_474: ;XOR_32 +rx_i_470: ;XOR_32 dec edi - js rx_finish + jz rx_finish xor r14, 090849e3eh mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r xor eax, r11d mov rcx, rax mov eax, r14d @@ -6659,9 +6337,9 @@ rx_i_474: ;XOR_32 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_475: ;IMUL_32 +rx_i_471: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r14, 0cedba9b6h mov eax, r14d and eax, 32767 @@ -6671,105 +6349,91 @@ rx_i_475: ;IMUL_32 imul rax, rcx mov r14, rax -rx_i_476: ;CALL +rx_i_472: ;CALL dec edi - js rx_finish + jz rx_finish xor r9, 038f4b9d6h mov eax, r9d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r10d, 1738497427 - jl short taken_call_476 + jl short taken_call_472 mov r10, rax - jmp rx_i_477 -taken_call_476: + jmp rx_i_473 +taken_call_472: push rax - call rx_i_12 + call rx_i_8 -rx_i_477: ;MULH_64 +rx_i_473: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r14, 01fb7637dh mov eax, r14d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] - mov rcx, -751043211 - mul rcx - mov rax, rdx + imul rax, rax, -751043211 mov r12, rax -rx_i_478: ;CALL +rx_i_474: ;CALL dec edi - js rx_finish + jz rx_finish xor r9, 0b5c0b4d4h mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r cmp r15d, -233120543 - jo short taken_call_478 + jo short taken_call_474 mov r15, rax - jmp rx_i_479 -taken_call_478: + jmp rx_i_475 +taken_call_474: push rax - call rx_i_73 + call rx_i_69 -rx_i_479: ;FPSUB +rx_i_475: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r10, 0910dcdeeh mov eax, r10d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm9 - mov eax, r15d - xor eax, 04a936216h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm9 + movaps xmm7, xmm0 -rx_i_480: ;FPSUB +rx_i_476: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r8, 07ab3b5a4h mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm2 - mov eax, r9d - xor eax, 0b01bb14ch - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm2 + movaps xmm9, xmm0 -rx_i_481: ;FPADD +rx_i_477: ;FPADD dec edi - js rx_finish + jz rx_finish xor r12, 07a29ec63h mov eax, r12d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, -4487871971018670080 - movd xmm1, rax - addsd xmm0, xmm1 - movsd xmm6, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm9 + movaps xmm6, xmm0 + mov eax, r14d + xor eax, 0e81fc7a6h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm6 -rx_i_482: ;MULH_64 +rx_i_478: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r14, 02d3d7e7fh mov ecx, r14d - call rx_read_dataset - mov rcx, r10 - mul rcx - mov rax, rdx + call rx_read_dataset_r + imul rax, r10 mov r12, rax -rx_i_483: ;MUL_64 +rx_i_479: ;MUL_64 dec edi - js rx_finish + jz rx_finish xor r12, 09b49c793h mov eax, r12d and eax, 2047 @@ -6781,24 +6445,19 @@ rx_i_483: ;MUL_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_484: ;FPSUB +rx_i_480: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r9, 0a9cc4f01h mov eax, r9d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm4 - mov eax, r14d - xor eax, 0d8750eeh - and eax, 32767 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm4 + movaps xmm6, xmm0 -rx_i_485: ;DIV_64 +rx_i_481: ;DIV_64 dec edi - js rx_finish + jz rx_finish xor r14, 0225ba1f9h mov eax, r14d and eax, 2047 @@ -6811,61 +6470,54 @@ rx_i_485: ;DIV_64 div rcx mov r12, rax -rx_i_486: ;XOR_64 +rx_i_482: ;XOR_64 dec edi - js rx_finish + jz rx_finish xor r14, 044a0f592h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r xor rax, r12 mov r11, rax -rx_i_487: ;FPADD +rx_i_483: ;FPADD dec edi - js rx_finish + jz rx_finish xor r11, 07f71f219h mov ecx, r11d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4743722256075587584 - movd xmm1, rax - addsd xmm0, xmm1 - mov eax, r14d - xor eax, 0545908cah - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + addpd xmm0, xmm6 + movaps xmm6, xmm0 -rx_i_488: ;ROL_64 +rx_i_484: ;ROL_64 dec edi - js rx_finish + jz rx_finish xor r12, 07027bacdh mov ecx, r12d - call rx_read_dataset + call rx_read_dataset_r rol rax, 37 mov r11, rax -rx_i_489: ;CALL +rx_i_485: ;CALL dec edi - js rx_finish + jz rx_finish xor r13, 03a04647h mov ecx, r13d - call rx_read_dataset + call rx_read_dataset_r cmp r8d, 554879918 - jno short taken_call_489 + jno short taken_call_485 mov rcx, rax mov eax, r15d xor eax, 02112cbaeh and eax, 32767 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_490 -taken_call_489: + jmp rx_i_486 +taken_call_485: push rax - call rx_i_62 + call rx_i_58 -rx_i_490: ;ADD_64 +rx_i_486: ;ADD_64 dec edi - js rx_finish + jz rx_finish xor r15, 0ad072937h mov eax, r15d and eax, 2047 @@ -6877,18 +6529,18 @@ rx_i_490: ;ADD_64 and eax, 32767 mov qword ptr [rsi + rax * 8], rcx -rx_i_491: ;SUB_64 +rx_i_487: ;SUB_64 dec edi - js rx_finish + jz rx_finish xor r11, 07f78ad34h mov ecx, r11d - call rx_read_dataset + call rx_read_dataset_r sub rax, -333279706 mov r11, rax -rx_i_492: ;IMULH_64 +rx_i_488: ;IMULH_64 dec edi - js rx_finish + jz rx_finish xor r12, 0d8b1788eh mov eax, r12d and eax, 32767 @@ -6898,146 +6550,132 @@ rx_i_492: ;IMULH_64 mov rax, rdx mov r12, rax -rx_i_493: ;CALL +rx_i_489: ;CALL dec edi - js rx_finish + jz rx_finish xor r10, 0b2ec9f3ah mov eax, r10d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp r15d, -1127175870 - jge short taken_call_493 + jge short taken_call_489 mov rcx, rax mov eax, r8d xor eax, 0bcd0a942h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_494 -taken_call_493: + jmp rx_i_490 +taken_call_489: push rax - call rx_i_79 + call rx_i_75 -rx_i_494: ;FPADD +rx_i_490: ;FPADD dec edi - js rx_finish + jz rx_finish xor r11, 015c7f598h mov ecx, r11d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - mov eax, r15d - xor eax, 0ab8bd68h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + addpd xmm0, xmm9 + movaps xmm7, xmm0 -rx_i_495: ;FPADD +rx_i_491: ;FPADD dec edi - js rx_finish + jz rx_finish xor r8, 0902da6bdh mov ecx, r8d - call rx_read_dataset - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - movsd xmm7, xmm0 + call rx_read_dataset_f + addpd xmm0, xmm9 + movaps xmm7, xmm0 + mov eax, r15d + xor eax, 0b0f0fca4h + and eax, 32767 + movhpd qword ptr [rsi + rax * 8], xmm7 -rx_i_496: ;OR_64 +rx_i_492: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r9, 0491090d9h mov ecx, r9d - call rx_read_dataset + call rx_read_dataset_r or rax, r9 mov r12, rax -rx_i_497: ;FPSUB +rx_i_493: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r8, 09de81282h mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4740027165670637568 - movd xmm1, rax - subsd xmm0, xmm1 - mov eax, r12d - xor eax, 02feb2fd7h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm9 + movaps xmm4, xmm0 -rx_i_498: ;MUL_32 +rx_i_494: ;MUL_32 dec edi - js rx_finish + jz rx_finish xor r10, 0b0d50e46h mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r mov ecx, eax mov eax, r11d imul rax, rcx mov r14, rax -rx_i_499: ;FPMUL +rx_i_495: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r11, 0e276cad1h mov eax, r11d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mov rax, 4739626697148596224 - movd xmm1, rax - mulsd xmm0, xmm1 - mov eax, r8d - xor eax, 02d12bd27h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm2 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 -rx_i_500: ;OR_64 +rx_i_496: ;OR_64 dec edi - js rx_finish + jz rx_finish xor r14, 0fe757b73h mov ecx, r14d - call rx_read_dataset + call rx_read_dataset_r or rax, -359802064 mov r9, rax -rx_i_501: ;FPDIV +rx_i_497: ;FPDIV dec edi - js rx_finish + jz rx_finish xor r8, 08d25742eh mov eax, r8d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - divsd xmm0, xmm3 - mov eax, r8d - xor eax, 0a800c041h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + divpd xmm0, xmm3 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 -rx_i_502: ;FPMUL +rx_i_498: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r15, 0e066fd15h mov eax, r15d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm9 - movsd xmm8, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + mulpd xmm0, xmm9 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 + mov eax, r8d + xor eax, 09dc5a1f9h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm8 -rx_i_503: ;IMUL_32 +rx_i_499: ;IMUL_32 dec edi - js rx_finish + jz rx_finish xor r12, 08925556bh mov eax, r12d and eax, 2047 @@ -7047,31 +6685,31 @@ rx_i_503: ;IMUL_32 imul rax, rcx mov r8, rax -rx_i_504: ;CALL +rx_i_500: ;CALL dec edi - js rx_finish + jz rx_finish xor r10, 04bc870ebh mov eax, r10d and eax, 32767 mov rax, qword ptr [rsi + rax * 8] cmp r13d, 1243939650 - jl short taken_call_504 + jl short taken_call_500 mov rcx, rax mov eax, r10d xor eax, 04a250342h and eax, 32767 mov qword ptr [rsi + rax * 8], rcx - jmp rx_i_505 -taken_call_504: + jmp rx_i_501 +taken_call_500: push rax - call rx_i_3 + call rx_i_511 -rx_i_505: ;SHR_64 +rx_i_501: ;SHR_64 dec edi - js rx_finish + jz rx_finish xor r8, 07d46c503h mov ecx, r8d - call rx_read_dataset + call rx_read_dataset_r mov rcx, r10 shr rax, cl mov rcx, rax @@ -7080,16 +6718,14 @@ rx_i_505: ;SHR_64 and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_506: ;RET +rx_i_502: ;RET dec edi - js rx_finish + jz rx_finish xor r10, 09e70b20ch mov ecx, r10d - call rx_read_dataset + call rx_read_dataset_r cmp rsp, rbp - je short not_taken_ret_506 - cmp r15d, 148394770 - jno short not_taken_ret_506 + je short not_taken_ret_502 xor rax, qword ptr [rsp + 8] mov rcx, rax mov eax, r9d @@ -7097,79 +6733,131 @@ rx_i_506: ;RET and eax, 2047 mov qword ptr [rsi + rax * 8], rcx ret 8 -not_taken_ret_506: +not_taken_ret_502: mov rcx, rax mov eax, r9d xor eax, 08d85312h and eax, 2047 mov qword ptr [rsi + rax * 8], rcx -rx_i_507: ;FPSUB +rx_i_503: ;FPSUB dec edi - js rx_finish + jz rx_finish xor r13, 0442e4850h mov eax, r13d and eax, 32767 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - subsd xmm0, xmm2 - movsd xmm9, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm2 + movaps xmm9, xmm0 + mov eax, r9d + xor eax, 080465282h + and eax, 2047 + movlpd qword ptr [rsi + rax * 8], xmm9 -rx_i_508: ;FPADD +rx_i_504: ;FPADD dec edi - js rx_finish + jz rx_finish xor r13, 099d48347h mov eax, r13d and eax, 2047 - mov rax, qword ptr [rsi + rax * 8] - and rax, -2048 - cvtsi2sd xmm0, rax - addsd xmm0, xmm9 - movsd xmm4, xmm0 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + addpd xmm0, xmm9 + movaps xmm4, xmm0 + mov eax, r12d + xor eax, 0be8cbb18h + and eax, 32767 + movhpd qword ptr [rsi + rax * 8], xmm4 -rx_i_509: ;FPMUL +rx_i_505: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r12, 032c0a28ah mov ecx, r12d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm4 - movsd xmm8, xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm4 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm8, xmm0 + mov eax, r8d + xor eax, 021b54eaeh + and eax, 32767 + movlpd qword ptr [rsi + rax * 8], xmm8 -rx_i_510: ;FPMUL +rx_i_506: ;FPMUL dec edi - js rx_finish + jz rx_finish xor r9, 0a973d58ch mov ecx, r9d - call rx_read_dataset - or rax, 2048 - and rax, -2048 - cvtsi2sd xmm0, rax - mulsd xmm0, xmm9 - mov eax, r11d - xor eax, 05e890759h - and eax, 2047 - movd qword ptr [rsi + rax * 8], xmm0 + call rx_read_dataset_f + mulpd xmm0, xmm9 + movaps xmm1, xmm0 + cmpeqpd xmm1, xmm1 + andps xmm0, xmm1 + movaps xmm3, xmm0 -rx_i_511: ;RET +rx_i_507: ;RET dec edi - js rx_finish + jz rx_finish xor r10, 0d3b7165ch mov eax, r10d and eax, 2047 mov rax, qword ptr [rsi + rax * 8] cmp rsp, rbp - je short not_taken_ret_511 - cmp r11d, -260506265 - ja short not_taken_ret_511 + je short not_taken_ret_507 xor rax, qword ptr [rsp + 8] mov r14, rax ret 8 -not_taken_ret_511: +not_taken_ret_507: mov r14, rax +rx_i_508: ;RET + dec edi + jz rx_finish + xor r13, 0da34d818h + mov ecx, r13d + call rx_read_dataset_r + cmp rsp, rbp + je short not_taken_ret_508 + xor rax, qword ptr [rsp + 8] + mov r8, rax + ret 8 +not_taken_ret_508: + mov r8, rax + +rx_i_509: ;CALL + dec edi + jz rx_finish + xor r11, 01b2873f2h + mov eax, r11d + and eax, 2047 + mov rax, qword ptr [rsi + rax * 8] + cmp r8d, 1826115244 + jno short taken_call_509 + mov r10, rax + jmp rx_i_510 +taken_call_509: + push rax + call rx_i_42 + +rx_i_510: ;FPSUB + dec edi + jz rx_finish + xor r8, 0db65513ch + mov eax, r8d + and eax, 2047 + cvtdq2pd xmm0, qword ptr [rsi + rax * 8] + subpd xmm0, xmm2 + movaps xmm9, xmm0 + +rx_i_511: ;ROL_64 + dec edi + jz rx_finish + xor r11, 02bd79286h + mov ecx, r11d + call rx_read_dataset_r + mov rcx, r10 + rol rax, cl + mov r11, rax + jmp rx_i_0