diff --git a/makefile b/makefile
index 4b51084..21584cb 100644
--- a/makefile
+++ b/makefile
@@ -12,6 +12,9 @@ OBJDIR=obj
LDFLAGS=-lpthread
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o)
+ifeq ($(PLATFORM),x86_64)
+ ROBJS += $(OBJDIR)/JitCompilerX86-static.o
+endif
all: release test
@@ -57,6 +60,9 @@ $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) |
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
+$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_r.inc read_f.inc)) | $(OBJDIR)
+ $(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@
+
$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp instructions.hpp intrinPortable.h) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@
diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp
index 0d61f43..bb0e106 100644
--- a/src/AssemblyGeneratorX86.cpp
+++ b/src/AssemblyGeneratorX86.cpp
@@ -54,7 +54,7 @@ namespace RandomX {
(this->*generator)(instr, i);
}
- void AssemblyGeneratorX86::gena(Instruction& instr) {
+ void AssemblyGeneratorX86::genar(Instruction& instr) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
switch (instr.loca & 7)
{
@@ -63,7 +63,7 @@ namespace RandomX {
case 2:
case 3:
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
- asmCode << "\tcall rx_read_dataset" << std::endl;
+ asmCode << "\tcall rx_read_dataset_r" << std::endl;
return;
case 4:
@@ -80,6 +80,33 @@ namespace RandomX {
}
}
+
+ void AssemblyGeneratorX86::genaf(Instruction& instr) {
+ asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
+ switch (instr.loca & 7)
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
+ asmCode << "\tcall rx_read_dataset_f" << std::endl;
+ return;
+
+ case 4:
+ asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
+ asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
+ asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl;
+ return;
+
+ default:
+ asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
+ asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
+ asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl;
+ return;
+ }
+ }
+
void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) {
switch (instr.locb & 7)
{
@@ -87,8 +114,6 @@ namespace RandomX {
case 1:
case 2:
case 3:
- case 4:
- case 5:
asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl;
asmCode << "\t" << instrx86 << " rax, cl" << std::endl;
return;
@@ -133,26 +158,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::genbf(Instruction& instr, const char* instrx86) {
- asmCode << "\tand rax, -2048" << std::endl;
- asmCode << "\tcvtsi2sd xmm0, rax" << std::endl;
- switch (instr.locb & 7)
- {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- asmCode << "\t" << instrx86 << " xmm0, " << regF[instr.regb % RegistersCount] << std::endl;
- return;
- default:
- convertible_t bimm;
- bimm.f64 = (double)instr.imm32;
- asmCode << "\tmov rax, " << bimm.i64 << std::endl;
- asmCode << "\tmovd xmm1, rax" << std::endl;
- asmCode << "\t" << instrx86 << " xmm0, xmm1" << std::endl;
- return;
- }
+ asmCode << "\t" << instrx86 << " xmm0, " << regF[instr.regb % RegistersCount] << std::endl;
}
void AssemblyGeneratorX86::gencr(Instruction& instr) {
@@ -165,7 +171,7 @@ namespace RandomX {
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
if (trace) {
- asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rcx" << std::endl;
+ asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl;
}
return;
@@ -178,76 +184,75 @@ namespace RandomX {
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
if (trace) {
- asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rcx" << std::endl;
+ asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl;
}
return;
default:
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl;
if (trace) {
- asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
+ asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl;
}
}
}
- void AssemblyGeneratorX86::gencf(Instruction& instr) {
+ void AssemblyGeneratorX86::gencf(Instruction& instr, bool alwaysLow = false) {
+ if(!alwaysLow)
+ asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
+ const char* store = (!alwaysLow && (instr.locc & 8)) ? "movhpd" : "movlpd";
switch (instr.locc & 7)
{
- case 0:
- asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
- asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
- asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
- asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl;
- break;
+ case 4:
+ asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
+ asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
+ asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
+ asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
+ break;
- case 1:
- case 2:
- case 3:
- asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
- asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
- asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
- asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl;
- break;
-
- default:
- asmCode << "\tmovsd " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
- break;
+ case 5:
+ case 6:
+ case 7:
+ asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
+ asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
+ asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
+ asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
+ break;
}
if (trace) {
- asmCode << "\tmovd qword ptr [rsi + rdi * 8 + 262144], xmm0" << std::endl;
+ asmCode << "\t" << store << " qword ptr [rsi + rdi * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;
}
}
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tadd rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tadd eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tsub rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tsub eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_MUL_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\timul rax, ";
if ((instr.locb & 7) >= 6) {
asmCode << "rax, ";
@@ -257,7 +262,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tmov rcx, ";
genbr1(instr);
asmCode << "\tmul rcx" << std::endl;
@@ -266,7 +271,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_MUL_32(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tmov ecx, eax" << std::endl;
asmCode << "\tmov eax, ";
genbr132(instr);
@@ -275,7 +280,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_IMUL_32(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tmovsxd rcx, eax" << std::endl;
if ((instr.locb & 7) >= 6) {
asmCode << "\tmov rax, " << instr.imm32 << std::endl;
@@ -288,7 +293,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tmov rcx, ";
genbr1(instr);
asmCode << "\timul rcx" << std::endl;
@@ -297,7 +302,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
if ((instr.locb & 7) >= 6) {
if (instr.imm32 == 0) {
asmCode << "\tmov ecx, 1" << std::endl;
@@ -318,7 +323,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tmov edx, ";
genbr132(instr);
asmCode << "\tcmp edx, -1" << std::endl;
@@ -339,123 +344,125 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tand rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tand eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tor rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tor eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\txor rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\txor eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
genbr0(instr, "shl");
gencr(instr);
}
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
genbr0(instr, "shr");
gencr(instr);
}
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
genbr0(instr, "sar");
gencr(instr);
}
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
genbr0(instr, "rol");
gencr(instr);
}
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
genbr0(instr, "ror");
gencr(instr);
}
void AssemblyGeneratorX86::h_FPADD(Instruction& instr, int i) {
- gena(instr);
- genbf(instr, "addsd");
+ genaf(instr);
+ genbf(instr, "addpd");
gencf(instr);
}
void AssemblyGeneratorX86::h_FPSUB(Instruction& instr, int i) {
- gena(instr);
- genbf(instr, "subsd");
+ genaf(instr);
+ genbf(instr, "subpd");
gencf(instr);
}
void AssemblyGeneratorX86::h_FPMUL(Instruction& instr, int i) {
- gena(instr);
- asmCode << "\tor rax, 2048" << std::endl;
- genbf(instr, "mulsd");
+ genaf(instr);
+ genbf(instr, "mulpd");
+ asmCode << "\tmovaps xmm1, xmm0" << std::endl;
+ asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
+ asmCode << "\tandps xmm0, xmm1" << std::endl;
gencf(instr);
}
void AssemblyGeneratorX86::h_FPDIV(Instruction& instr, int i) {
- gena(instr);
- asmCode << "\tor rax, 2048" << std::endl;
- genbf(instr, "divsd");
+ genaf(instr);
+ genbf(instr, "divpd");
+ asmCode << "\tmovaps xmm1, xmm0" << std::endl;
+ asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
+ asmCode << "\tandps xmm0, xmm1" << std::endl;
gencf(instr);
}
void AssemblyGeneratorX86::h_FPSQRT(Instruction& instr, int i) {
- gena(instr);
- asmCode << "\tmov rcx, 9223372036854773760" << std::endl;
- asmCode << "\tand rax, rcx" << std::endl;
- asmCode << "\tcvtsi2sd xmm0, rax" << std::endl;
- asmCode << "\tsqrtsd xmm0, xmm0" << std::endl;
+ genaf(instr);
+ asmCode << "\tandps xmm0, xmm10" << std::endl;
+ asmCode << "\tsqrtpd xmm0, xmm0" << std::endl;
gencf(instr);
}
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tmov rcx, rax" << std::endl;
asmCode << "\tshl eax, 13" << std::endl;
asmCode << "\tand rcx, -2048" << std::endl;
asmCode << "\tand eax, 24576" << std::endl;
- asmCode << "\tcvtsi2sd xmm0, rcx" << std::endl;
+ asmCode << "\tcvtsi2sd " << regF[instr.regc % RegistersCount] << ", rcx" << std::endl;
asmCode << "\tor eax, 40896" << std::endl;
asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl;
asmCode << "\tldmxcsr dword ptr [rsp - 8]" << std::endl;
- gencf(instr);
+ gencf(instr, true);
}
static inline const char* jumpCondition(Instruction& instr, bool invert = false) {
@@ -481,7 +488,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
asmCode << "\t" << jumpCondition(instr);
asmCode << " short taken_call_" << i << std::endl;
@@ -489,14 +496,14 @@ namespace RandomX {
asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl;
asmCode << "taken_call_" << i << ":" << std::endl;
if (trace) {
- asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
+ asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl;
}
asmCode << "\tpush rax" << std::endl;
asmCode << "\tcall rx_i_" << wrapInstr(i + (instr.imm8 & 127) + 2) << std::endl;
}
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
- gena(instr);
+ genar(instr);
asmCode << "\tcmp rsp, rbp" << std::endl;
asmCode << "\tje short not_taken_ret_" << i << std::endl;
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp
index e61fa26..3097a94 100644
--- a/src/AssemblyGeneratorX86.hpp
+++ b/src/AssemblyGeneratorX86.hpp
@@ -38,13 +38,14 @@ namespace RandomX {
static InstructionGenerator engine[256];
std::stringstream asmCode;
- void gena(Instruction&);
+ void genar(Instruction&);
+ void genaf(Instruction&);
void genbr0(Instruction&, const char*);
void genbr1(Instruction&);
void genbr132(Instruction&);
void genbf(Instruction&, const char*);
void gencr(Instruction&);
- void gencf(Instruction&);
+ void gencf(Instruction&, bool);
void generateCode(Instruction&, int);
diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp
index 5ef3cd7..7803003 100644
--- a/src/CompiledVirtualMachine.cpp
+++ b/src/CompiledVirtualMachine.cpp
@@ -26,9 +26,7 @@ along with RandomX. If not, see.
namespace RandomX {
CompiledVirtualMachine::CompiledVirtualMachine(bool softAes) : VirtualMachine(softAes) {
-#if !defined(_M_X64) && !defined(__x86_64__)
- throw std::runtime_error("Compiled VM only supports x86-64 CPUs");
-#endif
+
}
void CompiledVirtualMachine::setDataset(dataset_t ds, bool lightClient) {
@@ -51,7 +49,7 @@ namespace RandomX {
void CompiledVirtualMachine::execute() {
//executeProgram(reg, mem, scratchpad, readDataset);
compiler.getProgramFunc()(reg, mem, scratchpad);
-#ifdef TRACE
+#ifdef TRACEVM
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
std::cout << std::hex << tracepad[i].u64 << std::endl;
}
diff --git a/src/CompiledVirtualMachine.hpp b/src/CompiledVirtualMachine.hpp
index e7e9299..0932cfe 100644
--- a/src/CompiledVirtualMachine.hpp
+++ b/src/CompiledVirtualMachine.hpp
@@ -18,7 +18,7 @@ along with RandomX. If not, see.
*/
#pragma once
-//#define TRACE
+//#define TRACEVM
#include "VirtualMachine.hpp"
#include "JitCompilerX86.hpp"
@@ -34,7 +34,7 @@ namespace RandomX {
return compiler.getCode();
}
private:
-#ifdef TRACE
+#ifdef TRACEVM
convertible_t tracepad[InstructionCount];
#endif
JitCompilerX86 compiler;
diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp
index 2bb4d75..c436ef7 100644
--- a/src/InterpretedVirtualMachine.cpp
+++ b/src/InterpretedVirtualMachine.cpp
@@ -44,9 +44,11 @@ namespace RandomX {
*(((uint32_t*)®) + i) = gen();
}
FPINIT();
- for (int i = 0; i < 8; ++i) {
- reg.f[i].f64 = (double)reg.f[i].i64;
+ for (int i = 0; i < RegistersCount; ++i) {
+ reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
+ reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
}
+ //std::cout << reg;
p.initialize(gen);
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
mem.mx = *(((uint32_t*)seed) + 5);
@@ -97,52 +99,36 @@ namespace RandomX {
convertible_t InterpretedVirtualMachine::loadbr1(Instruction& inst) {
switch (inst.locb & 7)
{
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- return reg.r[inst.regb % RegistersCount];
- case 6:
- case 7:
- convertible_t temp;
- temp.i64 = inst.imm32; //sign-extend imm32
- return temp;
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ return reg.r[inst.regb % RegistersCount];
+ case 6:
+ case 7:
+ convertible_t temp;
+ temp.i64 = inst.imm32; //sign-extend imm32
+ return temp;
}
}
convertible_t InterpretedVirtualMachine::loadbr0(Instruction& inst) {
switch (inst.locb & 7)
{
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- return reg.r[inst.regb % RegistersCount];
- case 6:
- case 7:
- convertible_t temp;
- temp.u64 = inst.imm8;
- return temp;
- }
- }
-
- double InterpretedVirtualMachine::loadbf(Instruction& inst) {
- switch (inst.locb & 7)
- {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- return reg.f[inst.regb % RegistersCount].f64;
- case 6:
- case 7:
- return (double)inst.imm32;
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ return reg.r[inst.regb % RegistersCount];
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ convertible_t temp;
+ temp.u64 = inst.imm8;
+ return temp;
}
}
@@ -150,43 +136,61 @@ namespace RandomX {
addr_t addr;
switch (inst.locc & 7)
{
- case 0:
- addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
- return scratchpad[addr % ScratchpadL2];
+ case 0:
+ addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
+ return scratchpad[addr % ScratchpadL2];
- case 1:
- case 2:
- case 3:
- addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
- return scratchpad[addr % ScratchpadL1];
+ case 1:
+ case 2:
+ case 3:
+ addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
+ return scratchpad[addr % ScratchpadL1];
- case 4:
- case 5:
- case 6:
- case 7:
- return reg.r[inst.regc % RegistersCount];
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ return reg.r[inst.regc % RegistersCount];
}
}
- convertible_t& InterpretedVirtualMachine::getcf(Instruction& inst) {
+ void InterpretedVirtualMachine::writecf(Instruction& inst, fpu_reg_t& regc) {
addr_t addr;
switch (inst.locc & 7)
{
- case 0:
- addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
- return scratchpad[addr % ScratchpadL2];
+ case 4:
+ addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
+ scratchpad[addr % ScratchpadL2] = (inst.locc & 8) ? regc.hi : regc.lo;
+ break;
- case 1:
- case 2:
- case 3:
- addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
- return scratchpad[addr % ScratchpadL1];
+ case 5:
+ case 6:
+ case 7:
+ addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
+ scratchpad[addr % ScratchpadL1] = (inst.locc & 8) ? regc.hi : regc.lo;
- case 4:
- case 5:
- case 6:
- case 7:
- return reg.f[inst.regc % RegistersCount];
+ default:
+ break;
+ }
+ }
+
+ void InterpretedVirtualMachine::writecflo(Instruction& inst, fpu_reg_t& regc) {
+ addr_t addr;
+ switch (inst.locc & 7)
+ {
+ case 4:
+ addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
+ scratchpad[addr % ScratchpadL2] = regc.lo;
+ break;
+
+ case 5:
+ case 6:
+ case 7:
+ addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
+ scratchpad[addr % ScratchpadL1] = regc.lo;
+
+ default:
+ break;
}
}
@@ -194,22 +198,18 @@ namespace RandomX {
if(trace) std::cout << std::hex << /*a.u64 << " " << b.u64 << " " <<*/ c.u64 << std::endl;
#define FPU_RETIRE(x) x(a, b, c); \
+ writecf(inst, c); \
if(trace) { \
- convertible_t bc; \
- bc.f64 = b; \
- std::cout << std::hex << /*a.u64 << " " << bc.u64 << " " <<*/ c.u64 << std::endl; \
+ std::cout << std::hex << ((inst.locc & 8) ? c.hi.u64 : c.lo.u64) << std::endl; \
} \
if(fpuCheck) { \
- convertible_t bc; \
- if(c.f64 != c.f64) { \
+ if(c.hi.f64 != c.hi.f64 || c.lo.f64 != c.lo.f64) { \
std::stringstream ss; \
- bc.f64 = b; \
- ss << "NaN result of " << #x << "(" << std::hex << a.u64 << ", " << bc.u64 << ") = " << c.u64; \
+ ss << "NaN result of " << #x << "(" << std::hex << a.u64 << ", " << b.hi.u64 << " " << b.lo.u64 << ") = " << c.hi.u64 << " " << c.lo.u64 << std::endl; \
throw std::runtime_error(ss.str()); \
- } else if (std::fpclassify(c.f64) == FP_SUBNORMAL) {\
+ } else if (std::fpclassify(c.hi.f64) == FP_SUBNORMAL || std::fpclassify(c.lo.f64) == FP_SUBNORMAL) {\
std::stringstream ss; \
- bc.f64 = b; \
- ss << "Denormal result of " << #x << "(" << std::hex << a.u64 << ", " << bc.u64 << ") = " << c.u64; \
+ ss << "Denormal result of " << #x << "(" << std::hex << a.u64 << ", " << b.hi.u64 << " " << b.lo.u64 << ") = " << c.hi.u64 << " " << c.lo.u64 << std::endl; \
throw std::runtime_error(ss.str()); \
} \
}
@@ -220,8 +220,13 @@ namespace RandomX {
#define INC_COUNT(x)
#endif
-#define FPU_RETIRE_NB(x) x(a, b, c); \
- if(trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
+#define FPU_RETIRE_FPSQRT(x) FPSQRT(a, b, c); \
+ writecf(inst, c); \
+ if(trace) std::cout << std::hex << ((inst.locc & 8) ? c.hi.u64 : c.lo.u64) << std::endl;
+
+#define FPU_RETIRE_FPROUND(x) FPROUND(a, b, c); \
+ writecflo(inst, c); \
+ if(trace) std::cout << std::hex << c.lo.u64 << std::endl;
#define ALU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
INC_COUNT(x) \
@@ -242,17 +247,17 @@ namespace RandomX {
#define FPU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
INC_COUNT(x) \
convertible_t a = loada(inst); \
- double b = loadbf(inst); \
- convertible_t& c = getcf(inst); \
+ fpu_reg_t& b = reg.f[inst.regb % RegistersCount]; \
+ fpu_reg_t& c = reg.f[inst.regc % RegistersCount]; \
FPU_RETIRE(x) \
}
#define FPU_INST_NB(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
INC_COUNT(x) \
convertible_t a = loada(inst); \
- convertible_t b; \
- convertible_t& c = getcf(inst); \
- FPU_RETIRE_NB(x) \
+ fpu_reg_t b; \
+ fpu_reg_t& c = reg.f[inst.regc % RegistersCount]; \
+ FPU_RETIRE_##x(x) \
}
ALU_INST(ADD_64)
diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp
index 5a6f49a..b8fd98f 100644
--- a/src/InterpretedVirtualMachine.hpp
+++ b/src/InterpretedVirtualMachine.hpp
@@ -18,7 +18,7 @@ along with RandomX. If not, see.
*/
#pragma once
-#define STATS
+//#define STATS
#include "VirtualMachine.hpp"
#include "Program.hpp"
#include
@@ -88,9 +88,9 @@ namespace RandomX {
convertible_t loada(Instruction&);
convertible_t loadbr0(Instruction&);
convertible_t loadbr1(Instruction&);
- double loadbf(Instruction&);
convertible_t& getcr(Instruction&);
- convertible_t& getcf(Instruction&);
+ void writecf(Instruction&, fpu_reg_t&);
+ void writecflo(Instruction&, fpu_reg_t&);
void stackPush(convertible_t& c) {
stack.push_back(c);
diff --git a/src/JitCompilerX86-static.S b/src/JitCompilerX86-static.S
new file mode 100644
index 0000000..be156ef
--- /dev/null
+++ b/src/JitCompilerX86-static.S
@@ -0,0 +1,58 @@
+;# Copyright (c) 2018 tevador
+;#
+;# This file is part of RandomX.
+;#
+;# RandomX is free software: you can redistribute it and/or modify
+;# it under the terms of the GNU General Public License as published by
+;# the Free Software Foundation, either version 3 of the License, or
+;# (at your option) any later version.
+;#
+;# RandomX is distributed in the hope that it will be useful,
+;# but WITHOUT ANY WARRANTY; without even the implied warranty of
+;# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;# GNU General Public License for more details.
+;#
+;# You should have received a copy of the GNU General Public License
+;# along with RandomX. If not, see.
+
+.intel_syntax noprefix
+#if defined(__APPLE__)
+.text
+#else
+.section .text
+#endif
+#if defined(__WIN32__) || defined(__APPLE__)
+#define DECL(x) _##x
+#else
+#define DECL(x) x
+#endif
+.global DECL(randomx_program_prologue)
+.global DECL(randomx_program_begin)
+.global DECL(randomx_program_epilogue)
+.global DECL(randomx_program_read_r)
+.global DECL(randomx_program_read_f)
+.global DECL(randomx_program_end)
+
+.align 64
+DECL(randomx_program_prologue):
+ #include "asm/program_prologue_linux.inc"
+
+.align 64
+DECL(randomx_program_begin):
+ nop
+
+.align 64
+DECL(randomx_program_epilogue):
+ #include "asm/program_epilogue_linux.inc"
+
+.align 64
+DECL(randomx_program_read_r):
+ #include "asm/program_read_r.inc"
+
+.align 64
+DECL(randomx_program_read_f):
+ #include "asm/program_read_f.inc"
+
+.align 64
+DECL(randomx_program_end):
+ nop
\ No newline at end of file
diff --git a/src/JitCompilerX86-static.asm b/src/JitCompilerX86-static.asm
new file mode 100644
index 0000000..d7d3d4b
--- /dev/null
+++ b/src/JitCompilerX86-static.asm
@@ -0,0 +1,59 @@
+;# Copyright (c) 2018 tevador
+;#
+;# This file is part of RandomX.
+;#
+;# RandomX is free software: you can redistribute it and/or modify
+;# it under the terms of the GNU General Public License as published by
+;# the Free Software Foundation, either version 3 of the License, or
+;# (at your option) any later version.
+;#
+;# RandomX is distributed in the hope that it will be useful,
+;# but WITHOUT ANY WARRANTY; without even the implied warranty of
+;# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;# GNU General Public License for more details.
+;#
+;# You should have received a copy of the GNU General Public License
+;# along with RandomX. If not, see.
+
+_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
+
+PUBLIC randomx_program_prologue
+PUBLIC randomx_program_begin
+PUBLIC randomx_program_epilogue
+PUBLIC randomx_program_read_r
+PUBLIC randomx_program_read_f
+PUBLIC randomx_program_end
+
+ALIGN 64
+randomx_program_prologue PROC
+ include asm/program_prologue_win64.inc
+randomx_program_prologue ENDP
+
+ALIGN 64
+randomx_program_begin PROC
+ nop
+randomx_program_begin ENDP
+
+ALIGN 64
+randomx_program_epilogue PROC
+ include asm/program_epilogue_win64.inc
+randomx_program_epilogue ENDP
+
+ALIGN 64
+randomx_program_read_r PROC
+ include asm/program_read_r.inc
+randomx_program_read_r ENDP
+
+ALIGN 64
+randomx_program_read_f PROC
+ include asm/program_read_f.inc
+randomx_program_read_f ENDP
+
+ALIGN 64
+randomx_program_end PROC
+ nop
+randomx_program_end ENDP
+
+_RANDOMX_JITX86_STATIC ENDS
+
+END
\ No newline at end of file
diff --git a/src/JitCompilerX86-static.hpp b/src/JitCompilerX86-static.hpp
new file mode 100644
index 0000000..6052283
--- /dev/null
+++ b/src/JitCompilerX86-static.hpp
@@ -0,0 +1,27 @@
+/*
+Copyright (c) 2018 tevador
+
+This file is part of RandomX.
+
+RandomX is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+RandomX is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RandomX. If not, see.
+*/
+
+extern "C" {
+ void randomx_program_prologue();
+ void randomx_program_begin();
+ void randomx_program_epilogue();
+ void randomx_program_read_r();
+ void randomx_program_read_f();
+ void randomx_program_end();
+}
\ No newline at end of file
diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp
index fe10229..b03a330 100644
--- a/src/JitCompilerX86.cpp
+++ b/src/JitCompilerX86.cpp
@@ -34,6 +34,16 @@ along with RandomX. If not, see.
namespace RandomX {
+#if !defined(_M_X64) && !defined(__x86_64__)
+ JitCompilerX86::JitCompilerX86() {
+ throw std::runtime_error("JIT compiler only supports x86-64 CPUs");
+ }
+
+ void JitCompilerX86::generateProgram(Pcg32& gen) {
+
+ }
+#else
+
/*
REGISTER ALLOCATION:
@@ -41,7 +51,7 @@ namespace RandomX {
rbx -> MemoryRegisters& memory
rcx -> temporary
rdx -> temporary
- rsi -> convertible_t& scratchpad
+ rsi -> convertible_t* scratchpad
rdi -> "ic" (instruction counter)
rbp -> beginning of VM stack
rsp -> end of VM stack
@@ -63,6 +73,7 @@ namespace RandomX {
xmm7 -> "f7"
xmm8 -> "f0"
xmm9 -> "f1"
+ xmm10 -> absolute value mask 0x7fffffffffffffff7fffffffffffffff
STACK STRUCTURE:
@@ -81,127 +92,23 @@ namespace RandomX {
*/
- constexpr uint8_t ic3 = ((InstructionCount + 1) >> 24);
- constexpr uint8_t ic2 = ((InstructionCount + 1) >> 16);
- constexpr uint8_t ic1 = ((InstructionCount + 1) >> 8);
- constexpr uint8_t ic0 = ((InstructionCount + 1) >> 0);
+#include "JitCompilerX86-static.hpp"
- const uint8_t prologue[] = {
- 0x53, //push rbx
- 0x55, //push rbp
-#ifdef _WIN32
- 0x57, //push rdi
- 0x56, //push rsi
-#endif
- 0x41, 0x54, //push r12
- 0x41, 0x55, //push r13
- 0x41, 0x56, //push r14
- 0x41, 0x57, //push r15
-#ifdef _WIN32
- 0x48, 0x83, 0xec, 0x48, //sub rsp,0x48
- 0xf3, 0x0f, 0x7f, 0x74, 0x24, 0x30, //movdqu XMMWORD PTR[rsp + 0x30],xmm6
- 0xf3, 0x0f, 0x7f, 0x7c, 0x24, 0x20, //movdqu XMMWORD PTR[rsp + 0x20],xmm7
- 0xf3, 0x44, 0x0f, 0x7f, 0x44, 0x24, 0x10, //movdqu XMMWORD PTR[rsp + 0x10],xmm8
- 0xf3, 0x44, 0x0f, 0x7f, 0x0c, 0x24, //movdqu XMMWORD PTR[rsp],xmm9
- 0x51, //push rcx
- 0x48, 0x8b, 0xda, //mov rbx,rdx
- 0x49, 0x8b, 0xf0, //mov rsi,r8
-#else
- 0x57, //push rdi
- 0x48, 0x8b, 0xde, //mov rbx, rsi
- 0x48, 0x8b, 0xf2, //mov rsi, rdx
- 0x48, 0x8b, 0xcf, //mov rcx, rdi
-#endif
- 0x48, 0x8b, 0xec, //mov rbp,rsp
- 0x48, 0xc7, 0xc7, ic0, ic1, ic2, ic3, //mov rdi, "InstructionCount"
- 0x4c, 0x8b, 0x01, //mov r8,QWORD PTR[rcx]
- 0x4c, 0x8b, 0x49, 0x08, //mov r9,QWORD PTR[rcx+0x8]
- 0x4c, 0x8b, 0x51, 0x10, //mov r10,QWORD PTR[rcx+0x10]
- 0x4c, 0x8b, 0x59, 0x18, //mov r11,QWORD PTR[rcx+0x18]
- 0x4c, 0x8b, 0x61, 0x20, //mov r12,QWORD PTR[rcx+0x20]
- 0x4c, 0x8b, 0x69, 0x28, //mov r13,QWORD PTR[rcx+0x28]
- 0x4c, 0x8b, 0x71, 0x30, //mov r14,QWORD PTR[rcx+0x30]
- 0x4c, 0x8b, 0x79, 0x38, //mov r15,QWORD PTR[rcx+0x38]
- 0xc7, 0x44, 0x24, 0xf8, 0xc0, 0x9f, 0x00, //mov DWORD PTR[rsp-0x8],0x9fc0
- 0x00,
- 0x0f, 0xae, 0x54, 0x24, 0xf8, //ldmxcsr DWORD PTR[rsp-0x8]
- 0xf2, 0x4c, 0x0f, 0x2a, 0x41, 0x40, //cvtsi2sd xmm8,QWORD PTR[rcx+0x40]
- 0xf2, 0x4c, 0x0f, 0x2a, 0x49, 0x48, //cvtsi2sd xmm9,QWORD PTR[rcx+0x48]
- 0xf2, 0x48, 0x0f, 0x2a, 0x51, 0x50, //cvtsi2sd xmm2,QWORD PTR[rcx+0x50]
- 0xf2, 0x48, 0x0f, 0x2a, 0x59, 0x58, //cvtsi2sd xmm3,QWORD PTR[rcx+0x58]
- 0xf2, 0x48, 0x0f, 0x2a, 0x61, 0x60, //cvtsi2sd xmm4,QWORD PTR[rcx+0x60]
- 0xf2, 0x48, 0x0f, 0x2a, 0x69, 0x68, //cvtsi2sd xmm5,QWORD PTR[rcx+0x68]
- 0xf2, 0x48, 0x0f, 0x2a, 0x71, 0x70, //cvtsi2sd xmm6,QWORD PTR[rcx+0x70]
- 0xf2, 0x48, 0x0f, 0x2a, 0x79, 0x78, //cvtsi2sd xmm7,QWORD PTR[rcx+0x78]
- };
+ const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
+ const uint8_t* codeProgramBegin = (uint8_t*)&randomx_program_begin;
+ const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue;
+ const uint8_t* codeReadDatasetR = (uint8_t*)&randomx_program_read_r;
+ const uint8_t* codeReadDatasetF = (uint8_t*)&randomx_program_read_f;
+ const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end;
- const uint8_t epilogue[] = {
- 0x48, 0x8b, 0xe5, //mov rsp,rbp
- 0x59, //pop rcx
- 0x4c, 0x89, 0x01, //mov QWORD PTR [rcx],r8
- 0x4c, 0x89, 0x49, 0x08, //mov QWORD PTR [rcx+0x8],r9
- 0x4c, 0x89, 0x51, 0x10, //mov QWORD PTR [rcx+0x10],r10
- 0x4c, 0x89, 0x59, 0x18, //mov QWORD PTR [rcx+0x18],r11
- 0x4c, 0x89, 0x61, 0x20, //mov QWORD PTR [rcx+0x20],r12
- 0x4c, 0x89, 0x69, 0x28, //mov QWORD PTR [rcx+0x28],r13
- 0x4c, 0x89, 0x71, 0x30, //mov QWORD PTR [rcx+0x30],r14
- 0x4c, 0x89, 0x79, 0x38, //mov QWORD PTR [rcx+0x38],r15
- 0x66, 0x4c, 0x0f, 0x7e, 0x41, 0x40, //movq QWORD PTR [rcx+0x40],xmm8
- 0x66, 0x4c, 0x0f, 0x7e, 0x49, 0x48, //movq QWORD PTR [rcx+0x48],xmm9
- 0x66, 0x48, 0x0f, 0x7e, 0x51, 0x50, //movq QWORD PTR [rcx+0x50],xmm2
- 0x66, 0x48, 0x0f, 0x7e, 0x59, 0x58, //movq QWORD PTR [rcx+0x58],xmm3
- 0x66, 0x48, 0x0f, 0x7e, 0x61, 0x60, //movq QWORD PTR [rcx+0x60],xmm4
- 0x66, 0x48, 0x0f, 0x7e, 0x69, 0x68, //movq QWORD PTR [rcx+0x68],xmm5
- 0x66, 0x48, 0x0f, 0x7e, 0x71, 0x70, //movq QWORD PTR [rcx+0x70],xmm6
- 0x66, 0x48, 0x0f, 0x7e, 0x79, 0x78, //movq QWORD PTR [rcx+0x78],xmm7
-#ifdef _WIN32
- 0xf3, 0x44, 0x0f, 0x6f, 0x0c, 0x24, //movdqu xmm9,XMMWORD PTR [rsp]
- 0xf3, 0x44, 0x0f, 0x6f, 0x44, 0x24, 0x10, //movdqu xmm8,XMMWORD PTR [rsp+0x10]
- 0xf3, 0x0f, 0x6f, 0x7c, 0x24, 0x20, //movdqu xmm7,XMMWORD PTR [rsp+0x20]
- 0xf3, 0x0f, 0x6f, 0x74, 0x24, 0x30, //movdqu xmm6,XMMWORD PTR [rsp+0x30]
- 0x48, 0x83, 0xc4, 0x48, //add rsp,0x48
-#endif
- 0x41, 0x5f, //pop r15
- 0x41, 0x5e, //pop r14
- 0x41, 0x5d, //pop r13
- 0x41, 0x5c, //pop r12
-#ifdef _WIN32
- 0x5e, //pop rsi
- 0x5f, //pop rdi
-#endif
- 0x5d, //pop rbp
- 0x5b, //pop rbx
- 0xc3, //ret
- };
+ const int32_t prologueSize = codeProgramBegin - codePrologue;
+ const int32_t epilogueSize = codeReadDatasetR - codeEpilogue;
+ const int32_t readDatasetRSize = codeReadDatasetF - codeReadDatasetR;
+ const int32_t readDatasetFSize = codeProgramEnd - codeReadDatasetF;
- //41 bytes -> 1 cache line
- const uint8_t readDatasetSub[] = {
- 0x8b, 0x13, //mov edx,DWORD PTR [rbx]
- 0x48, 0x8b, 0x43, 0x08, //mov rax,QWORD PTR [rbx+0x8]
- 0x48, 0x8b, 0x04, 0x10, //mov rax,QWORD PTR [rax+rdx*1]
- 0x83, 0x03, 0x08, //add DWORD PTR [rbx],0x8
- 0x33, 0x4b, 0x04, //xor ecx,DWORD PTR [rbx+0x4]
- 0x89, 0x4b, 0x04, //mov DWORD PTR [rbx+0x4],ecx
- 0xf7, 0xc1, 0xf8, 0xff, 0x00, 0x00, //test ecx,0xfff8
- 0x75, 0x0d, //jne
- 0x83, 0xe1, 0xf8, //and ecx,0xfffffff8
- 0x89, 0x0b, //mov DWORD PTR [rbx],ecx
- 0x48, 0x8b, 0x53, 0x08, //mov rdx,QWORD PTR [rbx+0x8]
- 0x0f, 0x18, 0x0c, 0x0a, //prefetcht0 BYTE PTR [rdx+rcx*1]
- 0xc3, //ret
- };
-
- constexpr int getNumCacheLines(size_t size) {
- return (size + (CacheLineSize - 1)) / CacheLineSize;
- }
-
- constexpr int32_t align(int32_t pos, int32_t align) {
- return ((pos - 1) / align + 1) * align;
- }
-
- constexpr int32_t readDatasetSubOffset = CodeSize - CacheLineSize * getNumCacheLines(sizeof(readDatasetSub));
- constexpr int32_t epilogueOffset = readDatasetSubOffset - CacheLineSize * getNumCacheLines(sizeof(epilogue));
- constexpr int32_t startOffsetAligned = align(sizeof(prologue), CacheLineSize);
+ const int32_t readDatasetFOffset = CodeSize - readDatasetFSize;
+ const int32_t readDatasetROffset = readDatasetFOffset - readDatasetRSize;
+ const int32_t epilogueOffset = readDatasetROffset - epilogueSize;
JitCompilerX86::JitCompilerX86() {
#ifdef _WIN32
@@ -213,24 +120,16 @@ namespace RandomX {
if (code == (uint8_t*)-1)
throw std::runtime_error("mmap failed");
#endif
- memcpy(code, prologue, sizeof(prologue));
- codePos = sizeof(prologue);
- if (startOffsetAligned - codePos > 4) {
- emitByte(0xeb);
- emitByte(startOffsetAligned - (codePos + 1));
- }
- else {
- while (codePos < startOffsetAligned)
- emitByte(0x90); //nop
- }
- memcpy(code + readDatasetSubOffset, readDatasetSub, sizeof(readDatasetSub));
- memcpy(code + epilogueOffset, epilogue, sizeof(epilogue));
+ memcpy(code, codePrologue, prologueSize);
+ memcpy(code + CodeSize - readDatasetRSize - readDatasetFSize - epilogueSize, codeEpilogue, epilogueSize);
+ memcpy(code + CodeSize - readDatasetRSize - readDatasetFSize, codeReadDatasetR, readDatasetRSize);
+ memcpy(code + CodeSize - readDatasetFSize, codeReadDatasetF, readDatasetFSize);
}
void JitCompilerX86::generateProgram(Pcg32& gen) {
instructionOffsets.clear();
callOffsets.clear();
- codePos = startOffsetAligned;
+ codePos = prologueSize;
Instruction instr;
for (unsigned i = 0; i < ProgramLength; ++i) {
for (unsigned j = 0; j < sizeof(instr) / sizeof(Pcg32::result_type); ++j) {
@@ -247,7 +146,6 @@ namespace RandomX {
instructionOffsets.push_back(codePos);
emit(0x840fcfff); //dec edx; jz
emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative)
- gena(instr);
auto generator = engine[instr.opcode];
(this->*generator)(instr, i);
}
@@ -258,11 +156,10 @@ namespace RandomX {
}
}
- void JitCompilerX86::gena(Instruction& instr) {
+ void JitCompilerX86::genar(Instruction& instr) {
emit(uint16_t(0x8149)); //xor
emitByte(0xf0 + (instr.rega % RegistersCount));
emit(instr.addra);
- int32_t pc;
switch (instr.loca & 7)
{
case 0:
@@ -272,7 +169,7 @@ namespace RandomX {
emit(uint16_t(0x8b41)); //mov
emitByte(0xc8 + (instr.rega % RegistersCount)); //ecx, rega
emitByte(0xe8); //call
- emit(readDatasetSubOffset - (codePos + 4));
+ emit(readDatasetROffset - (codePos + 4));
return;
case 4:
@@ -293,8 +190,44 @@ namespace RandomX {
}
}
+ void JitCompilerX86::genaf(Instruction& instr) {
+ emit(uint16_t(0x8149)); //xor
+ emitByte(0xf0 + (instr.rega % RegistersCount));
+ emit(instr.addra);
+ switch (instr.loca & 7)
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ emit(uint16_t(0x8b41)); //mov
+ emitByte(0xc8 + (instr.rega % RegistersCount)); //ecx, rega
+ emitByte(0xe8); //call
+ emit(readDatasetFOffset - (codePos + 4));
+ return;
+
+ case 4:
+ emit(uint16_t(0x8b41)); //mov
+ emitByte(0xc0 + (instr.rega % RegistersCount)); //eax, rega
+ emitByte(0x25); //and
+ emit(ScratchpadL2 - 1); //whole scratchpad
+ emitByte(0xf3);
+ emit(0xc604e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rax*8]
+ return;
+
+ default:
+ emit(uint16_t(0x8b41)); //mov
+ emitByte(0xc0 + (instr.rega % RegistersCount)); //eax, rega
+ emitByte(0x25); //and
+ emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
+ emitByte(0xf3);
+ emit(0xc604e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rax*8]
+ return;
+ }
+ }
+
void JitCompilerX86::genbr0(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
- if ((instr.locb & 7) <= 5) {
+ if ((instr.locb & 7) <= 3) {
emit(uint16_t(0x8b49)); //mov
emitByte(0xc8 + (instr.regb % RegistersCount)); //rcx, regb
emitByte(0x48); //REX.W
@@ -330,126 +263,117 @@ namespace RandomX {
}
void JitCompilerX86::genbf(Instruction& instr, uint8_t opcode) {
- emit(0x48f2fffff8002548); //and rax,0xfffffffffffff800; cvtsi2sd xmm0,rax
- emit(uint16_t(0x2a0f));
- emitByte(0xc0);
- if ((instr.locb & 7) <= 5) {
- int regb = (instr.regb % RegistersCount);
- emitByte(0xf2); //xxxsd xmm0,regb
- if (regb <= 1) {
- emitByte(0x41); //REX
- }
- emitByte(0x0f);
- emitByte(opcode);
- emitByte(0xc0 + regb);
- }
- else {
- convertible_t bimm;
- bimm.f64 = (double)instr.imm32;
- emit(uint16_t(0xb848)); //movabs rax,imm64
- emit(bimm.i64);
- emitByte(0x66); //movq xmm1,rax
- emit(0xc86e0f48);
- emit(uint16_t(0x0ff2)); //xxxsd xmm0,xmm1
- emitByte(opcode);
- emitByte(0xc1);
+ int regb = (instr.regb % RegistersCount);
+ emitByte(0x66); //xxxpd xmm0,regb
+ if (regb <= 1) {
+ emitByte(0x41); //REX
}
+ emitByte(0x0f);
+ emitByte(opcode);
+ emitByte(0xc0 + regb);
+ }
+
+
+ void JitCompilerX86::scratchpadStoreR(Instruction& instr, uint32_t scratchpadSize) {
+ emit(0x41c88b48); //mov rcx, rax; REX
+ emitByte(0x8b); // mov
+ emitByte(0xc0 + (instr.regc % RegistersCount)); //eax, regc
+ emitByte(0x35); // xor eax
+ emit(instr.addrc);
+ emitByte(0x25); //and
+ emit(scratchpadSize - 1);
+ emit(0xc60c8948); // mov QWORD PTR [rsi+rax*8],rcx
}
void JitCompilerX86::gencr(Instruction& instr) {
switch (instr.locc & 7)
{
- case 0:
- emit(0x41c88b48); //mov rcx, rax; REX
- emitByte(0x8b); // mov
- emitByte(0xc0 + (instr.regc % RegistersCount)); //eax, regc
- emitByte(0x35); // xor eax
- emit(instr.addrc);
- emitByte(0x25); //and
- emit(ScratchpadL2 - 1); //whole scratchpad
- emit(0xc60c8948); // mov QWORD PTR [rsi+rax*8],rcx
- break;
+ case 0:
+ scratchpadStoreR(instr, ScratchpadL2);
+ break;
- case 1:
- case 2:
- case 3:
- emit(0x41c88b48); //mov rcx, rax; REX
- emitByte(0x8b); // mov
- emitByte(0xc0 + (instr.regc % RegistersCount)); //eax, regc
- emitByte(0x35); // xor eax
- emit(instr.addrc);
- emitByte(0x25); //and
- emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
- emit(0xc60c8948); // mov QWORD PTR [rsi+rax*8],rcx
- break;
+ case 1:
+ case 2:
+ case 3:
+ scratchpadStoreR(instr, ScratchpadL1);
+ break;
- default:
- emit(uint16_t(0x8b4c)); //mov
- emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
- break;
+ default:
+ emit(uint16_t(0x8b4c)); //mov
+ emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
+ break;
}
}
- void JitCompilerX86::gencf(Instruction& instr) {
+ void JitCompilerX86::scratchpadStoreF(Instruction& instr, int regc, uint32_t scratchpadSize, bool storeHigh) {
+ emit(uint16_t(0x8b41)); //mov
+ emitByte(0xc0 + regc); //eax, regc
+ emitByte(0x35); // xor eax
+ emit(instr.addrc);
+ emitByte(0x25); //and
+ emit(scratchpadSize - 1);
+ emitByte(0x66); //movhpd/movlpd QWORD PTR [rsi+rax*8], regc
+ if (regc <= 1) {
+ emitByte(0x44); //REX
+ }
+ emitByte(0x0f);
+ emitByte(storeHigh ? 0x17 : 0x13);
+ emitByte(4 + 8 * regc);
+ emitByte(0xc6);
+ }
+
+ void JitCompilerX86::gencf(Instruction& instr, bool alwaysLow = false) {
int regc = (instr.regc % RegistersCount);
- switch (instr.locc & 7)
- {
- case 0:
- emit(uint16_t(0x8b41)); //mov
- emitByte(0xc0 + regc); //eax, regc
- emitByte(0x35); // xor eax
- emit(instr.addrc);
- emitByte(0x25); //and
- emit(ScratchpadL2 - 1); //whole scratchpad
- emit(uint16_t(0x4866)); //prefix
- emit(0xc6047e0f); // movq QWORD PTR [rsi+rax*8],xmm0
- break;
-
- case 1:
- case 2:
- case 3:
- emit(uint16_t(0x8b41)); //mov
- emitByte(0xc0 + regc); //eax, regc
- emitByte(0x35); // xor eax
- emit(instr.addrc);
- emitByte(0x25); //and
- emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
- emit(uint16_t(0x4866)); //prefix
- emit(0xc6047e0f); // movq QWORD PTR [rsi+rax*8],xmm0
- break;
-
- default:
- emitByte(0xf2);
+ if (!alwaysLow) {
if (regc <= 1) {
emitByte(0x44); //REX
}
- emit(uint16_t(0x100f)); //movsd
+ emit(uint16_t(0x280f)); //movaps
emitByte(0xc0 + 8 * regc); // regc, xmm0
- break;
+ }
+ switch (instr.locc & 7)
+ {
+ case 4:
+ scratchpadStoreF(instr, regc, ScratchpadL2, !alwaysLow && (instr.locc & 8));
+ break;
+
+ case 5:
+ case 6:
+ case 7:
+ scratchpadStoreF(instr, regc, ScratchpadL1, !alwaysLow && (instr.locc & 8));
+ break;
+
+ default:
+ break;
}
}
void JitCompilerX86::h_ADD_64(Instruction& instr, int i) {
+ genar(instr);
genbr1(instr, 0x0349, 0x0548);
gencr(instr);
}
void JitCompilerX86::h_ADD_32(Instruction& instr, int i) {
+ genar(instr);
genbr132(instr, 0x0341, 0x05);
gencr(instr);
}
void JitCompilerX86::h_SUB_64(Instruction& instr, int i) {
+ genar(instr);
genbr1(instr, 0x2b49, 0x2d48);
gencr(instr);
}
void JitCompilerX86::h_SUB_32(Instruction& instr, int i) {
+ genar(instr);
genbr132(instr, 0x2b41, 0x2d);
gencr(instr);
}
void JitCompilerX86::h_MUL_64(Instruction& instr, int i) {
+ genar(instr);
if ((instr.locb & 7) <= 5) {
emitByte(0x49); //REX
emit(uint16_t(0xaf0f)); // imul rax, r64
@@ -464,6 +388,7 @@ namespace RandomX {
}
void JitCompilerX86::h_MULH_64(Instruction& instr, int i) {
+ genar(instr);
if ((instr.locb & 7) <= 5) {
emit(uint16_t(0x8b49)); //mov rcx, r64
emitByte(0xc8 + (instr.regb % RegistersCount));
@@ -481,6 +406,7 @@ namespace RandomX {
}
void JitCompilerX86::h_MUL_32(Instruction& instr, int i) {
+ genar(instr);
emit(uint16_t(0xc88b)); //mov ecx, eax
if ((instr.locb & 7) <= 5) {
emit(uint16_t(0x8b41)); // mov eax, r32
@@ -495,6 +421,7 @@ namespace RandomX {
}
void JitCompilerX86::h_IMUL_32(Instruction& instr, int i) {
+ genar(instr);
emitByte(0x48);
emit(uint16_t(0xc863)); //movsxd rcx,eax
if ((instr.locb & 7) <= 5) {
@@ -511,6 +438,7 @@ namespace RandomX {
}
void JitCompilerX86::h_IMULH_64(Instruction& instr, int i) {
+ genar(instr);
if ((instr.locb & 7) <= 5) {
emit(uint16_t(0x8b49)); //mov rcx, r64
emitByte(0xc8 + (instr.regb % RegistersCount));
@@ -528,6 +456,7 @@ namespace RandomX {
}
void JitCompilerX86::h_DIV_64(Instruction& instr, int i) {
+ genar(instr);
if ((instr.locb & 7) <= 5) {
emitByte(0xb9); //mov ecx, 1
emit(1);
@@ -546,6 +475,7 @@ namespace RandomX {
}
void JitCompilerX86::h_IDIV_64(Instruction& instr, int i) {
+ genar(instr);
if ((instr.locb & 7) <= 5) {
emit(uint16_t(0x8b41)); //mov edx, r32
emitByte(0xd0 + (instr.regb % RegistersCount));
@@ -563,100 +493,127 @@ namespace RandomX {
}
void JitCompilerX86::h_AND_64(Instruction& instr, int i) {
+ genar(instr);
genbr1(instr, 0x2349, 0x2548);
gencr(instr);
}
void JitCompilerX86::h_AND_32(Instruction& instr, int i) {
+ genar(instr);
genbr132(instr, 0x2341, 0x25);
gencr(instr);
}
void JitCompilerX86::h_OR_64(Instruction& instr, int i) {
+ genar(instr);
genbr1(instr, 0x0b49, 0x0d48);
gencr(instr);
}
void JitCompilerX86::h_OR_32(Instruction& instr, int i) {
+ genar(instr);
genbr132(instr, 0x0b41, 0x0d);
gencr(instr);
}
void JitCompilerX86::h_XOR_64(Instruction& instr, int i) {
+ genar(instr);
genbr1(instr, 0x3349, 0x3548);
gencr(instr);
}
void JitCompilerX86::h_XOR_32(Instruction& instr, int i) {
+ genar(instr);
genbr132(instr, 0x3341, 0x35);
gencr(instr);
}
void JitCompilerX86::h_SHL_64(Instruction& instr, int i) {
+ genar(instr);
genbr0(instr, 0xe0d3, 0xe0c1);
gencr(instr);
}
void JitCompilerX86::h_SHR_64(Instruction& instr, int i) {
+ genar(instr);
genbr0(instr, 0xe8d3, 0xe8c1);
gencr(instr);
}
void JitCompilerX86::h_SAR_64(Instruction& instr, int i) {
+ genar(instr);
genbr0(instr, 0xf8d3, 0xf8c1);
gencr(instr);
}
void JitCompilerX86::h_ROL_64(Instruction& instr, int i) {
+ genar(instr);
genbr0(instr, 0xc0d3, 0xc0c1);
gencr(instr);
}
void JitCompilerX86::h_ROR_64(Instruction& instr, int i) {
+ genar(instr);
genbr0(instr, 0xc8d3, 0xc8c1);
gencr(instr);
}
void JitCompilerX86::h_FPADD(Instruction& instr, int i) {
+ genaf(instr);
genbf(instr, 0x58);
gencf(instr);
}
void JitCompilerX86::h_FPSUB(Instruction& instr, int i) {
+ genaf(instr);
genbf(instr, 0x5c);
gencf(instr);
}
void JitCompilerX86::h_FPMUL(Instruction& instr, int i) {
- emit(uint16_t(0x0d48)); //or rax,0x800
- emit(0x00000800);
+ genaf(instr);
genbf(instr, 0x59);
+ emit(0x00c9c20f66c8280f); //movaps xmm1,xmm0; cmpeqpd xmm1,xmm1
+ emit(uint16_t(0x540f)); //andps xmm0,xmm1
+ emitByte(0xc1);
gencf(instr);
}
void JitCompilerX86::h_FPDIV(Instruction& instr, int i) {
- emit(uint16_t(0x0d48)); //or rax,0x800
- emit(0x00000800);
+ genaf(instr);
genbf(instr, 0x5e);
+ emit(0x00c9c20f66c8280f); //movaps xmm1,xmm0; cmpeqpd xmm1,xmm1
+ emit(uint16_t(0x540f)); //andps xmm0,xmm1
+ emitByte(0xc1);
gencf(instr);
}
void JitCompilerX86::h_FPSQRT(Instruction& instr, int i) {
- emit(uint16_t(0xb948)); //or movabs rcx, imm64
- emit(0x7ffffffffffff800);
- emit(0xc02a0f48f2c12348); //and rax,rcx; cvtsi2sd xmm0,rax
- emit(0xc0510ff2); //sqrtsd xmm0,xmm0
+ genaf(instr);
+ emit(0xc0510f66c2540f41); //andps xmm0,xmm10; sqrtpd xmm0,xmm0
gencf(instr);
}
void JitCompilerX86::h_FPROUND(Instruction& instr, int i) {
+ genar(instr);
emit(0x81480de0c1c88b48);
emit(0x600025fffff800e1);
- emit(0x0dc12a0f48f20000);
+ emit(uint16_t(0x0000));
+ emitByte(0xf2);
+ int regc = (instr.regc % RegistersCount);
+ if (regc <= 1) {
+ emitByte(0x4c); //REX
+ }
+ else {
+ emitByte(0x48); //REX
+ }
+ emit(uint16_t(0x2a0f));
+ emitByte(0xc1 + 8 * regc);
+ emitByte(0x0d);
emit(0xf824448900009fc0);
emit(0x2454ae0f); //ldmxcsr DWORD PTR [rsp-0x8]
emitByte(0xf8);
- gencf(instr);
+ gencf(instr, true);
}
static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) {
@@ -682,6 +639,7 @@ namespace RandomX {
}
void JitCompilerX86::h_CALL(Instruction& instr, int i) {
+ genar(instr);
emit(uint16_t(0x8141)); //cmp regb, imm32
emitByte(0xf8 + (instr.regb % RegistersCount));
emit(instr.imm32);
@@ -707,6 +665,7 @@ namespace RandomX {
}
void JitCompilerX86::h_RET(Instruction& instr, int i) {
+ genar(instr);
int crlen = 0;
if ((instr.locc & 7) <= 3) {
crlen = 17;
@@ -756,4 +715,6 @@ namespace RandomX {
INST_HANDLE(CALL)
INST_HANDLE(RET)
};
+
+#endif
}
\ No newline at end of file
diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp
index c453ba1..e2c432c 100644
--- a/src/JitCompilerX86.hpp
+++ b/src/JitCompilerX86.hpp
@@ -58,13 +58,16 @@ namespace RandomX {
std::vector instructionOffsets;
std::vector callOffsets;
- void gena(Instruction&);
+ void genar(Instruction&);
+ void genaf(Instruction&);
void genbr0(Instruction&, uint16_t, uint16_t);
void genbr1(Instruction&, uint16_t, uint16_t);
void genbr132(Instruction&, uint16_t, uint8_t);
void genbf(Instruction&, uint8_t);
+ void scratchpadStoreR(Instruction&, uint32_t);
+ void scratchpadStoreF(Instruction&, int, uint32_t, bool);
void gencr(Instruction&);
- void gencf(Instruction&);
+ void gencf(Instruction&, bool);
void generateCode(Instruction&, int);
void fixCallOffsets();
diff --git a/src/TestAluFpu.cpp b/src/TestAluFpu.cpp
index f2fe387..de90083 100644
--- a/src/TestAluFpu.cpp
+++ b/src/TestAluFpu.cpp
@@ -21,33 +21,36 @@ along with RandomX. If not, see.
#include
#include
#include "instructions.hpp"
-#include "Pcg32.hpp"
//#define DEBUG
using namespace RandomX;
-typedef void(*VmOperation)(convertible_t&, convertible_t&, convertible_t&);
-
-uint64_t rxRound(uint32_t mode, int64_t x, int64_t y, VmOperation op) {
- convertible_t a, b, c;
- a.u64 = mode;
- FPROUND(a, b, c);
-#ifdef DEBUG
- a.f64 = convertToDouble(x);
- b.f64 = convertToDouble(y);
- std::cout << std::hex << (uint64_t)x << " -> " << a.u64 << std::endl;
- std::cout << std::hex << (uint64_t)y << " -> " << b.u64 << std::endl;
- std::cout << std::dec;
-#endif
- a.i64 = x;
- b.i64 = y;
- op(a, b, c);
- return c.u64;
-}
+typedef void(*FpuOperation)(convertible_t&, fpu_reg_t&, fpu_reg_t&);
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
+uint64_t rxRound(uint32_t mode, int64_t x, int64_t y, FpuOperation op, bool hiEqualsLo = true) {
+ convertible_t a;
+ fpu_reg_t b, c;
+ a.u64 = mode;
+ FPROUND(a, b, c);
+ if (hiEqualsLo) {
+ a.i32lo = x;
+ a.i32hi = x;
+ }
+ else {
+ a.i64 = x;
+ }
+ b.lo.i64 = y;
+ b.hi.i64 = y;
+ op(a, b, c);
+ if (hiEqualsLo) {
+ CHECK(c.lo.u64 == c.hi.u64);
+ }
+ return c.lo.u64;
+}
+
#define RX_EXECUTE_U64(va, vb, INST) do { \
a.u64 = va; \
b.u64 = vb; \
@@ -273,118 +276,126 @@ TEST_CASE("Circular right shift (64-bit)", "[ROR_64]") {
TEST_CASE("Denormal results are not produced", "[FTZ]") {
FPINIT();
- convertible_t a, b, c;
- a.i64 = 2048;
- FPDIV(a, DBL_MAX, c);
+ convertible_t a;
+ fpu_reg_t b;
+ a.i64 = 1;
+ b.lo.f64 = DBL_MAX;
+ FPDIV(a, b, b);
#ifdef DEBUG
- std::cout << a.i64 << " / " << DBL_MAX << " = " << std::hex << c.u64 << std::endl;
+ std::cout << a.i64 << " / " << DBL_MAX << " = " << std::hex << b.lo.u64 << std::endl;
#endif
- REQUIRE(std::fpclassify(c.f64) != FP_SUBNORMAL);
- b.f64 = c.f64;
+ CHECK(std::fpclassify(b.lo.f64) != FP_SUBNORMAL);
a.i64 = 0;
- FPSUB_64(a, b, c);
+ FPSUB(a, b, b);
#ifdef DEBUG
- std::cout << a.i64 << " - " << b.f64 << " = " << std::hex << c.u64 << std::endl;
+ std::cout << a.i64 << " - " << b.lo.f64 << " = " << std::hex << b.lo.u64 << std::endl;
#endif
- CHECK(std::fpclassify(c.f64) != FP_SUBNORMAL);
+ CHECK(std::fpclassify(b.lo.f64) != FP_SUBNORMAL);
}
TEST_CASE("NaN results are not produced", "[NAN]") {
FPINIT();
- convertible_t a, c;
+ convertible_t a;
+ fpu_reg_t b;
a.i64 = 0;
- FPDIV(a, 0, c);
- CHECK(std::fpclassify(c.f64) != FP_NAN);
- FPMUL(a, std::numeric_limits::infinity(), c);
- CHECK(std::fpclassify(c.f64) != FP_NAN);
+ b.lo.f64 = 0;
+ FPDIV(a, b, b);
+ CHECK(std::fpclassify(b.lo.f64) != FP_NAN);
+ b.lo.f64 = std::numeric_limits::infinity();
+ FPMUL(a, b, b);
+ CHECK(std::fpclassify(b.lo.f64) != FP_NAN);
}
-volatile int64_t fpAdda = 7379480244170225589;
-volatile int64_t fpAddb = -438072579179686797;
-volatile int64_t fpSuba = 2939258788088626026;
-volatile int64_t fpSubb = 4786131045320678734;
-volatile int64_t fpMula1 = 8399833736388895639;
-volatile int64_t fpMulb1 = 5671608020317594922;
-volatile int64_t fpMula2 = -7094299423744805450;
-volatile int64_t fpMulb2 = 4982086006202596504;
-volatile int64_t fpDiva1 = 8399833736388895639;
-volatile int64_t fpDivb1 = 5671608020317594922;
-volatile int64_t fpDiva2 = -7434878587645025912;
-volatile int64_t fpDivb2 = 5266243837734830806;
-volatile int64_t fpSqrta = -7594301562963134542;
+volatile int64_t fpRounda = 7379480244170225589;
+volatile int32_t fpAdda = -2110701072;
+volatile int64_t fpAddb = 5822431907862180274;
+volatile int32_t fpSuba = -1651770302;
+volatile int64_t fpSubb = 4982086006202596504;
+volatile int32_t fpMula1 = 122885310;
+volatile int64_t fpMulb1 = 6036690890763685020;
+volatile int32_t fpMula2 = -1952486466;
+volatile int64_t fpMulb2 = 5693689137909219638;
+volatile int32_t fpDiva1 = -1675630642;
+volatile int64_t fpDivb1 = -3959960229647489051;
+volatile int32_t fpDiva2 = -1651770302;
+volatile int64_t fpDivb2 = 4982086006202596504;
+volatile int32_t fpSqrta1 = 440505508;
+volatile int32_t fpSqrta2 = -2147483648;
TEST_CASE("IEEE-754 compliance", "[FPU]") {
FPINIT();
- convertible_t a, b, c;
+ convertible_t a;
+ fpu_reg_t b, c;
+ b.lo.f64 = 0.0;
- a.i64 = 2048;
- FPDIV(a, 0, c);
- CHECK(c.f64 == std::numeric_limits::infinity());
+ a.i64 = 1;
+ FPDIV(a, b, c);
+ CHECK(c.lo.f64 == std::numeric_limits::infinity());
- a.i64 = -2048;
- FPDIV(a, 0, c);
- CHECK(c.f64 == -std::numeric_limits::infinity());
+ a.i64 = -1;
+ FPDIV(a, b, c);
+ CHECK(c.lo.f64 == -std::numeric_limits::infinity());
#ifdef DEBUG
std::cout << "FPROUND" << std::endl;
#endif
- CHECK(rxRound(RoundToNearest, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU);
- CHECK(rxRound(RoundDown, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU);
- CHECK(rxRound(RoundUp, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU);
- CHECK(rxRound(RoundToZero, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU);
-
- CHECK(rxRound(RoundToNearest, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU);
- CHECK(rxRound(RoundDown, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU);
- CHECK(rxRound(RoundUp, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU);
- CHECK(rxRound(RoundToZero, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU);
+ CHECK(rxRound(RoundToNearest, fpRounda, 0, &FPROUND, false) == 0x43d99a4b8bc531dcU);
+ CHECK(rxRound(RoundDown, fpRounda, 0, &FPROUND, false) == 0x43d99a4b8bc531dcU);
+ CHECK(rxRound(RoundUp, fpRounda, 0, &FPROUND, false) == 0x43d99a4b8bc531dcU);
+ CHECK(rxRound(RoundToZero, fpRounda, 0, &FPROUND, false) == 0x43d99a4b8bc531dcU);
#ifdef DEBUG
std::cout << "FPADD" << std::endl;
#endif
- CHECK(rxRound(RoundToNearest, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d473U);
- CHECK(rxRound(RoundDown, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d473U);
- CHECK(rxRound(RoundUp, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d472U);
- CHECK(rxRound(RoundToZero, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d472U);
+ CHECK(rxRound(RoundToNearest, fpAdda, fpAddb, &FPADD) == 0x50cd6ef8bd0671b2U);
+ CHECK(rxRound(RoundDown, fpAdda, fpAddb, &FPADD) == 0x50cd6ef8bd0671b1U);
+ CHECK(rxRound(RoundUp, fpAdda, fpAddb, &FPADD) == 0x50cd6ef8bd0671b2U);
+ CHECK(rxRound(RoundToZero, fpAdda, fpAddb, &FPADD) == 0x50cd6ef8bd0671b1U);
#ifdef DEBUG
std::cout << "FPSUB" << std::endl;
#endif
- CHECK(rxRound(RoundToNearest, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c49U);
- CHECK(rxRound(RoundDown, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c48U);
- CHECK(rxRound(RoundUp, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c49U);
- CHECK(rxRound(RoundToZero, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c48U);
+ CHECK(rxRound(RoundToNearest, fpSuba, fpSubb, &FPSUB) == 0xc523ecd390267c99U);
+ CHECK(rxRound(RoundDown, fpSuba, fpSubb, &FPSUB) == 0xc523ecd390267c99U);
+ CHECK(rxRound(RoundUp, fpSuba, fpSubb, &FPSUB) == 0xc523ecd390267c98U);
+ CHECK(rxRound(RoundToZero, fpSuba, fpSubb, &FPSUB) == 0xc523ecd390267c98U);
#ifdef DEBUG
std::cout << "FPMUL" << std::endl;
#endif
- CHECK(rxRound(RoundToNearest, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e9U);
- CHECK(rxRound(RoundDown, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e8U);
- CHECK(rxRound(RoundUp, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e9U);
- CHECK(rxRound(RoundToZero, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e8U);
+ CHECK(rxRound(RoundToNearest, fpMula1, fpMulb1, &FPMUL) == 0x5574b924d2f24542U);
+ CHECK(rxRound(RoundDown, fpMula1, fpMulb1, &FPMUL) == 0x5574b924d2f24541U);
+ CHECK(rxRound(RoundUp, fpMula1, fpMulb1, &FPMUL) == 0x5574b924d2f24542U);
+ CHECK(rxRound(RoundToZero, fpMula1, fpMulb1, &FPMUL) == 0x5574b924d2f24541U);
- CHECK(rxRound(RoundToNearest, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c583U);
- CHECK(rxRound(RoundDown, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c583U);
- CHECK(rxRound(RoundUp, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c582U);
- CHECK(rxRound(RoundToZero, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c582U);
+ CHECK(rxRound(RoundToNearest, fpMula2, fpMulb2, &FPMUL) == 0xd0f23a18891a7470U);
+ CHECK(rxRound(RoundDown, fpMula2, fpMulb2, &FPMUL) == 0xd0f23a18891a7470U);
+ CHECK(rxRound(RoundUp, fpMula2, fpMulb2, &FPMUL) == 0xd0f23a18891a746fU);
+ CHECK(rxRound(RoundToZero, fpMula2, fpMulb2, &FPMUL) == 0xd0f23a18891a746fU);
#ifdef DEBUG
std::cout << "FPDIV" << std::endl;
#endif
- CHECK(rxRound(RoundToNearest, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81cU);
- CHECK(rxRound(RoundDown, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81bU);
- CHECK(rxRound(RoundUp, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81cU);
- CHECK(rxRound(RoundToZero, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81bU);
+ CHECK(rxRound(RoundToNearest, fpDiva1, fpDivb1, &FPDIV) == 0x38bd2a7732b5eb0aU);
+ CHECK(rxRound(RoundDown, fpDiva1, fpDivb1, &FPDIV) == 0x38bd2a7732b5eb09U);
+ CHECK(rxRound(RoundUp, fpDiva1, fpDivb1, &FPDIV) == 0x38bd2a7732b5eb0aU);
+ CHECK(rxRound(RoundToZero, fpDiva1, fpDivb1, &FPDIV) == 0x38bd2a7732b5eb09U);
- CHECK(rxRound(RoundToNearest, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fccU);
- CHECK(rxRound(RoundDown, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fccU);
- CHECK(rxRound(RoundUp, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fcbU);
- CHECK(rxRound(RoundToZero, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fcbU);
+ CHECK(rxRound(RoundToNearest, fpDiva2, fpDivb2, &FPDIV) == 0xbca3c3c039ccc71cU);
+ CHECK(rxRound(RoundDown, fpDiva2, fpDivb2, &FPDIV) == 0xbca3c3c039ccc71cU);
+ CHECK(rxRound(RoundUp, fpDiva2, fpDivb2, &FPDIV) == 0xbca3c3c039ccc71bU);
+ CHECK(rxRound(RoundToZero, fpDiva2, fpDivb2, &FPDIV) == 0xbca3c3c039ccc71bU);
#ifdef DEBUG
std::cout << "FPSQRT" << std::endl;
#endif
- CHECK(rxRound(RoundToNearest, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2dU);
- CHECK(rxRound(RoundDown, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2cU);
- CHECK(rxRound(RoundUp, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2dU);
- CHECK(rxRound(RoundToZero, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2cU);
+ CHECK(rxRound(RoundToNearest, fpSqrta1, 0, &FPSQRT) == 0x40d47f0e46ebc19dU);
+ CHECK(rxRound(RoundDown, fpSqrta1, 0, &FPSQRT) == 0x40d47f0e46ebc19cU);
+ CHECK(rxRound(RoundUp, fpSqrta1, 0, &FPSQRT) == 0x40d47f0e46ebc19dU);
+ CHECK(rxRound(RoundToZero, fpSqrta1, 0, &FPSQRT) == 0x40d47f0e46ebc19cU);
+
+ CHECK(rxRound(RoundToNearest, fpSqrta2, 0, &FPSQRT) == 0x40e6a09e667f3bcdU);
+ CHECK(rxRound(RoundDown, fpSqrta2, 0, &FPSQRT) == 0x40e6a09e667f3bccU);
+ CHECK(rxRound(RoundUp, fpSqrta2, 0, &FPSQRT) == 0x40e6a09e667f3bcdU);
+ CHECK(rxRound(RoundToZero, fpSqrta2, 0, &FPSQRT) == 0x40e6a09e667f3bccU);
}
diff --git a/src/VirtualMachine.cpp b/src/VirtualMachine.cpp
index 21c52ac..103d245 100644
--- a/src/VirtualMachine.cpp
+++ b/src/VirtualMachine.cpp
@@ -24,8 +24,19 @@ along with RandomX. If not, see.
#include "t1ha/t1ha.h"
#include "blake2/blake2.h"
#include
+#include
+
+std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
+ for (int i = 0; i < RandomX::RegistersCount; ++i)
+ os << std::hex << "r" << i << " = " << rf.r[i].u64 << std::endl << std::dec;
+ for (int i = 0; i < RandomX::RegistersCount; ++i)
+ os << std::hex << "f" << i << " = " << rf.f[i].hi.u64 << " (" << rf.f[i].hi.f64 << ")" << std::endl
+ << " = " << rf.f[i].lo.u64 << " (" << rf.f[i].lo.f64 << ")" << std::endl << std::dec;
+ return os;
+}
namespace RandomX {
+
VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) {
mem.ds.dataset = nullptr;
}
@@ -83,9 +94,10 @@ namespace RandomX {
}
void VirtualMachine::getResult(void* out) {
- uint64_t smallState[sizeof(RegisterFile) / sizeof(uint64_t) + 2];
+ constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 2;
+ uint64_t smallState[smallStateLength];
memcpy(smallState, ®, sizeof(RegisterFile));
- smallState[17] = t1ha2_atonce128(&smallState[16], scratchpad, ScratchpadSize, reg.r[0].u64);
+ smallState[smallStateLength - 1] = t1ha2_atonce128(&smallState[smallStateLength - 2], scratchpad, ScratchpadSize, reg.r[0].u64);
blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0);
}
}
\ No newline at end of file
diff --git a/src/VirtualMachine.hpp b/src/VirtualMachine.hpp
index 569718c..f7fdcd0 100644
--- a/src/VirtualMachine.hpp
+++ b/src/VirtualMachine.hpp
@@ -32,11 +32,14 @@ namespace RandomX {
virtual void initializeProgram(const void* seed) = 0;
virtual void execute() = 0;
void getResult(void*);
+ const RegisterFile& getRegisterFile() {
+ return reg;
+ }
protected:
bool softAes, lightClient;
- RegisterFile reg;
- MemoryRegisters mem;
DatasetReadFunc readDataset;
+ alignas(16) RegisterFile reg;
+ MemoryRegisters mem;
alignas(16) convertible_t scratchpad[ScratchpadLength];
};
}
\ No newline at end of file
diff --git a/src/asm/program_epilogue_linux.inc b/src/asm/program_epilogue_linux.inc
new file mode 100644
index 0000000..414c9ba
--- /dev/null
+++ b/src/asm/program_epilogue_linux.inc
@@ -0,0 +1,12 @@
+ #include "program_epilogue_store.inc"
+
+ ;# restore callee-saved registers - System V AMD64 ABI
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+
+ ;# program finished
+ ret 0
\ No newline at end of file
diff --git a/src/asm/program_epilogue_store.inc b/src/asm/program_epilogue_store.inc
new file mode 100644
index 0000000..b7b779b
--- /dev/null
+++ b/src/asm/program_epilogue_store.inc
@@ -0,0 +1,22 @@
+ ;# unroll VM stack
+ mov rsp, rbp
+
+ ;# save VM register values
+ pop rcx
+ mov qword ptr [rcx+0], r8
+ mov qword ptr [rcx+8], r9
+ mov qword ptr [rcx+16], r10
+ mov qword ptr [rcx+24], r11
+ mov qword ptr [rcx+32], r12
+ mov qword ptr [rcx+40], r13
+ mov qword ptr [rcx+48], r14
+ mov qword ptr [rcx+56], r15
+ movdqa xmmword ptr [rcx+64], xmm8
+ movdqa xmmword ptr [rcx+80], xmm9
+ movdqa xmmword ptr [rcx+96], xmm2
+ movdqa xmmword ptr [rcx+112], xmm3
+ lea rcx, [rcx+64]
+ movdqa xmmword ptr [rcx+64], xmm4
+ movdqa xmmword ptr [rcx+80], xmm5
+ movdqa xmmword ptr [rcx+96], xmm6
+ movdqa xmmword ptr [rcx+112], xmm7
\ No newline at end of file
diff --git a/src/asm/program_epilogue_win64.inc b/src/asm/program_epilogue_win64.inc
new file mode 100644
index 0000000..220bed8
--- /dev/null
+++ b/src/asm/program_epilogue_win64.inc
@@ -0,0 +1,20 @@
+ include program_epilogue_store.inc
+
+ ;# restore callee-saved registers - Microsoft x64 calling convention
+ movdqu xmm10, xmmword ptr [rsp]
+ movdqu xmm9, xmmword ptr [rsp+16]
+ movdqu xmm8, xmmword ptr [rsp+32]
+ movdqu xmm7, xmmword ptr [rsp+48]
+ movdqu xmm6, xmmword ptr [rsp+64]
+ add rsp, 80
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rsi
+ pop rdi
+ pop rbp
+ pop rbx
+
+ ;# program finished
+ ret 0
\ No newline at end of file
diff --git a/src/asm/program_prologue_linux.inc b/src/asm/program_prologue_linux.inc
new file mode 100644
index 0000000..8d09d88
--- /dev/null
+++ b/src/asm/program_prologue_linux.inc
@@ -0,0 +1,17 @@
+ ;# callee-saved registers - System V AMD64 ABI
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+ ;# function arguments
+ push rdi ;# RegisterFile& registerFile
+ mov rbx, rsi ;# MemoryRegisters& memory
+ mov rsi, rdx ;# convertible_t* scratchpad
+ mov rcx, rdi
+
+ #include "program_prologue_load.inc"
+
+ jmp randomx_program_begin
\ No newline at end of file
diff --git a/src/asm/program_prologue_load.inc b/src/asm/program_prologue_load.inc
new file mode 100644
index 0000000..df44c08
--- /dev/null
+++ b/src/asm/program_prologue_load.inc
@@ -0,0 +1,63 @@
+ mov rbp, rsp ;# beginning of VM stack
+ mov rdi, 1048577 ;# number of VM instructions to execute + 1
+
+ xorps xmm10, xmm10
+ cmpeqpd xmm10, xmm10
+ psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
+
+ ;# reset rounding mode
+ mov dword ptr [rsp-8], 40896
+ ldmxcsr dword ptr [rsp-8]
+
+ ;# load integer registers
+ mov r8, qword ptr [rcx+0]
+ mov r9, qword ptr [rcx+8]
+ mov r10, qword ptr [rcx+16]
+ mov r11, qword ptr [rcx+24]
+ mov r12, qword ptr [rcx+32]
+ mov r13, qword ptr [rcx+40]
+ mov r14, qword ptr [rcx+48]
+ mov r15, qword ptr [rcx+56]
+
+ ;# initialize floating point registers
+ xorps xmm8, xmm8
+ cvtsi2sd xmm8, qword ptr [rcx+72]
+ pslldq xmm8, 8
+ cvtsi2sd xmm8, qword ptr [rcx+64]
+
+ xorps xmm9, xmm9
+ cvtsi2sd xmm9, qword ptr [rcx+88]
+ pslldq xmm9, 8
+ cvtsi2sd xmm9, qword ptr [rcx+80]
+
+ xorps xmm2, xmm2
+ cvtsi2sd xmm2, qword ptr [rcx+104]
+ pslldq xmm2, 8
+ cvtsi2sd xmm2, qword ptr [rcx+96]
+
+ xorps xmm3, xmm3
+ cvtsi2sd xmm3, qword ptr [rcx+120]
+ pslldq xmm3, 8
+ cvtsi2sd xmm3, qword ptr [rcx+112]
+
+ lea rcx, [rcx+64]
+
+ xorps xmm4, xmm4
+ cvtsi2sd xmm4, qword ptr [rcx+72]
+ pslldq xmm4, 8
+ cvtsi2sd xmm4, qword ptr [rcx+64]
+
+ xorps xmm5, xmm5
+ cvtsi2sd xmm5, qword ptr [rcx+88]
+ pslldq xmm5, 8
+ cvtsi2sd xmm5, qword ptr [rcx+80]
+
+ xorps xmm6, xmm6
+ cvtsi2sd xmm6, qword ptr [rcx+104]
+ pslldq xmm6, 8
+ cvtsi2sd xmm6, qword ptr [rcx+96]
+
+ xorps xmm7, xmm7
+ cvtsi2sd xmm7, qword ptr [rcx+120]
+ pslldq xmm7, 8
+ cvtsi2sd xmm7, qword ptr [rcx+112]
\ No newline at end of file
diff --git a/src/asm/program_prologue_win64.inc b/src/asm/program_prologue_win64.inc
new file mode 100644
index 0000000..6059904
--- /dev/null
+++ b/src/asm/program_prologue_win64.inc
@@ -0,0 +1,24 @@
+ ;# callee-saved registers - Microsoft x64 calling convention
+ push rbx
+ push rbp
+ push rdi
+ push rsi
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp, 80
+ movdqu xmmword ptr [rsp+64], xmm6
+ movdqu xmmword ptr [rsp+48], xmm7
+ movdqu xmmword ptr [rsp+32], xmm8
+ movdqu xmmword ptr [rsp+16], xmm9
+ movdqu xmmword ptr [rsp+0], xmm10
+
+ ;# function arguments
+ push rcx ;# RegisterFile& registerFile
+ mov rbx, rdx ;# MemoryRegisters& memory
+ mov rsi, r8 ;# convertible_t* scratchpad
+
+ include program_prologue_load.inc
+
+ jmp randomx_program_begin
\ No newline at end of file
diff --git a/src/asm/program_read_f.inc b/src/asm/program_read_f.inc
new file mode 100644
index 0000000..1d70dab
--- /dev/null
+++ b/src/asm/program_read_f.inc
@@ -0,0 +1,13 @@
+ mov edx, dword ptr [rbx] ;# ma
+ mov rax, qword ptr [rbx+8] ;# dataset
+ cvtdq2pd xmm0, qword ptr [rax+rdx]
+ add dword ptr [rbx], 8
+ xor ecx, dword ptr [rbx+4] ;# mx
+ mov dword ptr [rbx+4], ecx
+ test ecx, 65528
+ jne short rx_read_dataset_f_ret
+ and ecx, -8
+ mov dword ptr [rbx], ecx
+ prefetcht0 byte ptr [rax+rcx]
+rx_read_dataset_f_ret:
+ ret 0
\ No newline at end of file
diff --git a/src/asm/program_read_r.inc b/src/asm/program_read_r.inc
new file mode 100644
index 0000000..b3102dc
--- /dev/null
+++ b/src/asm/program_read_r.inc
@@ -0,0 +1,13 @@
+ mov eax, dword ptr [rbx] ;# ma
+ mov rdx, qword ptr [rbx+8] ;# dataset
+ mov rax, qword ptr [rdx+rax]
+ add dword ptr [rbx], 8
+ xor ecx, dword ptr [rbx+4] ;# mx
+ mov dword ptr [rbx+4], ecx
+ test ecx, 65528
+ jne short rx_read_dataset_r_ret
+ and ecx, -8
+ mov dword ptr [rbx], ecx
+ prefetcht0 byte ptr [rdx+rcx]
+rx_read_dataset_r_ret:
+ ret 0
\ No newline at end of file
diff --git a/src/common.hpp b/src/common.hpp
index 761d9f5..0bfc834 100644
--- a/src/common.hpp
+++ b/src/common.hpp
@@ -20,6 +20,7 @@ along with RandomX. If not, see.
#pragma once
#include
+#include
namespace RandomX {
@@ -59,6 +60,15 @@ namespace RandomX {
uint64_t u64;
int32_t i32;
uint32_t u32;
+ struct {
+ int32_t i32lo;
+ int32_t i32hi;
+ };
+ };
+
+ struct fpu_reg_t {
+ convertible_t lo;
+ convertible_t hi;
};
constexpr int ProgramLength = 512;
@@ -96,10 +106,10 @@ namespace RandomX {
struct RegisterFile {
convertible_t r[RegistersCount];
- convertible_t f[RegistersCount];
+ fpu_reg_t f[RegistersCount];
};
- static_assert(sizeof(RegisterFile) == 2 * RegistersCount * sizeof(convertible_t), "Invalid alignment of struct RandomX::RegisterFile");
+ static_assert(sizeof(RegisterFile) == 3 * RegistersCount * sizeof(convertible_t), "Invalid alignment of struct RandomX::RegisterFile");
typedef convertible_t(*DatasetReadFunc)(addr_t, MemoryRegisters&);
@@ -108,4 +118,6 @@ namespace RandomX {
extern "C" {
void executeProgram(RegisterFile&, MemoryRegisters&, convertible_t*, DatasetReadFunc);
}
-}
\ No newline at end of file
+}
+
+std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf);
diff --git a/src/executeProgram-win64.asm b/src/executeProgram-win64.asm
index e5ff87d..356428c 100644
--- a/src/executeProgram-win64.asm
+++ b/src/executeProgram-win64.asm
@@ -1,19 +1,19 @@
-; Copyright (c) 2018 tevador
-;
-; This file is part of RandomX.
-;
-; RandomX is free software: you can redistribute it and/or modify
-; it under the terms of the GNU General Public License as published by
-; the Free Software Foundation, either version 3 of the License, or
-; (at your option) any later version.
-;
-; RandomX is distributed in the hope that it will be useful,
-; but WITHOUT ANY WARRANTY; without even the implied warranty of
-; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-; GNU General Public License for more details.
-;
-; You should have received a copy of the GNU General Public License
-; along with RandomX. If not, see.
+;# Copyright (c) 2018 tevador
+;#
+;# This file is part of RandomX.
+;#
+;# RandomX is free software: you can redistribute it and/or modify
+;# it under the terms of the GNU General Public License as published by
+;# the Free Software Foundation, either version 3 of the License, or
+;# (at your option) any later version.
+;#
+;# RandomX is distributed in the hope that it will be useful,
+;# but WITHOUT ANY WARRANTY; without even the implied warranty of
+;# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;# GNU General Public License for more details.
+;#
+;# You should have received a copy of the GNU General Public License
+;# along with RandomX. If not, see.
PUBLIC executeProgram
@@ -47,6 +47,7 @@ executeProgram PROC
; xmm7 -> "f7"
; xmm8 -> "f0"
; xmm9 -> "f1"
+ ; xmm10 -> absolute value mask
; STACK STRUCTURE:
; |
@@ -71,11 +72,12 @@ executeProgram PROC
push r13
push r14
push r15
- sub rsp, 64
- movdqu xmmword ptr [rsp+48], xmm6
- movdqu xmmword ptr [rsp+32], xmm7
- movdqu xmmword ptr [rsp+16], xmm8
- movdqu xmmword ptr [rsp+0], xmm9
+ sub rsp, 80
+ movdqu xmmword ptr [rsp+64], xmm6
+ movdqu xmmword ptr [rsp+48], xmm7
+ movdqu xmmword ptr [rsp+32], xmm8
+ movdqu xmmword ptr [rsp+16], xmm9
+ movdqu xmmword ptr [rsp+0], xmm10
; function arguments
push rcx ; RegisterFile& registerFile
@@ -86,7 +88,15 @@ executeProgram PROC
mov rbp, rsp ; beginning of VM stack
mov rdi, 1048577 ; number of VM instructions to execute + 1
- ; load VM register values
+ xorps xmm10, xmm10
+ cmpeqpd xmm10, xmm10
+ psrlq xmm10, 1 ; mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
+
+ ; reset rounding mode
+ mov dword ptr [rsp-8], 40896
+ ldmxcsr dword ptr [rsp-8]
+
+ ; load integer registers
mov r8, qword ptr [rcx+0]
mov r9, qword ptr [rcx+8]
mov r10, qword ptr [rcx+16]
@@ -95,16 +105,56 @@ executeProgram PROC
mov r13, qword ptr [rcx+40]
mov r14, qword ptr [rcx+48]
mov r15, qword ptr [rcx+56]
- mov dword ptr [rsp-8], 40896
- ldmxcsr dword ptr [rsp-8]
+
+ ; load register f0 hi, lo
+ xorps xmm8, xmm8
+ cvtsi2sd xmm8, qword ptr [rcx+72]
+ pslldq xmm8, 8
cvtsi2sd xmm8, qword ptr [rcx+64]
- cvtsi2sd xmm9, qword ptr [rcx+72]
- cvtsi2sd xmm2, qword ptr [rcx+80]
- cvtsi2sd xmm3, qword ptr [rcx+88]
- cvtsi2sd xmm4, qword ptr [rcx+96]
- cvtsi2sd xmm5, qword ptr [rcx+104]
- cvtsi2sd xmm6, qword ptr [rcx+112]
+
+ ; load register f1 hi, lo
+ xorps xmm9, xmm9
+ cvtsi2sd xmm9, qword ptr [rcx+88]
+ pslldq xmm9, 8
+ cvtsi2sd xmm9, qword ptr [rcx+80]
+
+ ; load register f2 hi, lo
+ xorps xmm2, xmm2
+ cvtsi2sd xmm2, qword ptr [rcx+104]
+ pslldq xmm2, 8
+ cvtsi2sd xmm2, qword ptr [rcx+96]
+
+ ; load register f3 hi, lo
+ xorps xmm3, xmm3
+ cvtsi2sd xmm3, qword ptr [rcx+120]
+ pslldq xmm3, 8
+ cvtsi2sd xmm3, qword ptr [rcx+112]
+
+ lea rcx, [rcx+64]
+
+ ; load register f4 hi, lo
+ xorps xmm4, xmm4
+ cvtsi2sd xmm4, qword ptr [rcx+72]
+ pslldq xmm4, 8
+ cvtsi2sd xmm4, qword ptr [rcx+64]
+
+ ; load register f5 hi, lo
+ xorps xmm5, xmm5
+ cvtsi2sd xmm5, qword ptr [rcx+88]
+ pslldq xmm5, 8
+ cvtsi2sd xmm5, qword ptr [rcx+80]
+
+ ; load register f6 hi, lo
+ xorps xmm6, xmm6
+ cvtsi2sd xmm6, qword ptr [rcx+104]
+ pslldq xmm6, 8
+ cvtsi2sd xmm6, qword ptr [rcx+96]
+
+ ; load register f7 hi, lo
+ xorps xmm7, xmm7
cvtsi2sd xmm7, qword ptr [rcx+120]
+ pslldq xmm7, 8
+ cvtsi2sd xmm7, qword ptr [rcx+112]
; program body
@@ -125,21 +175,23 @@ rx_finish:
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
- movd qword ptr [rcx+64], xmm8
- movd qword ptr [rcx+72], xmm9
- movd qword ptr [rcx+80], xmm2
- movd qword ptr [rcx+88], xmm3
- movd qword ptr [rcx+96], xmm4
- movd qword ptr [rcx+104], xmm5
- movd qword ptr [rcx+112], xmm6
- movd qword ptr [rcx+120], xmm7
+ movdqa xmmword ptr [rcx+64], xmm8
+ movdqa xmmword ptr [rcx+80], xmm9
+ movdqa xmmword ptr [rcx+96], xmm2
+ movdqa xmmword ptr [rcx+112], xmm3
+ lea rcx, [rcx+64]
+ movdqa xmmword ptr [rcx+64], xmm4
+ movdqa xmmword ptr [rcx+80], xmm5
+ movdqa xmmword ptr [rcx+96], xmm6
+ movdqa xmmword ptr [rcx+112], xmm7
; load callee-saved registers
- movdqu xmm9, xmmword ptr [rsp]
- movdqu xmm8, xmmword ptr [rsp+16]
- movdqu xmm7, xmmword ptr [rsp+32]
- movdqu xmm6, xmmword ptr [rsp+48]
- add rsp, 64
+ movdqu xmm10, xmmword ptr [rsp]
+ movdqu xmm9, xmmword ptr [rsp+16]
+ movdqu xmm8, xmmword ptr [rsp+32]
+ movdqu xmm7, xmmword ptr [rsp+48]
+ movdqu xmm6, xmmword ptr [rsp+64]
+ add rsp, 80
pop r15
pop r14
pop r13
@@ -171,7 +223,7 @@ rx_read_dataset:
pop r8
ret 0
-rx_read_dataset_full:
+rx_read_dataset_r:
mov edx, dword ptr [rbx] ; ma
mov rax, qword ptr [rbx+8] ; dataset
mov rax, qword ptr [rax+rdx]
@@ -179,12 +231,27 @@ rx_read_dataset_full:
xor ecx, dword ptr [rbx+4] ; mx
mov dword ptr [rbx+4], ecx
test ecx, 0FFF8h
- jne short rx_read_dataset_full_ret
+ jne short rx_read_dataset_r_ret
and ecx, -8
mov dword ptr [rbx], ecx
mov rdx, qword ptr [rbx+8]
prefetcht0 byte ptr [rdx+rcx]
-rx_read_dataset_full_ret:
+rx_read_dataset_r_ret:
+ ret 0
+
+rx_read_dataset_f:
+ mov edx, dword ptr [rbx] ; ma
+ mov rax, qword ptr [rbx+8] ; dataset
+ cvtdq2pd xmm0, qword ptr [rax+rdx]
+ add dword ptr [rbx], 8
+ xor ecx, dword ptr [rbx+4] ; mx
+ mov dword ptr [rbx+4], ecx
+ test ecx, 0FFF8h
+ jne short rx_read_dataset_f_ret
+ and ecx, -8
+ mov dword ptr [rbx], ecx
+ prefetcht0 byte ptr [rax+rcx]
+rx_read_dataset_f_ret:
ret 0
executeProgram ENDP
diff --git a/src/instructionWeights.hpp b/src/instructionWeights.hpp
index e25b6cc..bb99ca7 100644
--- a/src/instructionWeights.hpp
+++ b/src/instructionWeights.hpp
@@ -19,15 +19,15 @@ along with RandomX. If not, see.
#pragma once
-#define WT_ADD_64 10
+#define WT_ADD_64 11
#define WT_ADD_32 2
-#define WT_SUB_64 10
+#define WT_SUB_64 11
#define WT_SUB_32 2
-#define WT_MUL_64 21
+#define WT_MUL_64 23
#define WT_MULH_64 10
#define WT_MUL_32 15
#define WT_IMUL_32 15
-#define WT_IMULH_64 10
+#define WT_IMULH_64 6
#define WT_DIV_64 1
#define WT_IDIV_64 1
#define WT_AND_64 4
@@ -47,8 +47,9 @@ along with RandomX. If not, see.
#define WT_FPDIV 8
#define WT_FPSQRT 6
#define WT_FPROUND 2
-#define WT_CALL 24
-#define WT_RET 18
+#define WT_CALL 20
+#define WT_RET 22
+
constexpr int wtSum = WT_ADD_64 + WT_ADD_32 + WT_SUB_64 + WT_SUB_32 + \
WT_MUL_64 + WT_MULH_64 + WT_MUL_32 + WT_IMUL_32 + WT_IMULH_64 + \
@@ -60,6 +61,7 @@ WT_SAR_64 + WT_ROL_64 + WT_ROR_64 + WT_FPADD + WT_FPSUB + WT_FPMUL \
static_assert(wtSum == 256,
"Sum of instruction weights must be 256");
+#define REP0(x)
#define REP1(x) x,
#define REP2(x) REP1(x) x,
#define REP3(x) REP2(x) x,
@@ -86,6 +88,16 @@ static_assert(wtSum == 256,
#define REP24(x) REP23(x) x,
#define REP25(x) REP24(x) x,
#define REP26(x) REP25(x) x,
+#define REP27(x) REP26(x) x,
+#define REP28(x) REP27(x) x,
+#define REP29(x) REP28(x) x,
+#define REP30(x) REP29(x) x,
+#define REP31(x) REP30(x) x,
+#define REP32(x) REP31(x) x,
+#define REP33(x) REP32(x) x,
+#define REP40(x) REP32(x) REP8(x)
+#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
+#define REP256(x) REP128(x) REP128(x)
#define REPNX(x,N) REP##N(x)
#define REPN(x,N) REPNX(x,N)
#define NUM(x) x
diff --git a/src/instructions.hpp b/src/instructions.hpp
index 7afb916..2321be6 100644
--- a/src/instructions.hpp
+++ b/src/instructions.hpp
@@ -22,16 +22,10 @@ along with RandomX. If not, see.
namespace RandomX {
- inline double convertToDouble(int64_t x) {
- return (double)(x &-2048L);
- }
-
- inline double convertToDoubleNonZero(int64_t x) {
- return (double)((x & -2048L) | 2048);
- }
-
- inline double convertToDoubleNonNegative(int64_t x) {
- return (double)(x & 9223372036854773760L);
+ //Clears the 11 least-significant bits before conversion. This is done so the number
+ //fits exactly into the 52-bit mantissa without rounding.
+ inline double convertSigned52(int64_t x) {
+ return (double)(x & -2048L);
}
extern "C" {
@@ -59,27 +53,11 @@ namespace RandomX {
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c);
bool JMP_COND(uint8_t, convertible_t&, int32_t);
void FPINIT();
- void FPADD(convertible_t& a, double b, convertible_t& c);
- void FPSUB(convertible_t& a, double b, convertible_t& c);
- void FPMUL(convertible_t& a, double b, convertible_t& c);
- void FPDIV(convertible_t& a, double b, convertible_t& c);
- void FPSQRT(convertible_t& a, convertible_t& b, convertible_t& c);
- void FPROUND(convertible_t& a, convertible_t& b, convertible_t& c);
-
- inline void FPADD_64(convertible_t& a, convertible_t& b, convertible_t& c) {
- FPADD(a, b.f64, c);
- }
-
- inline void FPSUB_64(convertible_t& a, convertible_t& b, convertible_t& c) {
- FPSUB(a, b.f64, c);
- }
-
- inline void FPMUL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
- FPMUL(a, b.f64, c);
- }
-
- inline void FPDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
- FPDIV(a, b.f64, c);
- }
+ void FPADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
+ void FPSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
+ void FPMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
+ void FPDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
+ void FPSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
+ void FPROUND(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c);
}
}
\ No newline at end of file
diff --git a/src/instructionsPortable.cpp b/src/instructionsPortable.cpp
index 5207c1e..790506b 100644
--- a/src/instructionsPortable.cpp
+++ b/src/instructionsPortable.cpp
@@ -17,7 +17,6 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see.
*/
//#define DEBUG
-//#define FTZ
#include "instructions.hpp"
#include "intrinPortable.h"
#pragma STDC FENV_ACCESS on
@@ -154,19 +153,17 @@ static inline int32_t safeSub(int32_t a, int32_t b) {
#define subOverflow __subOverflow
#endif
-static double FlushDenormal(double x) {
- if (std::fpclassify(x) == FP_SUBNORMAL) {
- return 0;
+static inline double FlushDenormalNaN(double x) {
+ int fpc = std::fpclassify(x);
+ if (fpc == FP_SUBNORMAL || fpc == FP_NAN) {
+ return 0.0;
}
return x;
}
-#ifdef FTZ
-#undef FTZ
-#define FTZ(x) FlushDenormal(x)
-#else
-#define FTZ(x) x
-#endif
+static inline double FlushNaN(double x) {
+ return x != x ? 0.0 : x;
+}
namespace RandomX {
@@ -286,37 +283,95 @@ namespace RandomX {
}
void FPINIT() {
- setRoundMode(FE_TONEAREST);
- }
-
- void FPADD(convertible_t& a, double b, convertible_t& c) {
- c.f64 = FTZ(convertToDouble(a.i64) + b);
- }
-
- void FPSUB(convertible_t& a, double b, convertible_t& c) {
- c.f64 = FTZ(convertToDouble(a.i64) - b);
- }
-
- void FPMUL(convertible_t& a, double b, convertible_t& c) {
- c.f64 = FTZ(convertToDoubleNonZero(a.i64) * b);
- }
-
- void FPDIV(convertible_t& a, double b, convertible_t& c) {
- c.f64 = FTZ(convertToDoubleNonZero(a.i64) / b);
- }
-
- void FPSQRT(convertible_t& a, convertible_t& b, convertible_t& c) {
#ifdef __SSE2__
- double d = convertToDoubleNonNegative(a.i64);
- c.f64 = _mm_cvtsd_f64(_mm_sqrt_sd(_mm_setzero_pd(), _mm_load_pd(&d)));
+ _mm_setcsr(0x9FC0); //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled
#else
- c.f64 = FTZ(sqrt(convertToDoubleNonNegative(a.i64)));
+ setRoundMode(FE_TONEAREST);
#endif
-
}
- void FPROUND(convertible_t& a, convertible_t& b, convertible_t& c) {
- c.f64 = convertToDouble(a.i64);
+ void FPADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
+#ifdef __SSE2__
+ __m128i ai = _mm_loadl_epi64((const __m128i*)&a);
+ __m128d ad = _mm_cvtepi32_pd(ai);
+ __m128d bd = _mm_load_pd(&b.lo.f64);
+ __m128d cd = _mm_add_pd(ad, bd);
+ _mm_store_pd(&c.lo.f64, cd);
+#else
+ double alo = (double)a.i32lo;
+ double ahi = (double)a.i32hi;
+ c.lo.f64 = alo + b.lo.f64;
+ c.hi.f64 = ahi + b.hi.f64;
+#endif
+ }
+
+ void FPSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
+#ifdef __SSE2__
+ __m128i ai = _mm_loadl_epi64((const __m128i*)&a);
+ __m128d ad = _mm_cvtepi32_pd(ai);
+ __m128d bd = _mm_load_pd(&b.lo.f64);
+ __m128d cd = _mm_sub_pd(ad, bd);
+ _mm_store_pd(&c.lo.f64, cd);
+#else
+ double alo = (double)a.i32lo;
+ double ahi = (double)a.i32hi;
+ c.lo.f64 = alo - b.lo.f64;
+ c.hi.f64 = ahi - b.hi.f64;
+#endif
+ }
+
+ void FPMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
+#ifdef __SSE2__
+ __m128i ai = _mm_loadl_epi64((const __m128i*)&a);
+ __m128d ad = _mm_cvtepi32_pd(ai);
+ __m128d bd = _mm_load_pd(&b.lo.f64);
+ __m128d cd = _mm_mul_pd(ad, bd);
+ __m128d mask = _mm_cmpeq_pd(cd, cd);
+ cd = _mm_and_pd(cd, mask);
+ _mm_store_pd(&c.lo.f64, cd);
+#else
+ double alo = (double)a.i32lo;
+ double ahi = (double)a.i32hi;
+ c.lo.f64 = FlushNaN(alo * b.lo.f64);
+ c.hi.f64 = FlushNaN(ahi * b.hi.f64);
+#endif
+ }
+
+ void FPDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
+#ifdef __SSE2__
+ __m128i ai = _mm_loadl_epi64((const __m128i*)&a);
+ __m128d ad = _mm_cvtepi32_pd(ai);
+ __m128d bd = _mm_load_pd(&b.lo.f64);
+ __m128d cd = _mm_div_pd(ad, bd);
+ __m128d mask = _mm_cmpeq_pd(cd, cd);
+ cd = _mm_and_pd(cd, mask);
+ _mm_store_pd(&c.lo.f64, cd);
+#else
+ double alo = (double)a.i32lo;
+ double ahi = (double)a.i32hi;
+ c.lo.f64 = FlushDenormalNaN(alo / b.lo.f64);
+ c.hi.f64 = FlushDenormalNaN(ahi / b.hi.f64);
+#endif
+ }
+
+ void FPSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
+#ifdef __SSE2__
+ __m128i ai = _mm_loadl_epi64((const __m128i*)&a);
+ __m128d ad = _mm_cvtepi32_pd(ai);
+ const __m128d absmask = _mm_castsi128_pd(_mm_set1_epi64x(~(1LL << 63)));
+ ad = _mm_and_pd(ad, absmask);
+ __m128d cd = _mm_sqrt_pd(ad);
+ _mm_store_pd(&c.lo.f64, cd);
+#else
+ double alo = (double)a.i32lo;
+ double ahi = (double)a.i32hi;
+ c.lo.f64 = sqrt(std::abs(alo));
+ c.hi.f64 = sqrt(std::abs(ahi));
+#endif
+ }
+
+ void FPROUND(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
+ c.lo.f64 = convertSigned52(a.i64);
switch (a.u64 & 3) {
case RoundDown:
#ifdef DEBUG
diff --git a/src/main.cpp b/src/main.cpp
index fc95c8b..8bb5492 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -79,14 +79,6 @@ void readInt(int argc, char** argv, int& out, int defaultValue) {
out = defaultValue;
}
-std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
- for (int i = 0; i < RandomX::RegistersCount; ++i)
- os << std::hex << "r" << i << " = " << rf.r[i].u64 << std::endl << std::dec;
- for (int i = 0; i < RandomX::RegistersCount; ++i)
- os << std::hex << "f" << i << " = " << rf.f[i].u64 << " (" << rf.f[i].f64 << ")" << std::endl << std::dec;
- return os;
-}
-
class AtomicHash {
public:
AtomicHash() {
@@ -282,7 +274,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: ";
result.print(std::cout);
if(programCount == 1000)
- std::cout << "Reference result: f6bf06465d5fa1b1dc919140b9e9f9e210b07ae6d662988458a172e9a267eb3f" << std::endl;
+ std::cout << "Reference result: 3e1c5f9b9d0bf8ffa250f860bf5f7ab76ac823b206ddee6a592660119a3640c6" << std::endl;
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
/*if (threadCount == 1 && !compiled) {
auto ivm = (RandomX::InterpretedVirtualMachine*)vms[0];
diff --git a/src/program.inc b/src/program.inc
index b41c7b5..081647f 100644
--- a/src/program.inc
+++ b/src/program.inc
@@ -1,82 +1,12 @@
-rx_i_0: ;SUB_64
+rx_i_0: ;RET
dec edi
- js rx_finish
- xor r14, 087d93944h
- mov eax, r14d
- and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- sub rax, r10
- mov r14, rax
-
-rx_i_1: ;IMULH_64
- dec edi
- js rx_finish
- xor r8, 0d7a5aadbh
- mov eax, r8d
- and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- mov rcx, -615634046
- imul rcx
- mov rax, rdx
- mov rcx, rax
- mov eax, r14d
- xor eax, 0db4e2b82h
- and eax, 2047
- mov qword ptr [rsi + rax * 8], rcx
-
-rx_i_2: ;RET
- dec edi
- js rx_finish
- xor r10, 06ffcedb1h
- mov eax, r10d
- and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- cmp rsp, rbp
- je short not_taken_ret_2
- cmp r8d, 330026357
- jo short not_taken_ret_2
- xor rax, qword ptr [rsp + 8]
- mov r13, rax
- ret 8
-not_taken_ret_2:
- mov r13, rax
-
-rx_i_3: ;RET
- dec edi
- js rx_finish
- xor r9, 0a35d739ch
- mov eax, r9d
- and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- cmp rsp, rbp
- je short not_taken_ret_3
- cmp r14d, 2014518380
- jno short not_taken_ret_3
- xor rax, qword ptr [rsp + 8]
- mov rcx, rax
- mov eax, r11d
- xor eax, 078131c6ch
- and eax, 2047
- mov qword ptr [rsi + rax * 8], rcx
- ret 8
-not_taken_ret_3:
- mov rcx, rax
- mov eax, r11d
- xor eax, 078131c6ch
- and eax, 2047
- mov qword ptr [rsi + rax * 8], rcx
-
-rx_i_4: ;RET
- dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0ca9788ah
mov eax, r9d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_4
- cmp r11d, 445530481
- ja short not_taken_ret_4
+ je short not_taken_ret_0
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r12d
@@ -84,117 +14,120 @@ rx_i_4: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_4:
+not_taken_ret_0:
mov rcx, rax
mov eax, r12d
xor eax, 01a8e4171h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_5: ;AND_64
+rx_i_1: ;AND_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 06afc2fa4h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
and rax, r10
mov r12, rax
-rx_i_6: ;CALL
+rx_i_2: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 097210f7bh
mov eax, r15d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r11d, 1348521207
- jno short taken_call_6
+ jno short taken_call_2
mov rcx, rax
mov eax, r9d
xor eax, 05060ccf7h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_7
-taken_call_6:
+ jmp rx_i_3
+taken_call_2:
push rax
- call rx_i_51
+ call rx_i_47
-rx_i_7: ;FPROUND
+rx_i_3: ;FPROUND
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 082c73195h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, rax
shl eax, 13
and rcx, -2048
and eax, 24576
- cvtsi2sd xmm0, rcx
+ cvtsi2sd xmm8, rcx
or eax, 40896
mov dword ptr [rsp - 8], eax
ldmxcsr dword ptr [rsp - 8]
- movsd xmm8, xmm0
+ mov eax, r8d
+ xor eax, 06bb1a0b2h
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm8
-rx_i_8: ;MUL_32
+rx_i_4: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 077daefb4h
mov eax, r14d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
- mov ecx, eax
- mov eax, r14d
- imul rax, rcx
+ mov rcx, r14
+ mul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r9d
xor eax, 06ce10c20h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_9: ;IMUL_32
+rx_i_5: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0379f9ee0h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
movsxd rcx, eax
movsxd rax, r12d
imul rax, rcx
mov r12, rax
-rx_i_10: ;MULH_64
+rx_i_6: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 03bae7272h
mov ecx, r8d
- call rx_read_dataset
- mov rcx, r15
- mul rcx
- mov rax, rdx
+ call rx_read_dataset_r
+ imul rax, r15
mov rcx, rax
mov eax, r9d
xor eax, 098a649d1h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_11: ;FPADD
+rx_i_7: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0e264ed81h
mov eax, r10d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm6
- movsd xmm6, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm6
+ movaps xmm6, xmm0
+ mov eax, r14d
+ xor eax, 057c8c41bh
+ and eax, 32767
+ movhpd qword ptr [rsi + rax * 8], xmm6
-rx_i_12: ;SHL_64
+rx_i_8: ;SHL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 068c1e5d2h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
shl rax, 47
mov rcx, rax
mov eax, r12d
@@ -202,9 +135,9 @@ rx_i_12: ;SHL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_13: ;AND_64
+rx_i_9: ;AND_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 085121c54h
mov eax, r14d
and eax, 32767
@@ -212,78 +145,72 @@ rx_i_13: ;AND_64
and rax, 565870810
mov r10, rax
-rx_i_14: ;OR_64
+rx_i_10: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 052efde3eh
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
or rax, -727859809
mov r13, rax
-rx_i_15: ;FPADD
+rx_i_11: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0a9bf8aa1h
mov ecx, r10d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm5
- movsd xmm4, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm5
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 0852d40d8h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm4
-rx_i_16: ;CALL
+rx_i_12: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0db2691ch
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r8d, -1763940407
- jge short taken_call_16
+ jge short taken_call_12
mov r8, rax
- jmp rx_i_17
-taken_call_16:
+ jmp rx_i_13
+taken_call_12:
push rax
- call rx_i_39
+ call rx_i_35
-rx_i_17: ;FPSUB
+rx_i_13: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 061c0d34dh
mov ecx, r12d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm3
- mov eax, r9d
- xor eax, 04f2f223ch
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm3
+ movaps xmm9, xmm0
-rx_i_18: ;SHR_64
+rx_i_14: ;SHR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0e761d1beh
mov ecx, r10d
- call rx_read_dataset
- mov rcx, r9
- shr rax, cl
+ call rx_read_dataset_r
+ shr rax, 4
mov rcx, rax
mov eax, r10d
xor eax, 03c1a72f8h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_19: ;RET
+rx_i_15: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 074ddb688h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_19
- cmp r11d, 1183529144
- js short not_taken_ret_19
+ je short not_taken_ret_15
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r14d
@@ -291,16 +218,16 @@ rx_i_19: ;RET
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_19:
+not_taken_ret_15:
mov rcx, rax
mov eax, r14d
xor eax, 0468b38b8h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_20: ;ADD_64
+rx_i_16: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 06be90627h
mov eax, r14d
and eax, 2047
@@ -312,78 +239,78 @@ rx_i_20: ;ADD_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_21: ;FPMUL
+rx_i_17: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0fbc6fc35h
mov eax, r11d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm4
- movsd xmm4, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm4
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 0f77ffe16h
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm4
-rx_i_22: ;FPSUB
+rx_i_18: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0c28ca080h
mov eax, r14d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm4
- movsd xmm3, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm4
+ movaps xmm3, xmm0
+ mov eax, r11d
+ xor eax, 0869baa81h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm3
-rx_i_23: ;FPSUB
+rx_i_19: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0ac009c30h
mov eax, r13d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm8
- mov eax, r15d
- xor eax, 0e92dc022h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm8
+ movaps xmm7, xmm0
-rx_i_24: ;FPMUL
+rx_i_20: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0ecca967dh
mov ecx, r13d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm2
- movsd xmm7, xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 0aad81365h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm7
-rx_i_25: ;FPADD
+rx_i_21: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0977f0284h
mov ecx, r8d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- mov eax, r15d
- xor eax, 0db5e0aafh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm9
+ movaps xmm7, xmm0
-rx_i_26: ;ADD_32
+rx_i_22: ;ADD_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 080bdfefah
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
add eax, r8d
mov rcx, rax
mov eax, r10d
@@ -391,21 +318,21 @@ rx_i_26: ;ADD_32
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_27: ;MUL_64
+rx_i_23: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0e1e0d3c4h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r11
mov r8, rax
-rx_i_28: ;IMULH_64
+rx_i_24: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 070d3b8c7h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r15
imul rcx
mov rax, rdx
@@ -415,52 +342,53 @@ rx_i_28: ;IMULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_29: ;FPMUL
+rx_i_25: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 01cf77a04h
mov ecx, r12d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm9
- movsd xmm6, xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm9
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm6, xmm0
+ mov eax, r14d
+ xor eax, 0baf5c2d4h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm6
-rx_i_30: ;IMULH_64
+rx_i_26: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0e311468ch
mov ecx, r11d
- call rx_read_dataset
- mov rcx, r13
- imul rcx
- mov rax, rdx
+ call rx_read_dataset_r
+ movsxd rcx, eax
+ movsxd rax, r13d
+ imul rax, rcx
mov rcx, rax
mov eax, r9d
xor eax, 0306ff9ech
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_31: ;FPMUL
+rx_i_27: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 01fd9911ah
mov eax, r12d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm3
- mov eax, r14d
- xor eax, 04b5d4e80h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm3
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm6, xmm0
-rx_i_32: ;XOR_64
+rx_i_28: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 067df757eh
mov eax, r13d
and eax, 2047
@@ -468,56 +396,51 @@ rx_i_32: ;XOR_64
xor rax, r13
mov r14, rax
-rx_i_33: ;SUB_64
+rx_i_29: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0be2e7c42h
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, 1944166515
mov r14, rax
-rx_i_34: ;FPADD
+rx_i_30: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 084d067f7h
mov eax, r11d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4485208588087721984
- movd xmm1, rax
- addsd xmm0, xmm1
- mov eax, r15d
- xor eax, 0dd52e4f9h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm3
+ movaps xmm7, xmm0
-rx_i_35: ;FPADD
+rx_i_31: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0d352ce37h
mov eax, r14d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm3
- movsd xmm6, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm3
+ movaps xmm6, xmm0
+ mov eax, r14d
+ xor eax, 01e2da792h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm6
-rx_i_36: ;XOR_64
+rx_i_32: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0a1f248dah
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
xor rax, -1936869641
mov r9, rax
-rx_i_37: ;MULH_64
+rx_i_33: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0554720fch
mov eax, r9d
and eax, 2047
@@ -527,70 +450,69 @@ rx_i_37: ;MULH_64
mov rax, rdx
mov r12, rax
-rx_i_38: ;CALL
+rx_i_34: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0665e91f1h
mov eax, r13d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r14d, -380224718
- js short taken_call_38
+ js short taken_call_34
mov r15, rax
- jmp rx_i_39
-taken_call_38:
+ jmp rx_i_35
+taken_call_34:
push rax
- call rx_i_112
+ call rx_i_108
-rx_i_39: ;CALL
+rx_i_35: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 05ef1be79h
mov eax, r15d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
- cmp r9d, -2040787098
- js short taken_call_39
+ cmp rsp, rbp
+ je short not_taken_ret_35
+ xor rax, qword ptr [rsp + 8]
+ mov r8, rax
+ ret 8
+not_taken_ret_35:
mov r8, rax
- jmp rx_i_40
-taken_call_39:
- push rax
- call rx_i_62
-rx_i_40: ;FPMUL
+rx_i_36: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 012ec7e3ah
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm6
- mov eax, r15d
- xor eax, 07a07ae2ah
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm6
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
-rx_i_41: ;FPMUL
+rx_i_37: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0d0706601h
mov eax, r12d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4480846364313387008
- movd xmm1, rax
- mulsd xmm0, xmm1
- movsd xmm9, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 0bca81c78h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm9
-rx_i_42: ;SUB_64
+rx_i_38: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 064056913h
mov eax, r9d
and eax, 2047
@@ -598,26 +520,24 @@ rx_i_42: ;SUB_64
sub rax, r14
mov r10, rax
-rx_i_43: ;ADD_32
+rx_i_39: ;ADD_32
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 02c1f1eb0h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
add eax, r14d
mov r14, rax
-rx_i_44: ;RET
+rx_i_40: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 068fd9009h
mov eax, r10d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_44
- cmp r12d, -1297973554
- jns short not_taken_ret_44
+ je short not_taken_ret_40
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r9d
@@ -625,51 +545,44 @@ rx_i_44: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_44:
+not_taken_ret_40:
mov rcx, rax
mov eax, r9d
xor eax, 0b2a27eceh
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_45: ;CALL
+rx_i_41: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 037a30933h
mov eax, r9d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
cmp r14d, -1070581824
- jo short taken_call_45
+ jo short taken_call_41
mov r9, rax
- jmp rx_i_46
-taken_call_45:
+ jmp rx_i_42
+taken_call_41:
push rax
- call rx_i_131
+ call rx_i_127
-rx_i_46: ;FPSUB
+rx_i_42: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0bc1de9f6h
mov eax, r15d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4739074351570092032
- movd xmm1, rax
- subsd xmm0, xmm1
- mov eax, r14d
- xor eax, 029260733h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm6
+ movaps xmm6, xmm0
-rx_i_47: ;SUB_64
+rx_i_43: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 02b2a2eech
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, 1693705407
mov rcx, rax
mov eax, r11d
@@ -677,39 +590,32 @@ rx_i_47: ;SUB_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_48: ;ROL_64
+rx_i_44: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0685817abh
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r9
rol rax, cl
mov r15, rax
-rx_i_49: ;FPSUB
+rx_i_45: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 08cd244ebh
mov eax, r12d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4478227626472767488
- movd xmm1, rax
- subsd xmm0, xmm1
- mov eax, r13d
- xor eax, 0977132cdh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm2
+ movaps xmm5, xmm0
-rx_i_50: ;ADD_64
+rx_i_46: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 06d8f4254h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
add rax, r9
mov rcx, rax
mov eax, r8d
@@ -717,56 +623,55 @@ rx_i_50: ;ADD_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_51: ;CALL
+rx_i_47: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 05ba232c6h
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r10d, 119251505
- jbe short taken_call_51
+ jbe short taken_call_47
mov rcx, rax
mov eax, r13d
xor eax, 071ba231h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_52
-taken_call_51:
+ jmp rx_i_48
+taken_call_47:
push rax
- call rx_i_135
+ call rx_i_131
-rx_i_52: ;FPSQRT
+rx_i_48: ;FPSQRT
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0aaed618fh
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- mov rcx, 9223372036854773760
- and rax, rcx
- cvtsi2sd xmm0, rax
- sqrtsd xmm0, xmm0
- movsd xmm9, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ andps xmm0, xmm10
+ sqrtpd xmm0, xmm0
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 020e5d9e9h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm9
-rx_i_53: ;FPMUL
+rx_i_49: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0f96c6a45h
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm3
- mov eax, r13d
- xor eax, 0c56b47bh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm3
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm5, xmm0
-rx_i_54: ;OR_32
+rx_i_50: ;OR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0da3e4842h
mov eax, r9d
and eax, 32767
@@ -778,53 +683,51 @@ rx_i_54: ;OR_32
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_55: ;SUB_64
+rx_i_51: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0302b676ah
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, 419241919
mov r15, rax
-rx_i_56: ;CALL
+rx_i_52: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0fa88f48bh
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r13d, -534426193
- js short taken_call_56
+ js short taken_call_52
mov rcx, rax
mov eax, r15d
xor eax, 0e0254dafh
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_57
-taken_call_56:
+ jmp rx_i_53
+taken_call_52:
push rax
- call rx_i_98
+ call rx_i_94
-rx_i_57: ;RET
+rx_i_53: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 03dff9b9eh
mov eax, r13d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_57
- cmp r8d, 2028798189
- jno short not_taken_ret_57
+ je short not_taken_ret_53
xor rax, qword ptr [rsp + 8]
mov r13, rax
ret 8
-not_taken_ret_57:
+not_taken_ret_53:
mov r13, rax
-rx_i_58: ;IMULH_64
+rx_i_54: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 060638de0h
mov eax, r11d
and eax, 2047
@@ -838,22 +741,26 @@ rx_i_58: ;IMULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_59: ;FPMUL
+rx_i_55: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0dda983d4h
mov eax, r10d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm5
- movsd xmm3, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm5
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm3, xmm0
+ mov eax, r11d
+ xor eax, 07c79cddh
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm3
-rx_i_60: ;AND_64
+rx_i_56: ;AND_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0f1456b8eh
mov eax, r14d
and eax, 32767
@@ -865,9 +772,9 @@ rx_i_60: ;AND_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_61: ;MUL_64
+rx_i_57: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 010dc4571h
mov eax, r9d
and eax, 2047
@@ -879,55 +786,48 @@ rx_i_61: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_62: ;IDIV_64
+rx_i_58: ;IDIV_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0bcec0ebah
mov eax, r14d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
mov edx, r13d
cmp edx, -1
- jne short safe_idiv_62
+ jne short safe_idiv_58
mov rcx, rax
rol rcx, 1
dec rcx
- jz short result_idiv_62
-safe_idiv_62:
+ jz short result_idiv_58
+safe_idiv_58:
mov ecx, 1
test edx, edx
cmovne ecx, edx
movsxd rcx, ecx
cqo
idiv rcx
-result_idiv_62:
+result_idiv_58:
mov r8, rax
-rx_i_63: ;FPSUB
+rx_i_59: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0980dd402h
mov eax, r11d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm8
- mov eax, r15d
- xor eax, 04f4e2c91h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm8
+ movaps xmm7, xmm0
-rx_i_64: ;RET
+rx_i_60: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 03de14d1eh
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_64
- cmp r11d, 2075529029
- jo short not_taken_ret_64
+ je short not_taken_ret_60
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r11d
@@ -935,137 +835,133 @@ rx_i_64: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_64:
+not_taken_ret_60:
mov rcx, rax
mov eax, r11d
xor eax, 07bb60f45h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_65: ;CALL
+rx_i_61: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 05058ce64h
mov eax, r13d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r15d, 1933164545
- jns short taken_call_65
+ jns short taken_call_61
+ mov r11, rax
+ jmp rx_i_62
+taken_call_61:
+ push rax
+ call rx_i_120
+
+rx_i_62: ;FPMUL
+ dec edi
+ jz rx_finish
+ xor r15, 0c3089414h
+ mov ecx, r15d
+ call rx_read_dataset_f
+ mulpd xmm0, xmm8
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm2, xmm0
+ mov eax, r10d
+ xor eax, 05c4789e3h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm2
+
+rx_i_63: ;FPMUL
+ dec edi
+ jz rx_finish
+ xor r9, 065cf272eh
+ mov eax, r9d
+ and eax, 2047
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm7
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
+
+rx_i_64: ;SUB_64
+ dec edi
+ jz rx_finish
+ xor r13, 0ae54dfbfh
+ mov ecx, r13d
+ call rx_read_dataset_r
+ sub rax, r15
+ mov r9, rax
+
+rx_i_65: ;CALL
+ dec edi
+ jz rx_finish
+ xor r13, 07b366ce6h
+ mov ecx, r13d
+ call rx_read_dataset_r
+ cmp r8d, 1498056607
+ js short taken_call_65
mov r11, rax
jmp rx_i_66
taken_call_65:
push rax
- call rx_i_124
+ call rx_i_129
-rx_i_66: ;FPMUL
+rx_i_66: ;FPSQRT
dec edi
- js rx_finish
- xor r15, 0c3089414h
- mov ecx, r15d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4744280396844236800
- movd xmm1, rax
- mulsd xmm0, xmm1
- movsd xmm2, xmm0
-
-rx_i_67: ;FPMUL
- dec edi
- js rx_finish
- xor r9, 065cf272eh
- mov eax, r9d
- and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4480946344868970496
- movd xmm1, rax
- mulsd xmm0, xmm1
- mov eax, r8d
- xor eax, 0be13d69eh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
-
-rx_i_68: ;SUB_64
- dec edi
- js rx_finish
- xor r13, 0ae54dfbfh
- mov ecx, r13d
- call rx_read_dataset
- sub rax, r15
- mov r9, rax
-
-rx_i_69: ;CALL
- dec edi
- js rx_finish
- xor r13, 07b366ce6h
- mov ecx, r13d
- call rx_read_dataset
- cmp r8d, 1498056607
- js short taken_call_69
- mov r11, rax
- jmp rx_i_70
-taken_call_69:
- push rax
- call rx_i_133
-
-rx_i_70: ;FPSQRT
- dec edi
- js rx_finish
+ jz rx_finish
xor r15, 015a1b689h
mov ecx, r15d
- call rx_read_dataset
- mov rcx, 9223372036854773760
- and rax, rcx
- cvtsi2sd xmm0, rax
- sqrtsd xmm0, xmm0
- movsd xmm9, xmm0
+ call rx_read_dataset_f
+ andps xmm0, xmm10
+ sqrtpd xmm0, xmm0
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 07305e78h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_71: ;CALL
+rx_i_67: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 088393ba0h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r13d, 2031541081
- jns short taken_call_71
+ jns short taken_call_67
mov r9, rax
- jmp rx_i_72
-taken_call_71:
+ jmp rx_i_68
+taken_call_67:
push rax
- call rx_i_83
+ call rx_i_79
-rx_i_72: ;FPSUB
+rx_i_68: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 03aa5c3a4h
mov ecx, r13d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm2
- movsd xmm4, xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm2
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 03c51ef39h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm4
-rx_i_73: ;FPADD
+rx_i_69: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0376c9c27h
mov ecx, r15d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm5
- mov eax, r8d
- xor eax, 098c2e84dh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm5
+ movaps xmm8, xmm0
-rx_i_74: ;MULH_64
+rx_i_70: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0bbbec3fah
mov eax, r8d
and eax, 2047
@@ -1075,67 +971,58 @@ rx_i_74: ;MULH_64
mov rax, rdx
mov r13, rax
-rx_i_75: ;FPMUL
+rx_i_71: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0e9efb350h
mov eax, r14d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4743866573565984768
- movd xmm1, rax
- mulsd xmm0, xmm1
- mov eax, r15d
- xor eax, 056660eedh
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm5
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
-rx_i_76: ;CALL
+rx_i_72: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0f4e51e28h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r9d, -631091751
- jno short taken_call_76
+ jno short taken_call_72
mov rcx, rax
mov eax, r11d
xor eax, 0da624dd9h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_77
-taken_call_76:
+ jmp rx_i_73
+taken_call_72:
push rax
- call rx_i_195
+ call rx_i_191
-rx_i_77: ;FPROUND
+rx_i_73: ;FPROUND
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0c24ddbd4h
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, rax
shl eax, 13
and rcx, -2048
and eax, 24576
- cvtsi2sd xmm0, rcx
+ cvtsi2sd xmm2, rcx
or eax, 40896
mov dword ptr [rsp - 8], eax
ldmxcsr dword ptr [rsp - 8]
- mov eax, r10d
- xor eax, 040624270h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
-rx_i_78: ;MUL_64
+rx_i_74: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 04c4b0c7fh
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, rax, -1431647438
mov rcx, rax
mov eax, r9d
@@ -1143,44 +1030,44 @@ rx_i_78: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_79: ;CALL
+rx_i_75: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 03bcc02e3h
mov eax, r14d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- cmp r11d, -1160798683
- jo short taken_call_79
+ cmp rsp, rbp
+ je short not_taken_ret_75
+ xor rax, qword ptr [rsp + 8]
+ mov r13, rax
+ ret 8
+not_taken_ret_75:
mov r13, rax
- jmp rx_i_80
-taken_call_79:
- push rax
- call rx_i_206
-rx_i_80: ;FPADD
+rx_i_76: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 04b0ff63eh
mov eax, r11d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm3
- movsd xmm7, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm3
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 083bc0396h
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm7
-rx_i_81: ;RET
+rx_i_77: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0b956b3e8h
mov eax, r14d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_81
- cmp r15d, 982695034
- jo short not_taken_ret_81
+ je short not_taken_ret_77
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r11d
@@ -1188,16 +1075,16 @@ rx_i_81: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_81:
+not_taken_ret_77:
mov rcx, rax
mov eax, r11d
xor eax, 03a92bc7ah
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_82: ;MUL_32
+rx_i_78: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0edeca680h
mov eax, r9d
and eax, 2047
@@ -1207,72 +1094,69 @@ rx_i_82: ;MUL_32
imul rax, rcx
mov r15, rax
-rx_i_83: ;CALL
+rx_i_79: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0fbdddcb5h
mov eax, r11d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
- cmp r13d, 1800043331
- jbe short taken_call_83
+ cmp rsp, rbp
+ je short not_taken_ret_79
+ xor rax, qword ptr [rsp + 8]
+ mov rcx, rax
+ mov eax, r11d
+ xor eax, 06b4a7b43h
+ and eax, 2047
+ mov qword ptr [rsi + rax * 8], rcx
+ ret 8
+not_taken_ret_79:
mov rcx, rax
mov eax, r11d
xor eax, 06b4a7b43h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_84
-taken_call_83:
- push rax
- call rx_i_97
-rx_i_84: ;FPADD
+rx_i_80: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 09cec97a1h
mov eax, r13d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4736212432215605248
- movd xmm1, rax
- addsd xmm0, xmm1
- mov eax, r11d
- xor eax, 01a681d13h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm3
+ movaps xmm3, xmm0
-rx_i_85: ;OR_64
+rx_i_81: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 078228167h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
or rax, r13
mov r8, rax
-rx_i_86: ;CALL
+rx_i_82: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 078cae1ffh
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r12d, -68969733
- jo short taken_call_86
+ jo short taken_call_82
mov rcx, rax
mov eax, r10d
xor eax, 0fbe39afbh
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_87
-taken_call_86:
+ jmp rx_i_83
+taken_call_82:
push rax
- call rx_i_149
+ call rx_i_145
-rx_i_87: ;AND_64
+rx_i_83: ;AND_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0d9b6a533h
mov eax, r10d
and eax, 32767
@@ -1280,12 +1164,12 @@ rx_i_87: ;AND_64
and rax, r10
mov r12, rax
-rx_i_88: ;ROR_64
+rx_i_84: ;ROR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0e9e75336h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r10
ror rax, cl
mov rcx, rax
@@ -1294,9 +1178,9 @@ rx_i_88: ;ROR_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_89: ;MUL_64
+rx_i_85: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 04c0d378ah
mov eax, r13d
and eax, 2047
@@ -1304,12 +1188,12 @@ rx_i_89: ;MUL_64
imul rax, r8
mov r10, rax
-rx_i_90: ;OR_64
+rx_i_86: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 04386e368h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
or rax, r8
mov rcx, rax
mov eax, r12d
@@ -1317,9 +1201,9 @@ rx_i_90: ;OR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_91: ;SUB_64
+rx_i_87: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0d75a0ecfh
mov eax, r9d
and eax, 2047
@@ -1327,24 +1211,26 @@ rx_i_91: ;SUB_64
sub rax, r12
mov r8, rax
-rx_i_92: ;FPADD
+rx_i_88: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 031bb7f7ah
mov eax, r9d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm6
- movsd xmm9, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm6
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 0c149906eh
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_93: ;MUL_64
+rx_i_89: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 03b45ecebh
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r8
mov rcx, rax
mov eax, r10d
@@ -1352,85 +1238,77 @@ rx_i_93: ;MUL_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_94: ;FPADD
+rx_i_90: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0ee08e76bh
mov eax, r12d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm3
- mov eax, r14d
- xor eax, 0b435cf2dh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm3
+ movaps xmm6, xmm0
-rx_i_95: ;FPMUL
+rx_i_91: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 042e28e94h
mov ecx, r9d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm2
- mov eax, r12d
- xor eax, 0b723c20bh
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
-rx_i_96: ;CALL
+rx_i_92: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0729260e1h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r14d, 1288893603
- jge short taken_call_96
+ jge short taken_call_92
mov r12, rax
- jmp rx_i_97
-taken_call_96:
+ jmp rx_i_93
+taken_call_92:
push rax
- call rx_i_174
+ call rx_i_170
-rx_i_97: ;FPADD
+rx_i_93: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0bfcebaf4h
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm2
- movsd xmm2, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm2
+ movaps xmm2, xmm0
+ mov eax, r10d
+ xor eax, 07e48a0d8h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm2
-rx_i_98: ;RET
+rx_i_94: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0ea326630h
mov eax, r13d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_98
- cmp r13d, -343122976
- jns short not_taken_ret_98
+ je short not_taken_ret_94
xor rax, qword ptr [rsp + 8]
mov r8, rax
ret 8
-not_taken_ret_98:
+not_taken_ret_94:
mov r8, rax
-rx_i_99: ;MUL_64
+rx_i_95: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0b5451a2dh
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r10
mov rcx, rax
mov eax, r15d
@@ -1438,90 +1316,91 @@ rx_i_99: ;MUL_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_100: ;IMUL_32
+rx_i_96: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 04f912ef8h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
movsxd rcx, eax
mov rax, -1354397081
imul rax, rcx
mov r11, rax
-rx_i_101: ;FPSQRT
+rx_i_97: ;FPSQRT
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0acc45b3bh
mov ecx, r15d
- call rx_read_dataset
- mov rcx, 9223372036854773760
- and rax, rcx
- cvtsi2sd xmm0, rax
- sqrtsd xmm0, xmm0
- movsd xmm5, xmm0
+ call rx_read_dataset_f
+ andps xmm0, xmm10
+ sqrtpd xmm0, xmm0
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 0c477e850h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm5
-rx_i_102: ;SUB_64
+rx_i_98: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 09900a4e8h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, r15
mov r14, rax
-rx_i_103: ;FPDIV
+rx_i_99: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0841b2984h
mov ecx, r9d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4743144143516073984
- movd xmm1, rax
- divsd xmm0, xmm1
- movsd xmm4, xmm0
+ call rx_read_dataset_f
+ divpd xmm0, xmm6
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 04c21df83h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm4
-rx_i_104: ;ADD_64
+rx_i_100: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 07ebea48fh
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
add rax, r9
mov r14, rax
-rx_i_105: ;SUB_32
+rx_i_101: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0631209d3h
mov eax, r10d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- sub eax, r8d
+ sub rax, r8
mov r11, rax
-rx_i_106: ;FPDIV
+rx_i_102: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0e50bf07ah
mov eax, r10d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm3
- mov eax, r15d
- xor eax, 03ec98420h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ divpd xmm0, xmm3
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
-rx_i_107: ;MUL_64
+rx_i_103: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 02b7096f1h
mov eax, r10d
and eax, 32767
@@ -1533,9 +1412,9 @@ rx_i_107: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_108: ;IMULH_64
+rx_i_104: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 075deaf71h
mov eax, r11d
and eax, 32767
@@ -1549,12 +1428,12 @@ rx_i_108: ;IMULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_109: ;MUL_32
+rx_i_105: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 036a51f72h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, eax
mov eax, r15d
imul rax, rcx
@@ -1564,70 +1443,73 @@ rx_i_109: ;MUL_32
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_110: ;FPMUL
+rx_i_106: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 07b512986h
mov ecx, r11d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm3
- movsd xmm4, xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm3
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 03cb2505h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm4
-rx_i_111: ;CALL
+rx_i_107: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0f1d2e50h
mov eax, r12d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r11d, 1917037441
- jl short taken_call_111
+ jl short taken_call_107
mov rcx, rax
mov eax, r14d
xor eax, 07243ab81h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_112
-taken_call_111:
+ jmp rx_i_108
+taken_call_107:
push rax
- call rx_i_147
+ call rx_i_143
-rx_i_112: ;FPDIV
+rx_i_108: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 07327ba60h
mov ecx, r9d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm5
- movsd xmm9, xmm0
+ call rx_read_dataset_f
+ divpd xmm0, xmm5
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 0678b65beh
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_113: ;FPADD
+rx_i_109: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0594e37deh
mov eax, r15d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm2
- mov eax, r11d
- xor eax, 094ab5a5ch
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm2
+ movaps xmm3, xmm0
-rx_i_114: ;ROL_64
+rx_i_110: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 04cdf5ebah
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r9
rol rax, cl
mov rcx, rax
@@ -1636,27 +1518,31 @@ rx_i_114: ;ROL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_115: ;CALL
+rx_i_111: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 02e16c97ch
mov ecx, r8d
- call rx_read_dataset
- cmp r14d, 1562606859
- jge short taken_call_115
+ call rx_read_dataset_r
+ cmp rsp, rbp
+ je short not_taken_ret_111
+ xor rax, qword ptr [rsp + 8]
+ mov rcx, rax
+ mov eax, r12d
+ xor eax, 05d237d0bh
+ and eax, 32767
+ mov qword ptr [rsi + rax * 8], rcx
+ ret 8
+not_taken_ret_111:
mov rcx, rax
mov eax, r12d
xor eax, 05d237d0bh
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_116
-taken_call_115:
- push rax
- call rx_i_216
-rx_i_116: ;SUB_64
+rx_i_112: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0d42ddbd4h
mov eax, r12d
and eax, 2047
@@ -1668,43 +1554,43 @@ rx_i_116: ;SUB_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_117: ;MUL_32
+rx_i_113: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 07a4f8cbbh
mov ecx, r10d
- call rx_read_dataset
- mov ecx, eax
- mov eax, r9d
- imul rax, rcx
+ call rx_read_dataset_r
+ mov rcx, r9
+ mul rcx
+ mov rax, rdx
mov r13, rax
-rx_i_118: ;IMULH_64
+rx_i_114: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 06e83e2cdh
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r15
imul rcx
mov rax, rdx
mov r14, rax
-rx_i_119: ;OR_64
+rx_i_115: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0336c980eh
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
or rax, r10
mov r14, rax
-rx_i_120: ;IMULH_64
+rx_i_116: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0d122702eh
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, -1850776691
imul rcx
mov rax, rdx
@@ -1714,9 +1600,9 @@ rx_i_120: ;IMULH_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_121: ;AND_64
+rx_i_117: ;AND_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 015f2012bh
mov eax, r11d
and eax, 2047
@@ -1728,177 +1614,160 @@ rx_i_121: ;AND_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_122: ;FPSUB
+rx_i_118: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 037ddf43dh
mov eax, r9d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm5
- mov eax, r14d
- xor eax, 0d0b219d0h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm5
+ movaps xmm6, xmm0
-rx_i_123: ;FPSUB
+rx_i_119: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0bba475f3h
mov eax, r9d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm3
- mov eax, r13d
- xor eax, 02401488h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm3
+ movaps xmm5, xmm0
-rx_i_124: ;FPADD
+rx_i_120: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0e5561e3eh
mov eax, r12d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm4
- mov eax, r8d
- xor eax, 04d46f867h
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm4
+ movaps xmm8, xmm0
-rx_i_125: ;FPMUL
+rx_i_121: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 03ab8f73h
mov ecx, r9d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm5
- mov eax, r8d
- xor eax, 0808a2d8bh
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm5
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
-rx_i_126: ;CALL
+rx_i_122: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 04e0dbd40h
mov ecx, r10d
- call rx_read_dataset
- cmp r11d, 2029448233
- jo short taken_call_126
+ call rx_read_dataset_r
+ cmp rsp, rbp
+ je short not_taken_ret_122
+ xor rax, qword ptr [rsp + 8]
+ mov rcx, rax
+ mov eax, r14d
+ xor eax, 078f6ec29h
+ and eax, 2047
+ mov qword ptr [rsi + rax * 8], rcx
+ ret 8
+not_taken_ret_122:
mov rcx, rax
mov eax, r14d
xor eax, 078f6ec29h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_127
-taken_call_126:
- push rax
- call rx_i_196
-rx_i_127: ;SUB_64
+rx_i_123: ;ADD_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 073e9f58ah
mov eax, r13d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- sub rax, r15
+ add eax, r15d
mov r13, rax
-rx_i_128: ;CALL
+rx_i_124: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0e3fa3670h
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r11d, 1719505436
- jns short taken_call_128
+ jns short taken_call_124
mov rcx, rax
mov eax, r11d
xor eax, 0667d921ch
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_129
-taken_call_128:
+ jmp rx_i_125
+taken_call_124:
push rax
- call rx_i_241
+ call rx_i_237
-rx_i_129: ;IMUL_32
+rx_i_125: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0ebec27cdh
mov ecx, r8d
- call rx_read_dataset
- movsxd rcx, eax
- movsxd rax, r14d
+ call rx_read_dataset_r
+ mov ecx, eax
+ mov eax, r14d
imul rax, rcx
mov r14, rax
-rx_i_130: ;FPDIV
+rx_i_126: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 01feb5264h
mov eax, r8d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm6
- mov eax, r10d
- xor eax, 04b88e021h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ divpd xmm0, xmm6
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm2, xmm0
-rx_i_131: ;IMULH_64
+rx_i_127: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0405f500fh
mov ecx, r9d
- call rx_read_dataset
- mov rcx, r10
- imul rcx
- mov rax, rdx
+ call rx_read_dataset_r
+ movsxd rcx, eax
+ movsxd rax, r10d
+ imul rax, rcx
mov r8, rax
-rx_i_132: ;MUL_64
+rx_i_128: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0459f1154h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r9
mov r9, rax
-rx_i_133: ;CALL
+rx_i_129: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 081918b4ch
mov eax, r9d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r13d, -590624856
- jge short taken_call_133
+ jge short taken_call_129
mov r9, rax
- jmp rx_i_134
-taken_call_133:
+ jmp rx_i_130
+taken_call_129:
push rax
- call rx_i_158
+ call rx_i_154
-rx_i_134: ;OR_64
+rx_i_130: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 077c3b332h
mov eax, r9d
and eax, 2047
@@ -1910,17 +1779,15 @@ rx_i_134: ;OR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_135: ;RET
+rx_i_131: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 05792310bh
mov eax, r12d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_135
- cmp r15d, -537890955
- jns short not_taken_ret_135
+ je short not_taken_ret_131
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r15d
@@ -1928,33 +1795,28 @@ rx_i_135: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_135:
+not_taken_ret_131:
mov rcx, rax
mov eax, r15d
xor eax, 0dff06f75h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_136: ;FPADD
+rx_i_132: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0ebc6e10h
mov ecx, r10d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm6
- mov eax, r15d
- xor eax, 0b0c38959h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm6
+ movaps xmm7, xmm0
-rx_i_137: ;XOR_64
+rx_i_133: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0822f8b60h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
xor rax, -1000526796
mov rcx, rax
mov eax, r15d
@@ -1962,45 +1824,44 @@ rx_i_137: ;XOR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_138: ;ADD_64
+rx_i_134: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0d0f18593h
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
add rax, 1516102347
mov r13, rax
-rx_i_139: ;FPMUL
+rx_i_135: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 088212ef9h
mov ecx, r11d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm3
- mov eax, r8d
- xor eax, 0b29f3d2ah
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm3
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
-rx_i_140: ;FPSQRT
+rx_i_136: ;FPSQRT
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 01ae56e03h
mov ecx, r8d
- call rx_read_dataset
- mov rcx, 9223372036854773760
- and rax, rcx
- cvtsi2sd xmm0, rax
- sqrtsd xmm0, xmm0
- movsd xmm5, xmm0
+ call rx_read_dataset_f
+ andps xmm0, xmm10
+ sqrtpd xmm0, xmm0
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 0efd7799dh
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm5
-rx_i_141: ;ROL_64
+rx_i_137: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 015a24231h
mov eax, r11d
and eax, 32767
@@ -2009,25 +1870,23 @@ rx_i_141: ;ROL_64
rol rax, cl
mov r11, rax
-rx_i_142: ;RET
+rx_i_138: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 02fd380c5h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_142
- cmp r9d, -1910517416
- jbe short not_taken_ret_142
+ je short not_taken_ret_138
xor rax, qword ptr [rsp + 8]
mov r10, rax
ret 8
-not_taken_ret_142:
+not_taken_ret_138:
mov r10, rax
-rx_i_143: ;ADD_64
+rx_i_139: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 093172470h
mov eax, r9d
and eax, 2047
@@ -2039,9 +1898,9 @@ rx_i_143: ;ADD_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_144: ;IMUL_32
+rx_i_140: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 052543553h
mov eax, r14d
and eax, 2047
@@ -2051,40 +1910,40 @@ rx_i_144: ;IMUL_32
imul rax, rcx
mov r14, rax
-rx_i_145: ;FPADD
+rx_i_141: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 02f636da1h
mov ecx, r8d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4478407513863094272
- movd xmm1, rax
- addsd xmm0, xmm1
- movsd xmm9, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm2
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 099ff9ffdh
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_146: ;CALL
+rx_i_142: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0b11a4f2ch
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r12d, 1365939282
- js short taken_call_146
+ js short taken_call_142
mov rcx, rax
mov eax, r10d
xor eax, 0516a9452h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_147
-taken_call_146:
+ jmp rx_i_143
+taken_call_142:
push rax
- call rx_i_261
+ call rx_i_257
-rx_i_147: ;IMUL_32
+rx_i_143: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 037f4b5d0h
mov eax, r15d
and eax, 2047
@@ -2094,9 +1953,9 @@ rx_i_147: ;IMUL_32
imul rax, rcx
mov r9, rax
-rx_i_148: ;IMULH_64
+rx_i_144: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 02e59e00ah
mov eax, r10d
and eax, 2047
@@ -2106,12 +1965,12 @@ rx_i_148: ;IMULH_64
mov rax, rdx
mov r15, rax
-rx_i_149: ;IMULH_64
+rx_i_145: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 08d5c798h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r11
imul rcx
mov rax, rdx
@@ -2121,21 +1980,21 @@ rx_i_149: ;IMULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_150: ;IMULH_64
+rx_i_146: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 02327e6e2h
mov eax, r13d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- mov rcx, r12
- imul rcx
- mov rax, rdx
+ movsxd rcx, eax
+ movsxd rax, r12d
+ imul rax, rcx
mov r10, rax
-rx_i_151: ;MULH_64
+rx_i_147: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 03a7df043h
mov eax, r13d
and eax, 2047
@@ -2149,12 +2008,12 @@ rx_i_151: ;MULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_152: ;SUB_64
+rx_i_148: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0783e5c4eh
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, r14
mov rcx, rax
mov eax, r10d
@@ -2162,9 +2021,9 @@ rx_i_152: ;SUB_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_153: ;MUL_32
+rx_i_149: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0aa0f5b2fh
mov eax, r12d
and eax, 2047
@@ -2178,9 +2037,9 @@ rx_i_153: ;MUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_154: ;DIV_64
+rx_i_150: ;DIV_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 01504ca7ah
mov eax, r9d
and eax, 2047
@@ -2197,9 +2056,9 @@ rx_i_154: ;DIV_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_155: ;OR_32
+rx_i_151: ;OR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0ea72a7cfh
mov eax, r9d
and eax, 2047
@@ -2211,31 +2070,35 @@ rx_i_155: ;OR_32
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_156: ;ROR_64
+rx_i_152: ;ROR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0ad0e7a88h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r10
ror rax, cl
mov r10, rax
-rx_i_157: ;FPDIV
+rx_i_153: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0fd95ab87h
mov ecx, r15d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm2
- movsd xmm8, xmm0
+ call rx_read_dataset_f
+ divpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
+ mov eax, r8d
+ xor eax, 09111c981h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm8
-rx_i_158: ;MUL_32
+rx_i_154: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0256697b0h
mov eax, r10d
and eax, 2047
@@ -2245,12 +2108,12 @@ rx_i_158: ;MUL_32
imul rax, rcx
mov r10, rax
-rx_i_159: ;ROR_64
+rx_i_155: ;ROR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0d23f3b78h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r10
ror rax, cl
mov rcx, rax
@@ -2259,29 +2122,29 @@ rx_i_159: ;ROR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_160: ;IMUL_32
+rx_i_156: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 098917533h
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
movsxd rcx, eax
movsxd rax, r15d
imul rax, rcx
mov r15, rax
-rx_i_161: ;ADD_64
+rx_i_157: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0dfac3efch
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
add rax, r12
mov r14, rax
-rx_i_162: ;ADD_64
+rx_i_158: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0a64de090h
mov eax, r15d
and eax, 2047
@@ -2289,30 +2152,34 @@ rx_i_162: ;ADD_64
add rax, 1233402159
mov r10, rax
-rx_i_163: ;CALL
+rx_i_159: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0952a3abbh
mov ecx, r13d
- call rx_read_dataset
- cmp r15d, -8571241
- jbe short taken_call_163
+ call rx_read_dataset_r
+ cmp rsp, rbp
+ je short not_taken_ret_159
+ xor rax, qword ptr [rsp + 8]
+ mov rcx, rax
+ mov eax, r13d
+ xor eax, 0ff7d3697h
+ and eax, 2047
+ mov qword ptr [rsi + rax * 8], rcx
+ ret 8
+not_taken_ret_159:
mov rcx, rax
mov eax, r13d
xor eax, 0ff7d3697h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_164
-taken_call_163:
- push rax
- call rx_i_185
-rx_i_164: ;SUB_64
+rx_i_160: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0b1685b90h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, 1518778665
mov rcx, rax
mov eax, r10d
@@ -2320,18 +2187,18 @@ rx_i_164: ;SUB_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_165: ;OR_64
+rx_i_161: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0ea992531h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
or rax, r14
mov r8, rax
-rx_i_166: ;SAR_64
+rx_i_162: ;SAR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 01fd57a4ah
mov eax, r9d
and eax, 2047
@@ -2340,12 +2207,12 @@ rx_i_166: ;SAR_64
sar rax, cl
mov r13, rax
-rx_i_167: ;SUB_64
+rx_i_163: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0e3486c0ah
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, -2101130488
mov rcx, rax
mov eax, r14d
@@ -2353,9 +2220,9 @@ rx_i_167: ;SUB_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_168: ;MUL_32
+rx_i_164: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 01f0c2737h
mov eax, r12d
and eax, 2047
@@ -2369,17 +2236,15 @@ rx_i_168: ;MUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_169: ;RET
+rx_i_165: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0debb493eh
mov eax, r12d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_169
- cmp r8d, 1682991196
- jno short not_taken_ret_169
+ je short not_taken_ret_165
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r9d
@@ -2387,16 +2252,16 @@ rx_i_169: ;RET
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_169:
+not_taken_ret_165:
mov rcx, rax
mov eax, r9d
xor eax, 06450685ch
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_170: ;ROL_64
+rx_i_166: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0fe684081h
mov eax, r9d
and eax, 2047
@@ -2409,47 +2274,42 @@ rx_i_170: ;ROL_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_171: ;FPMUL
+rx_i_167: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0d10371ch
mov eax, r11d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4739242735460941824
- movd xmm1, rax
- mulsd xmm0, xmm1
- movsd xmm2, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm4
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm2, xmm0
+ mov eax, r10d
+ xor eax, 02a58510fh
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm2
-rx_i_172: ;FPSQRT
+rx_i_168: ;FPSQRT
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 071b15effh
mov eax, r12d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- mov rcx, 9223372036854773760
- and rax, rcx
- cvtsi2sd xmm0, rax
- sqrtsd xmm0, xmm0
- mov eax, r15d
- xor eax, 08d1a76f8h
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ andps xmm0, xmm10
+ sqrtpd xmm0, xmm0
+ movaps xmm7, xmm0
-rx_i_173: ;RET
+rx_i_169: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 072790347h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_173
- cmp r10d, -1286357107
- ja short not_taken_ret_173
+ je short not_taken_ret_169
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r14d
@@ -2457,31 +2317,31 @@ rx_i_173: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_173:
+not_taken_ret_169:
mov rcx, rax
mov eax, r14d
xor eax, 0b353bf8dh
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_174: ;CALL
+rx_i_170: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 04ae8a020h
mov eax, r8d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r10d, -1541051751
- jl short taken_call_174
+ jl short taken_call_170
mov r14, rax
- jmp rx_i_175
-taken_call_174:
+ jmp rx_i_171
+taken_call_170:
push rax
- call rx_i_208
+ call rx_i_204
-rx_i_175: ;IMULH_64
+rx_i_171: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 09901e05bh
mov eax, r15d
and eax, 2047
@@ -2491,9 +2351,9 @@ rx_i_175: ;IMULH_64
mov rax, rdx
mov r12, rax
-rx_i_176: ;SUB_64
+rx_i_172: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 050e8c510h
mov eax, r13d
and eax, 2047
@@ -2501,25 +2361,23 @@ rx_i_176: ;SUB_64
sub rax, r11
mov r12, rax
-rx_i_177: ;MULH_64
+rx_i_173: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 05422cf8fh
mov eax, r14d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- mov rcx, r12
- mul rcx
- mov rax, rdx
+ imul rax, r12
mov rcx, rax
mov eax, r12d
xor eax, 0ad60ae9ch
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_178: ;FPROUND
+rx_i_174: ;FPROUND
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0a025c3dbh
mov eax, r12d
and eax, 2047
@@ -2528,37 +2386,40 @@ rx_i_178: ;FPROUND
shl eax, 13
and rcx, -2048
and eax, 24576
- cvtsi2sd xmm0, rcx
+ cvtsi2sd xmm6, rcx
or eax, 40896
mov dword ptr [rsp - 8], eax
ldmxcsr dword ptr [rsp - 8]
- movsd xmm6, xmm0
+ mov eax, r14d
+ xor eax, 02be6989fh
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm6
-rx_i_179: ;SAR_64
+rx_i_175: ;SAR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 08f74c11h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r8
sar rax, cl
mov r8, rax
-rx_i_180: ;SUB_64
+rx_i_176: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 01f2ed5f1h
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, r14
mov r10, rax
-rx_i_181: ;ADD_64
+rx_i_177: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0d2072c79h
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
add rax, r10
mov rcx, rax
mov eax, r13d
@@ -2566,16 +2427,14 @@ rx_i_181: ;ADD_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_182: ;RET
+rx_i_178: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0a8e51933h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_182
- cmp r12d, -1016679819
- js short not_taken_ret_182
+ je short not_taken_ret_178
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r12d
@@ -2583,31 +2442,26 @@ rx_i_182: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_182:
+not_taken_ret_178:
mov rcx, rax
mov eax, r12d
xor eax, 0c366b275h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_183: ;FPADD
+rx_i_179: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0934ad492h
mov eax, r12d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm2
- mov eax, r8d
- xor eax, 01a997aebh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm2
+ movaps xmm8, xmm0
-rx_i_184: ;XOR_64
+rx_i_180: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 01cb3ce1fh
mov eax, r15d
and eax, 2047
@@ -2619,39 +2473,32 @@ rx_i_184: ;XOR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_185: ;CALL
+rx_i_181: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 023c7845fh
mov ecx, r10d
- call rx_read_dataset
- cmp r12d, -1612576918
- ja short taken_call_185
+ call rx_read_dataset_r
+ cmp rsp, rbp
+ je short not_taken_ret_181
+ xor rax, qword ptr [rsp + 8]
+ mov r10, rax
+ ret 8
+not_taken_ret_181:
mov r10, rax
- jmp rx_i_186
-taken_call_185:
- push rax
- call rx_i_215
-rx_i_186: ;FPSUB
+rx_i_182: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0f8884327h
mov ecx, r8d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4746551310076084224
- movd xmm1, rax
- subsd xmm0, xmm1
- mov eax, r14d
- xor eax, 07c8d12a5h
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm7
+ movaps xmm6, xmm0
-rx_i_187: ;ADD_64
+rx_i_183: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 013070461h
mov eax, r13d
and eax, 2047
@@ -2659,40 +2506,40 @@ rx_i_187: ;ADD_64
add rax, 137260710
mov r10, rax
-rx_i_188: ;SAR_64
+rx_i_184: ;SAR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 04764cdf7h
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
sar rax, 40
mov r12, rax
-rx_i_189: ;CALL
+rx_i_185: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 03c41026fh
mov eax, r10d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
cmp r15d, -1510284125
- jbe short taken_call_189
+ jbe short taken_call_185
mov rcx, rax
mov eax, r9d
xor eax, 0a5fae4a3h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_190
-taken_call_189:
+ jmp rx_i_186
+taken_call_185:
push rax
- call rx_i_250
+ call rx_i_246
-rx_i_190: ;XOR_32
+rx_i_186: ;XOR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0cded414bh
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
xor eax, r15d
mov rcx, rax
mov eax, r10d
@@ -2700,113 +2547,99 @@ rx_i_190: ;XOR_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_191: ;FPDIV
+rx_i_187: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 05c6d64a8h
mov ecx, r13d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm6
- mov eax, r13d
- xor eax, 020dcdd88h
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ divpd xmm0, xmm6
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm5, xmm0
-rx_i_192: ;FPMUL
+rx_i_188: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 04659becbh
mov eax, r9d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4746030173290233856
- movd xmm1, rax
- mulsd xmm0, xmm1
- mov eax, r12d
- xor eax, 075253031h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm3
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
-rx_i_193: ;FPROUND
+rx_i_189: ;FPROUND
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0c52741d5h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, rax
shl eax, 13
and rcx, -2048
and eax, 24576
- cvtsi2sd xmm0, rcx
+ cvtsi2sd xmm5, rcx
or eax, 40896
mov dword ptr [rsp - 8], eax
ldmxcsr dword ptr [rsp - 8]
- mov eax, r13d
- xor eax, 0e6f1a3b7h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
-rx_i_194: ;RET
+rx_i_190: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0217bf5f3h
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_194
- cmp r8d, -1901851954
- jns short not_taken_ret_194
+ je short not_taken_ret_190
xor rax, qword ptr [rsp + 8]
mov r13, rax
ret 8
-not_taken_ret_194:
+not_taken_ret_190:
mov r13, rax
-rx_i_195: ;CALL
+rx_i_191: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0884f3526h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r11d, 1687119072
- jno short taken_call_195
+ jno short taken_call_191
mov rcx, rax
mov eax, r14d
xor eax, 0648f64e0h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_196
-taken_call_195:
+ jmp rx_i_192
+taken_call_191:
push rax
- call rx_i_279
+ call rx_i_275
-rx_i_196: ;CALL
+rx_i_192: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0d76edad3h
mov eax, r8d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r14d, -117628864
- jns short taken_call_196
+ jns short taken_call_192
mov r8, rax
- jmp rx_i_197
-taken_call_196:
+ jmp rx_i_193
+taken_call_192:
push rax
- call rx_i_309
+ call rx_i_305
-rx_i_197: ;MUL_32
+rx_i_193: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0e9939ach
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, eax
mov eax, r12d
imul rax, rcx
@@ -2816,35 +2649,39 @@ rx_i_197: ;MUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_198: ;FPMUL
+rx_i_194: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0f21ca520h
mov eax, r12d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm2
- movsd xmm5, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 040eb9f47h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm5
-rx_i_199: ;ROL_64
+rx_i_195: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 09405152ch
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r8
rol rax, cl
mov r9, rax
-rx_i_200: ;SUB_64
+rx_i_196: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0c2a9f41bh
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, -1907903895
mov rcx, rax
mov eax, r13d
@@ -2852,20 +2689,18 @@ rx_i_200: ;SUB_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_201: ;MULH_64
+rx_i_197: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0229208efh
mov ecx, r12d
- call rx_read_dataset
- mov rcx, r15
- mul rcx
- mov rax, rdx
+ call rx_read_dataset_r
+ imul rax, r15
mov r11, rax
-rx_i_202: ;MULH_64
+rx_i_198: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0c8d95bbbh
mov eax, r14d
and eax, 32767
@@ -2879,12 +2714,12 @@ rx_i_202: ;MULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_203: ;MULH_64
+rx_i_199: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 050049e2eh
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r10
mul rcx
mov rax, rdx
@@ -2894,60 +2729,59 @@ rx_i_203: ;MULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_204: ;FPSUB
+rx_i_200: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0c63b99e8h
mov ecx, r10d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm2
- movsd xmm4, xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm2
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 0b05ce8abh
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm4
-rx_i_205: ;FPADD
+rx_i_201: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0cdda801dh
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- movsd xmm4, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm9
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 040cfe68eh
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm4
-rx_i_206: ;FPSUB
+rx_i_202: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0fa44b04ah
mov eax, r13d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm9
- mov eax, r13d
- xor eax, 0b44dbc71h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm9
+ movaps xmm5, xmm0
-rx_i_207: ;FPSUB
+rx_i_203: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0d73e472ch
mov ecx, r10d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4478539544748097536
- movd xmm1, rax
- subsd xmm0, xmm1
- movsd xmm7, xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm2
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 09bdff355h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm7
-rx_i_208: ;MUL_64
+rx_i_204: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 01af8ab1dh
mov eax, r9d
and eax, 32767
@@ -2959,45 +2793,37 @@ rx_i_208: ;MUL_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_209: ;FPDIV
+rx_i_205: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 094e997c5h
mov eax, r14d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm8
- mov eax, r13d
- xor eax, 0a1f8d8c7h
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ divpd xmm0, xmm8
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm5, xmm0
-rx_i_210: ;FPMUL
+rx_i_206: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0e836a177h
mov ecx, r11d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4483350864878108672
- movd xmm1, rax
- mulsd xmm0, xmm1
- mov eax, r12d
- xor eax, 0d01fb731h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm7
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
-rx_i_211: ;AND_32
+rx_i_207: ;AND_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 039ccdd30h
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
and eax, r12d
mov rcx, rax
mov eax, r9d
@@ -3005,18 +2831,18 @@ rx_i_211: ;AND_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_212: ;MUL_64
+rx_i_208: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0f4f126c5h
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r12
mov r10, rax
-rx_i_213: ;SHR_64
+rx_i_209: ;SHR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0b84811f1h
mov eax, r8d
and eax, 2047
@@ -3028,12 +2854,12 @@ rx_i_213: ;SHR_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_214: ;MUL_32
+rx_i_210: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0c5efc90ah
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, eax
mov eax, -1027162400
imul rax, rcx
@@ -3043,27 +2869,22 @@ rx_i_214: ;MUL_32
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_215: ;FPADD
+rx_i_211: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0ce533072h
mov eax, r12d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- mov eax, r11d
- xor eax, 0212e615h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm9
+ movaps xmm3, xmm0
-rx_i_216: ;MUL_64
+rx_i_212: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 06b465fdbh
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r13
mov rcx, rax
mov eax, r15d
@@ -3071,9 +2892,9 @@ rx_i_216: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_217: ;IMUL_32
+rx_i_213: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 02dd1d503h
mov eax, r13d
and eax, 2047
@@ -3083,9 +2904,9 @@ rx_i_217: ;IMUL_32
imul rax, rcx
mov r14, rax
-rx_i_218: ;ROL_64
+rx_i_214: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0a159f313h
mov eax, r9d
and eax, 2047
@@ -3094,21 +2915,21 @@ rx_i_218: ;ROL_64
rol rax, cl
mov r14, rax
-rx_i_219: ;SUB_64
+rx_i_215: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 08359265eh
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, r12
mov r10, rax
-rx_i_220: ;MUL_64
+rx_i_216: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 080696de3h
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r13
mov rcx, rax
mov eax, r15d
@@ -3116,9 +2937,9 @@ rx_i_220: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_221: ;IMUL_32
+rx_i_217: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 040d5b526h
mov eax, r8d
and eax, 2047
@@ -3132,26 +2953,26 @@ rx_i_221: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_222: ;CALL
+rx_i_218: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 083c0bd93h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r8d, -585552250
- jge short taken_call_222
+ jge short taken_call_218
mov r11, rax
- jmp rx_i_223
-taken_call_222:
+ jmp rx_i_219
+taken_call_218:
push rax
- call rx_i_244
+ call rx_i_240
-rx_i_223: ;XOR_64
+rx_i_219: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0ca37f668h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
xor rax, -740915304
mov rcx, rax
mov eax, r15d
@@ -3159,24 +2980,24 @@ rx_i_223: ;XOR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_224: ;IMULH_64
+rx_i_220: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0bb44c384h
mov ecx, r9d
- call rx_read_dataset
- mov rcx, r11
- imul rcx
- mov rax, rdx
+ call rx_read_dataset_r
+ movsxd rcx, eax
+ movsxd rax, r11d
+ imul rax, rcx
mov rcx, rax
mov eax, r11d
xor eax, 0903fd173h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_225: ;IMULH_64
+rx_i_221: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0a3deb512h
mov eax, r9d
and eax, 2047
@@ -3190,37 +3011,43 @@ rx_i_225: ;IMULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_226: ;FPMUL
+rx_i_222: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 084a02d64h
mov eax, r9d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm5
- movsd xmm7, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm5
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 0d7601963h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm7
-rx_i_227: ;FPSUB
+rx_i_223: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 01e5cc085h
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm3
- movsd xmm2, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm3
+ movaps xmm2, xmm0
+ mov eax, r10d
+ xor eax, 07fca59eeh
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm2
-rx_i_228: ;SAR_64
+rx_i_224: ;SAR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 053982440h
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r14
sar rax, cl
mov rcx, rax
@@ -3229,12 +3056,12 @@ rx_i_228: ;SAR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_229: ;DIV_64
+rx_i_225: ;DIV_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0c558367eh
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, 1
mov edx, r10d
test edx, edx
@@ -3247,98 +3074,100 @@ rx_i_229: ;DIV_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_230: ;CALL
+rx_i_226: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 040139b65h
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r8d, -1752488808
- jno short taken_call_230
+ jno short taken_call_226
mov rcx, rax
mov eax, r8d
xor eax, 0978b2498h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_231
-taken_call_230:
+ jmp rx_i_227
+taken_call_226:
push rax
- call rx_i_332
+ call rx_i_328
-rx_i_231: ;FPDIV
+rx_i_227: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0fa312dbdh
mov eax, r11d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4479585788803153920
- movd xmm1, rax
- divsd xmm0, xmm1
- movsd xmm3, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ divpd xmm0, xmm7
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm3, xmm0
+ mov eax, r11d
+ xor eax, 0aabe2a0ah
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm3
-rx_i_232: ;CALL
+rx_i_228: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0b64246c0h
mov eax, r11d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r10d, -2099304
- jns short taken_call_232
+ jns short taken_call_228
mov rcx, rax
mov eax, r15d
xor eax, 0ffdff798h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_233
-taken_call_232:
+ jmp rx_i_229
+taken_call_228:
push rax
- call rx_i_287
+ call rx_i_283
-rx_i_233: ;IMULH_64
+rx_i_229: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 05c535836h
mov ecx, r11d
- call rx_read_dataset
- mov rcx, r12
- imul rcx
- mov rax, rdx
+ call rx_read_dataset_r
+ movsxd rcx, eax
+ movsxd rax, r12d
+ imul rax, rcx
mov rcx, rax
mov eax, r13d
xor eax, 013e8b2e0h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_234: ;FPMUL
+rx_i_230: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0f394972eh
mov eax, r15d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4737156465540726784
- movd xmm1, rax
- mulsd xmm0, xmm1
- movsd xmm5, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm6
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 01dc2b4f6h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm5
-rx_i_235: ;RET
+rx_i_231: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0bb56428dh
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_235
- cmp r9d, -422974038
- js short not_taken_ret_235
+ je short not_taken_ret_231
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r9d
@@ -3346,46 +3175,43 @@ rx_i_235: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_235:
+not_taken_ret_231:
mov rcx, rax
mov eax, r9d
xor eax, 0e6c9edaah
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_236: ;FPDIV
+rx_i_232: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 09ab46ab3h
mov eax, r15d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm3
- mov eax, r15d
- xor eax, 07e732935h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ divpd xmm0, xmm3
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
-rx_i_237: ;CALL
+rx_i_233: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 08eb2cd76h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r12d, 392389867
- jo short taken_call_237
+ jo short taken_call_233
mov r14, rax
- jmp rx_i_238
-taken_call_237:
+ jmp rx_i_234
+taken_call_233:
push rax
- call rx_i_272
+ call rx_i_268
-rx_i_238: ;FPROUND
+rx_i_234: ;FPROUND
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0ba687578h
mov eax, r15d
and eax, 2047
@@ -3394,21 +3220,17 @@ rx_i_238: ;FPROUND
shl eax, 13
and rcx, -2048
and eax, 24576
- cvtsi2sd xmm0, rcx
+ cvtsi2sd xmm4, rcx
or eax, 40896
mov dword ptr [rsp - 8], eax
ldmxcsr dword ptr [rsp - 8]
- mov eax, r12d
- xor eax, 04d2e9e7dh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
-rx_i_239: ;IMUL_32
+rx_i_235: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0b6cb9ff2h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
movsxd rcx, eax
movsxd rax, r12d
imul rax, rcx
@@ -3418,48 +3240,45 @@ rx_i_239: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_240: ;FPADD
+rx_i_236: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 03ad196ach
mov ecx, r15d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm4
- mov eax, r11d
- xor eax, 0b2ab82cdh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm4
+ movaps xmm3, xmm0
-rx_i_241: ;CALL
+rx_i_237: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0fab4600h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r12d, -121899164
- jge short taken_call_241
+ jge short taken_call_237
mov r11, rax
- jmp rx_i_242
-taken_call_241:
+ jmp rx_i_238
+taken_call_237:
push rax
- call rx_i_299
+ call rx_i_295
-rx_i_242: ;FPADD
+rx_i_238: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0158f119fh
mov ecx, r8d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm6
- movsd xmm7, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm6
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 0331bbf8h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm7
-rx_i_243: ;ADD_64
+rx_i_239: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 044f30b3fh
mov eax, r13d
and eax, 2047
@@ -3467,47 +3286,49 @@ rx_i_243: ;ADD_64
add rax, r10
mov r10, rax
-rx_i_244: ;IMULH_64
+rx_i_240: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0d65d29f9h
mov ecx, r9d
- call rx_read_dataset
- mov rcx, -423830277
- imul rcx
- mov rax, rdx
+ call rx_read_dataset_r
+ movsxd rcx, eax
+ mov rax, -423830277
+ imul rax, rcx
mov r8, rax
-rx_i_245: ;FPADD
+rx_i_241: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0ce5260adh
mov ecx, r11d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm3
- movsd xmm7, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm3
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 0bc2423ebh
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm7
-rx_i_246: ;MUL_32
+rx_i_242: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 01119b0f9h
mov eax, r12d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- mov ecx, eax
- mov eax, 319324914
- imul rax, rcx
+ mov rcx, 319324914
+ mul rcx
+ mov rax, rdx
mov rcx, rax
mov eax, r10d
xor eax, 0130882f2h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_247: ;XOR_64
+rx_i_243: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0d6c2ce3dh
mov eax, r12d
and eax, 2047
@@ -3515,27 +3336,22 @@ rx_i_247: ;XOR_64
xor rax, 1198180774
mov r14, rax
-rx_i_248: ;FPADD
+rx_i_244: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0c6a6248h
mov eax, r11d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm6
- mov eax, r9d
- xor eax, 0b4a1fad6h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm6
+ movaps xmm9, xmm0
-rx_i_249: ;XOR_64
+rx_i_245: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 084505739h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
xor rax, -1546539637
mov rcx, rax
mov eax, r12d
@@ -3543,9 +3359,9 @@ rx_i_249: ;XOR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_250: ;AND_64
+rx_i_246: ;AND_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 027eeaa2eh
mov eax, r15d
and eax, 2047
@@ -3553,12 +3369,12 @@ rx_i_250: ;AND_64
and rax, r9
mov r12, rax
-rx_i_251: ;IMUL_32
+rx_i_247: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0c4de0296h
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
movsxd rcx, eax
movsxd rax, r14d
imul rax, rcx
@@ -3568,9 +3384,9 @@ rx_i_251: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_252: ;MUL_32
+rx_i_248: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0649df46fh
mov eax, r8d
and eax, 2047
@@ -3584,24 +3400,24 @@ rx_i_252: ;MUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_253: ;IMULH_64
+rx_i_249: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0499552cch
mov ecx, r15d
- call rx_read_dataset
- mov rcx, r11
- imul rcx
- mov rax, rdx
+ call rx_read_dataset_r
+ movsxd rcx, eax
+ movsxd rax, r11d
+ imul rax, rcx
mov rcx, rax
mov eax, r13d
xor eax, 0e1afcff9h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_254: ;MUL_64
+rx_i_250: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 083eafe6fh
mov eax, r13d
and eax, 2047
@@ -3613,75 +3429,83 @@ rx_i_254: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_255: ;FPMUL
+rx_i_251: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0a25a4d8ah
mov eax, r13d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm2
- movsd xmm4, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 05ed767a3h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm4
-rx_i_256: ;ROL_64
+rx_i_252: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 08a75ad41h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r8
rol rax, cl
mov r14, rax
-rx_i_257: ;CALL
+rx_i_253: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 057f3f596h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r15d, 1699431947
- jns short taken_call_257
+ jns short taken_call_253
mov rcx, rax
mov eax, r13d
xor eax, 0654b460bh
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_258
-taken_call_257:
+ jmp rx_i_254
+taken_call_253:
push rax
- call rx_i_371
+ call rx_i_367
-rx_i_258: ;FPSUB
+rx_i_254: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 04cfb709eh
mov ecx, r14d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm4
- movsd xmm8, xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm4
+ movaps xmm8, xmm0
+ mov eax, r8d
+ xor eax, 0c251872eh
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm8
-rx_i_259: ;FPADD
+rx_i_255: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0b96ec9ech
mov ecx, r9d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm5
- movsd xmm6, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm5
+ movaps xmm6, xmm0
+ mov eax, r14d
+ xor eax, 0ae781d10h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm6
-rx_i_260: ;MULH_64
+rx_i_256: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 08375472ch
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r15
mul rcx
mov rax, rdx
@@ -3691,28 +3515,28 @@ rx_i_260: ;MULH_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_261: ;FPADD
+rx_i_257: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0d75a8c3fh
mov ecx, r12d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4741056264732147712
- movd xmm1, rax
- addsd xmm0, xmm1
- movsd xmm3, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm5
+ movaps xmm3, xmm0
+ mov eax, r11d
+ xor eax, 0373b1b6fh
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm3
-rx_i_262: ;IMUL_32
+rx_i_258: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 064fdbda0h
mov eax, r11d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
- movsxd rcx, eax
- movsxd rax, r14d
+ mov ecx, eax
+ mov eax, r14d
imul rax, rcx
mov rcx, rax
mov eax, r9d
@@ -3720,54 +3544,46 @@ rx_i_262: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_263: ;FPADD
+rx_i_259: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 02e36a073h
mov eax, r11d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- mov eax, r11d
- xor eax, 06c1856f0h
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm9
+ movaps xmm3, xmm0
-rx_i_264: ;FPMUL
+rx_i_260: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0f94e9fa9h
mov eax, r13d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4743938178866479104
- movd xmm1, rax
- mulsd xmm0, xmm1
- mov eax, r9d
- xor eax, 0576a8e8fh
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm5
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm9, xmm0
-rx_i_265: ;FPSQRT
+rx_i_261: ;FPSQRT
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 02346171ch
mov ecx, r14d
- call rx_read_dataset
- mov rcx, 9223372036854773760
- and rax, rcx
- cvtsi2sd xmm0, rax
- sqrtsd xmm0, xmm0
- movsd xmm3, xmm0
+ call rx_read_dataset_f
+ andps xmm0, xmm10
+ sqrtpd xmm0, xmm0
+ movaps xmm3, xmm0
+ mov eax, r11d
+ xor eax, 0745a48e9h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm3
-rx_i_266: ;OR_32
+rx_i_262: ;OR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 01c42baa6h
mov eax, r10d
and eax, 2047
@@ -3779,98 +3595,91 @@ rx_i_266: ;OR_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_267: ;FPDIV
+rx_i_263: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0b39b140h
mov ecx, r11d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm8
- mov eax, r14d
- xor eax, 0d8823dc5h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ divpd xmm0, xmm8
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm6, xmm0
-rx_i_268: ;FPMUL
+rx_i_264: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 01a07d201h
mov eax, r11d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm3
- mov eax, r15d
- xor eax, 0df89f274h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm3
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
-rx_i_269: ;FPADD
+rx_i_265: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 07a3eb340h
mov eax, r13d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm8
- movsd xmm2, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm8
+ movaps xmm2, xmm0
+ mov eax, r10d
+ xor eax, 04c559414h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm2
-rx_i_270: ;RET
+rx_i_266: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 03d0a3a89h
mov eax, r13d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_270
- cmp r12d, 136160027
- jbe short not_taken_ret_270
+ je short not_taken_ret_266
xor rax, qword ptr [rsp + 8]
mov r10, rax
ret 8
-not_taken_ret_270:
+not_taken_ret_266:
mov r10, rax
-rx_i_271: ;ROR_64
+rx_i_267: ;ROR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0c6c7b37h
mov eax, r8d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- mov rcx, r10
- ror rax, cl
+ ror rax, 56
mov r11, rax
-rx_i_272: ;CALL
+rx_i_268: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0c2510cebh
mov eax, r12d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r15d, -2062812966
- jl short taken_call_272
+ jl short taken_call_268
mov r13, rax
- jmp rx_i_273
-taken_call_272:
+ jmp rx_i_269
+taken_call_268:
push rax
- call rx_i_385
+ call rx_i_381
-rx_i_273: ;ROR_64
+rx_i_269: ;ROR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0c80cc899h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r8
ror rax, cl
mov rcx, rax
@@ -3879,27 +3688,24 @@ rx_i_273: ;ROR_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_274: ;FPMUL
+rx_i_270: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0eb355caah
mov ecx, r11d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm9
- mov eax, r15d
- xor eax, 03981662bh
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm9
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
-rx_i_275: ;MUL_32
+rx_i_271: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0c6f12299h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, eax
mov eax, -2032281772
imul rax, rcx
@@ -3909,9 +3715,9 @@ rx_i_275: ;MUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_276: ;OR_32
+rx_i_272: ;OR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0695a5dd2h
mov eax, r12d
and eax, 2047
@@ -3919,71 +3725,71 @@ rx_i_276: ;OR_32
or eax, r12d
mov r13, rax
-rx_i_277: ;CALL
+rx_i_273: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0d315e4dch
mov eax, r9d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r12d, 1670848568
- jl short taken_call_277
+ jl short taken_call_273
mov rcx, rax
mov eax, r13d
xor eax, 063972038h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_278
-taken_call_277:
+ jmp rx_i_274
+taken_call_273:
push rax
- call rx_i_376
+ call rx_i_372
-rx_i_278: ;FPSUB
+rx_i_274: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0b66ca7e0h
mov ecx, r15d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4745257761179172864
- movd xmm1, rax
- subsd xmm0, xmm1
- movsd xmm6, xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm4
+ movaps xmm6, xmm0
+ mov eax, r14d
+ xor eax, 06a2b2b5bh
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm6
-rx_i_279: ;OR_64
+rx_i_275: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0788eceb7h
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
or rax, r11
mov r13, rax
-rx_i_280: ;CALL
+rx_i_276: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0c6ac5edah
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r11d, -1236180570
- jns short taken_call_280
+ jns short taken_call_276
mov rcx, rax
mov eax, r12d
xor eax, 0b65161a6h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_281
-taken_call_280:
+ jmp rx_i_277
+taken_call_276:
push rax
- call rx_i_408
+ call rx_i_404
-rx_i_281: ;IMUL_32
+rx_i_277: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0c9549789h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
movsxd rcx, eax
movsxd rax, r10d
imul rax, rcx
@@ -3993,33 +3799,35 @@ rx_i_281: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_282: ;FPSUB
+rx_i_278: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0a2bc66c9h
mov ecx, r9d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm7
- movsd xmm4, xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm7
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 02d00ad10h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm4
-rx_i_283: ;FPSUB
+rx_i_279: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0f1a91458h
mov ecx, r15d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4742807966216880128
- movd xmm1, rax
- subsd xmm0, xmm1
- movsd xmm9, xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm5
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 0475ade01h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm9
-rx_i_284: ;AND_64
+rx_i_280: ;AND_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 066246b43h
mov eax, r12d
and eax, 2047
@@ -4031,12 +3839,12 @@ rx_i_284: ;AND_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_285: ;SUB_64
+rx_i_281: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 05a762727h
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, r10
mov rcx, rax
mov eax, r11d
@@ -4044,18 +3852,18 @@ rx_i_285: ;SUB_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_286: ;MUL_64
+rx_i_282: ;SUB_32
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0de1ab603h
mov ecx, r15d
- call rx_read_dataset
- imul rax, rax, 1367326224
+ call rx_read_dataset_r
+ sub eax, 1367326224
mov r11, rax
-rx_i_287: ;ADD_32
+rx_i_283: ;ADD_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0df4d084fh
mov eax, r9d
and eax, 32767
@@ -4067,48 +3875,43 @@ rx_i_287: ;ADD_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_288: ;FPSUB
+rx_i_284: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0e68f36ach
mov ecx, r15d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4477945591619387392
- movd xmm1, rax
- subsd xmm0, xmm1
- movsd xmm9, xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm6
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 0936f2960h
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_289: ;IMUL_32
+rx_i_285: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 09adb333bh
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
movsxd rcx, eax
movsxd rax, r8d
imul rax, rcx
mov r14, rax
-rx_i_290: ;FPADD
+rx_i_286: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 082f5e36ch
mov eax, r14d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- mov eax, r15d
- xor eax, 0546e75d1h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm9
+ movaps xmm7, xmm0
-rx_i_291: ;OR_64
+rx_i_287: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 049547c9ch
mov eax, r11d
and eax, 2047
@@ -4120,9 +3923,9 @@ rx_i_291: ;OR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_292: ;MUL_64
+rx_i_288: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 08716ac8bh
mov eax, r10d
and eax, 2047
@@ -4134,48 +3937,40 @@ rx_i_292: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_293: ;FPDIV
+rx_i_289: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0efef52b5h
mov eax, r14d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm9
- mov eax, r8d
- xor eax, 046affb49h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ divpd xmm0, xmm9
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
-rx_i_294: ;FPMUL
+rx_i_290: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 060665748h
mov ecx, r15d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm8
- mov eax, r9d
- xor eax, 02f4d18d7h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm8
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm9, xmm0
-rx_i_295: ;RET
+rx_i_291: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0ddf4bd1ah
mov eax, r13d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_295
- cmp r14d, 1988795765
- js short not_taken_ret_295
+ je short not_taken_ret_291
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r14d
@@ -4183,16 +3978,16 @@ rx_i_295: ;RET
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_295:
+not_taken_ret_291:
mov rcx, rax
mov eax, r14d
xor eax, 0768a9d75h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_296: ;ROR_64
+rx_i_292: ;ROR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 05a87cc3dh
mov eax, r13d
and eax, 32767
@@ -4200,30 +3995,23 @@ rx_i_296: ;ROR_64
ror rax, 23
mov r10, rax
-rx_i_297: ;FPSUB
+rx_i_293: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0c61f4279h
mov ecx, r9d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm5
- mov eax, r8d
- xor eax, 014844990h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm5
+ movaps xmm8, xmm0
-rx_i_298: ;RET
+rx_i_294: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0f3b9d85h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_298
- cmp r15d, -276467273
- jo short not_taken_ret_298
+ je short not_taken_ret_294
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r8d
@@ -4231,48 +4019,43 @@ rx_i_298: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_298:
+not_taken_ret_294:
mov rcx, rax
mov eax, r8d
xor eax, 0ef8571b7h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_299: ;FPSUB
+rx_i_295: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0f42798fdh
mov ecx, r9d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm8
- mov eax, r15d
- xor eax, 08a66e69fh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm8
+ movaps xmm7, xmm0
-rx_i_300: ;CALL
+rx_i_296: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 018738758h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r9d, -207252278
- jns short taken_call_300
+ jns short taken_call_296
mov rcx, rax
mov eax, r8d
xor eax, 0f3a594cah
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_301
-taken_call_300:
+ jmp rx_i_297
+taken_call_296:
push rax
- call rx_i_399
+ call rx_i_395
-rx_i_301: ;ADD_64
+rx_i_297: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0de3b9d9bh
mov eax, r15d
and eax, 2047
@@ -4280,24 +4063,19 @@ rx_i_301: ;ADD_64
add rax, r10
mov r14, rax
-rx_i_302: ;FPSUB
+rx_i_298: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 084f53637h
mov eax, r14d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm7
- mov eax, r14d
- xor eax, 0d10f7c42h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm7
+ movaps xmm6, xmm0
-rx_i_303: ;ADD_64
+rx_i_299: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 042f4897h
mov eax, r12d
and eax, 2047
@@ -4309,59 +4087,58 @@ rx_i_303: ;ADD_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_304: ;FPSUB
+rx_i_300: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 095765693h
mov eax, r12d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4478628807791607808
- movd xmm1, rax
- subsd xmm0, xmm1
- mov eax, r10d
- xor eax, 09d24b005h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm8
+ movaps xmm2, xmm0
-rx_i_305: ;FPMUL
+rx_i_301: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0a0ec5eech
mov ecx, r8d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm5
- movsd xmm7, xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm5
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 0433cf2d6h
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm7
-rx_i_306: ;ADD_64
+rx_i_302: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0f6f8c345h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
add rax, r10
mov r11, rax
-rx_i_307: ;FPADD
+rx_i_303: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 082a3e965h
mov eax, r14d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm3
- movsd xmm9, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm3
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 0bb9ee490h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_308: ;MUL_64
+rx_i_304: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 04940c652h
mov eax, r12d
and eax, 2047
@@ -4369,37 +4146,37 @@ rx_i_308: ;MUL_64
imul rax, r15
mov r13, rax
-rx_i_309: ;MUL_64
+rx_i_305: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 03c6c62b8h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, rax, -65873120
mov r10, rax
-rx_i_310: ;ADD_32
+rx_i_306: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 08b34cdfch
mov ecx, r15d
- call rx_read_dataset
- add eax, r15d
+ call rx_read_dataset_r
+ add rax, r15
mov r13, rax
-rx_i_311: ;SAR_64
+rx_i_307: ;SAR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 04c36adb1h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r8
sar rax, cl
mov r10, rax
-rx_i_312: ;MUL_64
+rx_i_308: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0a4213b21h
mov eax, r11d
and eax, 2047
@@ -4407,9 +4184,9 @@ rx_i_312: ;MUL_64
imul rax, r13
mov r15, rax
-rx_i_313: ;IMULH_64
+rx_i_309: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 090c42304h
mov eax, r9d
and eax, 32767
@@ -4423,72 +4200,62 @@ rx_i_313: ;IMULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_314: ;FPMUL
+rx_i_310: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0f78e1c8ch
mov eax, r9d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4746554338141274112
- movd xmm1, rax
- mulsd xmm0, xmm1
- movsd xmm7, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm6
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 07c9816c0h
+ and eax, 32767
+ movhpd qword ptr [rsi + rax * 8], xmm7
-rx_i_315: ;FPMUL
+rx_i_311: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0ff8848cfh
mov ecx, r8d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4744327281034395648
- movd xmm1, rax
- mulsd xmm0, xmm1
- mov eax, r12d
- xor eax, 05cf21a31h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm4
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
-rx_i_316: ;MUL_32
+rx_i_312: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0b18904cdh
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, eax
mov eax, -1147928648
imul rax, rcx
mov r10, rax
-rx_i_317: ;FPADD
+rx_i_313: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0a0d0befh
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4478056002024898560
- movd xmm1, rax
- addsd xmm0, xmm1
- mov eax, r14d
- xor eax, 09500d514h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm5
+ movaps xmm6, xmm0
-rx_i_318: ;IMUL_32
+rx_i_314: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 01e3c65f7h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
movsxd rcx, eax
movsxd rax, r9d
imul rax, rcx
@@ -4498,9 +4265,9 @@ rx_i_318: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_319: ;SHR_64
+rx_i_315: ;SHR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 02e36ddafh
mov eax, r9d
and eax, 32767
@@ -4509,16 +4276,14 @@ rx_i_319: ;SHR_64
shr rax, cl
mov r9, rax
-rx_i_320: ;RET
+rx_i_316: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 05b0cb5bbh
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_320
- cmp r10d, 906151187
- jl short not_taken_ret_320
+ je short not_taken_ret_316
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r8d
@@ -4526,47 +4291,37 @@ rx_i_320: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_320:
+not_taken_ret_316:
mov rcx, rax
mov eax, r8d
xor eax, 03602c513h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_321: ;FPADD
+rx_i_317: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0c74e7415h
mov ecx, r9d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm7
- mov eax, r13d
- xor eax, 0b5bc8h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm7
+ movaps xmm5, xmm0
-rx_i_322: ;FPADD
+rx_i_318: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 057621d9ah
mov ecx, r9d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm3
- mov eax, r15d
- xor eax, 061cb9db8h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm3
+ movaps xmm7, xmm0
-rx_i_323: ;ROL_64
+rx_i_319: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 08ee02d99h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r15
rol rax, cl
mov rcx, rax
@@ -4575,20 +4330,22 @@ rx_i_323: ;ROL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_324: ;FPADD
+rx_i_320: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 013461188h
mov ecx, r15d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm4
- movsd xmm2, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm4
+ movaps xmm2, xmm0
+ mov eax, r10d
+ xor eax, 02bdc7349h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm2
-rx_i_325: ;IMUL_32
+rx_i_321: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0a7bae383h
mov eax, r11d
and eax, 32767
@@ -4602,27 +4359,31 @@ rx_i_325: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_326: ;CALL
+rx_i_322: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 08215399bh
mov ecx, r14d
- call rx_read_dataset
- cmp r11d, 1411981860
- jo short taken_call_326
+ call rx_read_dataset_r
+ cmp rsp, rbp
+ je short not_taken_ret_322
+ xor rax, qword ptr [rsp + 8]
+ mov rcx, rax
+ mov eax, r11d
+ xor eax, 054292224h
+ and eax, 2047
+ mov qword ptr [rsi + rax * 8], rcx
+ ret 8
+not_taken_ret_322:
mov rcx, rax
mov eax, r11d
xor eax, 054292224h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_327
-taken_call_326:
- push rax
- call rx_i_347
-rx_i_327: ;MULH_64
+rx_i_323: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 07b07664bh
mov eax, r14d
and eax, 32767
@@ -4636,22 +4397,24 @@ rx_i_327: ;MULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_328: ;FPSQRT
+rx_i_324: ;FPSQRT
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0f956baffh
mov eax, r9d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- mov rcx, 9223372036854773760
- and rax, rcx
- cvtsi2sd xmm0, rax
- sqrtsd xmm0, xmm0
- movsd xmm9, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ andps xmm0, xmm10
+ sqrtpd xmm0, xmm0
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 0944856d4h
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_329: ;SHL_64
+rx_i_325: ;SHL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0708ab9d1h
mov eax, r11d
and eax, 2047
@@ -4659,12 +4422,12 @@ rx_i_329: ;SHL_64
shl rax, 24
mov r13, rax
-rx_i_330: ;MULH_64
+rx_i_326: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0d1b27540h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r8
mul rcx
mov rax, rdx
@@ -4674,9 +4437,9 @@ rx_i_330: ;MULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_331: ;AND_64
+rx_i_327: ;AND_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 09665f98dh
mov eax, r9d
and eax, 2047
@@ -4684,9 +4447,9 @@ rx_i_331: ;AND_64
and rax, r15
mov r12, rax
-rx_i_332: ;ROL_64
+rx_i_328: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0fb9c32adh
mov eax, r12d
and eax, 2047
@@ -4695,31 +4458,29 @@ rx_i_332: ;ROL_64
rol rax, cl
mov r9, rax
-rx_i_333: ;RET
+rx_i_329: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0e1110623h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_333
- cmp r8d, 842221018
- jl short not_taken_ret_333
+ je short not_taken_ret_329
xor rax, qword ptr [rsp + 8]
mov r11, rax
ret 8
-not_taken_ret_333:
+not_taken_ret_329:
mov r11, rax
-rx_i_334: ;IMUL_32
+rx_i_330: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0f6a93f19h
mov eax, r9d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
- movsxd rcx, eax
- movsxd rax, r13d
+ mov ecx, eax
+ mov eax, r13d
imul rax, rcx
mov rcx, rax
mov eax, r11d
@@ -4727,36 +4488,33 @@ rx_i_334: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_335: ;FPADD
+rx_i_331: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0bc9bbe4ah
mov eax, r9d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm3
- mov eax, r9d
- xor eax, 0ba4d4c0fh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm3
+ movaps xmm9, xmm0
-rx_i_336: ;FPADD
+rx_i_332: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0f253cd4eh
mov eax, r12d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm6
- movsd xmm3, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm6
+ movaps xmm3, xmm0
+ mov eax, r11d
+ xor eax, 0116c919eh
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm3
-rx_i_337: ;XOR_64
+rx_i_333: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0f009758bh
mov eax, r14d
and eax, 2047
@@ -4764,18 +4522,18 @@ rx_i_337: ;XOR_64
xor rax, -175125848
mov r11, rax
-rx_i_338: ;ADD_32
+rx_i_334: ;ADD_32
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0dda04168h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
add eax, r13d
mov r8, rax
-rx_i_339: ;SUB_64
+rx_i_335: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 03e6cfb73h
mov eax, r15d
and eax, 32767
@@ -4787,27 +4545,22 @@ rx_i_339: ;SUB_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_340: ;FPADD
+rx_i_336: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0aea0a435h
mov eax, r15d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm2
- mov eax, r11d
- xor eax, 02644c5ah
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm2
+ movaps xmm3, xmm0
-rx_i_341: ;ADD_32
+rx_i_337: ;ADD_32
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 03d6c4ab2h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
add eax, r12d
mov rcx, rax
mov eax, r13d
@@ -4815,9 +4568,9 @@ rx_i_341: ;ADD_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_342: ;MUL_64
+rx_i_338: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0d428a742h
mov eax, r12d
and eax, 2047
@@ -4825,38 +4578,28 @@ rx_i_342: ;MUL_64
imul rax, r12
mov r11, rax
-rx_i_343: ;FPADD
+rx_i_339: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 04596ef73h
mov eax, r9d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm6
- mov eax, r10d
- xor eax, 07c8317fah
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm6
+ movaps xmm2, xmm0
-rx_i_344: ;FPSUB
+rx_i_340: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0e51629cch
mov ecx, r15d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm5
- mov eax, r13d
- xor eax, 038b653beh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm5
+ movaps xmm5, xmm0
-rx_i_345: ;MUL_32
+rx_i_341: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 019eb9ea5h
mov eax, r12d
and eax, 2047
@@ -4870,60 +4613,51 @@ rx_i_345: ;MUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_346: ;FPMUL
+rx_i_342: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 09ccc7abah
mov ecx, r9d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm2
- mov eax, r11d
- xor eax, 0319de2d3h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm3, xmm0
-rx_i_347: ;SHR_64
+rx_i_343: ;SHR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 056f6cf0bh
mov eax, r14d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- mov rcx, r13
- shr rax, cl
+ shr rax, 48
mov rcx, rax
mov eax, r15d
xor eax, 0d9a469a9h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_348: ;FPMUL
+rx_i_344: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 03ef9bcc4h
mov eax, r10d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4744717476367958016
- movd xmm1, rax
- mulsd xmm0, xmm1
- mov eax, r13d
- xor eax, 0627d9feah
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm6
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm5, xmm0
-rx_i_349: ;MULH_64
+rx_i_345: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0bbbcdbach
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r13
mul rcx
mov rax, rdx
@@ -4933,12 +4667,12 @@ rx_i_349: ;MULH_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_350: ;XOR_64
+rx_i_346: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0ae9d1e96h
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
xor rax, r15
mov rcx, rax
mov eax, r13d
@@ -4946,9 +4680,9 @@ rx_i_350: ;XOR_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_351: ;ADD_64
+rx_i_347: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 070c34d69h
mov eax, r14d
and eax, 2047
@@ -4956,23 +4690,23 @@ rx_i_351: ;ADD_64
add rax, r10
mov r13, rax
-rx_i_352: ;FPSUB
+rx_i_348: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0523ff904h
mov eax, r13d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4741412628788674560
- movd xmm1, rax
- subsd xmm0, xmm1
- movsd xmm9, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm3
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 039c35461h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_353: ;XOR_32
+rx_i_349: ;XOR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 018e0e5ddh
mov eax, r8d
and eax, 2047
@@ -4980,77 +4714,81 @@ rx_i_353: ;XOR_32
xor eax, r15d
mov r13, rax
-rx_i_354: ;CALL
+rx_i_350: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 09bd050f0h
mov eax, r9d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
cmp r9d, -980411581
- jbe short taken_call_354
+ jbe short taken_call_350
mov rcx, rax
mov eax, r12d
xor eax, 0c5901b43h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_355
-taken_call_354:
+ jmp rx_i_351
+taken_call_350:
push rax
- call rx_i_356
+ call rx_i_352
-rx_i_355: ;MULH_64
+rx_i_351: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0a3a5906fh
mov ecx, r11d
- call rx_read_dataset
- mov rcx, r10
+ call rx_read_dataset_r
+ imul rax, r10
+ mov r13, rax
+
+rx_i_352: ;FPADD
+ dec edi
+ jz rx_finish
+ xor r10, 0afc9af2bh
+ mov ecx, r10d
+ call rx_read_dataset_f
+ addpd xmm0, xmm6
+ movaps xmm2, xmm0
+ mov eax, r10d
+ xor eax, 03bf686f2h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm2
+
+rx_i_353: ;FPMUL
+ dec edi
+ jz rx_finish
+ xor r13, 02e65278bh
+ mov eax, r13d
+ and eax, 2047
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 0b3c9f7aeh
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm7
+
+rx_i_354: ;MULH_64
+ dec edi
+ jz rx_finish
+ xor r13, 02412fc10h
+ mov ecx, r13d
+ call rx_read_dataset_r
+ mov rcx, r13
mul rcx
mov rax, rdx
mov r13, rax
-rx_i_356: ;FPADD
+rx_i_355: ;MUL_64
dec edi
- js rx_finish
- xor r10, 0afc9af2bh
- mov ecx, r10d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm6
- movsd xmm2, xmm0
-
-rx_i_357: ;FPMUL
- dec edi
- js rx_finish
- xor r13, 02e65278bh
- mov eax, r13d
- and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm2
- movsd xmm7, xmm0
-
-rx_i_358: ;MUL_32
- dec edi
- js rx_finish
- xor r13, 02412fc10h
- mov ecx, r13d
- call rx_read_dataset
- mov ecx, eax
- mov eax, r13d
- imul rax, rcx
- mov r13, rax
-
-rx_i_359: ;MUL_64
- dec edi
- js rx_finish
+ jz rx_finish
xor r10, 06bd6e65fh
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r14
mov rcx, rax
mov eax, r8d
@@ -5058,18 +4796,18 @@ rx_i_359: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_360: ;MUL_64
+rx_i_356: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 01cd85d80h
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r10
mov r11, rax
-rx_i_361: ;ADD_64
+rx_i_357: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0f7daed36h
mov eax, r10d
and eax, 2047
@@ -5077,9 +4815,9 @@ rx_i_361: ;ADD_64
add rax, 820073637
mov r11, rax
-rx_i_362: ;DIV_64
+rx_i_358: ;DIV_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 088fa6e5ah
mov eax, r13d
and eax, 2047
@@ -5092,48 +4830,49 @@ rx_i_362: ;DIV_64
div rcx
mov r9, rax
-rx_i_363: ;FPSUB
+rx_i_359: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0714fc2cdh
mov ecx, r10d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm9
- movsd xmm4, xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm9
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 0f16b9be3h
+ and eax, 32767
+ movhpd qword ptr [rsi + rax * 8], xmm4
-rx_i_364: ;FPMUL
+rx_i_360: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0c2d110b5h
mov eax, r10d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm8
- mov eax, r8d
- xor eax, 0c41a4103h
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm8
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
-rx_i_365: ;FPSQRT
+rx_i_361: ;FPSQRT
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 01d125a7fh
mov ecx, r15d
- call rx_read_dataset
- mov rcx, 9223372036854773760
- and rax, rcx
- cvtsi2sd xmm0, rax
- sqrtsd xmm0, xmm0
- movsd xmm6, xmm0
+ call rx_read_dataset_f
+ andps xmm0, xmm10
+ sqrtpd xmm0, xmm0
+ movaps xmm6, xmm0
+ mov eax, r14d
+ xor eax, 0ad0b81f5h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm6
-rx_i_366: ;SUB_64
+rx_i_362: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0ed8954bdh
mov eax, r9d
and eax, 2047
@@ -5145,36 +4884,33 @@ rx_i_366: ;SUB_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_367: ;FPMUL
+rx_i_363: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 09f75887bh
mov eax, r12d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm6
- mov eax, r11d
- xor eax, 05415334dh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm6
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm3, xmm0
-rx_i_368: ;MUL_32
+rx_i_364: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0badaf867h
mov ecx, r11d
- call rx_read_dataset
- mov ecx, eax
- mov eax, r8d
- imul rax, rcx
+ call rx_read_dataset_r
+ mov rcx, r8
+ mul rcx
+ mov rax, rdx
mov r8, rax
-rx_i_369: ;IMUL_32
+rx_i_365: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 02db4444ah
mov eax, r15d
and eax, 2047
@@ -5188,9 +4924,9 @@ rx_i_369: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_370: ;IMUL_32
+rx_i_366: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0bff7218fh
mov eax, r12d
and eax, 2047
@@ -5204,29 +4940,31 @@ rx_i_370: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_371: ;FPADD
+rx_i_367: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 04d14cb3ah
mov ecx, r9d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- movsd xmm4, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm9
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 0ad9b92e8h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm4
-rx_i_372: ;MUL_64
+rx_i_368: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0a14836bah
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r10
mov r8, rax
-rx_i_373: ;AND_64
+rx_i_369: ;AND_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 053fe22e2h
mov eax, r9d
and eax, 32767
@@ -5234,32 +4972,36 @@ rx_i_373: ;AND_64
and rax, r13
mov r9, rax
-rx_i_374: ;FPSUB
+rx_i_370: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 010e1fb24h
mov eax, r15d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm6
- movsd xmm6, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm6
+ movaps xmm6, xmm0
+ mov eax, r14d
+ xor eax, 0a120e0edh
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm6
-rx_i_375: ;FPADD
+rx_i_371: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0ebbd5cc9h
mov ecx, r8d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- movsd xmm5, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm9
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 0c40fe413h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm5
-rx_i_376: ;ROL_64
+rx_i_372: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 098ab79d7h
mov eax, r10d
and eax, 2047
@@ -5268,43 +5010,37 @@ rx_i_376: ;ROL_64
rol rax, cl
mov r9, rax
-rx_i_377: ;FPDIV
+rx_i_373: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 056438b3h
mov eax, r15d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm8
- mov eax, r12d
- xor eax, 05655fac9h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ divpd xmm0, xmm8
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
-rx_i_378: ;FPMUL
+rx_i_374: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0dbcce604h
mov ecx, r11d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm2
- mov eax, r10d
- xor eax, 03507e810h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm2, xmm0
-rx_i_379: ;ADD_64
+rx_i_375: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0edea6200h
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
add rax, r15
mov rcx, rax
mov eax, r12d
@@ -5312,9 +5048,9 @@ rx_i_379: ;ADD_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_380: ;ADD_64
+rx_i_376: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 05e61b279h
mov eax, r14d
and eax, 2047
@@ -5326,47 +5062,42 @@ rx_i_380: ;ADD_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_381: ;FPSUB
+rx_i_377: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0fc1fb433h
mov ecx, r14d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm3
- mov eax, r15d
- xor eax, 0d822f28fh
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm3
+ movaps xmm7, xmm0
-rx_i_382: ;MUL_32
+rx_i_378: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 082aa21ach
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, eax
mov eax, 547725353
imul rax, rcx
mov r15, rax
-rx_i_383: ;FPADD
+rx_i_379: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 05dba41fbh
mov ecx, r10d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4741471142953353216
- movd xmm1, rax
- addsd xmm0, xmm1
- movsd xmm5, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm9
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 03a2dc429h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm5
-rx_i_384: ;MUL_64
+rx_i_380: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0229e3d6eh
mov eax, r11d
and eax, 32767
@@ -5378,45 +5109,49 @@ rx_i_384: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_385: ;SAR_64
+rx_i_381: ;SAR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 019816ff9h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r14
sar rax, cl
mov r9, rax
-rx_i_386: ;FPADD
+rx_i_382: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 036b5b81fh
mov ecx, r14d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm3
- movsd xmm3, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm3
+ movaps xmm3, xmm0
+ mov eax, r11d
+ xor eax, 0a6a2e0b1h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm3
-rx_i_387: ;FPSUB
+rx_i_383: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 05f798ec3h
mov eax, r15d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm4
- movsd xmm5, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm4
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 0c9f5cc22h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm5
-rx_i_388: ;SHR_64
+rx_i_384: ;SHR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 05b459fd7h
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r11
shr rax, cl
mov rcx, rax
@@ -5425,12 +5160,12 @@ rx_i_388: ;SHR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_389: ;MUL_64
+rx_i_385: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0c91749bbh
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r12
mov rcx, rax
mov eax, r13d
@@ -5438,40 +5173,33 @@ rx_i_389: ;MUL_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_390: ;FPADD
+rx_i_386: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0575b4bdch
mov ecx, r9d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm8
- mov eax, r9d
- xor eax, 05702d58dh
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm8
+ movaps xmm9, xmm0
-rx_i_391: ;MUL_64
+rx_i_387: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0d4f7bc6ah
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r15
mov r9, rax
-rx_i_392: ;RET
+rx_i_388: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 08a949356h
mov eax, r8d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_392
- cmp r13d, -1600627518
- jo short not_taken_ret_392
+ je short not_taken_ret_388
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r11d
@@ -5479,61 +5207,51 @@ rx_i_392: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_392:
+not_taken_ret_388:
mov rcx, rax
mov eax, r11d
xor eax, 0a0985cc2h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_393: ;CALL
+rx_i_389: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 06531ad2eh
mov eax, r11d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
cmp r9d, -350609584
- jge short taken_call_393
+ jge short taken_call_389
mov r14, rax
- jmp rx_i_394
-taken_call_393:
+ jmp rx_i_390
+taken_call_389:
push rax
- call rx_i_425
+ call rx_i_421
-rx_i_394: ;FPADD
+rx_i_390: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 02914abeah
mov eax, r15d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm4
- mov eax, r11d
- xor eax, 0e5c5acbbh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm4
+ movaps xmm3, xmm0
-rx_i_395: ;FPADD
+rx_i_391: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0473a41f0h
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm3
- mov eax, r14d
- xor eax, 0aba2155fh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm3
+ movaps xmm6, xmm0
-rx_i_396: ;ROR_64
+rx_i_392: ;ROR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 01ebc1f0dh
mov eax, r14d
and eax, 2047
@@ -5545,9 +5263,9 @@ rx_i_396: ;ROR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_397: ;OR_32
+rx_i_393: ;OR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0742e95b1h
mov eax, r14d
and eax, 2047
@@ -5559,67 +5277,55 @@ rx_i_397: ;OR_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_398: ;FPADD
+rx_i_394: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0db885c2ch
mov eax, r12d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- mov eax, r14d
- xor eax, 0910e8628h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm9
+ movaps xmm6, xmm0
-rx_i_399: ;IDIV_64
+rx_i_395: ;IDIV_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 04ae4fe8ch
mov eax, r8d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
mov edx, r13d
cmp edx, -1
- jne short safe_idiv_399
+ jne short safe_idiv_395
mov rcx, rax
rol rcx, 1
dec rcx
- jz short result_idiv_399
-safe_idiv_399:
+ jz short result_idiv_395
+safe_idiv_395:
mov ecx, 1
test edx, edx
cmovne ecx, edx
movsxd rcx, ecx
cqo
idiv rcx
-result_idiv_399:
+result_idiv_395:
mov r8, rax
-rx_i_400: ;FPADD
+rx_i_396: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 07b41862bh
mov ecx, r10d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4737472108072796160
- movd xmm1, rax
- addsd xmm0, xmm1
- mov eax, r12d
- xor eax, 01ee1c837h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm7
+ movaps xmm4, xmm0
-rx_i_401: ;MUL_64
+rx_i_397: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0916f3819h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r12
mov rcx, rax
mov eax, r10d
@@ -5627,39 +5333,35 @@ rx_i_401: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_402: ;ROL_64
+rx_i_398: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 04eb6fd2ah
mov eax, r8d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
- mov rcx, r8
- rol rax, cl
+ rol rax, 44
mov rcx, rax
mov eax, r11d
xor eax, 0724e7136h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_403: ;FPDIV
+rx_i_399: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0899a98cfh
mov ecx, r11d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm2
- mov eax, r14d
- xor eax, 0fb6f7016h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ divpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm6, xmm0
-rx_i_404: ;OR_32
+rx_i_400: ;OR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0aae75db6h
mov eax, r13d
and eax, 32767
@@ -5671,86 +5373,84 @@ rx_i_404: ;OR_32
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_405: ;FPMUL
+rx_i_401: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 032e81f25h
mov eax, r13d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4742100199122403328
- movd xmm1, rax
- mulsd xmm0, xmm1
- movsd xmm6, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm4
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm6, xmm0
+ mov eax, r14d
+ xor eax, 03ea60344h
+ and eax, 32767
+ movhpd qword ptr [rsi + rax * 8], xmm6
-rx_i_406: ;RET
+rx_i_402: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0fa1a07ffh
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_406
- cmp r8d, -1580915312
- jl short not_taken_ret_406
+ je short not_taken_ret_402
xor rax, qword ptr [rsp + 8]
mov r14, rax
ret 8
-not_taken_ret_406:
+not_taken_ret_402:
mov r14, rax
-rx_i_407: ;IDIV_64
+rx_i_403: ;IDIV_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0e59500f7h
mov eax, r9d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
mov edx, r12d
cmp edx, -1
- jne short safe_idiv_407
+ jne short safe_idiv_403
mov rcx, rax
rol rcx, 1
dec rcx
- jz short result_idiv_407
-safe_idiv_407:
+ jz short result_idiv_403
+safe_idiv_403:
mov ecx, 1
test edx, edx
cmovne ecx, edx
movsxd rcx, ecx
cqo
idiv rcx
-result_idiv_407:
+result_idiv_403:
mov rcx, rax
mov eax, r11d
xor eax, 01ff394a0h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_408: ;MUL_32
+rx_i_404: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 05b8ceb2fh
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, eax
mov eax, r8d
imul rax, rcx
mov r15, rax
-rx_i_409: ;RET
+rx_i_405: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0f61082a3h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_409
- cmp r10d, 1795880641
- ja short not_taken_ret_409
+ je short not_taken_ret_405
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r12d
@@ -5758,16 +5458,16 @@ rx_i_409: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_409:
+not_taken_ret_405:
mov rcx, rax
mov eax, r12d
xor eax, 06b0af6c1h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_410: ;FPROUND
+rx_i_406: ;FPROUND
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0af6886b7h
mov eax, r9d
and eax, 2047
@@ -5776,33 +5476,33 @@ rx_i_410: ;FPROUND
shl eax, 13
and rcx, -2048
and eax, 24576
- cvtsi2sd xmm0, rcx
+ cvtsi2sd xmm9, rcx
or eax, 40896
mov dword ptr [rsp - 8], eax
ldmxcsr dword ptr [rsp - 8]
- movsd xmm9, xmm0
+ mov eax, r9d
+ xor eax, 09862adefh
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_411: ;FPMUL
+rx_i_407: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 09699566fh
mov ecx, r14d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm9
- mov eax, r8d
- xor eax, 0904eec66h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm9
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
-rx_i_412: ;MUL_64
+rx_i_408: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 066e79fa6h
mov ecx, r15d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r9
mov rcx, rax
mov eax, r10d
@@ -5810,86 +5510,79 @@ rx_i_412: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_413: ;MULH_64
+rx_i_409: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 04b6caa9ah
mov ecx, r11d
- call rx_read_dataset
- mov rcx, r15
- mul rcx
- mov rax, rdx
+ call rx_read_dataset_r
+ imul rax, r15
mov r8, rax
-rx_i_414: ;RET
+rx_i_410: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0d17f245eh
mov eax, r15d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_414
- cmp r12d, -1371608768
- jl short not_taken_ret_414
+ je short not_taken_ret_410
xor rax, qword ptr [rsp + 8]
mov r8, rax
ret 8
-not_taken_ret_414:
+not_taken_ret_410:
mov r8, rax
-rx_i_415: ;RET
+rx_i_411: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0364f10e7h
mov eax, r12d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_415
- cmp r13d, -1265436998
- jbe short not_taken_ret_415
+ je short not_taken_ret_411
xor rax, qword ptr [rsp + 8]
mov r12, rax
ret 8
-not_taken_ret_415:
+not_taken_ret_411:
mov r12, rax
-rx_i_416: ;FPSQRT
+rx_i_412: ;FPSQRT
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0ac90e7ah
mov eax, r10d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- mov rcx, 9223372036854773760
- and rax, rcx
- cvtsi2sd xmm0, rax
- sqrtsd xmm0, xmm0
- movsd xmm3, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ andps xmm0, xmm10
+ sqrtpd xmm0, xmm0
+ movaps xmm3, xmm0
+ mov eax, r11d
+ xor eax, 0bbd2640ah
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm3
-rx_i_417: ;FPDIV
+rx_i_413: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 04b6037abh
mov eax, r11d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm2
- mov eax, r12d
- xor eax, 043989376h
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ divpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
-rx_i_418: ;OR_64
+rx_i_414: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 06c01554dh
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
or rax, r8
mov rcx, rax
mov eax, r10d
@@ -5897,55 +5590,57 @@ rx_i_418: ;OR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_419: ;DIV_64
+rx_i_415: ;DIV_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 08c3e59a1h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, -538093385
xor edx, edx
div rcx
mov r9, rax
-rx_i_420: ;FPSUB
+rx_i_416: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0f3fafde9h
mov eax, r12d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm3
- movsd xmm5, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm3
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 0f84b5382h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm5
-rx_i_421: ;SUB_64
+rx_i_417: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 03c6481fah
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, r12
mov r10, rax
-rx_i_422: ;MULH_64
+rx_i_418: ;MULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 02bd61c5fh
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r11
mul rcx
mov rax, rdx
mov r10, rax
-rx_i_423: ;XOR_64
+rx_i_419: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0b6ab9d32h
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
xor rax, r14
mov rcx, rax
mov eax, r14d
@@ -5953,50 +5648,52 @@ rx_i_423: ;XOR_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_424: ;FPADD
+rx_i_420: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0f9690ceah
mov eax, r9d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm3
- movsd xmm9, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm3
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 08f7bb3ech
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_425: ;CALL
+rx_i_421: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 01ada0f39h
mov eax, r12d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- cmp r8d, -1600409762
- jno short taken_call_425
+ cmp rsp, rbp
+ je short not_taken_ret_421
+ xor rax, qword ptr [rsp + 8]
+ mov r10, rax
+ ret 8
+not_taken_ret_421:
mov r10, rax
- jmp rx_i_426
-taken_call_425:
- push rax
- call rx_i_35
-rx_i_426: ;IMUL_32
+rx_i_422: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 04dd16ca4h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
movsxd rcx, eax
movsxd rax, r10d
imul rax, rcx
mov r13, rax
-rx_i_427: ;MUL_64
+rx_i_423: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 04df5ce05h
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r10
mov rcx, rax
mov eax, r15d
@@ -6004,31 +5701,33 @@ rx_i_427: ;MUL_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_428: ;FPADD
+rx_i_424: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 01ad12ce2h
mov ecx, r13d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm7
- movsd xmm9, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm7
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 0565ae8aah
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm9
-rx_i_429: ;IMUL_32
+rx_i_425: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0a3c5391dh
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
movsxd rcx, eax
movsxd rax, r10d
imul rax, rcx
mov r14, rax
-rx_i_430: ;AND_64
+rx_i_426: ;AND_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 09dd55ba0h
mov eax, r12d
and eax, 2047
@@ -6040,9 +5739,9 @@ rx_i_430: ;AND_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_431: ;MUL_32
+rx_i_427: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0d6cae9aeh
mov eax, r11d
and eax, 2047
@@ -6056,17 +5755,15 @@ rx_i_431: ;MUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_432: ;RET
+rx_i_428: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0f807a961h
mov eax, r11d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_432
- cmp r12d, -474453201
- jl short not_taken_ret_432
+ je short not_taken_ret_428
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r8d
@@ -6074,86 +5771,88 @@ rx_i_432: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_432:
+not_taken_ret_428:
mov rcx, rax
mov eax, r8d
xor eax, 0e3b86b2fh
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_433: ;MULH_64
+rx_i_429: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0650a4102h
mov eax, r12d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- mov rcx, 1990438276
- mul rcx
- mov rax, rdx
+ imul rax, rax, 1990438276
mov r15, rax
-rx_i_434: ;FPADD
+rx_i_430: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 019cc0e5h
mov ecx, r14d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4744016937443393536
- movd xmm1, rax
- addsd xmm0, xmm1
- movsd xmm5, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm8
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 058891433h
+ and eax, 32767
+ movhpd qword ptr [rsi + rax * 8], xmm5
-rx_i_435: ;FPADD
+rx_i_431: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0ed17ab58h
mov ecx, r12d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm5
- movsd xmm5, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm5
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 019fe4aadh
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm5
-rx_i_436: ;SUB_64
+rx_i_432: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 01c3b321fh
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, r10
mov r8, rax
-rx_i_437: ;SUB_64
+rx_i_433: ;ADD_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0bbb88499h
mov ecx, r13d
- call rx_read_dataset
- sub rax, r12
+ call rx_read_dataset_r
+ add eax, r12d
mov rcx, rax
mov eax, r12d
xor eax, 04722b36fh
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_438: ;FPSQRT
+rx_i_434: ;FPSQRT
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0167edabdh
mov ecx, r13d
- call rx_read_dataset
- mov rcx, 9223372036854773760
- and rax, rcx
- cvtsi2sd xmm0, rax
- sqrtsd xmm0, xmm0
- movsd xmm9, xmm0
+ call rx_read_dataset_f
+ andps xmm0, xmm10
+ sqrtpd xmm0, xmm0
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 08c1cfc74h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_439: ;MUL_64
+rx_i_435: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0b940480ah
mov eax, r15d
and eax, 2047
@@ -6165,56 +5864,50 @@ rx_i_439: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_440: ;FPADD
+rx_i_436: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0bfc3ca8bh
mov eax, r15d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4481057281345060864
- movd xmm1, rax
- addsd xmm0, xmm1
- movsd xmm7, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm2
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 0bfa76c43h
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm7
-rx_i_441: ;FPDIV
+rx_i_437: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 098a6bcf7h
mov ecx, r8d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm3
- mov eax, r8d
- xor eax, 025dac800h
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ divpd xmm0, xmm3
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
-rx_i_442: ;FPMUL
+rx_i_438: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0325b38ebh
mov ecx, r10d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm9
- mov eax, r12d
- xor eax, 0b7c490eeh
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm9
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm4, xmm0
-rx_i_443: ;XOR_32
+rx_i_439: ;XOR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 05e807e81h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
xor eax, r15d
mov rcx, rax
mov eax, r10d
@@ -6222,29 +5915,27 @@ rx_i_443: ;XOR_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_444: ;RET
+rx_i_440: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 062f83728h
mov eax, r10d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_444
- cmp r12d, 2127765370
- jns short not_taken_ret_444
+ je short not_taken_ret_440
xor rax, qword ptr [rsp + 8]
mov r9, rax
ret 8
-not_taken_ret_444:
+not_taken_ret_440:
mov r9, rax
-rx_i_445: ;ADD_64
+rx_i_441: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0d18ec075h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
add rax, 529736748
mov rcx, rax
mov eax, r9d
@@ -6252,35 +5943,33 @@ rx_i_445: ;ADD_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_446: ;CALL
+rx_i_442: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0a53dd1bh
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r15d, 799523062
- jbe short taken_call_446
+ jbe short taken_call_442
mov rcx, rax
mov eax, r11d
xor eax, 02fa7c0f6h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_447
-taken_call_446:
+ jmp rx_i_443
+taken_call_442:
push rax
- call rx_i_13
+ call rx_i_9
-rx_i_447: ;RET
+rx_i_443: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0232d1285h
mov eax, r14d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_447
- cmp r12d, 1332855833
- jno short not_taken_ret_447
+ je short not_taken_ret_443
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r9d
@@ -6288,32 +5977,36 @@ rx_i_447: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_447:
+not_taken_ret_443:
mov rcx, rax
mov eax, r9d
xor eax, 04f71c419h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_448: ;FPMUL
+rx_i_444: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 042455dd8h
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm7
- movsd xmm5, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm7
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 0ce416070h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm5
-rx_i_449: ;ADD_64
+rx_i_445: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 09ae009b2h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
add rax, r11
mov rcx, rax
mov eax, r9d
@@ -6321,12 +6014,12 @@ rx_i_449: ;ADD_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_450: ;MUL_32
+rx_i_446: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 01734708eh
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, eax
mov eax, r15d
imul rax, rcx
@@ -6336,35 +6029,32 @@ rx_i_450: ;MUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_451: ;FPSUB
+rx_i_447: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 01596d0e8h
mov ecx, r8d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm7
- movsd xmm5, xmm0
+ call rx_read_dataset_f
+ subpd xmm0, xmm7
+ movaps xmm5, xmm0
+ mov eax, r13d
+ xor eax, 0b384d4afh
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm5
-rx_i_452: ;FPSUB
+rx_i_448: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0390cfdb0h
mov eax, r9d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm3
- mov eax, r9d
- xor eax, 0a700e3f3h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm3
+ movaps xmm9, xmm0
-rx_i_453: ;ROR_64
+rx_i_449: ;ROR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 04f27744bh
mov eax, r8d
and eax, 2047
@@ -6372,12 +6062,12 @@ rx_i_453: ;ROR_64
ror rax, 28
mov r8, rax
-rx_i_454: ;ROL_64
+rx_i_450: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 04e2c76ffh
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r12
rol rax, cl
mov rcx, rax
@@ -6386,25 +6076,23 @@ rx_i_454: ;ROL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_455: ;ADD_64
+rx_i_451: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0c4d99ac9h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
add rax, -287502157
mov r8, rax
-rx_i_456: ;RET
+rx_i_452: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 040130b88h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_456
- cmp r11d, -495064539
- jl short not_taken_ret_456
+ je short not_taken_ret_452
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r11d
@@ -6412,41 +6100,41 @@ rx_i_456: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_456:
+not_taken_ret_452:
mov rcx, rax
mov eax, r11d
xor eax, 0e27dea25h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_457: ;IMULH_64
+rx_i_453: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0a2096aa4h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r14
imul rcx
mov rax, rdx
mov r8, rax
-rx_i_458: ;FPADD
+rx_i_454: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 081314291h
mov eax, r13d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4746671349487894528
- movd xmm1, rax
- addsd xmm0, xmm1
- movsd xmm4, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm9
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 07e41c60fh
+ and eax, 2047
+ movhpd qword ptr [rsi + rax * 8], xmm4
-rx_i_459: ;XOR_64
+rx_i_455: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 059263cdbh
mov eax, r8d
and eax, 2047
@@ -6454,12 +6142,12 @@ rx_i_459: ;XOR_64
xor rax, r9
mov r8, rax
-rx_i_460: ;OR_32
+rx_i_456: ;OR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 010e8fe6h
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
or eax, r11d
mov rcx, rax
mov eax, r9d
@@ -6467,12 +6155,12 @@ rx_i_460: ;OR_32
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_461: ;SUB_64
+rx_i_457: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 09de1a3efh
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, r10
mov rcx, rax
mov eax, r10d
@@ -6480,21 +6168,21 @@ rx_i_461: ;SUB_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_462: ;ROL_64
+rx_i_458: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 05c79df6eh
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
rol rax, 22
mov r14, rax
-rx_i_463: ;MUL_64
+rx_i_459: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0346f46adh
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, rax, 381354340
mov rcx, rax
mov eax, r13d
@@ -6502,12 +6190,12 @@ rx_i_463: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_464: ;SUB_64
+rx_i_460: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 098ab71fch
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, r14
mov rcx, rax
mov eax, r12d
@@ -6515,12 +6203,12 @@ rx_i_464: ;SUB_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_465: ;SHR_64
+rx_i_461: ;SHR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0c814e926h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r13
shr rax, cl
mov rcx, rax
@@ -6529,9 +6217,9 @@ rx_i_465: ;SHR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_466: ;ADD_64
+rx_i_462: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0c64b4a9eh
mov eax, r10d
and eax, 2047
@@ -6539,9 +6227,9 @@ rx_i_466: ;ADD_64
add rax, -1734323376
mov r15, rax
-rx_i_467: ;SUB_64
+rx_i_463: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 08c29341h
mov eax, r9d
and eax, 2047
@@ -6549,12 +6237,12 @@ rx_i_467: ;SUB_64
sub rax, r15
mov r10, rax
-rx_i_468: ;MUL_64
+rx_i_464: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 06ff587fdh
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
imul rax, r15
mov rcx, rax
mov eax, r13d
@@ -6562,24 +6250,19 @@ rx_i_468: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_469: ;FPADD
+rx_i_465: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0b62c0003h
mov eax, r12d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm5
- mov eax, r10d
- xor eax, 0d11c1242h
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm5
+ movaps xmm2, xmm0
-rx_i_470: ;IMUL_32
+rx_i_466: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 05c541c42h
mov eax, r13d
and eax, 2047
@@ -6589,56 +6272,51 @@ rx_i_470: ;IMUL_32
imul rax, rcx
mov r9, rax
-rx_i_471: ;FPADD
+rx_i_467: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0cbb33f81h
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- mov eax, r8d
- xor eax, 0ad38e588h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm9
+ movaps xmm8, xmm0
-rx_i_472: ;IDIV_64
+rx_i_468: ;IDIV_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 091044dc3h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
mov edx, -13394825
cmp edx, -1
- jne short safe_idiv_472
+ jne short safe_idiv_468
mov rcx, rax
rol rcx, 1
dec rcx
- jz short result_idiv_472
-safe_idiv_472:
+ jz short result_idiv_468
+safe_idiv_468:
mov ecx, 1
test edx, edx
cmovne ecx, edx
movsxd rcx, ecx
cqo
idiv rcx
-result_idiv_472:
+result_idiv_468:
mov rcx, rax
mov eax, r8d
xor eax, 0ff339c77h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_473: ;IMUL_32
+rx_i_469: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0c0186beh
mov ecx, r9d
- call rx_read_dataset
- movsxd rcx, eax
- mov rax, 294019485
+ call rx_read_dataset_r
+ mov ecx, eax
+ mov eax, 294019485
imul rax, rcx
mov rcx, rax
mov eax, r9d
@@ -6646,12 +6324,12 @@ rx_i_473: ;IMUL_32
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_474: ;XOR_32
+rx_i_470: ;XOR_32
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 090849e3eh
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
xor eax, r11d
mov rcx, rax
mov eax, r14d
@@ -6659,9 +6337,9 @@ rx_i_474: ;XOR_32
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_475: ;IMUL_32
+rx_i_471: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0cedba9b6h
mov eax, r14d
and eax, 32767
@@ -6671,105 +6349,91 @@ rx_i_475: ;IMUL_32
imul rax, rcx
mov r14, rax
-rx_i_476: ;CALL
+rx_i_472: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 038f4b9d6h
mov eax, r9d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r10d, 1738497427
- jl short taken_call_476
+ jl short taken_call_472
mov r10, rax
- jmp rx_i_477
-taken_call_476:
+ jmp rx_i_473
+taken_call_472:
push rax
- call rx_i_12
+ call rx_i_8
-rx_i_477: ;MULH_64
+rx_i_473: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 01fb7637dh
mov eax, r14d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
- mov rcx, -751043211
- mul rcx
- mov rax, rdx
+ imul rax, rax, -751043211
mov r12, rax
-rx_i_478: ;CALL
+rx_i_474: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0b5c0b4d4h
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r15d, -233120543
- jo short taken_call_478
+ jo short taken_call_474
mov r15, rax
- jmp rx_i_479
-taken_call_478:
+ jmp rx_i_475
+taken_call_474:
push rax
- call rx_i_73
+ call rx_i_69
-rx_i_479: ;FPSUB
+rx_i_475: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0910dcdeeh
mov eax, r10d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm9
- mov eax, r15d
- xor eax, 04a936216h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm9
+ movaps xmm7, xmm0
-rx_i_480: ;FPSUB
+rx_i_476: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 07ab3b5a4h
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm2
- mov eax, r9d
- xor eax, 0b01bb14ch
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm2
+ movaps xmm9, xmm0
-rx_i_481: ;FPADD
+rx_i_477: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 07a29ec63h
mov eax, r12d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, -4487871971018670080
- movd xmm1, rax
- addsd xmm0, xmm1
- movsd xmm6, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm9
+ movaps xmm6, xmm0
+ mov eax, r14d
+ xor eax, 0e81fc7a6h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm6
-rx_i_482: ;MULH_64
+rx_i_478: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 02d3d7e7fh
mov ecx, r14d
- call rx_read_dataset
- mov rcx, r10
- mul rcx
- mov rax, rdx
+ call rx_read_dataset_r
+ imul rax, r10
mov r12, rax
-rx_i_483: ;MUL_64
+rx_i_479: ;MUL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 09b49c793h
mov eax, r12d
and eax, 2047
@@ -6781,24 +6445,19 @@ rx_i_483: ;MUL_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_484: ;FPSUB
+rx_i_480: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0a9cc4f01h
mov eax, r9d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm4
- mov eax, r14d
- xor eax, 0d8750eeh
- and eax, 32767
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm4
+ movaps xmm6, xmm0
-rx_i_485: ;DIV_64
+rx_i_481: ;DIV_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0225ba1f9h
mov eax, r14d
and eax, 2047
@@ -6811,61 +6470,54 @@ rx_i_485: ;DIV_64
div rcx
mov r12, rax
-rx_i_486: ;XOR_64
+rx_i_482: ;XOR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 044a0f592h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
xor rax, r12
mov r11, rax
-rx_i_487: ;FPADD
+rx_i_483: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 07f71f219h
mov ecx, r11d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4743722256075587584
- movd xmm1, rax
- addsd xmm0, xmm1
- mov eax, r14d
- xor eax, 0545908cah
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm6
+ movaps xmm6, xmm0
-rx_i_488: ;ROL_64
+rx_i_484: ;ROL_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 07027bacdh
mov ecx, r12d
- call rx_read_dataset
+ call rx_read_dataset_r
rol rax, 37
mov r11, rax
-rx_i_489: ;CALL
+rx_i_485: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 03a04647h
mov ecx, r13d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp r8d, 554879918
- jno short taken_call_489
+ jno short taken_call_485
mov rcx, rax
mov eax, r15d
xor eax, 02112cbaeh
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_490
-taken_call_489:
+ jmp rx_i_486
+taken_call_485:
push rax
- call rx_i_62
+ call rx_i_58
-rx_i_490: ;ADD_64
+rx_i_486: ;ADD_64
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0ad072937h
mov eax, r15d
and eax, 2047
@@ -6877,18 +6529,18 @@ rx_i_490: ;ADD_64
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
-rx_i_491: ;SUB_64
+rx_i_487: ;SUB_64
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 07f78ad34h
mov ecx, r11d
- call rx_read_dataset
+ call rx_read_dataset_r
sub rax, -333279706
mov r11, rax
-rx_i_492: ;IMULH_64
+rx_i_488: ;IMULH_64
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 0d8b1788eh
mov eax, r12d
and eax, 32767
@@ -6898,146 +6550,132 @@ rx_i_492: ;IMULH_64
mov rax, rdx
mov r12, rax
-rx_i_493: ;CALL
+rx_i_489: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0b2ec9f3ah
mov eax, r10d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp r15d, -1127175870
- jge short taken_call_493
+ jge short taken_call_489
mov rcx, rax
mov eax, r8d
xor eax, 0bcd0a942h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_494
-taken_call_493:
+ jmp rx_i_490
+taken_call_489:
push rax
- call rx_i_79
+ call rx_i_75
-rx_i_494: ;FPADD
+rx_i_490: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 015c7f598h
mov ecx, r11d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- mov eax, r15d
- xor eax, 0ab8bd68h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm9
+ movaps xmm7, xmm0
-rx_i_495: ;FPADD
+rx_i_491: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 0902da6bdh
mov ecx, r8d
- call rx_read_dataset
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- movsd xmm7, xmm0
+ call rx_read_dataset_f
+ addpd xmm0, xmm9
+ movaps xmm7, xmm0
+ mov eax, r15d
+ xor eax, 0b0f0fca4h
+ and eax, 32767
+ movhpd qword ptr [rsi + rax * 8], xmm7
-rx_i_496: ;OR_64
+rx_i_492: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0491090d9h
mov ecx, r9d
- call rx_read_dataset
+ call rx_read_dataset_r
or rax, r9
mov r12, rax
-rx_i_497: ;FPSUB
+rx_i_493: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 09de81282h
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4740027165670637568
- movd xmm1, rax
- subsd xmm0, xmm1
- mov eax, r12d
- xor eax, 02feb2fd7h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm9
+ movaps xmm4, xmm0
-rx_i_498: ;MUL_32
+rx_i_494: ;MUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0b0d50e46h
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
mov ecx, eax
mov eax, r11d
imul rax, rcx
mov r14, rax
-rx_i_499: ;FPMUL
+rx_i_495: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r11, 0e276cad1h
mov eax, r11d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mov rax, 4739626697148596224
- movd xmm1, rax
- mulsd xmm0, xmm1
- mov eax, r8d
- xor eax, 02d12bd27h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm2
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
-rx_i_500: ;OR_64
+rx_i_496: ;OR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r14, 0fe757b73h
mov ecx, r14d
- call rx_read_dataset
+ call rx_read_dataset_r
or rax, -359802064
mov r9, rax
-rx_i_501: ;FPDIV
+rx_i_497: ;FPDIV
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 08d25742eh
mov eax, r8d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- divsd xmm0, xmm3
- mov eax, r8d
- xor eax, 0a800c041h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ divpd xmm0, xmm3
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
-rx_i_502: ;FPMUL
+rx_i_498: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r15, 0e066fd15h
mov eax, r15d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm9
- movsd xmm8, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ mulpd xmm0, xmm9
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
+ mov eax, r8d
+ xor eax, 09dc5a1f9h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm8
-rx_i_503: ;IMUL_32
+rx_i_499: ;IMUL_32
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 08925556bh
mov eax, r12d
and eax, 2047
@@ -7047,31 +6685,31 @@ rx_i_503: ;IMUL_32
imul rax, rcx
mov r8, rax
-rx_i_504: ;CALL
+rx_i_500: ;CALL
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 04bc870ebh
mov eax, r10d
and eax, 32767
mov rax, qword ptr [rsi + rax * 8]
cmp r13d, 1243939650
- jl short taken_call_504
+ jl short taken_call_500
mov rcx, rax
mov eax, r10d
xor eax, 04a250342h
and eax, 32767
mov qword ptr [rsi + rax * 8], rcx
- jmp rx_i_505
-taken_call_504:
+ jmp rx_i_501
+taken_call_500:
push rax
- call rx_i_3
+ call rx_i_511
-rx_i_505: ;SHR_64
+rx_i_501: ;SHR_64
dec edi
- js rx_finish
+ jz rx_finish
xor r8, 07d46c503h
mov ecx, r8d
- call rx_read_dataset
+ call rx_read_dataset_r
mov rcx, r10
shr rax, cl
mov rcx, rax
@@ -7080,16 +6718,14 @@ rx_i_505: ;SHR_64
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_506: ;RET
+rx_i_502: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 09e70b20ch
mov ecx, r10d
- call rx_read_dataset
+ call rx_read_dataset_r
cmp rsp, rbp
- je short not_taken_ret_506
- cmp r15d, 148394770
- jno short not_taken_ret_506
+ je short not_taken_ret_502
xor rax, qword ptr [rsp + 8]
mov rcx, rax
mov eax, r9d
@@ -7097,79 +6733,131 @@ rx_i_506: ;RET
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
ret 8
-not_taken_ret_506:
+not_taken_ret_502:
mov rcx, rax
mov eax, r9d
xor eax, 08d85312h
and eax, 2047
mov qword ptr [rsi + rax * 8], rcx
-rx_i_507: ;FPSUB
+rx_i_503: ;FPSUB
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 0442e4850h
mov eax, r13d
and eax, 32767
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- subsd xmm0, xmm2
- movsd xmm9, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm2
+ movaps xmm9, xmm0
+ mov eax, r9d
+ xor eax, 080465282h
+ and eax, 2047
+ movlpd qword ptr [rsi + rax * 8], xmm9
-rx_i_508: ;FPADD
+rx_i_504: ;FPADD
dec edi
- js rx_finish
+ jz rx_finish
xor r13, 099d48347h
mov eax, r13d
and eax, 2047
- mov rax, qword ptr [rsi + rax * 8]
- and rax, -2048
- cvtsi2sd xmm0, rax
- addsd xmm0, xmm9
- movsd xmm4, xmm0
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ addpd xmm0, xmm9
+ movaps xmm4, xmm0
+ mov eax, r12d
+ xor eax, 0be8cbb18h
+ and eax, 32767
+ movhpd qword ptr [rsi + rax * 8], xmm4
-rx_i_509: ;FPMUL
+rx_i_505: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r12, 032c0a28ah
mov ecx, r12d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm4
- movsd xmm8, xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm4
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm8, xmm0
+ mov eax, r8d
+ xor eax, 021b54eaeh
+ and eax, 32767
+ movlpd qword ptr [rsi + rax * 8], xmm8
-rx_i_510: ;FPMUL
+rx_i_506: ;FPMUL
dec edi
- js rx_finish
+ jz rx_finish
xor r9, 0a973d58ch
mov ecx, r9d
- call rx_read_dataset
- or rax, 2048
- and rax, -2048
- cvtsi2sd xmm0, rax
- mulsd xmm0, xmm9
- mov eax, r11d
- xor eax, 05e890759h
- and eax, 2047
- movd qword ptr [rsi + rax * 8], xmm0
+ call rx_read_dataset_f
+ mulpd xmm0, xmm9
+ movaps xmm1, xmm0
+ cmpeqpd xmm1, xmm1
+ andps xmm0, xmm1
+ movaps xmm3, xmm0
-rx_i_511: ;RET
+rx_i_507: ;RET
dec edi
- js rx_finish
+ jz rx_finish
xor r10, 0d3b7165ch
mov eax, r10d
and eax, 2047
mov rax, qword ptr [rsi + rax * 8]
cmp rsp, rbp
- je short not_taken_ret_511
- cmp r11d, -260506265
- ja short not_taken_ret_511
+ je short not_taken_ret_507
xor rax, qword ptr [rsp + 8]
mov r14, rax
ret 8
-not_taken_ret_511:
+not_taken_ret_507:
mov r14, rax
+rx_i_508: ;RET
+ dec edi
+ jz rx_finish
+ xor r13, 0da34d818h
+ mov ecx, r13d
+ call rx_read_dataset_r
+ cmp rsp, rbp
+ je short not_taken_ret_508
+ xor rax, qword ptr [rsp + 8]
+ mov r8, rax
+ ret 8
+not_taken_ret_508:
+ mov r8, rax
+
+rx_i_509: ;CALL
+ dec edi
+ jz rx_finish
+ xor r11, 01b2873f2h
+ mov eax, r11d
+ and eax, 2047
+ mov rax, qword ptr [rsi + rax * 8]
+ cmp r8d, 1826115244
+ jno short taken_call_509
+ mov r10, rax
+ jmp rx_i_510
+taken_call_509:
+ push rax
+ call rx_i_42
+
+rx_i_510: ;FPSUB
+ dec edi
+ jz rx_finish
+ xor r8, 0db65513ch
+ mov eax, r8d
+ and eax, 2047
+ cvtdq2pd xmm0, qword ptr [rsi + rax * 8]
+ subpd xmm0, xmm2
+ movaps xmm9, xmm0
+
+rx_i_511: ;ROL_64
+ dec edi
+ jz rx_finish
+ xor r11, 02bd79286h
+ mov ecx, r11d
+ call rx_read_dataset_r
+ mov rcx, r10
+ rol rax, cl
+ mov r11, rax
+
jmp rx_i_0