RandomWOW/src/instructionsPortable.cpp

/*
Copyright (c) 2018 tevador

This file is part of RandomX.

RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
*/
//#define DEBUG
#include "intrinPortable.h"
#pragma STDC FENV_ACCESS on
#include <cfenv>
#include <cmath>
#ifdef DEBUG
#include <iostream>
#endif

#if defined(__SIZEOF_INT128__)
	typedef unsigned __int128 uint128_t;
	typedef __int128 int128_t;
	uint64_t mulh(uint64_t a, uint64_t b) {
		return ((uint128_t)a * b) >> 64;
	}
	int64_t smulh(int64_t a, int64_t b) {
		return ((int128_t)a * b) >> 64;
	}
	#define HAVE_MULH
	#define HAVE_SMULH
#endif

#if defined(_MSC_VER)
	#define HAS_VALUE(X) X ## 0
	#define EVAL_DEFINE(X) HAS_VALUE(X)
	#include <intrin.h>
	#include <stdlib.h>

	uint64_t rotl(uint64_t x, int c) {
		return _rotl64(x, c);
	}
	uint64_t rotr(uint64_t x , int c) {
		return _rotr64(x, c);
	}
	#define HAVE_ROTL
	#define HAVE_ROTR

	#if EVAL_DEFINE(__MACHINEARM64_X64(1))
		uint64_t mulh(uint64_t a, uint64_t b) {
			return __umulh(a, b);
		}
		#define HAVE_MULH
	#endif

	#if EVAL_DEFINE(__MACHINEX64(1))
		int64_t smulh(int64_t a, int64_t b) {
			int64_t hi;
			_mul128(a, b, &hi);
			return hi;
		}
		#define HAVE_SMULH
	#endif

	static void setRoundMode__(uint32_t mode) {
		_controlfp(mode, _MCW_RC);
	}
	#define HAVE_SETROUNDMODE_IMPL
#endif

#ifndef HAVE_SETROUNDMODE_IMPL
	static void setRoundMode__(uint32_t mode) {
		fesetround(mode);
	}
#endif

#ifndef HAVE_ROTR
	uint64_t rotr(uint64_t a, int b) {
		return (a >> b) | (a << (64 - b));
	}
	#define HAS_ROTR
#endif

#ifndef HAVE_ROTL
	uint64_t rotl(uint64_t a, int b) {
		return (a << b) | (a >> (64 - b));
	}
	#define HAS_ROTL
#endif

#ifndef HAVE_MULH
	#define LO(x) ((x)&0xffffffff)
	#define HI(x) ((x)>>32)
	uint64_t mulh(uint64_t a, uint64_t b) {
		uint64_t ah = HI(a), al = LO(a);
		uint64_t bh = HI(b), bl = LO(b);
		uint64_t x00 = al * bl;
		uint64_t x01 = al * bh;
		uint64_t x10 = ah * bl;
		uint64_t x11 = ah * bh;
		uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
		uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
		uint64_t m3 = HI(x11) + HI(m2);

		return (m3 << 32) + LO(m2);
	}
	#define HAVE_MULH
#endif

#ifndef HAVE_SMULH
	int64_t smulh(int64_t a, int64_t b) {
		int64_t hi = mulh(a, b);
		if (a < 0LL) hi -= b;
		if (b < 0LL) hi -= a;
		return hi;
	}
	#define HAVE_SMULH
#endif

// avoid undefined behavior of signed overflow
static inline int32_t safeSub(int32_t a, int32_t b) {
	return int32_t(uint32_t(a) - uint32_t(b));
}

#if __GNUC__ >= 5
#undef __has_builtin
#define __has_builtin(x) 1
#endif

#if defined(__has_builtin)
#if __has_builtin(__builtin_sub_overflow)
	static inline bool subOverflow__(int32_t a, int32_t b) {
		int32_t temp;
		return __builtin_sub_overflow(a, b, &temp);
	}
	#define HAVE_SUB_OVERFLOW
#endif
#endif

#ifndef HAVE_SUB_OVERFLOW
	static inline bool subOverflow__(int32_t a, int32_t b) {
		auto c = safeSub(a, b);
		return (c < a) != (b > 0);
	}
	#define HAVE_SUB_OVERFLOW
#endif

static inline double FlushDenormalNaN(double x) {
	int fpc = std::fpclassify(x);
	if (fpc == FP_SUBNORMAL || fpc == FP_NAN) {
		return 0.0;
	}
	return x;
}

static inline double FlushNaN(double x) {
	return x != x ? 0.0 : x;
}

void setRoundMode(uint32_t rcflag) {
	switch (rcflag & 3) {
	case RoundDown:
		setRoundMode__(FE_DOWNWARD);
		break;
	case RoundUp:
		setRoundMode__(FE_UPWARD);
		break;
	case RoundToZero:
		setRoundMode__(FE_TOWARDZERO);
		break;
	default:
		setRoundMode__(FE_TONEAREST);
		break;
	}
}

bool condition(uint32_t type, int32_t value, int32_t imm32) {
	switch (type & 7)
	{
	case 0:
		return (uint32_t)value <= (uint32_t)imm32;
	case 1:
		return (uint32_t)value > (uint32_t)imm32;
	case 2:
		return safeSub(value, imm32) < 0;
	case 3:
		return safeSub(value, imm32) >= 0;
	case 4:
		return subOverflow__(value, imm32);
	case 5:
		return !subOverflow__(value, imm32);
	case 6:
		return value < imm32;
	case 7:
		return value >= imm32;
	}
}

void initFpu() {
#ifdef __SSE2__
	_mm_setcsr(0x9FC0); //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled
#else
	setRoundMode(FE_TONEAREST);
#endif
}

namespace RandomX {

	extern "C" {
		/*void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U);
		}

		void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			if (a.i64 == INT64_MIN && b.i32 == -1)
				c.i64 = INT64_MIN;
			else
				c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);
		}

		void FADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
#ifdef __SSE2__
			__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
			__m128d ad = _mm_cvtepi32_pd(ai);
			__m128d bd = _mm_load_pd(&b.lo.f64);
			__m128d cd = _mm_add_pd(ad, bd);
			_mm_store_pd(&c.lo.f64, cd);
#else
			double alo = (double)a.i32lo;
			double ahi = (double)a.i32hi;
			c.lo.f64 = alo + b.lo.f64;
			c.hi.f64 = ahi + b.hi.f64;
#endif
		}

		void FSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
#ifdef __SSE2__
			__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
			__m128d ad = _mm_cvtepi32_pd(ai);
			__m128d bd = _mm_load_pd(&b.lo.f64);
			__m128d cd = _mm_sub_pd(ad, bd);
			_mm_store_pd(&c.lo.f64, cd);
#else
			double alo = (double)a.i32lo;
			double ahi = (double)a.i32hi;
			c.lo.f64 = alo - b.lo.f64;
			c.hi.f64 = ahi - b.hi.f64;
#endif
		}

		void FMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
#ifdef __SSE2__
			__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
			__m128d ad = _mm_cvtepi32_pd(ai);
			__m128d bd = _mm_load_pd(&b.lo.f64);
			__m128d cd = _mm_mul_pd(ad, bd);
			__m128d mask = _mm_cmpeq_pd(cd, cd);
			cd = _mm_and_pd(cd, mask);
			_mm_store_pd(&c.lo.f64, cd);
#else
			double alo = (double)a.i32lo;
			double ahi = (double)a.i32hi;
			c.lo.f64 = FlushNaN(alo * b.lo.f64);
			c.hi.f64 = FlushNaN(ahi * b.hi.f64);
#endif
		}

		void FDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
#ifdef __SSE2__
			__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
			__m128d ad = _mm_cvtepi32_pd(ai);
			__m128d bd = _mm_load_pd(&b.lo.f64);
			__m128d cd = _mm_div_pd(ad, bd);
			__m128d mask = _mm_cmpeq_pd(cd, cd);
			cd = _mm_and_pd(cd, mask);
			_mm_store_pd(&c.lo.f64, cd);
#else
			double alo = (double)a.i32lo;
			double ahi = (double)a.i32hi;
			c.lo.f64 = FlushDenormalNaN(alo / b.lo.f64);
			c.hi.f64 = FlushDenormalNaN(ahi / b.hi.f64);
#endif
		}

		void FSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
#ifdef __SSE2__
			__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
			__m128d ad = _mm_cvtepi32_pd(ai);
			const __m128d absmask = _mm_castsi128_pd(_mm_set1_epi64x(~(1LL << 63)));
			ad = _mm_and_pd(ad, absmask);
			__m128d cd = _mm_sqrt_pd(ad);
			_mm_store_pd(&c.lo.f64, cd);
#else
			double alo = (double)a.i32lo;
			double ahi = (double)a.i32hi;
			c.lo.f64 = sqrt(std::abs(alo));
			c.hi.f64 = sqrt(std::abs(ahi));
#endif
		}*/


	}
}
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`/*`
			`Copyright (c) 2018 tevador`

			`This file is part of RandomX.`

			`RandomX is free software: you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation, either version 3 of the License, or`
			`(at your option) any later version.`

			`RandomX is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with RandomX. If not, see<http://www.gnu.org/licenses/>.`
			`*/`
			`//#define DEBUG`
			`#include "intrinPortable.h"`
			`#pragma STDC FENV_ACCESS on`
			`#include <cfenv>`
			`#include <cmath>`
			`#ifdef DEBUG`
			`#include <iostream>`
			`#endif`

			`#if defined(__SIZEOF_INT128__)`
			`typedef unsigned __int128 uint128_t;`
			`typedef __int128 int128_t;`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`uint64_t mulh(uint64_t a, uint64_t b) {`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`return ((uint128_t)a * b) >> 64;`
			`}`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`int64_t smulh(int64_t a, int64_t b) {`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`return ((int128_t)a * b) >> 64;`
			`}`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#define HAVE_MULH`
			`#define HAVE_SMULH`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#endif`

			`#if defined(_MSC_VER)`
			`#define HAS_VALUE(X) X ## 0`
			`#define EVAL_DEFINE(X) HAS_VALUE(X)`
			`#include <intrin.h>`
			`#include <stdlib.h>`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00
			`uint64_t rotl(uint64_t x, int c) {`
			`return _rotl64(x, c);`
			`}`
			`uint64_t rotr(uint64_t x , int c) {`
			`return _rotr64(x, c);`
			`}`
			`#define HAVE_ROTL`
			`#define HAVE_ROTR`

RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#if EVAL_DEFINE(__MACHINEARM64_X64(1))`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`uint64_t mulh(uint64_t a, uint64_t b) {`
			`return __umulh(a, b);`
			`}`
			`#define HAVE_MULH`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#endif`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#if EVAL_DEFINE(__MACHINEX64(1))`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`int64_t smulh(int64_t a, int64_t b) {`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`int64_t hi;`
			`_mul128(a, b, &hi);`
			`return hi;`
			`}`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#define HAVE_SMULH`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#endif`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00
			`static void setRoundMode__(uint32_t mode) {`
			`_controlfp(mode, _MCW_RC);`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`}`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#define HAVE_SETROUNDMODE_IMPL`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#endif`

Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#ifndef HAVE_SETROUNDMODE_IMPL`
			`static void setRoundMode__(uint32_t mode) {`
			`fesetround(mode);`
			`}`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#endif`

Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#ifndef HAVE_ROTR`
			`uint64_t rotr(uint64_t a, int b) {`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`return (a >> b) \| (a << (64 - b));`
			`}`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#define HAS_ROTR`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#endif`

Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#ifndef HAVE_ROTL`
			`uint64_t rotl(uint64_t a, int b) {`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`return (a << b) \| (a >> (64 - b));`
			`}`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#define HAS_ROTL`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#endif`

Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#ifndef HAVE_MULH`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#define LO(x) ((x)&0xffffffff)`
			`#define HI(x) ((x)>>32)`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`uint64_t mulh(uint64_t a, uint64_t b) {`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`uint64_t ah = HI(a), al = LO(a);`
			`uint64_t bh = HI(b), bl = LO(b);`
			`uint64_t x00 = al * bl;`
			`uint64_t x01 = al * bh;`
			`uint64_t x10 = ah * bl;`
			`uint64_t x11 = ah * bh;`
			`uint64_t m1 = LO(x10) + LO(x01) + HI(x00);`
			`uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);`
			`uint64_t m3 = HI(x11) + HI(m2);`

			`return (m3 << 32) + LO(m2);`
			`}`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#define HAVE_MULH`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#endif`

Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#ifndef HAVE_SMULH`
			`int64_t smulh(int64_t a, int64_t b) {`
			`int64_t hi = mulh(a, b);`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`if (a < 0LL) hi -= b;`
			`if (b < 0LL) hi -= a;`
			`return hi;`
			`}`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#define HAVE_SMULH`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#endif`

8 branch conditions for CALL/RET 2018-12-21 21:41:35 +00:00			`// avoid undefined behavior of signed overflow`
			`static inline int32_t safeSub(int32_t a, int32_t b) {`
			`return int32_t(uint32_t(a) - uint32_t(b));`
			`}`

			`#if __GNUC__ >= 5`
			`#undef __has_builtin`
			`#define __has_builtin(x) 1`
			`#endif`

			`#if defined(__has_builtin)`
			`#if __has_builtin(__builtin_sub_overflow)`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`static inline bool subOverflow__(int32_t a, int32_t b) {`
8 branch conditions for CALL/RET 2018-12-21 21:41:35 +00:00			`int32_t temp;`
			`return __builtin_sub_overflow(a, b, &temp);`
			`}`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#define HAVE_SUB_OVERFLOW`
8 branch conditions for CALL/RET 2018-12-21 21:41:35 +00:00			`#endif`
			`#endif`

Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#ifndef HAVE_SUB_OVERFLOW`
			`static inline bool subOverflow__(int32_t a, int32_t b) {`
8 branch conditions for CALL/RET 2018-12-21 21:41:35 +00:00			`auto c = safeSub(a, b);`
			`return (c < a) != (b > 0);`
			`}`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`#define HAVE_SUB_OVERFLOW`
8 branch conditions for CALL/RET 2018-12-21 21:41:35 +00:00			`#endif`

Vector FPU instructions JitCompilerX86 - static code written in asm Updated ALU/FPU tests Updated instruction weights 2018-12-31 18:06:45 +00:00			`static inline double FlushDenormalNaN(double x) {`
			`int fpc = std::fpclassify(x);`
			`if (fpc == FP_SUBNORMAL \|\| fpc == FP_NAN) {`
			`return 0.0;`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`}`
			`return x;`
			`}`

Vector FPU instructions JitCompilerX86 - static code written in asm Updated ALU/FPU tests Updated instruction weights 2018-12-31 18:06:45 +00:00			`static inline double FlushNaN(double x) {`
			`return x != x ? 0.0 : x;`
			`}`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`void setRoundMode(uint32_t rcflag) {`
			`switch (rcflag & 3) {`
			`case RoundDown:`
			`setRoundMode__(FE_DOWNWARD);`
			`break;`
			`case RoundUp:`
			`setRoundMode__(FE_UPWARD);`
			`break;`
			`case RoundToZero:`
			`setRoundMode__(FE_TOWARDZERO);`
			`break;`
			`default:`
			`setRoundMode__(FE_TONEAREST);`
			`break;`
			`}`
			`}`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`bool condition(uint32_t type, int32_t value, int32_t imm32) {`
			`switch (type & 7)`
			`{`
			`case 0:`
			`return (uint32_t)value <= (uint32_t)imm32;`
			`case 1:`
			`return (uint32_t)value > (uint32_t)imm32;`
			`case 2:`
			`return safeSub(value, imm32) < 0;`
			`case 3:`
			`return safeSub(value, imm32) >= 0;`
			`case 4:`
			`return subOverflow__(value, imm32);`
			`case 5:`
			`return !subOverflow__(value, imm32);`
			`case 6:`
			`return value < imm32;`
			`case 7:`
			`return value >= imm32;`
			`}`
			`}`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`void initFpu() {`
			`#ifdef __SSE2__`
			`_mm_setcsr(0x9FC0); //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled`
			`#else`
			`setRoundMode(FE_TONEAREST);`
			`#endif`
			`}`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`namespace RandomX {`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`extern "C" {`
			`/*void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U);`
			`}`

			`void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`if (a.i64 == INT64_MIN && b.i32 == -1)`
			`c.i64 = INT64_MIN;`
			`else`
			`c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);`
			`}`

Renamed floating point instructions Fixed negative source operand for FMUL_M and FDIV_M 2019-02-05 22:43:57 +00:00			`void FADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {`
Vector FPU instructions JitCompilerX86 - static code written in asm Updated ALU/FPU tests Updated instruction weights 2018-12-31 18:06:45 +00:00			`#ifdef __SSE2__`
			`__m128i ai = _mm_loadl_epi64((const __m128i*)&a);`
			`__m128d ad = _mm_cvtepi32_pd(ai);`
			`__m128d bd = _mm_load_pd(&b.lo.f64);`
			`__m128d cd = _mm_add_pd(ad, bd);`
			`_mm_store_pd(&c.lo.f64, cd);`
			`#else`
			`double alo = (double)a.i32lo;`
			`double ahi = (double)a.i32hi;`
			`c.lo.f64 = alo + b.lo.f64;`
			`c.hi.f64 = ahi + b.hi.f64;`
			`#endif`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`}`

Renamed floating point instructions Fixed negative source operand for FMUL_M and FDIV_M 2019-02-05 22:43:57 +00:00			`void FSUB(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {`
Vector FPU instructions JitCompilerX86 - static code written in asm Updated ALU/FPU tests Updated instruction weights 2018-12-31 18:06:45 +00:00			`#ifdef __SSE2__`
			`__m128i ai = _mm_loadl_epi64((const __m128i*)&a);`
			`__m128d ad = _mm_cvtepi32_pd(ai);`
			`__m128d bd = _mm_load_pd(&b.lo.f64);`
			`__m128d cd = _mm_sub_pd(ad, bd);`
			`_mm_store_pd(&c.lo.f64, cd);`
			`#else`
			`double alo = (double)a.i32lo;`
			`double ahi = (double)a.i32hi;`
			`c.lo.f64 = alo - b.lo.f64;`
			`c.hi.f64 = ahi - b.hi.f64;`
			`#endif`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`}`

Renamed floating point instructions Fixed negative source operand for FMUL_M and FDIV_M 2019-02-05 22:43:57 +00:00			`void FMUL(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {`
Vector FPU instructions JitCompilerX86 - static code written in asm Updated ALU/FPU tests Updated instruction weights 2018-12-31 18:06:45 +00:00			`#ifdef __SSE2__`
			`__m128i ai = _mm_loadl_epi64((const __m128i*)&a);`
			`__m128d ad = _mm_cvtepi32_pd(ai);`
			`__m128d bd = _mm_load_pd(&b.lo.f64);`
			`__m128d cd = _mm_mul_pd(ad, bd);`
			`__m128d mask = _mm_cmpeq_pd(cd, cd);`
			`cd = _mm_and_pd(cd, mask);`
			`_mm_store_pd(&c.lo.f64, cd);`
			`#else`
			`double alo = (double)a.i32lo;`
			`double ahi = (double)a.i32hi;`
			`c.lo.f64 = FlushNaN(alo * b.lo.f64);`
			`c.hi.f64 = FlushNaN(ahi * b.hi.f64);`
			`#endif`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`}`

Renamed floating point instructions Fixed negative source operand for FMUL_M and FDIV_M 2019-02-05 22:43:57 +00:00			`void FDIV(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {`
Vector FPU instructions JitCompilerX86 - static code written in asm Updated ALU/FPU tests Updated instruction weights 2018-12-31 18:06:45 +00:00			`#ifdef __SSE2__`
			`__m128i ai = _mm_loadl_epi64((const __m128i*)&a);`
			`__m128d ad = _mm_cvtepi32_pd(ai);`
			`__m128d bd = _mm_load_pd(&b.lo.f64);`
			`__m128d cd = _mm_div_pd(ad, bd);`
			`__m128d mask = _mm_cmpeq_pd(cd, cd);`
			`cd = _mm_and_pd(cd, mask);`
			`_mm_store_pd(&c.lo.f64, cd);`
			`#else`
			`double alo = (double)a.i32lo;`
			`double ahi = (double)a.i32hi;`
			`c.lo.f64 = FlushDenormalNaN(alo / b.lo.f64);`
			`c.hi.f64 = FlushDenormalNaN(ahi / b.hi.f64);`
			`#endif`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`}`

Renamed floating point instructions Fixed negative source operand for FMUL_M and FDIV_M 2019-02-05 22:43:57 +00:00			`void FSQRT(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#ifdef __SSE2__`
Vector FPU instructions JitCompilerX86 - static code written in asm Updated ALU/FPU tests Updated instruction weights 2018-12-31 18:06:45 +00:00			`__m128i ai = _mm_loadl_epi64((const __m128i*)&a);`
			`__m128d ad = _mm_cvtepi32_pd(ai);`
			`const __m128d absmask = _mm_castsi128_pd(_mm_set1_epi64x(~(1LL << 63)));`
			`ad = _mm_and_pd(ad, absmask);`
			`__m128d cd = _mm_sqrt_pd(ad);`
			`_mm_store_pd(&c.lo.f64, cd);`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#else`
Vector FPU instructions JitCompilerX86 - static code written in asm Updated ALU/FPU tests Updated instruction weights 2018-12-31 18:06:45 +00:00			`double alo = (double)a.i32lo;`
			`double ahi = (double)a.i32hi;`
			`c.lo.f64 = sqrt(std::abs(alo));`
			`c.hi.f64 = sqrt(std::abs(ahi));`
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`#endif`
Added ISWAP instruction Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization 2019-02-04 16:07:00 +00:00			`}*/`

RandomX portable interpreter 2018-12-11 20:00:30 +00:00
			`}`
			`}`