mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 07:48:54 +00:00
ALU and FPU tests
This commit is contained in:
parent
ec2d378fce
commit
f19995d4c5
12
README.md
12
README.md
@ -205,7 +205,7 @@ The shift/rotate instructions use just the bottom 6 bits of the `B` operand (`im
|
||||
|22|FSUB|A - B|
|
||||
|22|FMUL|A * B|
|
||||
|8|FDIV|A / B|
|
||||
|6|FSQRT|sqrt(A)|
|
||||
|6|FABSQRT|sqrt(A)|
|
||||
|2|FROUND|A|
|
||||
|
||||
FPU instructions conform to the IEEE-754 specification, so they must give correctly rounded results. Initial rounding mode is RN (Round to Nearest). Denormal values may not be produced by any operation.
|
||||
@ -214,8 +214,8 @@ FPU instructions conform to the IEEE-754 specification, so they must give correc
|
||||
|
||||
Operands loaded from memory are treated as signed 64-bit integers and converted to double precision floating point format. Operands loaded from floating point registers are used directly.
|
||||
|
||||
##### FSQRT
|
||||
The sign bit of the FSQRT operand is always cleared first, so only non-negative values are used.
|
||||
##### FABSQRT
|
||||
The sign bit of the FABSQRT operand is always cleared first, so only non-negative values are used.
|
||||
|
||||
*In x86, the `SQRTSD` instruction must be used. The legacy `FSQRT` instruction doesn't produce correctly rounded results in all cases.*
|
||||
|
||||
@ -225,11 +225,11 @@ The FROUND instruction changes the rounding mode for all subsequent FPU operatio
|
||||
|A[1:0]|rounding mode|
|
||||
|-------|------------|
|
||||
|00|Round to Nearest (RN) mode|
|
||||
|01|Round towards Plus Infinity (RP) mode
|
||||
|10|Round towards Minus Infinity (RM) mode
|
||||
|01|Round towards Minus Infinity (RM) mode
|
||||
|10|Round towards Plus Infinity (RP) mode
|
||||
|11|Round towards Zero (RZ) mode
|
||||
|
||||
*The two-bit flag value exactly corresponds to bits 13-14 of the x86 `MXCSR` register and bits 22-23 of the ARM `FPSCR` register.*
|
||||
*The two-bit flag value exactly corresponds to bits 13-14 of the x86 `MXCSR` register and bits 23 and 22 (reversed) of the ARM `FPSCR` register.*
|
||||
|
||||
### Control flow instructions
|
||||
The following 2 control flow instructions are supported:
|
||||
|
69
tests/test_alu_fpu/Instructions.h
Normal file
69
tests/test_alu_fpu/Instructions.h
Normal file
@ -0,0 +1,69 @@
|
||||
//RandomX ALU + FPU test
|
||||
//https://github.com/tevador/RandomX
|
||||
//License: GPL v3
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
constexpr int RoundToNearest = 0;
|
||||
constexpr int RoundDown = 1;
|
||||
constexpr int RoundUp = 2;
|
||||
constexpr int RoundToZero = 3;
|
||||
|
||||
typedef union {
|
||||
double f64;
|
||||
int64_t i64;
|
||||
uint64_t u64;
|
||||
int32_t i32;
|
||||
uint32_t u32;
|
||||
} convertible_t;
|
||||
|
||||
extern "C" {
|
||||
void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void AND_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void AND_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void OR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void OR_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void FPINIT();
|
||||
void FADD_64(convertible_t& a, double b, convertible_t& c);
|
||||
void FSUB_64(convertible_t& a, double b, convertible_t& c);
|
||||
void FMUL_64(convertible_t& a, double b, convertible_t& c);
|
||||
void FDIV_64(convertible_t& a, double b, convertible_t& c);
|
||||
void FABSQRT(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void FROUND(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
|
||||
inline void FADD(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
FADD_64(a, (double)b.i64, c);
|
||||
}
|
||||
|
||||
inline void FSUB(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
FSUB_64(a, (double)b.i64, c);
|
||||
}
|
||||
|
||||
inline void FMUL(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
FMUL_64(a, (double)b.i64, c);
|
||||
}
|
||||
|
||||
inline void FDIV(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
FDIV_64(a, (double)b.i64, c);
|
||||
}
|
||||
}
|
||||
}
|
248
tests/test_alu_fpu/InstructionsPortable.cpp
Normal file
248
tests/test_alu_fpu/InstructionsPortable.cpp
Normal file
@ -0,0 +1,248 @@
|
||||
//RandomX ALU + FPU test
|
||||
//https://github.com/tevador/RandomX
|
||||
//License: GPL v3
|
||||
|
||||
#include "Instructions.h"
|
||||
#include <cfenv>
|
||||
#include <cmath>
|
||||
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
typedef unsigned __int128 uint128_t;
|
||||
typedef __int128 int128_t;
|
||||
static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {
|
||||
return ((uint128_t)a * b) >> 64;
|
||||
}
|
||||
static inline uint64_t __imulhi64(int64_t a, int64_t b) {
|
||||
return ((int128_t)a * b) >> 64;
|
||||
}
|
||||
#define umulhi64 __umulhi64
|
||||
#define imulhi64 __imulhi64
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#include <stdlib.h>
|
||||
#define ror64 _rotr64
|
||||
#define rol64 _rotl64
|
||||
#ifdef __MACHINEARM64_X64
|
||||
#define umulhi64 __umulh
|
||||
#endif
|
||||
#ifdef __MACHINEX64
|
||||
static inline uint64_t __imulhi64(int64_t a, int64_t b) {
|
||||
int64_t hi;
|
||||
_mul128(a, b, &hi);
|
||||
return hi;
|
||||
}
|
||||
#define imulhi64 __imulhi64
|
||||
#endif
|
||||
#ifdef __MACHINEX86_X64
|
||||
#define sar64 __ll_rshift
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ror64
|
||||
static inline uint64_t __ror64(uint64_t a, int b) {
|
||||
return (a >> b) | (a << (64 - b));
|
||||
}
|
||||
#define ror64 __ror64
|
||||
#endif
|
||||
|
||||
#ifndef rol64
|
||||
static inline uint64_t __rol64(uint64_t a, int b) {
|
||||
return (a << b) | (a >> (64 - b));
|
||||
}
|
||||
#define rol64 __rol64
|
||||
#endif
|
||||
|
||||
#ifndef sar64
|
||||
#include <type_traits>
|
||||
constexpr int64_t builtintShr64(int64_t value, int shift) noexcept {
|
||||
return value >> shift;
|
||||
}
|
||||
|
||||
struct usesArithmeticShift : std::integral_constant<bool, builtintShr64(-1LL, 1) == -1LL> {
|
||||
};
|
||||
|
||||
static inline int64_t __sar64(int64_t a, int b) {
|
||||
return usesArithmeticShift::value ? builtintShr64(a, b) : (a < 0 ? ~(~a >> b) : a >> b);
|
||||
}
|
||||
#define sar64 __sar64
|
||||
#endif
|
||||
|
||||
#ifndef umulhi64
|
||||
#define LO(x) ((x)&0xffffffff)
|
||||
#define HI(x) ((x)>>32)
|
||||
static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {
|
||||
uint64_t ah = HI(a), al = LO(a);
|
||||
uint64_t bh = HI(b), bl = LO(b);
|
||||
uint64_t x00 = al * bl;
|
||||
uint64_t x01 = al * bh;
|
||||
uint64_t x10 = ah * bl;
|
||||
uint64_t x11 = ah * bh;
|
||||
uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
|
||||
uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
|
||||
uint64_t m3 = HI(x11) + HI(m2);
|
||||
|
||||
return (m3 << 32) + LO(m2);
|
||||
}
|
||||
#define umulhi64 __umulhi64
|
||||
#endif
|
||||
|
||||
#ifndef imulhi64
|
||||
static inline int64_t __imulhi64(int64_t a, int64_t b) {
|
||||
int64_t hi = umulhi64(a, b);
|
||||
if (a < 0LL) hi -= b;
|
||||
if (b < 0LL) hi -= a;
|
||||
return hi;
|
||||
}
|
||||
#define imulhi64 __imulhi64
|
||||
#endif
|
||||
|
||||
static double FlushDenormal(double x) {
|
||||
if (std::fpclassify(x) == FP_SUBNORMAL) {
|
||||
return 0;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
#define FTZ(x) FlushDenormal(x)
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
extern "C" {
|
||||
|
||||
void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 + b.u64;
|
||||
}
|
||||
|
||||
void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 + b.u32;
|
||||
}
|
||||
|
||||
void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 - b.u64;
|
||||
}
|
||||
|
||||
void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 - b.u32;
|
||||
}
|
||||
|
||||
void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 * b.u64;
|
||||
}
|
||||
|
||||
void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = umulhi64(a.u64, b.u64);
|
||||
}
|
||||
|
||||
void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = (uint64_t)a.u32 * b.u32;
|
||||
}
|
||||
|
||||
void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.i64 = (int64_t)a.i32 * b.i32;
|
||||
}
|
||||
|
||||
void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.i64 = imulhi64(a.i64, b.i64);
|
||||
}
|
||||
|
||||
void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U);
|
||||
}
|
||||
|
||||
void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
if (a.i64 == INT64_MIN && b.i64 == -1)
|
||||
c.i64 = INT64_MIN;
|
||||
else
|
||||
c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);
|
||||
}
|
||||
|
||||
void AND_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 & b.u64;
|
||||
}
|
||||
|
||||
void AND_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 & b.u32;
|
||||
}
|
||||
|
||||
void OR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 | b.u64;
|
||||
}
|
||||
|
||||
void OR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 | b.u32;
|
||||
}
|
||||
|
||||
void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 ^ b.u64;
|
||||
}
|
||||
|
||||
void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 ^ b.u32;
|
||||
}
|
||||
|
||||
void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 << (b.u64 & 63);
|
||||
}
|
||||
|
||||
void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 >> (b.u64 & 63);
|
||||
}
|
||||
|
||||
void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = sar64(a.i64, b.u64 & 63);
|
||||
}
|
||||
|
||||
void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = rol64(a.u64, (b.u64 & 63));
|
||||
}
|
||||
|
||||
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = ror64(a.u64, (b.u64 & 63));
|
||||
}
|
||||
|
||||
void FPINIT() {
|
||||
fesetround(FE_TONEAREST);
|
||||
}
|
||||
|
||||
void FADD_64(convertible_t& a, double b, convertible_t& c) {
|
||||
c.f64 = FTZ((double)a.i64 + b);
|
||||
}
|
||||
|
||||
void FSUB_64(convertible_t& a, double b, convertible_t& c) {
|
||||
c.f64 = FTZ((double)a.i64 - b);
|
||||
}
|
||||
|
||||
void FMUL_64(convertible_t& a, double b, convertible_t& c) {
|
||||
c.f64 = FTZ((double)a.i64 * b);
|
||||
}
|
||||
|
||||
void FDIV_64(convertible_t& a, double b, convertible_t& c) {
|
||||
c.f64 = FTZ((double)a.i64 / b);
|
||||
}
|
||||
|
||||
void FABSQRT(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
double d = fabs((double)a.i64);
|
||||
c.f64 = FTZ(sqrt(d));
|
||||
}
|
||||
|
||||
void FROUND(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.f64 = (double)a.i64;
|
||||
switch (a.u64 & 3) {
|
||||
case RoundDown:
|
||||
fesetround(FE_DOWNWARD);
|
||||
break;
|
||||
case RoundUp:
|
||||
fesetround(FE_UPWARD);
|
||||
break;
|
||||
case RoundToZero:
|
||||
fesetround(FE_TOWARDZERO);
|
||||
break;
|
||||
default:
|
||||
fesetround(FE_TONEAREST);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
276
tests/test_alu_fpu/InstructionsX64.asm
Normal file
276
tests/test_alu_fpu/InstructionsX64.asm
Normal file
@ -0,0 +1,276 @@
|
||||
;RandomX ALU + FPU test
|
||||
;https://github.com/tevador/RandomX
|
||||
;License: GPL v3
|
||||
|
||||
PUBLIC ADD_64
|
||||
PUBLIC ADD_32
|
||||
PUBLIC SUB_64
|
||||
PUBLIC SUB_32
|
||||
PUBLIC MUL_64
|
||||
PUBLIC MULH_64
|
||||
PUBLIC MUL_32
|
||||
PUBLIC IMUL_32
|
||||
PUBLIC IMULH_64
|
||||
PUBLIC DIV_64
|
||||
PUBLIC IDIV_64
|
||||
PUBLIC AND_64
|
||||
PUBLIC AND_32
|
||||
PUBLIC OR_64
|
||||
PUBLIC OR_32
|
||||
PUBLIC XOR_64
|
||||
PUBLIC XOR_32
|
||||
PUBLIC SHL_64
|
||||
PUBLIC SHR_64
|
||||
PUBLIC SAR_64
|
||||
PUBLIC ROL_64
|
||||
PUBLIC ROR_64
|
||||
PUBLIC FPINIT
|
||||
PUBLIC FADD_64
|
||||
PUBLIC FSUB_64
|
||||
PUBLIC FMUL_64
|
||||
PUBLIC FDIV_64
|
||||
PUBLIC FABSQRT
|
||||
PUBLIC FROUND
|
||||
|
||||
CONST SEGMENT
|
||||
__XMMABS DB 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 07fH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 07fH
|
||||
CONST ENDS
|
||||
|
||||
.code
|
||||
|
||||
ADD_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
add rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
ADD_64 ENDP
|
||||
|
||||
ADD_32 PROC
|
||||
mov eax, DWORD PTR [rcx]
|
||||
add eax, DWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
ADD_32 ENDP
|
||||
|
||||
SUB_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
sub rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
SUB_64 ENDP
|
||||
|
||||
SUB_32 PROC
|
||||
mov eax, DWORD PTR [rcx]
|
||||
sub eax, DWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
SUB_32 ENDP
|
||||
|
||||
MUL_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
imul rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
MUL_64 ENDP
|
||||
|
||||
MULH_64 PROC
|
||||
mov rax, QWORD PTR [rdx]
|
||||
mul QWORD PTR [rcx]
|
||||
mov QWORD PTR [r8], rdx
|
||||
ret 0
|
||||
MULH_64 ENDP
|
||||
|
||||
MUL_32 PROC
|
||||
mov r9d, DWORD PTR [rcx]
|
||||
mov eax, DWORD PTR [rdx]
|
||||
imul r9, rax
|
||||
mov QWORD PTR [r8], r9
|
||||
ret 0
|
||||
MUL_32 ENDP
|
||||
|
||||
IMUL_32 PROC
|
||||
movsxd r9, DWORD PTR [rcx]
|
||||
movsxd rax, DWORD PTR [rdx]
|
||||
imul r9, rax
|
||||
mov QWORD PTR [r8], r9
|
||||
ret 0
|
||||
IMUL_32 ENDP
|
||||
|
||||
IMULH_64 PROC
|
||||
mov rax, QWORD PTR [rdx]
|
||||
imul QWORD PTR [rcx]
|
||||
mov QWORD PTR [r8], rdx
|
||||
ret 0
|
||||
IMULH_64 ENDP
|
||||
|
||||
DIV_64 PROC
|
||||
mov r9d, DWORD PTR [rdx]
|
||||
mov eax, 1
|
||||
test r9d, r9d
|
||||
cmovne eax, r9d
|
||||
xor edx, edx
|
||||
mov r9d, eax
|
||||
mov rax, QWORD PTR [rcx]
|
||||
div r9
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
DIV_64 ENDP
|
||||
|
||||
IDIV_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, -9223372036854775808
|
||||
cmp rax, rcx
|
||||
jne SHORT SAFE_IDIV_64
|
||||
cmp QWORD PTR [rdx], -1
|
||||
jne SHORT SAFE_IDIV_64
|
||||
mov QWORD PTR [r8], rcx
|
||||
ret 0
|
||||
SAFE_IDIV_64:
|
||||
mov ecx, DWORD PTR [rdx]
|
||||
test ecx, ecx
|
||||
mov edx, 1
|
||||
cmovne edx, ecx
|
||||
movsxd rcx, edx
|
||||
cqo
|
||||
idiv rcx
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
IDIV_64 ENDP
|
||||
|
||||
AND_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
and rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
AND_64 ENDP
|
||||
|
||||
AND_32 PROC
|
||||
mov eax, DWORD PTR [rcx]
|
||||
and eax, DWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
AND_32 ENDP
|
||||
|
||||
OR_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
or rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
OR_64 ENDP
|
||||
|
||||
OR_32 PROC
|
||||
mov eax, DWORD PTR [rcx]
|
||||
or eax, DWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
OR_32 ENDP
|
||||
|
||||
XOR_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
xor rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
XOR_64 ENDP
|
||||
|
||||
XOR_32 PROC
|
||||
mov eax, DWORD PTR [rcx]
|
||||
xor eax, DWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
XOR_32 ENDP
|
||||
|
||||
SHL_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, QWORD PTR [rdx]
|
||||
shl rax, cl
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
SHL_64 ENDP
|
||||
|
||||
SHR_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, QWORD PTR [rdx]
|
||||
shr rax, cl
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
SHR_64 ENDP
|
||||
|
||||
SAR_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, QWORD PTR [rdx]
|
||||
sar rax, cl
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
SAR_64 ENDP
|
||||
|
||||
ROL_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, QWORD PTR [rdx]
|
||||
rol rax, cl
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
ROL_64 ENDP
|
||||
|
||||
ROR_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, QWORD PTR [rdx]
|
||||
ror rax, cl
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
ROR_64 ENDP
|
||||
|
||||
FPINIT PROC
|
||||
mov DWORD PTR [rsp+8], 40896
|
||||
ldmxcsr DWORD PTR [rsp+8]
|
||||
ret 0
|
||||
FPINIT ENDP
|
||||
|
||||
FADD_64 PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
addsd xmm0, xmm1
|
||||
movsd QWORD PTR [r8], xmm0
|
||||
ret 0
|
||||
FADD_64 ENDP
|
||||
|
||||
FSUB_64 PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
subsd xmm0, xmm1
|
||||
movsd QWORD PTR [r8], xmm0
|
||||
ret 0
|
||||
FSUB_64 ENDP
|
||||
|
||||
FMUL_64 PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
mulsd xmm0, xmm1
|
||||
movsd QWORD PTR [r8], xmm0
|
||||
ret 0
|
||||
FMUL_64 ENDP
|
||||
|
||||
FDIV_64 PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
divsd xmm0, xmm1
|
||||
movsd QWORD PTR [r8], xmm0
|
||||
ret 0
|
||||
FDIV_64 ENDP
|
||||
|
||||
FABSQRT PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
andps xmm0, XMMWORD PTR __XMMABS
|
||||
sqrtsd xmm1, xmm0
|
||||
movsd QWORD PTR [r8], xmm1
|
||||
ret 0
|
||||
FABSQRT ENDP
|
||||
|
||||
FROUND PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
movsd QWORD PTR [r8], xmm0
|
||||
mov rax, QWORD PTR [rcx]
|
||||
shl rax, 13
|
||||
and eax, 24576
|
||||
or eax, 40896
|
||||
mov DWORD PTR [rsp+8], eax
|
||||
ldmxcsr DWORD PTR [rsp+8]
|
||||
ret 0
|
||||
FROUND ENDP
|
||||
|
||||
END
|
283
tests/test_alu_fpu/TestAluFpu.cpp
Normal file
283
tests/test_alu_fpu/TestAluFpu.cpp
Normal file
@ -0,0 +1,283 @@
|
||||
//RandomX ALU + FPU test
|
||||
//https://github.com/tevador/RandomX
|
||||
//License: GPL v3
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <limits>
|
||||
#include "Instructions.h"
|
||||
|
||||
using namespace RandomX;
|
||||
|
||||
typedef void(*VmOperation)(convertible_t&, convertible_t&, convertible_t&);
|
||||
|
||||
double rxRound(uint32_t mode, int64_t x, int64_t y, VmOperation op) {
|
||||
convertible_t a, b, c;
|
||||
a.u64 = mode;
|
||||
FROUND(a, b, c);
|
||||
a.i64 = x;
|
||||
b.i64 = y;
|
||||
op(a, b, c);
|
||||
return c.f64;
|
||||
}
|
||||
|
||||
#define CATCH_CONFIG_MAIN
|
||||
#include "catch.hpp"
|
||||
|
||||
#define RX_EXECUTE_U64(va, vb, INST) do { \
|
||||
a.u64 = va; \
|
||||
b.u64 = vb; \
|
||||
INST(a, b, c); \
|
||||
} while(false)
|
||||
|
||||
#define RX_EXECUTE_I64(va, vb, INST) do { \
|
||||
a.i64 = va; \
|
||||
b.i64 = vb; \
|
||||
INST(a, b, c); \
|
||||
} while(false)
|
||||
|
||||
TEST_CASE("Integer addition (64-bit)", "[ADD_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xFFFFFFFF, 0x1, ADD_64);
|
||||
REQUIRE(c.u64 == 0x100000000);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 0x8000000000000000, ADD_64);
|
||||
REQUIRE(c.u64 == 0x0);
|
||||
}
|
||||
|
||||
TEST_CASE("Integer addition (32-bit)", "[ADD_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xFFFFFFFF, 0x1, ADD_32);
|
||||
REQUIRE(c.u64 == 0);
|
||||
|
||||
RX_EXECUTE_U64(0xFF00000000000001, 0x0000000100000001, ADD_32);
|
||||
REQUIRE(c.u64 == 2);
|
||||
}
|
||||
|
||||
TEST_CASE("Integer subtraction (64-bit)", "[SUB_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(1, 0xFFFFFFFF, SUB_64);
|
||||
REQUIRE(c.u64 == 0xFFFFFFFF00000002);
|
||||
}
|
||||
|
||||
TEST_CASE("Integer subtraction (32-bit)", "[SUB_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(1, 0xFFFFFFFF, SUB_32);
|
||||
REQUIRE(c.u64 == 2);
|
||||
}
|
||||
|
||||
TEST_CASE("Unsigned multiplication (64-bit, low half)", "[MUL_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MUL_64);
|
||||
REQUIRE(c.u64 == 0x28723424A9108E51);
|
||||
}
|
||||
|
||||
TEST_CASE("Unsigned multiplication (64-bit, high half)", "[MULH_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MULH_64);
|
||||
REQUIRE(c.u64 == 0xB4676D31D2B34883);
|
||||
}
|
||||
|
||||
TEST_CASE("Unsigned multiplication (32-bit x 32-bit -> 64-bit)", "[MUL_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MUL_32);
|
||||
REQUIRE(c.u64 == 0xB001AA5FA9108E51);
|
||||
}
|
||||
|
||||
TEST_CASE("Signed multiplication (32-bit x 32-bit -> 64-bit)", "[IMUL_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, IMUL_32);
|
||||
REQUIRE(c.u64 == 0x03EBA0C1A9108E51);
|
||||
}
|
||||
|
||||
TEST_CASE("Signed multiplication (64-bit, high half)", "[IMULH_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, IMULH_64);
|
||||
REQUIRE(c.u64 == 0x02D93EF1269D3EE5);
|
||||
}
|
||||
|
||||
TEST_CASE("Unsigned division (64-bit / 32-bit -> 32-bit)", "[DIV_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(8774217225983458895, 3014068202, DIV_64);
|
||||
REQUIRE(c.u64 == 2911087818);
|
||||
|
||||
RX_EXECUTE_U64(8774217225983458895, 0, DIV_64);
|
||||
REQUIRE(c.u64 == 8774217225983458895);
|
||||
|
||||
RX_EXECUTE_U64(3014068202, 8774217225983458895, DIV_64);
|
||||
REQUIRE(c.u64 == 2);
|
||||
}
|
||||
|
||||
TEST_CASE("Signed division (64-bit / 32-bit -> 32-bit)", "[IDIV_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(8774217225983458895, 3014068202, IDIV_64);
|
||||
REQUIRE(c.u64 == 0xFFFFFFFE67B4994E);
|
||||
|
||||
RX_EXECUTE_U64(8774217225983458895, 0, IDIV_64);
|
||||
REQUIRE(c.u64 == 8774217225983458895);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 0xFFFFFFFFFFFFFFFF, IDIV_64);
|
||||
REQUIRE(c.u64 == 0x8000000000000000);
|
||||
|
||||
RX_EXECUTE_U64(0xFFFFFFFFB3A707EA, 8774217225983458895, IDIV_64);
|
||||
REQUIRE(c.u64 == 0xFFFFFFFFFFFFFFFF);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise AND (64-bit)", "[AND_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA, AND_64);
|
||||
REQUIRE(c.u64 == 0x8888888888888888);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise AND (32-bit)", "[AND_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA, AND_32);
|
||||
REQUIRE(c.u64 == 0x88888888);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise OR (64-bit)", "[OR_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x4444444444444444, 0xAAAAAAAAAAAAAAAA, OR_64);
|
||||
REQUIRE(c.u64 == 0xEEEEEEEEEEEEEEEE);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise OR (32-bit)", "[OR_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x4444444444444444, 0xAAAAAAAAAAAAAAAA, OR_32);
|
||||
REQUIRE(c.u64 == 0xEEEEEEEE);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise XOR (64-bit)", "[XOR_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x8888888888888888, 0xAAAAAAAAAAAAAAAA, XOR_64);
|
||||
REQUIRE(c.u64 == 0x2222222222222222);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise XOR (32-bit)", "[XOR_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x8888888888888888, 0xAAAAAAAAAAAAAAAA, XOR_32);
|
||||
REQUIRE(c.u64 == 0x22222222);
|
||||
}
|
||||
|
||||
TEST_CASE("Logical left shift (64-bit)", "[SHL_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x3, 52, SHL_64);
|
||||
REQUIRE(c.u64 == 0x30000000000000);
|
||||
|
||||
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, SHL_64);
|
||||
REQUIRE(c.u64 == 6978065200108797952);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 1, SHL_64);
|
||||
REQUIRE(c.u64 == 0);
|
||||
}
|
||||
|
||||
TEST_CASE("Logical right shift (64-bit)", "[SHR_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x3, 52, SHR_64);
|
||||
REQUIRE(c.u64 == 0);
|
||||
|
||||
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, SHR_64);
|
||||
REQUIRE(c.u64 == 110985711);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 1, SHR_64);
|
||||
REQUIRE(c.u64 == 0x4000000000000000);
|
||||
}
|
||||
|
||||
TEST_CASE("Arithmetic right shift (64-bit)", "[SAR_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_I64(-9, 2, SAR_64);
|
||||
REQUIRE(c.i64 == -3);
|
||||
|
||||
RX_EXECUTE_I64(INT64_MIN, 63, SAR_64);
|
||||
REQUIRE(c.i64 == -1);
|
||||
|
||||
RX_EXECUTE_I64(INT64_MAX, 163768499474606398, SAR_64);
|
||||
REQUIRE(c.i64 == 1);
|
||||
}
|
||||
|
||||
TEST_CASE("Circular left shift (64-bit)", "[ROL_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x3, 52, ROL_64);
|
||||
REQUIRE(c.u64 == 0x30000000000000);
|
||||
|
||||
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, ROL_64);
|
||||
REQUIRE(c.u64 == 6978065200552740799);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 1, ROL_64);
|
||||
REQUIRE(c.u64 == 1);
|
||||
}
|
||||
|
||||
TEST_CASE("Circular right shift (64-bit)", "[ROR_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x3, 52, ROR_64);
|
||||
REQUIRE(c.u64 == 12288);
|
||||
|
||||
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, ROR_64);
|
||||
REQUIRE(c.u64 == 0xD835C455069D81EF);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 1, ROR_64);
|
||||
REQUIRE(c.u64 == 0x4000000000000000);
|
||||
}
|
||||
|
||||
TEST_CASE("Denormal numbers are flushed to zero", "[FTZ]") {
|
||||
FPINIT();
|
||||
convertible_t a, c;
|
||||
a.i64 = 1;
|
||||
FDIV_64(a, std::numeric_limits<double>::max(), c);
|
||||
REQUIRE(c.f64 == 0.0);
|
||||
}
|
||||
|
||||
TEST_CASE("IEEE-754 compliance", "[FPU]") {
|
||||
FPINIT();
|
||||
convertible_t a, c;
|
||||
|
||||
a.i64 = 1;
|
||||
FDIV_64(a, 0, c);
|
||||
REQUIRE(c.f64 == std::numeric_limits<double>::infinity());
|
||||
|
||||
a.i64 = -1;
|
||||
FDIV_64(a, 0, c);
|
||||
REQUIRE(c.f64 == -std::numeric_limits<double>::infinity());
|
||||
|
||||
REQUIRE(rxRound(RoundToNearest, 33073499373184121, -37713516328519941, &FADD) == -4640016955335824.0);
|
||||
REQUIRE(rxRound(RoundDown, 33073499373184121, -37713516328519941, &FADD) == -4640016955335824.0);
|
||||
REQUIRE(rxRound(RoundUp, 33073499373184121, -37713516328519941, &FADD) == -4640016955335812.0);
|
||||
REQUIRE(rxRound(RoundToZero, 33073499373184121, -37713516328519941, &FADD) == -4640016955335816.0);
|
||||
|
||||
REQUIRE(rxRound(RoundToNearest, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107858e+18);
|
||||
REQUIRE(rxRound(RoundDown, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107868e+18);
|
||||
REQUIRE(rxRound(RoundUp, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107848e+18);
|
||||
REQUIRE(rxRound(RoundToZero, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107848e+18);
|
||||
|
||||
REQUIRE(rxRound(RoundToNearest, 1, -10, &FDIV) == -0.10000000000000001);
|
||||
REQUIRE(rxRound(RoundDown, 1, -10, &FDIV) == -0.10000000000000001);
|
||||
REQUIRE(rxRound(RoundUp, 1, -10, &FDIV) == -0.099999999999999992);
|
||||
REQUIRE(rxRound(RoundToZero, 1, -10, &FDIV) == -0.099999999999999992);
|
||||
|
||||
REQUIRE(rxRound(RoundToNearest, -2, 0, &FABSQRT) == 1.4142135623730951);
|
||||
REQUIRE(rxRound(RoundDown, -2, 0, &FABSQRT) == 1.4142135623730949);
|
||||
REQUIRE(rxRound(RoundUp, -2, 0, &FABSQRT) == 1.4142135623730951);
|
||||
REQUIRE(rxRound(RoundToZero, -2, 0, &FABSQRT) == 1.4142135623730949);
|
||||
}
|
14075
tests/test_alu_fpu/catch.hpp
Normal file
14075
tests/test_alu_fpu/catch.hpp
Normal file
File diff suppressed because it is too large
Load Diff
10
tests/test_alu_fpu/makefile
Normal file
10
tests/test_alu_fpu/makefile
Normal file
@ -0,0 +1,10 @@
|
||||
CXXFLAGS=-Wall -std=c++17 -O0
|
||||
|
||||
TestAluFpu: TestAluFpu.o InstructionsPortable.o
|
||||
$(CXX) TestAluFpu.o InstructionsPortable.o -o $@
|
||||
|
||||
TestAluFpu.o: TestAluFpu.cpp
|
||||
InstructionsPortable.o: InstructionsPortable.cpp
|
||||
|
||||
clean:
|
||||
rm -f TestAluFpu TestAluFpu.o InstructionsPortable.o
|
Loading…
Reference in New Issue
Block a user