2018-12-31 19:06:45 +01:00
|
|
|
;# Copyright (c) 2018 tevador
|
|
|
|
;#
|
|
|
|
;# This file is part of RandomX.
|
|
|
|
;#
|
|
|
|
;# RandomX is free software: you can redistribute it and/or modify
|
|
|
|
;# it under the terms of the GNU General Public License as published by
|
|
|
|
;# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
;# (at your option) any later version.
|
|
|
|
;#
|
|
|
|
;# RandomX is distributed in the hope that it will be useful,
|
|
|
|
;# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
;# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
;# GNU General Public License for more details.
|
|
|
|
;#
|
|
|
|
;# You should have received a copy of the GNU General Public License
|
|
|
|
;# along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
2018-12-13 23:11:55 +01:00
|
|
|
|
2019-01-04 19:44:15 +01:00
|
|
|
_RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
|
2018-12-13 23:11:55 +01:00
|
|
|
|
2019-01-04 19:44:15 +01:00
|
|
|
PUBLIC executeProgram
|
2018-12-13 23:11:55 +01:00
|
|
|
|
|
|
|
executeProgram PROC
|
|
|
|
; REGISTER ALLOCATION:
|
|
|
|
; rax -> temporary
|
2019-01-08 14:50:31 +01:00
|
|
|
; rbx -> "ic"
|
2018-12-13 23:11:55 +01:00
|
|
|
; rcx -> temporary
|
2018-12-16 13:43:18 +01:00
|
|
|
; rdx -> temporary
|
2018-12-13 23:11:55 +01:00
|
|
|
; rsi -> convertible_t& scratchpad
|
2019-01-08 14:50:31 +01:00
|
|
|
; rdi -> beginning of VM stack
|
|
|
|
; rbp -> "ma", "mx"
|
2018-12-13 23:11:55 +01:00
|
|
|
; rsp -> end of VM stack
|
2018-12-16 13:43:18 +01:00
|
|
|
; r8 -> "r0"
|
2018-12-13 23:11:55 +01:00
|
|
|
; r9 -> "r1"
|
|
|
|
; r10 -> "r2"
|
|
|
|
; r11 -> "r3"
|
|
|
|
; r12 -> "r4"
|
|
|
|
; r13 -> "r5"
|
|
|
|
; r14 -> "r6"
|
|
|
|
; r15 -> "r7"
|
|
|
|
; xmm0 -> temporary
|
2018-12-16 13:43:18 +01:00
|
|
|
; xmm1 -> temporary
|
2018-12-13 23:11:55 +01:00
|
|
|
; xmm2 -> "f2"
|
|
|
|
; xmm3 -> "f3"
|
|
|
|
; xmm4 -> "f4"
|
|
|
|
; xmm5 -> "f5"
|
|
|
|
; xmm6 -> "f6"
|
|
|
|
; xmm7 -> "f7"
|
|
|
|
; xmm8 -> "f0"
|
2018-12-16 13:43:18 +01:00
|
|
|
; xmm9 -> "f1"
|
2018-12-31 19:06:45 +01:00
|
|
|
; xmm10 -> absolute value mask
|
2018-12-13 23:11:55 +01:00
|
|
|
|
|
|
|
; STACK STRUCTURE:
|
|
|
|
; |
|
|
|
|
; |
|
|
|
|
; | saved registers
|
|
|
|
; |
|
|
|
|
; v
|
2019-01-04 19:44:15 +01:00
|
|
|
; [rbx+8] RegisterFile& registerFile
|
|
|
|
; [rbx+0] uint8_t* dataset
|
2018-12-13 23:11:55 +01:00
|
|
|
; |
|
|
|
|
; |
|
|
|
|
; | VM stack
|
|
|
|
; |
|
|
|
|
; v
|
|
|
|
; [rsp] last element of VM stack
|
|
|
|
|
|
|
|
; store callee-saved registers
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push rdi
|
|
|
|
push rsi
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
2018-12-31 19:06:45 +01:00
|
|
|
sub rsp, 80
|
|
|
|
movdqu xmmword ptr [rsp+64], xmm6
|
|
|
|
movdqu xmmword ptr [rsp+48], xmm7
|
|
|
|
movdqu xmmword ptr [rsp+32], xmm8
|
|
|
|
movdqu xmmword ptr [rsp+16], xmm9
|
|
|
|
movdqu xmmword ptr [rsp+0], xmm10
|
2018-12-13 23:11:55 +01:00
|
|
|
|
|
|
|
; function arguments
|
2019-01-04 19:44:15 +01:00
|
|
|
push rcx ; RegisterFile& registerFile
|
2019-01-08 14:50:31 +01:00
|
|
|
mov rbp, qword ptr [rdx] ; "mx", "ma"
|
2019-01-04 19:44:15 +01:00
|
|
|
mov rax, qword ptr [rdx+8] ; uint8_t* dataset
|
|
|
|
push rax
|
|
|
|
mov rsi, r8 ; convertible_t* scratchpad
|
2018-12-13 23:11:55 +01:00
|
|
|
|
2019-01-08 14:50:31 +01:00
|
|
|
mov rdi, rsp ; beginning of VM stack
|
|
|
|
mov ebx, 1048577 ; number of VM instructions to execute + 1
|
2018-12-13 23:11:55 +01:00
|
|
|
|
2018-12-31 19:06:45 +01:00
|
|
|
xorps xmm10, xmm10
|
|
|
|
cmpeqpd xmm10, xmm10
|
2019-01-04 19:44:15 +01:00
|
|
|
psrlq xmm10, 1 ; mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
|
2018-12-31 19:06:45 +01:00
|
|
|
|
|
|
|
; reset rounding mode
|
|
|
|
mov dword ptr [rsp-8], 40896
|
|
|
|
ldmxcsr dword ptr [rsp-8]
|
|
|
|
|
|
|
|
; load integer registers
|
2018-12-16 13:43:18 +01:00
|
|
|
mov r8, qword ptr [rcx+0]
|
2018-12-13 23:11:55 +01:00
|
|
|
mov r9, qword ptr [rcx+8]
|
|
|
|
mov r10, qword ptr [rcx+16]
|
|
|
|
mov r11, qword ptr [rcx+24]
|
|
|
|
mov r12, qword ptr [rcx+32]
|
|
|
|
mov r13, qword ptr [rcx+40]
|
|
|
|
mov r14, qword ptr [rcx+48]
|
|
|
|
mov r15, qword ptr [rcx+56]
|
2018-12-31 19:06:45 +01:00
|
|
|
|
|
|
|
; load register f0 hi, lo
|
|
|
|
xorps xmm8, xmm8
|
|
|
|
cvtsi2sd xmm8, qword ptr [rcx+72]
|
|
|
|
pslldq xmm8, 8
|
2018-12-16 15:10:03 +01:00
|
|
|
cvtsi2sd xmm8, qword ptr [rcx+64]
|
2018-12-31 19:06:45 +01:00
|
|
|
|
|
|
|
; load register f1 hi, lo
|
|
|
|
xorps xmm9, xmm9
|
|
|
|
cvtsi2sd xmm9, qword ptr [rcx+88]
|
|
|
|
pslldq xmm9, 8
|
|
|
|
cvtsi2sd xmm9, qword ptr [rcx+80]
|
|
|
|
|
|
|
|
; load register f2 hi, lo
|
|
|
|
xorps xmm2, xmm2
|
|
|
|
cvtsi2sd xmm2, qword ptr [rcx+104]
|
|
|
|
pslldq xmm2, 8
|
|
|
|
cvtsi2sd xmm2, qword ptr [rcx+96]
|
|
|
|
|
|
|
|
; load register f3 hi, lo
|
|
|
|
xorps xmm3, xmm3
|
|
|
|
cvtsi2sd xmm3, qword ptr [rcx+120]
|
|
|
|
pslldq xmm3, 8
|
|
|
|
cvtsi2sd xmm3, qword ptr [rcx+112]
|
|
|
|
|
|
|
|
lea rcx, [rcx+64]
|
|
|
|
|
|
|
|
; load register f4 hi, lo
|
|
|
|
xorps xmm4, xmm4
|
|
|
|
cvtsi2sd xmm4, qword ptr [rcx+72]
|
|
|
|
pslldq xmm4, 8
|
|
|
|
cvtsi2sd xmm4, qword ptr [rcx+64]
|
|
|
|
|
|
|
|
; load register f5 hi, lo
|
|
|
|
xorps xmm5, xmm5
|
|
|
|
cvtsi2sd xmm5, qword ptr [rcx+88]
|
|
|
|
pslldq xmm5, 8
|
|
|
|
cvtsi2sd xmm5, qword ptr [rcx+80]
|
|
|
|
|
|
|
|
; load register f6 hi, lo
|
|
|
|
xorps xmm6, xmm6
|
|
|
|
cvtsi2sd xmm6, qword ptr [rcx+104]
|
|
|
|
pslldq xmm6, 8
|
|
|
|
cvtsi2sd xmm6, qword ptr [rcx+96]
|
|
|
|
|
|
|
|
; load register f7 hi, lo
|
|
|
|
xorps xmm7, xmm7
|
2018-12-16 15:10:03 +01:00
|
|
|
cvtsi2sd xmm7, qword ptr [rcx+120]
|
2018-12-31 19:06:45 +01:00
|
|
|
pslldq xmm7, 8
|
|
|
|
cvtsi2sd xmm7, qword ptr [rcx+112]
|
2018-12-13 23:11:55 +01:00
|
|
|
|
2019-01-10 22:04:55 +01:00
|
|
|
jmp program_begin
|
2018-12-13 23:11:55 +01:00
|
|
|
|
2019-01-10 22:04:55 +01:00
|
|
|
; program body
|
|
|
|
ALIGN 64
|
|
|
|
program_begin:
|
2018-12-13 23:11:55 +01:00
|
|
|
include program.inc
|
|
|
|
|
2019-01-10 22:04:55 +01:00
|
|
|
ALIGN 64
|
2018-12-13 23:11:55 +01:00
|
|
|
rx_finish:
|
|
|
|
; unroll the stack
|
2019-01-08 14:50:31 +01:00
|
|
|
mov rsp, rdi
|
2018-12-13 23:11:55 +01:00
|
|
|
|
|
|
|
; save VM register values
|
2018-12-18 22:00:58 +01:00
|
|
|
pop rcx
|
2018-12-21 21:09:55 +01:00
|
|
|
pop rcx
|
2018-12-16 13:43:18 +01:00
|
|
|
mov qword ptr [rcx+0], r8
|
2018-12-13 23:11:55 +01:00
|
|
|
mov qword ptr [rcx+8], r9
|
|
|
|
mov qword ptr [rcx+16], r10
|
|
|
|
mov qword ptr [rcx+24], r11
|
|
|
|
mov qword ptr [rcx+32], r12
|
|
|
|
mov qword ptr [rcx+40], r13
|
|
|
|
mov qword ptr [rcx+48], r14
|
|
|
|
mov qword ptr [rcx+56], r15
|
2018-12-31 19:06:45 +01:00
|
|
|
movdqa xmmword ptr [rcx+64], xmm8
|
|
|
|
movdqa xmmword ptr [rcx+80], xmm9
|
|
|
|
movdqa xmmword ptr [rcx+96], xmm2
|
|
|
|
movdqa xmmword ptr [rcx+112], xmm3
|
|
|
|
lea rcx, [rcx+64]
|
|
|
|
movdqa xmmword ptr [rcx+64], xmm4
|
|
|
|
movdqa xmmword ptr [rcx+80], xmm5
|
|
|
|
movdqa xmmword ptr [rcx+96], xmm6
|
|
|
|
movdqa xmmword ptr [rcx+112], xmm7
|
2018-12-13 23:11:55 +01:00
|
|
|
|
|
|
|
; load callee-saved registers
|
2018-12-31 19:06:45 +01:00
|
|
|
movdqu xmm10, xmmword ptr [rsp]
|
|
|
|
movdqu xmm9, xmmword ptr [rsp+16]
|
|
|
|
movdqu xmm8, xmmword ptr [rsp+32]
|
|
|
|
movdqu xmm7, xmmword ptr [rsp+48]
|
|
|
|
movdqu xmm6, xmmword ptr [rsp+64]
|
|
|
|
add rsp, 80
|
2018-12-13 23:11:55 +01:00
|
|
|
pop r15
|
|
|
|
pop r14
|
|
|
|
pop r13
|
|
|
|
pop r12
|
|
|
|
pop rsi
|
|
|
|
pop rdi
|
|
|
|
pop rbp
|
|
|
|
pop rbx
|
|
|
|
|
|
|
|
; return
|
2019-01-04 19:44:15 +01:00
|
|
|
ret
|
|
|
|
|
|
|
|
TransformAddress MACRO reg32, reg64
|
|
|
|
;# Transforms the address in the register so that the transformed address
|
|
|
|
;# lies in a different cache line than the original address (mod 2^N).
|
|
|
|
;# This is done to prevent a load-store dependency.
|
|
|
|
;# There are 3 different transformations that can be used: x -> 9*x+C, x -> x+C, x -> x^C
|
2019-01-08 14:50:31 +01:00
|
|
|
;lea reg32, [reg64+reg64*8+127] ;# C = -119 -110 -101 -92 -83 -74 -65 -55 -46 -37 -28 -19 -10 -1 9 18 27 36 45 54 63 73 82 91 100 109 118 127
|
|
|
|
db 64
|
|
|
|
add reg32, -39 ;# C = all except -7 to +7
|
|
|
|
;xor reg32, -8 ;# C = all except 0 to 7
|
2019-01-04 19:44:15 +01:00
|
|
|
ENDM
|
|
|
|
|
2019-01-07 17:44:43 +01:00
|
|
|
ReadMemoryRandom MACRO spmask
|
2019-01-12 20:27:35 +01:00
|
|
|
;# IN eax = random 32-bit address
|
2019-01-08 14:50:31 +01:00
|
|
|
;# GLOBAL rdi = address of the dataset address
|
2019-01-04 19:44:15 +01:00
|
|
|
;# GLOBAL rsi = address of the scratchpad
|
2019-01-08 14:50:31 +01:00
|
|
|
;# GLOBAL rbp = low 32 bits = "mx", high 32 bits = "ma"
|
2019-01-04 19:44:15 +01:00
|
|
|
;# MODIFY rcx, rdx
|
2019-01-12 20:27:35 +01:00
|
|
|
push rax ;# preserve eax
|
|
|
|
TransformAddress eax, rax ;# TransformAddress function
|
|
|
|
mov rcx, qword ptr [rdi] ;# load the dataset address
|
|
|
|
xor rbp, rax ;# modify "mx"
|
2019-01-06 21:26:53 +01:00
|
|
|
; prefetch cacheline "mx"
|
2019-01-08 14:50:31 +01:00
|
|
|
and rbp, -64 ;# align "mx" to the start of a cache line
|
|
|
|
mov edx, ebp ;# edx = mx
|
2019-01-12 20:27:35 +01:00
|
|
|
prefetchnta byte ptr [rcx+rdx]
|
2019-01-06 21:26:53 +01:00
|
|
|
; read cacheline "ma"
|
2019-01-08 14:50:31 +01:00
|
|
|
ror rbp, 32 ;# swap "ma" and "mx"
|
|
|
|
mov edx, ebp ;# edx = ma
|
2019-01-12 20:27:35 +01:00
|
|
|
and eax, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
|
|
|
|
lea rax, [rsi+rax*8] ;# scratchpad cache line
|
|
|
|
lea rcx, [rcx+rdx] ;# dataset cache line
|
|
|
|
mov rdx, qword ptr [rcx+0] ;# load first dataset quadword (prefetched into the cache by now)
|
|
|
|
xor qword ptr [rax+0], rdx ;# XOR the dataset item with a scratchpad item, repeat for the rest of the cacheline
|
|
|
|
mov rdx, qword ptr [rcx+8]
|
|
|
|
xor qword ptr [rax+8], rdx
|
|
|
|
mov rdx, qword ptr [rcx+16]
|
|
|
|
xor qword ptr [rax+16], rdx
|
|
|
|
mov rdx, qword ptr [rcx+24]
|
|
|
|
xor qword ptr [rax+24], rdx
|
|
|
|
mov rdx, qword ptr [rcx+32]
|
|
|
|
xor qword ptr [rax+32], rdx
|
|
|
|
mov rdx, qword ptr [rcx+40]
|
|
|
|
xor qword ptr [rax+40], rdx
|
|
|
|
mov rdx, qword ptr [rcx+48]
|
|
|
|
xor qword ptr [rax+48], rdx
|
|
|
|
mov rdx, qword ptr [rcx+56]
|
|
|
|
xor qword ptr [rax+56], rdx
|
|
|
|
pop rax ;# restore eax
|
2019-01-04 19:44:15 +01:00
|
|
|
ret
|
2019-01-07 17:44:43 +01:00
|
|
|
ENDM
|
|
|
|
|
|
|
|
ALIGN 64
|
|
|
|
rx_read_l1:
|
|
|
|
ReadMemoryRandom 2047
|
|
|
|
|
|
|
|
ALIGN 64
|
|
|
|
rx_read_l2:
|
|
|
|
ReadMemoryRandom 32767
|
|
|
|
|
2018-12-13 23:11:55 +01:00
|
|
|
executeProgram ENDP
|
|
|
|
|
2019-01-04 19:44:15 +01:00
|
|
|
_RANDOMX_EXECUTE_PROGRAM ENDS
|
|
|
|
|
2018-12-13 23:11:55 +01:00
|
|
|
END
|