From 28ed776fbe513e52cfdd6ef7ec7023702678e67c Mon Sep 17 00:00:00 2001
From: tevador <tevador@gmail.com>
Date: Fri, 22 Mar 2019 11:00:21 +0100
Subject: [PATCH] Light JIT compiler - Linux

---
 makefile                              |  16 ++-
 src/JitCompilerX86-static.S           |  11 ++
 src/asm/program_transform_address.inc | 154 --------------------------
 3 files changed, 23 insertions(+), 158 deletions(-)
 delete mode 100644 src/asm/program_transform_address.inc

diff --git a/makefile b/makefile
index de8c830..fa21852 100644
--- a/makefile
+++ b/makefile
@@ -9,7 +9,7 @@ OBJDIR=obj
 LDFLAGS=-lpthread
 CPPSRC=src/argon2_core.c src/Cache.cpp src/divideByConstantCodegen.c src/Instruction.cpp src/JitCompilerX86.cpp src/Program.cpp src/VirtualMachine.cpp src/argon2_ref.c src/CompiledVirtualMachine.cpp src/executeProgram-linux.cpp src/instructionsPortable.cpp src/LightClientAsyncWorker.cpp src/softAes.cpp src/virtualMemory.cpp src/AssemblyGeneratorX86.cpp  src/dataset.cpp src/hashAes1Rx4.cpp src/InterpretedVirtualMachine.cpp src/main.cpp src/TestAluFpu.cpp src/blake2/blake2b.c
 TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
-ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o)
+ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o CompiledLightVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o)
 ifeq ($(PLATFORM),amd64)
     ROBJS += $(OBJDIR)/JitCompilerX86-static.o $(OBJDIR)/squareHash.o
     CXXFLAGS += -maes
@@ -23,8 +23,13 @@ all: release
 
 release: CXXFLAGS += -march=native -O3 -flto
 release: CCFLAGS += -march=native -O3 -flto
+release: LDFLAGS += -flto
 release: $(BINDIR)/randomx
 
+nolto: CXXFLAGS += -march=native -O3
+nolto: CCFLAGS += -march=native -O3
+nolto: $(BINDIR)/randomx
+
 debug: CXXFLAGS += -g
 debug: CCFLAGS += -g
 debug: LDFLAGS += -g
@@ -59,8 +64,11 @@ $(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cp
 $(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h endian.h) | $(OBJDIR)
 	$(CC) $(CCFLAGS) -c $(SRCDIR)/blake2/blake2b.c -o $@
 
-$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp configuration.h) | $(OBJDIR)
+$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp configuration.h JitCompilerX86.hpp) | $(OBJDIR)
 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@
+
+$(OBJDIR)/CompiledLightVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledLightVirtualMachine.cpp CompiledLightVirtualMachine.hpp common.hpp configuration.h JitCompilerX86.hpp) | $(OBJDIR)
+	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledLightVirtualMachine.cpp -o $@
   
 $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp blake2/endian.h dataset.hpp intrinPortable.h Cache.hpp virtualMemory.hpp configuration.h) | $(OBJDIR)
 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
@@ -74,10 +82,10 @@ $(OBJDIR)/hashAes1Rx4.o: $(addprefix $(SRCDIR)/,hashAes1Rx4.cpp softAes.h intrin
 $(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp common.hpp blake2/endian.h Program.hpp reciprocal.h virtualMemory.hpp configuration.h) | $(OBJDIR)
 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
 
-$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_dataset.inc loop_load.inc loop_store.inc xmm_constants.inc)) | $(OBJDIR)
+$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_dataset.inc loop_load.inc loop_store.inc xmm_constants.inc read_dataset_light.inc read_dataset_light_sub.inc)) | $(OBJDIR)
 	$(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@
 
-$(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc) configuration.h)  | $(OBJDIR)
+$(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc initBlock.inc) configuration.h)  | $(OBJDIR)
 	$(CXX) -x assembler-with-cpp -c $(SRCDIR)/squareHash.S -o $@
 
 $(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp intrinPortable.h blake2/endian.h common.hpp) | $(OBJDIR)
diff --git a/src/JitCompilerX86-static.S b/src/JitCompilerX86-static.S
index b5fdab5..9ccdb16 100644
--- a/src/JitCompilerX86-static.S
+++ b/src/JitCompilerX86-static.S
@@ -31,8 +31,10 @@
 .global DECL(randomx_program_loop_load)
 .global DECL(randomx_program_start)
 .global DECL(randomx_program_read_dataset)
+.global DECL(randomx_program_read_dataset_light)
 .global DECL(randomx_program_loop_store)
 .global DECL(randomx_program_loop_end)
+.global DECL(randomx_program_read_dataset_light_sub)
 .global DECL(randomx_program_epilogue)
 .global DECL(randomx_program_end)
 
@@ -58,12 +60,21 @@ DECL(randomx_program_start):
 DECL(randomx_program_read_dataset):
 	#include "asm/program_read_dataset.inc"
 
+DECL(randomx_program_read_dataset_light):
+	#include "asm/program_read_dataset_light.inc"
+
 DECL(randomx_program_loop_store):
 	#include "asm/program_loop_store.inc"
 
 DECL(randomx_program_loop_end):
 	nop
 
+.balign 64
+DECL(randomx_program_read_dataset_light_sub):
+	#include "asm/program_read_dataset_light_sub.inc"
+squareHashSub:
+	#include "asm/squareHash.inc"
+
 .balign 64
 DECL(randomx_program_epilogue):
 	#include "asm/program_epilogue_linux.inc"
diff --git a/src/asm/program_transform_address.inc b/src/asm/program_transform_address.inc
deleted file mode 100644
index 0815e29..0000000
--- a/src/asm/program_transform_address.inc
+++ /dev/null
@@ -1,154 +0,0 @@
-	;# 90 address transformations
-	;# forced REX prefix is used to make all transformations 4 bytes long
-	lea eax, [rax+rax*8+109]
-	db 64
-	xor eax, 96
-	lea eax, [rax+rax*8-19]
-	db 64
-	add eax, -98
-	db 64
-	add eax, -21
-	db 64
-	xor eax, -80
-	lea eax, [rax+rax*8-92]
-	db 64
-	add eax, 113
-	lea eax, [rax+rax*8+100]
-	db 64
-	add eax, -39
-	db 64
-	xor eax, 120
-	lea eax, [rax+rax*8-119]
-	db 64
-	add eax, -113
-	db 64
-	add eax, 111
-	db 64
-	xor eax, 104
-	lea eax, [rax+rax*8-83]
-	lea eax, [rax+rax*8+127]
-	db 64
-	xor eax, -112
-	db 64
-	add eax, 89
-	db 64
-	add eax, -32
-	db 64
-	add eax, 104
-	db 64
-	xor eax, -120
-	db 64
-	xor eax, 24
-	lea eax, [rax+rax*8+9]
-	db 64
-	add eax, -31
-	db 64
-	xor eax, -16
-	db 64
-	add eax, 68
-	lea eax, [rax+rax*8-110]
-	db 64
-	xor eax, 64
-	db 64
-	xor eax, -40
-	db 64
-	xor eax, -8
-	db 64
-	add eax, -10
-	db 64
-	xor eax, -32
-	db 64
-	add eax, 14
-	lea eax, [rax+rax*8-46]
-	db 64
-	xor eax, -104
-	lea eax, [rax+rax*8+36]
-	db 64
-	add eax, 100
-	lea eax, [rax+rax*8-65]
-	lea eax, [rax+rax*8+27]
-	lea eax, [rax+rax*8+91]
-	db 64
-	add eax, -101
-	db 64
-	add eax, -94
-	lea eax, [rax+rax*8-10]
-	db 64
-	xor eax, 80
-	db 64
-	add eax, -108
-	db 64
-	add eax, -58
-	db 64
-	xor eax, 48
-	lea eax, [rax+rax*8+73]
-	db 64
-	xor eax, -48
-	db 64
-	xor eax, 32
-	db 64
-	xor eax, -96
-	db 64
-	add eax, 118
-	db 64
-	add eax, 91
-	lea eax, [rax+rax*8+18]
-	db 64
-	add eax, -11
-	lea eax, [rax+rax*8+63]
-	db 64
-	add eax, 114
-	lea eax, [rax+rax*8+45]
-	db 64
-	add eax, -67
-	db 64
-	add eax, 53
-	lea eax, [rax+rax*8-101]
-	lea eax, [rax+rax*8-1]
-	db 64
-	xor eax, 16
-	lea eax, [rax+rax*8-37]
-	lea eax, [rax+rax*8-28]
-	lea eax, [rax+rax*8-55]
-	db 64
-	xor eax, -88
-	db 64
-	xor eax, -72
-	db 64
-	add eax, 36
-	db 64
-	xor eax, -56
-	db 64
-	add eax, 116
-	db 64
-	xor eax, 88
-	db 64
-	xor eax, -128
-	db 64
-	add eax, 50
-	db 64
-	add eax, 105
-	db 64
-	add eax, -37
-	db 64
-	xor eax, 112
-	db 64
-	xor eax, 8
-	db 64
-	xor eax, -24
-	lea eax, [rax+rax*8+118]
-	db 64
-	xor eax, 72
-	db 64
-	xor eax, -64
-	db 64
-	add eax, 40
-	lea eax, [rax+rax*8-74]
-	lea eax, [rax+rax*8+82]
-	lea eax, [rax+rax*8+54]
-	db 64
-	xor eax, 56
-	db 64
-	xor eax, 40
-	db 64
-	add eax, 87
\ No newline at end of file