diff --git a/makefile b/makefile index de8c830..fa21852 100644 --- a/makefile +++ b/makefile @@ -9,7 +9,7 @@ OBJDIR=obj LDFLAGS=-lpthread CPPSRC=src/argon2_core.c src/Cache.cpp src/divideByConstantCodegen.c src/Instruction.cpp src/JitCompilerX86.cpp src/Program.cpp src/VirtualMachine.cpp src/argon2_ref.c src/CompiledVirtualMachine.cpp src/executeProgram-linux.cpp src/instructionsPortable.cpp src/LightClientAsyncWorker.cpp src/softAes.cpp src/virtualMemory.cpp src/AssemblyGeneratorX86.cpp src/dataset.cpp src/hashAes1Rx4.cpp src/InterpretedVirtualMachine.cpp src/main.cpp src/TestAluFpu.cpp src/blake2/blake2b.c TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o) -ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o) +ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o CompiledLightVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o) ifeq ($(PLATFORM),amd64) ROBJS += $(OBJDIR)/JitCompilerX86-static.o $(OBJDIR)/squareHash.o CXXFLAGS += -maes @@ -23,8 +23,13 @@ all: release release: CXXFLAGS += -march=native -O3 -flto release: CCFLAGS += -march=native -O3 -flto +release: LDFLAGS += -flto release: $(BINDIR)/randomx +nolto: CXXFLAGS += -march=native -O3 +nolto: CCFLAGS += -march=native -O3 +nolto: $(BINDIR)/randomx + debug: CXXFLAGS += -g debug: CCFLAGS += -g debug: LDFLAGS += -g @@ -59,8 +64,11 @@ $(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cp $(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h endian.h) | $(OBJDIR) $(CC) $(CCFLAGS) -c $(SRCDIR)/blake2/blake2b.c -o $@ -$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp configuration.h) | $(OBJDIR) +$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp configuration.h JitCompilerX86.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@ + +$(OBJDIR)/CompiledLightVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledLightVirtualMachine.cpp CompiledLightVirtualMachine.hpp common.hpp configuration.h JitCompilerX86.hpp) | $(OBJDIR) + $(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledLightVirtualMachine.cpp -o $@ $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp blake2/endian.h dataset.hpp intrinPortable.h Cache.hpp virtualMemory.hpp configuration.h) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@ @@ -74,10 +82,10 @@ $(OBJDIR)/hashAes1Rx4.o: $(addprefix $(SRCDIR)/,hashAes1Rx4.cpp softAes.h intrin $(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp common.hpp blake2/endian.h Program.hpp reciprocal.h virtualMemory.hpp configuration.h) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@ -$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_dataset.inc loop_load.inc loop_store.inc xmm_constants.inc)) | $(OBJDIR) +$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_dataset.inc loop_load.inc loop_store.inc xmm_constants.inc read_dataset_light.inc read_dataset_light_sub.inc)) | $(OBJDIR) $(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@ -$(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc) configuration.h) | $(OBJDIR) +$(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc initBlock.inc) configuration.h) | $(OBJDIR) $(CXX) -x assembler-with-cpp -c $(SRCDIR)/squareHash.S -o $@ $(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp intrinPortable.h blake2/endian.h common.hpp) | $(OBJDIR) diff --git a/src/JitCompilerX86-static.S b/src/JitCompilerX86-static.S index b5fdab5..9ccdb16 100644 --- a/src/JitCompilerX86-static.S +++ b/src/JitCompilerX86-static.S @@ -31,8 +31,10 @@ .global DECL(randomx_program_loop_load) .global DECL(randomx_program_start) .global DECL(randomx_program_read_dataset) +.global DECL(randomx_program_read_dataset_light) .global DECL(randomx_program_loop_store) .global DECL(randomx_program_loop_end) +.global DECL(randomx_program_read_dataset_light_sub) .global DECL(randomx_program_epilogue) .global DECL(randomx_program_end) @@ -58,12 +60,21 @@ DECL(randomx_program_start): DECL(randomx_program_read_dataset): #include "asm/program_read_dataset.inc" +DECL(randomx_program_read_dataset_light): + #include "asm/program_read_dataset_light.inc" + DECL(randomx_program_loop_store): #include "asm/program_loop_store.inc" DECL(randomx_program_loop_end): nop +.balign 64 +DECL(randomx_program_read_dataset_light_sub): + #include "asm/program_read_dataset_light_sub.inc" +squareHashSub: + #include "asm/squareHash.inc" + .balign 64 DECL(randomx_program_epilogue): #include "asm/program_epilogue_linux.inc" diff --git a/src/asm/program_transform_address.inc b/src/asm/program_transform_address.inc deleted file mode 100644 index 0815e29..0000000 --- a/src/asm/program_transform_address.inc +++ /dev/null @@ -1,154 +0,0 @@ - ;# 90 address transformations - ;# forced REX prefix is used to make all transformations 4 bytes long - lea eax, [rax+rax*8+109] - db 64 - xor eax, 96 - lea eax, [rax+rax*8-19] - db 64 - add eax, -98 - db 64 - add eax, -21 - db 64 - xor eax, -80 - lea eax, [rax+rax*8-92] - db 64 - add eax, 113 - lea eax, [rax+rax*8+100] - db 64 - add eax, -39 - db 64 - xor eax, 120 - lea eax, [rax+rax*8-119] - db 64 - add eax, -113 - db 64 - add eax, 111 - db 64 - xor eax, 104 - lea eax, [rax+rax*8-83] - lea eax, [rax+rax*8+127] - db 64 - xor eax, -112 - db 64 - add eax, 89 - db 64 - add eax, -32 - db 64 - add eax, 104 - db 64 - xor eax, -120 - db 64 - xor eax, 24 - lea eax, [rax+rax*8+9] - db 64 - add eax, -31 - db 64 - xor eax, -16 - db 64 - add eax, 68 - lea eax, [rax+rax*8-110] - db 64 - xor eax, 64 - db 64 - xor eax, -40 - db 64 - xor eax, -8 - db 64 - add eax, -10 - db 64 - xor eax, -32 - db 64 - add eax, 14 - lea eax, [rax+rax*8-46] - db 64 - xor eax, -104 - lea eax, [rax+rax*8+36] - db 64 - add eax, 100 - lea eax, [rax+rax*8-65] - lea eax, [rax+rax*8+27] - lea eax, [rax+rax*8+91] - db 64 - add eax, -101 - db 64 - add eax, -94 - lea eax, [rax+rax*8-10] - db 64 - xor eax, 80 - db 64 - add eax, -108 - db 64 - add eax, -58 - db 64 - xor eax, 48 - lea eax, [rax+rax*8+73] - db 64 - xor eax, -48 - db 64 - xor eax, 32 - db 64 - xor eax, -96 - db 64 - add eax, 118 - db 64 - add eax, 91 - lea eax, [rax+rax*8+18] - db 64 - add eax, -11 - lea eax, [rax+rax*8+63] - db 64 - add eax, 114 - lea eax, [rax+rax*8+45] - db 64 - add eax, -67 - db 64 - add eax, 53 - lea eax, [rax+rax*8-101] - lea eax, [rax+rax*8-1] - db 64 - xor eax, 16 - lea eax, [rax+rax*8-37] - lea eax, [rax+rax*8-28] - lea eax, [rax+rax*8-55] - db 64 - xor eax, -88 - db 64 - xor eax, -72 - db 64 - add eax, 36 - db 64 - xor eax, -56 - db 64 - add eax, 116 - db 64 - xor eax, 88 - db 64 - xor eax, -128 - db 64 - add eax, 50 - db 64 - add eax, 105 - db 64 - add eax, -37 - db 64 - xor eax, 112 - db 64 - xor eax, 8 - db 64 - xor eax, -24 - lea eax, [rax+rax*8+118] - db 64 - xor eax, 72 - db 64 - xor eax, -64 - db 64 - add eax, 40 - lea eax, [rax+rax*8-74] - lea eax, [rax+rax*8+82] - lea eax, [rax+rax*8+54] - db 64 - xor eax, 56 - db 64 - xor eax, 40 - db 64 - add eax, 87 \ No newline at end of file