From 41b51a485832064a10cec7669ebd3525ce41628f Mon Sep 17 00:00:00 2001 From: tevador Date: Sun, 21 Apr 2019 14:07:32 +0200 Subject: [PATCH] Cleaned up legacy code --- src/allocator.cpp | 8 +- src/asm/initBlock.inc | 155 ------------------- src/asm/program_prologue_load.inc | 21 --- src/asm/program_read_dataset_light_sub.inc | 171 --------------------- src/asm/squareHash.inc | 87 ----------- src/common.hpp | 7 +- src/configuration.h | 6 - src/dataset.cpp | 19 --- src/jit_compiler_x86.cpp | 8 - src/jit_compiler_x86_static.S | 10 +- src/jit_compiler_x86_static.asm | 8 - src/jit_compiler_x86_static.hpp | 1 - src/squareHash.S | 41 ----- src/squareHash.asm | 43 ------ src/squareHash.h | 76 --------- src/superscalar.cpp | 2 +- 16 files changed, 6 insertions(+), 657 deletions(-) delete mode 100644 src/asm/initBlock.inc delete mode 100644 src/asm/program_prologue_load.inc delete mode 100644 src/asm/program_read_dataset_light_sub.inc delete mode 100644 src/asm/squareHash.inc delete mode 100644 src/squareHash.S delete mode 100644 src/squareHash.asm delete mode 100644 src/squareHash.h diff --git a/src/allocator.cpp b/src/allocator.cpp index 456767f..2626147 100644 --- a/src/allocator.cpp +++ b/src/allocator.cpp @@ -17,8 +17,6 @@ You should have received a copy of the GNU General Public License along with RandomX. If not, see. */ -#pragma once - #include "allocator.hpp" #include "intrin_portable.h" #include "virtual_memory.hpp" @@ -36,10 +34,8 @@ namespace randomx { _mm_free(ptr); } - template void* AlignedAllocator::allocMemory(size_t count); - template void AlignedAllocator::freeMemory(void* ptr, size_t count); - template void* AlignedAllocator::allocMemory(size_t count); - template void AlignedAllocator::freeMemory(void* ptr, size_t count); + template class AlignedAllocator; + template class AlignedAllocator;; void* LargePageAllocator::allocMemory(size_t count) { return allocLargePagesMemory(count); diff --git a/src/asm/initBlock.inc b/src/asm/initBlock.inc deleted file mode 100644 index 61b06b6..0000000 --- a/src/asm/initBlock.inc +++ /dev/null @@ -1,155 +0,0 @@ - prefetcht0 byte ptr [rbp] - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -initBlock_loop: - ;# c0 - mov rbx, r8 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r8+r9] - call squareHash - mov r9, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c1 - mov rbx, r9 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r9+r10] - call squareHash - mov r10, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c2 - mov rbx, r10 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r10+r11] - call squareHash - mov r11, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c3 - mov rbx, r11 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r11+r12] - call squareHash - mov r12, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c4 - mov rbx, r12 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r12+r13] - call squareHash - mov r13, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c5 - mov rbx, r13 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r13+r14] - call squareHash - mov r14, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c6 - mov rbx, r14 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r14+r15] - call squareHash - mov r15, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c7 - mov rbx, r15 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r15+r8] - call squareHash - mov r8, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - sub rsi, 1 - jnz initBlock_loop - mov qword ptr [rbp+0], r8 - mov qword ptr [rbp+8], r9 - mov qword ptr [rbp+16], r10 - mov qword ptr [rbp+24], r11 - mov qword ptr [rbp+32], r12 - mov qword ptr [rbp+40], r13 - mov qword ptr [rbp+48], r14 - mov qword ptr [rbp+56], r15 \ No newline at end of file diff --git a/src/asm/program_prologue_load.inc b/src/asm/program_prologue_load.inc deleted file mode 100644 index 757cf10..0000000 --- a/src/asm/program_prologue_load.inc +++ /dev/null @@ -1,21 +0,0 @@ - mov rax, rbp - - ;# zero integer registers - xor r8, r8 - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - - ;# load constant registers - lea rcx, [rcx+120] - movapd xmm8, xmmword ptr [rcx+72] - movapd xmm9, xmmword ptr [rcx+88] - movapd xmm10, xmmword ptr [rcx+104] - movapd xmm11, xmmword ptr [rcx+120] - movapd xmm13, xmmword ptr [minDbl] - movapd xmm14, xmmword ptr [absMask] - movapd xmm15, xmmword ptr [signMask] diff --git a/src/asm/program_read_dataset_light_sub.inc b/src/asm/program_read_dataset_light_sub.inc deleted file mode 100644 index 6fe07f0..0000000 --- a/src/asm/program_read_dataset_light_sub.inc +++ /dev/null @@ -1,171 +0,0 @@ - ;# rdi -> Cache pointer - ;# rcx -> Dataset block number - ;# rax, rbx, rcx, rdx -> scratch registers - sub rsp, 72 - mov qword ptr [rsp+64], rbx - mov qword ptr [rsp+56], r8 - mov qword ptr [rsp+48], r9 - mov qword ptr [rsp+40], r10 - mov qword ptr [rsp+32], r11 - mov qword ptr [rsp+24], r12 - mov qword ptr [rsp+16], r13 - mov qword ptr [rsp+8], r14 - mov qword ptr [rsp+0], r15 - mov r8, rcx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - ;# iteration 0 - ;# c0 - mov rbx, r8 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r8+r9] - call squareHashSub - mov r9, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c1 - mov rbx, r9 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r9+r10] - call squareHashSub - mov r10, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c2 - mov rbx, r10 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r10+r11] - call squareHashSub - mov r11, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c3 - mov rbx, r11 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r11+r12] - call squareHashSub - mov r12, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c4 - mov rbx, r12 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r12+r13] - call squareHashSub - mov r13, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c5 - mov rbx, r13 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r13+r14] - call squareHashSub - mov r14, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c6 - mov rbx, r14 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r14+r15] - call squareHashSub - mov r15, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c7 - mov rbx, r15 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r15+r8] - call squareHashSub - mov r8, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# -------------------------- - mov rbx, qword ptr [rsp+64] - xor r8, qword ptr [rsp+56] - xor r9, qword ptr [rsp+48] - xor r10, qword ptr [rsp+40] - xor r11, qword ptr [rsp+32] - xor r12, qword ptr [rsp+24] - xor r13, qword ptr [rsp+16] - xor r14, qword ptr [rsp+8] - xor r15, qword ptr [rsp+0] - add rsp, 72 - ;# xor eax, eax - ret \ No newline at end of file diff --git a/src/asm/squareHash.inc b/src/asm/squareHash.inc deleted file mode 100644 index a7ec605..0000000 --- a/src/asm/squareHash.inc +++ /dev/null @@ -1,87 +0,0 @@ - mov rax, 9507361525245169745 - add rax, rcx - mul rax - sub rax, rdx ;# 1 - mul rax - sub rax, rdx ;# 2 - mul rax - sub rax, rdx ;# 3 - mul rax - sub rax, rdx ;# 4 - mul rax - sub rax, rdx ;# 5 - mul rax - sub rax, rdx ;# 6 - mul rax - sub rax, rdx ;# 7 - mul rax - sub rax, rdx ;# 8 - mul rax - sub rax, rdx ;# 9 - mul rax - sub rax, rdx ;# 10 - mul rax - sub rax, rdx ;# 11 - mul rax - sub rax, rdx ;# 12 - mul rax - sub rax, rdx ;# 13 - mul rax - sub rax, rdx ;# 14 - mul rax - sub rax, rdx ;# 15 - mul rax - sub rax, rdx ;# 16 - mul rax - sub rax, rdx ;# 17 - mul rax - sub rax, rdx ;# 18 - mul rax - sub rax, rdx ;# 19 - mul rax - sub rax, rdx ;# 20 - mul rax - sub rax, rdx ;# 21 - mul rax - sub rax, rdx ;# 22 - mul rax - sub rax, rdx ;# 23 - mul rax - sub rax, rdx ;# 24 - mul rax - sub rax, rdx ;# 25 - mul rax - sub rax, rdx ;# 26 - mul rax - sub rax, rdx ;# 27 - mul rax - sub rax, rdx ;# 28 - mul rax - sub rax, rdx ;# 29 - mul rax - sub rax, rdx ;# 30 - mul rax - sub rax, rdx ;# 31 - mul rax - sub rax, rdx ;# 32 - mul rax - sub rax, rdx ;# 33 - mul rax - sub rax, rdx ;# 34 - mul rax - sub rax, rdx ;# 35 - mul rax - sub rax, rdx ;# 36 - mul rax - sub rax, rdx ;# 37 - mul rax - sub rax, rdx ;# 38 - mul rax - sub rax, rdx ;# 39 - mul rax - sub rax, rdx ;# 40 - mul rax - sub rax, rdx ;# 41 - mul rax - sub rax, rdx ;# 42 - ret \ No newline at end of file diff --git a/src/common.hpp b/src/common.hpp index 4de76b2..00c3356 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -30,8 +30,6 @@ namespace randomx { static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2."); static_assert((RANDOMX_DATASET_SIZE & (RANDOMX_DATASET_SIZE - 1)) == 0, "RANDOMX_DATASET_SIZE must be a power of 2."); static_assert(RANDOMX_DATASET_SIZE <= 4294967296ULL, "RANDOMX_DATASET_SIZE must not exceed 4294967296."); - static_assert(RANDOMX_DS_GROWTH % 64 == 0, "RANDOMX_DS_GROWTH must be divisible by 64."); - static_assert(RANDOMX_ARGON_GROWTH >= 0, "RANDOMX_ARGON_GROWTH must be greater than or equal to 0."); static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0"); static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0"); static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0"); @@ -54,15 +52,14 @@ namespace randomx { using addr_t = uint32_t; - constexpr int SeedSize = 32; - constexpr int ResultSize = 64; constexpr int ArgonBlockSize = 1024; constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1; constexpr int CacheLineSize = 64; constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3; constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_SIZE - 1) & ~(CacheLineSize - 1); constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * 1024; - constexpr int CacheBlockCount = CacheSize / CacheLineSize; + + static_assert(RANDOMX_DATASET_BLOCKS == RANDOMX_DATASET_SIZE / CacheLineSize, "Invalid value of RANDOMX_DATASET_BLOCKS"); #ifdef TRACE constexpr bool trace = true; diff --git a/src/configuration.h b/src/configuration.h index a266cb9..44db843 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -22,9 +22,6 @@ along with RandomX. If not, see. //Cache size in KiB. Must be a power of 2. #define RANDOMX_ARGON_MEMORY (256 * 1024) -//Cache growth per epoch in KiB. -#define RANDOMX_ARGON_GROWTH 0 - //Number of Argon2d iterations for Cache initialization #define RANDOMX_ARGON_ITERATIONS 3 @@ -43,9 +40,6 @@ along with RandomX. If not, see. //Dataset size in bytes. Must be a power of 2. #define RANDOMX_DATASET_SIZE (2ULL * 1024 * 1024 * 1024) -//Dataset growth per epoch in bytes. Must be divisible by 64. -#define RANDOMX_DS_GROWTH 0 - //Number of blocks per epoch #define RANDOMX_EPOCH_BLOCKS 2048 diff --git a/src/dataset.cpp b/src/dataset.cpp index 4f608f9..7aa2e51 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -211,23 +211,4 @@ namespace randomx { for (uint32_t blockNumber = startBlock; blockNumber < endBlock; ++blockNumber, dataset += CacheLineSize) initDatasetBlock(cache, dataset, blockNumber); } - - /*void datasetAlloc(dataset_t& ds, bool largePages) { - if (std::numeric_limits::max() < RANDOMX_DATASET_SIZE) - throw std::runtime_error("Platform doesn't support enough memory for the dataset"); - if (largePages) { - ds.dataset.memory = (uint8_t*)allocLargePagesMemory(ds.dataset.size); - } - else { - ds.dataset.memory = (uint8_t*)_mm_malloc(ds.dataset.size, 64); - if (ds.dataset.memory == nullptr) { - throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed."); - } - } - } - - void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) { - ds.cache.memory = allocCache(ds.cache.size, largePages); - argonFill(ds.cache, seed, SeedSize); - }*/ } diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index abd698a..8989c14 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -105,7 +105,6 @@ namespace randomx { const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init; const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store; const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end; - const uint8_t* codeReadDatasetLightSub = (uint8_t*)&randomx_program_read_dataset_light_sub; const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue; const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end; const uint8_t* codeShhLoad = (uint8_t*)&randomx_sshash_load; @@ -120,7 +119,6 @@ namespace randomx { const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit; const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin; const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; - const int32_t readDatasetLightSubSize = codeDatasetInit - codeReadDatasetLightSub; const int32_t datasetInitSize = codeEpilogue - codeDatasetInit; const int32_t epilogueSize = codeShhLoad - codeEpilogue; const int32_t codeSshLoadSize = codeShhPrefetch - codeShhLoad; @@ -128,7 +126,6 @@ namespace randomx { const int32_t codeSshInitSize = codeProgramEnd - codeShhInit; const int32_t epilogueOffset = CodeSize - epilogueSize; - const int32_t readDatasetLightSubOffset = epilogueOffset - readDatasetLightSubSize; constexpr int32_t superScalarHashOffset = 32768; static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 }; @@ -226,7 +223,6 @@ namespace randomx { code = (uint8_t*)allocExecutableMemory(CodeSize); memcpy(code, codePrologue, prologueSize); memcpy(code + epilogueOffset, codeEpilogue, epilogueSize); - memcpy(code + readDatasetLightSubOffset, codeReadDatasetLightSub, readDatasetLightSubSize); } JitCompilerX86::~JitCompilerX86() { @@ -241,10 +237,6 @@ namespace randomx { } void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) { - if (RANDOMX_CACHE_ACCESSES != 8) - throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES"); - if (RANDOMX_ARGON_GROWTH != 0) - throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH"); generateProgramPrologue(prog, pcfg); //if (superscalar) { emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); diff --git a/src/jit_compiler_x86_static.S b/src/jit_compiler_x86_static.S index 233eac6..bd3ff1e 100644 --- a/src/jit_compiler_x86_static.S +++ b/src/jit_compiler_x86_static.S @@ -34,11 +34,9 @@ .global DECL(randomx_program_read_dataset_light) .global DECL(randomx_program_read_dataset_sshash_init) .global DECL(randomx_program_read_dataset_sshash_fin) -.global DECL(randomx_program_read_dataset_light_sub) -.global DECL(randomx_dataset_init) .global DECL(randomx_program_loop_store) .global DECL(randomx_program_loop_end) -.global DECL(randomx_program_read_dataset_light_sub) +.global DECL(randomx_dataset_init) .global DECL(randomx_program_epilogue) .global DECL(randomx_sshash_load) .global DECL(randomx_sshash_prefetch) @@ -83,12 +81,6 @@ DECL(randomx_program_loop_store): DECL(randomx_program_loop_end): nop -.balign 64 -DECL(randomx_program_read_dataset_light_sub): - #include "asm/program_read_dataset_light_sub.inc" -squareHashSub: - #include "asm/squareHash.inc" - .balign 64 DECL(randomx_dataset_init): push rbx diff --git a/src/jit_compiler_x86_static.asm b/src/jit_compiler_x86_static.asm index 3e66398..de4dbb8 100644 --- a/src/jit_compiler_x86_static.asm +++ b/src/jit_compiler_x86_static.asm @@ -27,7 +27,6 @@ PUBLIC randomx_program_read_dataset PUBLIC randomx_program_read_dataset_light PUBLIC randomx_program_read_dataset_sshash_init PUBLIC randomx_program_read_dataset_sshash_fin -PUBLIC randomx_program_read_dataset_light_sub PUBLIC randomx_dataset_init PUBLIC randomx_program_loop_store PUBLIC randomx_program_loop_end @@ -83,13 +82,6 @@ randomx_program_loop_end PROC nop randomx_program_loop_end ENDP -ALIGN 64 -randomx_program_read_dataset_light_sub PROC - include asm/program_read_dataset_light_sub.inc - squareHashSub: - include asm/squareHash.inc -randomx_program_read_dataset_light_sub ENDP - ALIGN 64 randomx_dataset_init PROC push rbx diff --git a/src/jit_compiler_x86_static.hpp b/src/jit_compiler_x86_static.hpp index 5e8d0ce..a3ce44f 100644 --- a/src/jit_compiler_x86_static.hpp +++ b/src/jit_compiler_x86_static.hpp @@ -30,7 +30,6 @@ extern "C" { void randomx_program_read_dataset_sshash_fin(); void randomx_program_loop_store(); void randomx_program_loop_end(); - void randomx_program_read_dataset_light_sub(); void randomx_dataset_init(); void randomx_program_epilogue(); void randomx_sshash_load(); diff --git a/src/squareHash.S b/src/squareHash.S deleted file mode 100644 index 11c6cda..0000000 --- a/src/squareHash.S +++ /dev/null @@ -1,41 +0,0 @@ -.intel_syntax noprefix -#if defined(__APPLE__) -.text -#else -.section .text -#endif -#if defined(__WIN32__) || defined(__APPLE__) -#define DECL(x) _##x -#else -#define DECL(x) x -#endif - -#include "configuration.h" - -.global DECL(squareHash) -.global DECL(initBlock) - -DECL(squareHash): - mov rcx, rdi - #include "asm/squareHash.inc" - -DECL(initBlock): - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - mov rdi, qword ptr [rdi] - mov rbp, rsi - mov r8, rdx - mov rsi, rcx - #define squareHash DECL(squareHash) - #include "asm/initBlock.inc" - pop r15 - pop r14 - pop r13 - pop r12 - pop rbp - pop rbx - ret \ No newline at end of file diff --git a/src/squareHash.asm b/src/squareHash.asm deleted file mode 100644 index 8f591d0..0000000 --- a/src/squareHash.asm +++ /dev/null @@ -1,43 +0,0 @@ -IFDEF RAX - -PUBLIC squareHash -PUBLIC initBlock - -.code - -squareHash PROC - include asm/squareHash.inc -squareHash ENDP - -; rcx = cache -; rdx = out -; r8 = blockNumber -; r9 = iterations -initBlock PROC - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - mov rdi, qword ptr [rcx] - mov rbp, rdx - ; r8 = blockNumber - mov rsi, r9 - include asm/initBlock.inc - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -initBlock ENDP - -ENDIF - -END \ No newline at end of file diff --git a/src/squareHash.h b/src/squareHash.h deleted file mode 100644 index 1128627..0000000 --- a/src/squareHash.h +++ /dev/null @@ -1,76 +0,0 @@ -/* -Copyright (c) 2019 tevador - -This file is part of RandomX. - -RandomX is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -RandomX is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with RandomX. If not, see. -*/ - -/* - Based on the original idea by SChernykh: - https://github.com/SChernykh/xmr-stak-cpu/issues/1#issuecomment-414336613 -*/ - -#include - -#if !defined(_M_X64) && !defined(__x86_64__) - -typedef struct { - uint64_t lo; - uint64_t hi; -} uint128_t; - -#define LO(x) ((x)&0xffffffff) -#define HI(x) ((x)>>32) -static inline uint128_t square128(uint64_t x) { - uint64_t xh = HI(x), xl = LO(x); - uint64_t xll = xl * xl; - uint64_t xlh = xl * xh; - uint64_t xhh = xh * xh; - uint64_t m1 = 2 * LO(xlh) + HI(xll); - uint64_t m2 = 2 * HI(xlh) + LO(xhh) + HI(m1); - uint64_t m3 = HI(xhh) + HI(m2); - - uint128_t x2; - - x2.lo = (m1 << 32) + LO(xll); - x2.hi = (m3 << 32) + LO(m2); - - return x2; -} -#undef LO -#undef HI - -inline uint64_t squareHash(uint64_t x) { - x += 9507361525245169745ULL; - for (int i = 0; i < 42; ++i) { - uint128_t x2 = square128(x); - x = x2.lo - x2.hi; - } - return x; -} - -#else - -#if defined(__cplusplus) -extern "C" { -#endif - -uint64_t squareHash(uint64_t); - -#if defined(__cplusplus) -} -#endif - -#endif \ No newline at end of file diff --git a/src/superscalar.cpp b/src/superscalar.cpp index 3537f35..f71c1fe 100644 --- a/src/superscalar.cpp +++ b/src/superscalar.cpp @@ -18,7 +18,7 @@ along with RandomX. If not, see. */ #include "configuration.h" -#include "Program.hpp" +#include "program.hpp" #include "blake2/endian.h" #include #include