mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-03 13:48:54 +00:00
Implemented cache shift
Fixed assembly code generator Fixed an error in the interpreter Updated specification: sign-extended immediates
This commit is contained in:
parent
4fc4b840f5
commit
6332831ec1
@ -33,7 +33,7 @@ The first operand is read from memory. The location is determined by the `loc(a)
|
|||||||
|
|
||||||
Flag `reg(a)` encodes an integer register `r0`-`r7`. The read address is calculated as:
|
Flag `reg(a)` encodes an integer register `r0`-`r7`. The read address is calculated as:
|
||||||
```
|
```
|
||||||
reg(a)[31:0] = reg(a)[31:0] XOR addr0
|
reg(a) = reg(a) XOR signExtend(addr0)
|
||||||
addr(a) = reg(a)[W-1:0]
|
addr(a) = reg(a)[W-1:0]
|
||||||
```
|
```
|
||||||
`W` is the address width from the above table. For reading from the scratchpad, `addr(a)` is multiplied by 8 for 8-byte aligned access.
|
`W` is the address width from the above table. For reading from the scratchpad, `addr(a)` is multiplied by 8 for 8-byte aligned access.
|
||||||
@ -54,7 +54,7 @@ The second operand is loaded either from a register or from an immediate value e
|
|||||||
|
|
||||||
`imm0` is an 8-bit immediate value, which is used for shift and rotate ALU operations.
|
`imm0` is an 8-bit immediate value, which is used for shift and rotate ALU operations.
|
||||||
|
|
||||||
`imm1` is a 32-bit immediate value which is used for most operations. For operands larger than 32 bits, the value is zero-extended for unsigned instructions and sign-extended for signed instructions. For FPU instructions, the value is considered a signed 32-bit integer and then converted to a double precision floating point format.
|
`imm1` is a 32-bit immediate value which is used for most operations. For operands larger than 32 bits, the value is sign-extended. For FPU instructions, the value is considered a signed 32-bit integer and then converted to a double precision floating point format.
|
||||||
|
|
||||||
#### Operand C
|
#### Operand C
|
||||||
The third operand is the location where the result is stored.
|
The third operand is the location where the result is stored.
|
||||||
@ -80,7 +80,7 @@ addr(c) = 8 * (addr1 XOR reg(c)[31:0])[W-1:0]
|
|||||||
An 8-bit immediate value that is used as the shift/rotate count by some ALU instructions and as the jump offset of the CALL instruction.
|
An 8-bit immediate value that is used as the shift/rotate count by some ALU instructions and as the jump offset of the CALL instruction.
|
||||||
|
|
||||||
#### addr0
|
#### addr0
|
||||||
A 32-bit address mask that is used to calculate the read address for the A operand.
|
A 32-bit address mask that is used to calculate the read address for the A operand. It's sign-extended to 64 bits.
|
||||||
|
|
||||||
#### addr1
|
#### addr1
|
||||||
A 32-bit address mask that is used to calculate the write address for the C operand. `addr1` is equal to `imm1`.
|
A 32-bit address mask that is used to calculate the write address for the C operand. `addr1` is equal to `imm1`.
|
||||||
|
@ -16,7 +16,7 @@ GNU General Public License for more details.
|
|||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
//#define TRACE
|
||||||
#include "AssemblyGeneratorX86.hpp"
|
#include "AssemblyGeneratorX86.hpp"
|
||||||
#include "Pcg32.hpp"
|
#include "Pcg32.hpp"
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
@ -164,6 +164,9 @@ namespace RandomX {
|
|||||||
asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
|
asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||||
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
|
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
|
||||||
|
if (trace) {
|
||||||
|
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rcx" << std::endl;
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
@ -174,10 +177,16 @@ namespace RandomX {
|
|||||||
asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
|
asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||||
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
|
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
|
||||||
|
if (trace) {
|
||||||
|
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rcx" << std::endl;
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl;
|
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl;
|
||||||
|
if (trace) {
|
||||||
|
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -189,7 +198,7 @@ namespace RandomX {
|
|||||||
asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
|
asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
|
||||||
asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl;
|
asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl;
|
||||||
return;
|
break;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
case 2:
|
case 2:
|
||||||
@ -198,10 +207,14 @@ namespace RandomX {
|
|||||||
asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
|
asmCode << "\txor eax, 0" << std::hex << instr.addr1 << "h" << std::dec << std::endl;
|
||||||
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
|
||||||
asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl;
|
asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl;
|
||||||
return;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
asmCode << "\tmovsd " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
asmCode << "\tmovsd " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (trace) {
|
||||||
|
asmCode << "\tmovd qword ptr [rsi + rdi * 8 + 262144], xmm0" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -466,8 +479,11 @@ namespace RandomX {
|
|||||||
asmCode << "\tjmp rx_i_" << wrapi(i + 1) << std::endl;
|
asmCode << "\tjmp rx_i_" << wrapi(i + 1) << std::endl;
|
||||||
asmCode << "taken_call_" << i << ":" << std::endl;
|
asmCode << "taken_call_" << i << ":" << std::endl;
|
||||||
}
|
}
|
||||||
|
if (trace) {
|
||||||
|
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
|
||||||
|
}
|
||||||
asmCode << "\tpush rax" << std::endl;
|
asmCode << "\tpush rax" << std::endl;
|
||||||
asmCode << "\tcall rx_i_" << wrapi(i + (instr.imm0 & 127) + 1) << std::endl;
|
asmCode << "\tcall rx_i_" << wrapi(i + (instr.imm0 & 127) + 2) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
|
||||||
|
@ -43,5 +43,11 @@ namespace RandomX {
|
|||||||
void CompiledVirtualMachine::execute() {
|
void CompiledVirtualMachine::execute() {
|
||||||
FPINIT();
|
FPINIT();
|
||||||
executeProgram(reg, mem, readDataset, scratchpad);
|
executeProgram(reg, mem, readDataset, scratchpad);
|
||||||
|
#ifdef TRACE
|
||||||
|
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
|
||||||
|
std::cout << std::hex << tracepad[i].u64 << std::endl;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -18,7 +18,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
//#define TRACE
|
||||||
#include "VirtualMachine.hpp"
|
#include "VirtualMachine.hpp"
|
||||||
#include "Program.hpp"
|
#include "Program.hpp"
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
@ -30,5 +30,9 @@ namespace RandomX {
|
|||||||
CompiledVirtualMachine(bool softAes) : VirtualMachine(softAes) {}
|
CompiledVirtualMachine(bool softAes) : VirtualMachine(softAes) {}
|
||||||
virtual void initializeProgram(const void* seed) override;
|
virtual void initializeProgram(const void* seed) override;
|
||||||
virtual void execute() override;
|
virtual void execute() override;
|
||||||
|
private:
|
||||||
|
#ifdef TRACE
|
||||||
|
convertible_t tracepad[InstructionCount];
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
}
|
}
|
@ -34,7 +34,7 @@ namespace RandomX {
|
|||||||
uint8_t locc;
|
uint8_t locc;
|
||||||
uint8_t regc;
|
uint8_t regc;
|
||||||
uint8_t imm0;
|
uint8_t imm0;
|
||||||
uint32_t addr0;
|
int32_t addr0;
|
||||||
union {
|
union {
|
||||||
uint32_t addr1;
|
uint32_t addr1;
|
||||||
int32_t imm1;
|
int32_t imm1;
|
||||||
|
@ -65,7 +65,7 @@ namespace RandomX {
|
|||||||
|
|
||||||
convertible_t InterpretedVirtualMachine::loada(Instruction& inst) {
|
convertible_t InterpretedVirtualMachine::loada(Instruction& inst) {
|
||||||
convertible_t& rega = reg.r[inst.rega % RegistersCount];
|
convertible_t& rega = reg.r[inst.rega % RegistersCount];
|
||||||
rega.u64 ^= inst.addr0;
|
rega.i64 ^= inst.addr0; //sign-extend addr0
|
||||||
addr_t addr = rega.u32;
|
addr_t addr = rega.u32;
|
||||||
switch (inst.loca & 7)
|
switch (inst.loca & 7)
|
||||||
{
|
{
|
||||||
@ -86,7 +86,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
convertible_t InterpretedVirtualMachine::loadbr1(Instruction& inst) {
|
convertible_t InterpretedVirtualMachine::loadbr1(Instruction& inst) {
|
||||||
switch (inst.loca & 7)
|
switch (inst.locb & 7)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
case 1:
|
||||||
@ -98,7 +98,7 @@ namespace RandomX {
|
|||||||
case 6:
|
case 6:
|
||||||
case 7:
|
case 7:
|
||||||
convertible_t temp;
|
convertible_t temp;
|
||||||
temp.i64 = inst.imm1;
|
temp.i64 = inst.imm1; //sign-extend imm1
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -182,13 +182,13 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define ALU_RETIRE(x) x(a, b, c); \
|
#define ALU_RETIRE(x) x(a, b, c); \
|
||||||
if(trace) std::cout << std::hex << a.u64 << " " << b.u64 << " " << c.u64 << std::endl;
|
if(trace) std::cout << std::hex << /*a.u64 << " " << b.u64 << " " <<*/ c.u64 << std::endl;
|
||||||
|
|
||||||
#define FPU_RETIRE(x) x(a, b, c); \
|
#define FPU_RETIRE(x) x(a, b, c); \
|
||||||
if(trace) { \
|
if(trace) { \
|
||||||
convertible_t bc; \
|
convertible_t bc; \
|
||||||
bc.f64 = b; \
|
bc.f64 = b; \
|
||||||
std::cout << std::hex << a.u64 << " " << bc.u64 << " " << c.u64 << std::endl; \
|
std::cout << std::hex << /*a.u64 << " " << bc.u64 << " " <<*/ c.u64 << std::endl; \
|
||||||
} \
|
} \
|
||||||
if(fpuCheck) { \
|
if(fpuCheck) { \
|
||||||
convertible_t bc; \
|
convertible_t bc; \
|
||||||
@ -206,7 +206,7 @@ namespace RandomX {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define FPU_RETIRE_NB(x) x(a, b, c); \
|
#define FPU_RETIRE_NB(x) x(a, b, c); \
|
||||||
if(trace) std::cout << std::hex << a.u64 << " " << c.u64 << std::endl;
|
if(trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
|
||||||
|
|
||||||
#define ALU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
#define ALU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
|
||||||
convertible_t a = loada(inst); \
|
convertible_t a = loada(inst); \
|
||||||
@ -277,9 +277,11 @@ namespace RandomX {
|
|||||||
stackPush(pc);
|
stackPush(pc);
|
||||||
pc += (inst.imm0 & 127) + 1;
|
pc += (inst.imm0 & 127) + 1;
|
||||||
pc = pc % ProgramLength;
|
pc = pc % ProgramLength;
|
||||||
|
if (trace) std::cout << std::hex << a.u64 << std::endl;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
c.u64 = a.u64;
|
c.u64 = a.u64;
|
||||||
|
if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -296,6 +298,7 @@ namespace RandomX {
|
|||||||
else {
|
else {
|
||||||
c.u64 = a.u64;
|
c.u64 = a.u64;
|
||||||
}
|
}
|
||||||
|
if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
#include "instructionWeights.hpp"
|
#include "instructionWeights.hpp"
|
||||||
|
@ -58,16 +58,14 @@ namespace RandomX {
|
|||||||
void VirtualMachine::initializeScratchpad(uint32_t index) {
|
void VirtualMachine::initializeScratchpad(uint32_t index) {
|
||||||
if (lightClient) {
|
if (lightClient) {
|
||||||
if (softAes) {
|
if (softAes) {
|
||||||
initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 0, 4 * index + 0, mem.lcm->keys);
|
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
|
||||||
initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 1, 4 * index + 1, mem.lcm->keys);
|
initBlock<true>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys);
|
||||||
initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 2, 4 * index + 2, mem.lcm->keys);
|
}
|
||||||
initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 3, 4 * index + 3, mem.lcm->keys);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 0, 4 * index + 0, mem.lcm->keys);
|
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
|
||||||
initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 1, 4 * index + 1, mem.lcm->keys);
|
initBlock<false>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys);
|
||||||
initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 2, 4 * index + 2, mem.lcm->keys);
|
}
|
||||||
initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 3, 4 * index + 3, mem.lcm->keys);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -35,6 +35,7 @@ namespace RandomX {
|
|||||||
constexpr int SeedSize = 32;
|
constexpr int SeedSize = 32;
|
||||||
|
|
||||||
constexpr int CacheBlockSize = 1024;
|
constexpr int CacheBlockSize = 1024;
|
||||||
|
constexpr int CacheShift = CacheBlockSize / 2;
|
||||||
constexpr int BlockExpansionRatio = 64;
|
constexpr int BlockExpansionRatio = 64;
|
||||||
constexpr uint32_t DatasetBlockSize = BlockExpansionRatio * CacheBlockSize;
|
constexpr uint32_t DatasetBlockSize = BlockExpansionRatio * CacheBlockSize;
|
||||||
constexpr uint32_t DatasetBlockCount = 65536;
|
constexpr uint32_t DatasetBlockCount = 65536;
|
||||||
|
@ -26,6 +26,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
#include <new>
|
#include <new>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
|
#if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
|
||||||
@ -237,7 +238,7 @@ namespace RandomX {
|
|||||||
convertible_t data;
|
convertible_t data;
|
||||||
auto blockNumber = memory.ma / DatasetBlockSize;
|
auto blockNumber = memory.ma / DatasetBlockSize;
|
||||||
if (memory.lcm->blockNumber != blockNumber) {
|
if (memory.lcm->blockNumber != blockNumber) {
|
||||||
initBlock<softAes>(memory.lcm->cache, (uint8_t*)memory.lcm->block, blockNumber, memory.lcm->keys);
|
initBlock<softAes>(memory.lcm->cache + CacheShift, (uint8_t*)memory.lcm->block, blockNumber, memory.lcm->keys);
|
||||||
memory.lcm->blockNumber = blockNumber;
|
memory.lcm->blockNumber = blockNumber;
|
||||||
}
|
}
|
||||||
data.u64 = *(uint64_t*)(memory.lcm->block + (memory.ma % DatasetBlockSize));
|
data.u64 = *(uint64_t*)(memory.lcm->block + (memory.ma % DatasetBlockSize));
|
||||||
@ -263,15 +264,16 @@ namespace RandomX {
|
|||||||
if (dataset == nullptr) {
|
if (dataset == nullptr) {
|
||||||
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of virtual memory is needed.");
|
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of virtual memory is needed.");
|
||||||
}
|
}
|
||||||
uint8_t* cache = (uint8_t*)_mm_malloc(CacheSize, sizeof(__m128i));
|
uint8_t* cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
|
||||||
if (dataset == nullptr) {
|
if (cache == nullptr) {
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
}
|
}
|
||||||
initializeCache(seed, SeedSize, cache);
|
initializeCache(seed, SeedSize, cache);
|
||||||
|
memcpy(cache + CacheSize, cache, CacheShift);
|
||||||
alignas(16) __m128i keys[10];
|
alignas(16) __m128i keys[10];
|
||||||
expandAesKeys<softAes>((const __m128i*)seed, keys);
|
expandAesKeys<softAes>((const __m128i*)seed, keys);
|
||||||
for (uint32_t i = 0; i < DatasetBlockCount; ++i) {
|
for (uint32_t i = 0; i < DatasetBlockCount; ++i) {
|
||||||
initBlock<softAes>(cache, dataset + i * DatasetBlockSize, i, keys);
|
initBlock<softAes>(cache + CacheShift, dataset + i * DatasetBlockSize, i, keys);
|
||||||
}
|
}
|
||||||
_mm_free(cache);
|
_mm_free(cache);
|
||||||
}
|
}
|
||||||
@ -285,11 +287,12 @@ namespace RandomX {
|
|||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void datasetInitLight(const void* seed, LightClientMemory*& lcm) {
|
void datasetInitLight(const void* seed, LightClientMemory*& lcm) {
|
||||||
lcm = new LightClientMemory();
|
lcm = new LightClientMemory();
|
||||||
lcm->cache = (uint8_t*)_mm_malloc(CacheSize, sizeof(__m128i));
|
lcm->cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
|
||||||
if (lcm->cache == nullptr) {
|
if (lcm->cache == nullptr) {
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
}
|
}
|
||||||
initializeCache(seed, SeedSize, lcm->cache);
|
initializeCache(seed, SeedSize, lcm->cache);
|
||||||
|
memcpy(lcm->cache + CacheSize, lcm->cache, CacheShift);
|
||||||
expandAesKeys<softAes>((__m128i*)seed, lcm->keys);
|
expandAesKeys<softAes>((__m128i*)seed, lcm->keys);
|
||||||
lcm->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
|
lcm->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
|
||||||
if (lcm->block == nullptr) {
|
if (lcm->block == nullptr) {
|
||||||
|
@ -151,19 +151,40 @@ rx_finish:
|
|||||||
; return
|
; return
|
||||||
ret 0
|
ret 0
|
||||||
|
|
||||||
rx_read_dataset:
|
rx_read_dataset_light:
|
||||||
push rdx
|
push rdx
|
||||||
push r9
|
push r9
|
||||||
push r10
|
push r10
|
||||||
push r11
|
push r11
|
||||||
sub rsp, 32
|
movd qword ptr [rsp - 8], xmm1
|
||||||
|
movd qword ptr [rsp - 16], xmm2
|
||||||
|
sub rsp, 48
|
||||||
call qword ptr [rbp]
|
call qword ptr [rbp]
|
||||||
add rsp, 32
|
add rsp, 48
|
||||||
|
movd xmm2, qword ptr [rsp - 16]
|
||||||
|
movd xmm1, qword ptr [rsp - 8]
|
||||||
pop r11
|
pop r11
|
||||||
pop r10
|
pop r10
|
||||||
pop r9
|
pop r9
|
||||||
pop rdx
|
pop rdx
|
||||||
ret 0
|
ret 0
|
||||||
|
|
||||||
|
rx_read_dataset:
|
||||||
|
mov r8d, dword ptr [rdx] ; ma
|
||||||
|
mov rax, qword ptr [rdx+8] ; dataset
|
||||||
|
mov rax, qword ptr [rax+r8]
|
||||||
|
add dword ptr [rdx], 8
|
||||||
|
mov r8d, dword ptr [rdx+4] ; mx
|
||||||
|
xor ecx, r8d
|
||||||
|
mov dword ptr [rdx+4], ecx
|
||||||
|
test ecx, 0FFF8h
|
||||||
|
jne short rx_read_dataset_full_ret
|
||||||
|
and ecx, -8
|
||||||
|
mov dword ptr [rdx], ecx
|
||||||
|
mov r8, qword ptr [rdx+8]
|
||||||
|
prefetcht0 byte ptr [r8+rcx]
|
||||||
|
rx_read_dataset_full_ret:
|
||||||
|
ret 0
|
||||||
executeProgram ENDP
|
executeProgram ENDP
|
||||||
|
|
||||||
END
|
END
|
||||||
|
@ -123,7 +123,7 @@ rx_i_6: ;CALL
|
|||||||
jmp rx_i_7
|
jmp rx_i_7
|
||||||
taken_call_6:
|
taken_call_6:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_50
|
call rx_i_51
|
||||||
|
|
||||||
rx_i_7: ;FPDIV
|
rx_i_7: ;FPDIV
|
||||||
dec edi
|
dec edi
|
||||||
@ -538,7 +538,7 @@ rx_i_38: ;CALL
|
|||||||
jmp rx_i_39
|
jmp rx_i_39
|
||||||
taken_call_38:
|
taken_call_38:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_111
|
call rx_i_112
|
||||||
|
|
||||||
rx_i_39: ;CALL
|
rx_i_39: ;CALL
|
||||||
dec edi
|
dec edi
|
||||||
@ -553,7 +553,7 @@ rx_i_39: ;CALL
|
|||||||
jmp rx_i_40
|
jmp rx_i_40
|
||||||
taken_call_39:
|
taken_call_39:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_61
|
call rx_i_62
|
||||||
|
|
||||||
rx_i_40: ;FPMUL
|
rx_i_40: ;FPMUL
|
||||||
dec edi
|
dec edi
|
||||||
@ -621,7 +621,7 @@ rx_i_44: ;CALL
|
|||||||
jmp rx_i_45
|
jmp rx_i_45
|
||||||
taken_call_44:
|
taken_call_44:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_93
|
call rx_i_94
|
||||||
|
|
||||||
rx_i_45: ;FPROUND
|
rx_i_45: ;FPROUND
|
||||||
dec edi
|
dec edi
|
||||||
@ -726,7 +726,7 @@ rx_i_51: ;CALL
|
|||||||
jmp rx_i_52
|
jmp rx_i_52
|
||||||
taken_call_51:
|
taken_call_51:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_134
|
call rx_i_135
|
||||||
|
|
||||||
rx_i_52: ;FPDIV
|
rx_i_52: ;FPDIV
|
||||||
dec edi
|
dec edi
|
||||||
@ -943,7 +943,7 @@ rx_i_65: ;CALL
|
|||||||
jmp rx_i_66
|
jmp rx_i_66
|
||||||
taken_call_65:
|
taken_call_65:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_123
|
call rx_i_124
|
||||||
|
|
||||||
rx_i_66: ;FPSUB
|
rx_i_66: ;FPSUB
|
||||||
dec edi
|
dec edi
|
||||||
@ -996,7 +996,7 @@ rx_i_69: ;CALL
|
|||||||
jmp rx_i_70
|
jmp rx_i_70
|
||||||
taken_call_69:
|
taken_call_69:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_132
|
call rx_i_133
|
||||||
|
|
||||||
rx_i_70: ;FPDIV
|
rx_i_70: ;FPDIV
|
||||||
dec edi
|
dec edi
|
||||||
@ -1022,7 +1022,7 @@ rx_i_71: ;CALL
|
|||||||
jmp rx_i_72
|
jmp rx_i_72
|
||||||
taken_call_71:
|
taken_call_71:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_82
|
call rx_i_83
|
||||||
|
|
||||||
rx_i_72: ;FPADD
|
rx_i_72: ;FPADD
|
||||||
dec edi
|
dec edi
|
||||||
@ -1093,7 +1093,7 @@ rx_i_76: ;CALL
|
|||||||
jmp rx_i_77
|
jmp rx_i_77
|
||||||
taken_call_76:
|
taken_call_76:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_194
|
call rx_i_195
|
||||||
|
|
||||||
rx_i_77: ;FPDIV
|
rx_i_77: ;FPDIV
|
||||||
dec edi
|
dec edi
|
||||||
@ -1138,7 +1138,7 @@ rx_i_79: ;CALL
|
|||||||
jmp rx_i_80
|
jmp rx_i_80
|
||||||
taken_call_79:
|
taken_call_79:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_205
|
call rx_i_206
|
||||||
|
|
||||||
rx_i_80: ;FPADD
|
rx_i_80: ;FPADD
|
||||||
dec edi
|
dec edi
|
||||||
@ -1208,7 +1208,7 @@ rx_i_83: ;CALL
|
|||||||
jmp rx_i_84
|
jmp rx_i_84
|
||||||
taken_call_83:
|
taken_call_83:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_96
|
call rx_i_97
|
||||||
|
|
||||||
rx_i_84: ;ROR_64
|
rx_i_84: ;ROR_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -1249,7 +1249,7 @@ rx_i_86: ;CALL
|
|||||||
jmp rx_i_87
|
jmp rx_i_87
|
||||||
taken_call_86:
|
taken_call_86:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_148
|
call rx_i_149
|
||||||
|
|
||||||
rx_i_87: ;DIV_64
|
rx_i_87: ;DIV_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -1376,7 +1376,7 @@ rx_i_96: ;CALL
|
|||||||
mov ecx, ebx
|
mov ecx, ebx
|
||||||
call rx_read_dataset
|
call rx_read_dataset
|
||||||
push rax
|
push rax
|
||||||
call rx_i_173
|
call rx_i_174
|
||||||
|
|
||||||
rx_i_97: ;ROR_64
|
rx_i_97: ;ROR_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -1402,7 +1402,7 @@ rx_i_98: ;CALL
|
|||||||
jmp rx_i_99
|
jmp rx_i_99
|
||||||
taken_call_98:
|
taken_call_98:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_160
|
call rx_i_161
|
||||||
|
|
||||||
rx_i_99: ;MUL_64
|
rx_i_99: ;MUL_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -1567,7 +1567,7 @@ rx_i_111: ;CALL
|
|||||||
and eax, 2047
|
and eax, 2047
|
||||||
mov rax, qword ptr [rsi + rax * 8]
|
mov rax, qword ptr [rsi + rax * 8]
|
||||||
push rax
|
push rax
|
||||||
call rx_i_146
|
call rx_i_147
|
||||||
|
|
||||||
rx_i_112: ;FPMUL
|
rx_i_112: ;FPMUL
|
||||||
dec edi
|
dec edi
|
||||||
@ -1617,7 +1617,7 @@ rx_i_115: ;CALL
|
|||||||
mov ecx, ebx
|
mov ecx, ebx
|
||||||
call rx_read_dataset
|
call rx_read_dataset
|
||||||
push rax
|
push rax
|
||||||
call rx_i_215
|
call rx_i_216
|
||||||
|
|
||||||
rx_i_116: ;ADD_32
|
rx_i_116: ;ADD_32
|
||||||
dec edi
|
dec edi
|
||||||
@ -1778,7 +1778,7 @@ rx_i_126: ;CALL
|
|||||||
jmp rx_i_127
|
jmp rx_i_127
|
||||||
taken_call_126:
|
taken_call_126:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_195
|
call rx_i_196
|
||||||
|
|
||||||
rx_i_127: ;ADD_64
|
rx_i_127: ;ADD_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -1806,7 +1806,7 @@ rx_i_128: ;CALL
|
|||||||
jmp rx_i_129
|
jmp rx_i_129
|
||||||
taken_call_128:
|
taken_call_128:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_240
|
call rx_i_241
|
||||||
|
|
||||||
rx_i_129: ;MUL_32
|
rx_i_129: ;MUL_32
|
||||||
dec edi
|
dec edi
|
||||||
@ -1863,7 +1863,7 @@ rx_i_133: ;CALL
|
|||||||
and eax, 2047
|
and eax, 2047
|
||||||
mov rax, qword ptr [rsi + rax * 8]
|
mov rax, qword ptr [rsi + rax * 8]
|
||||||
push rax
|
push rax
|
||||||
call rx_i_157
|
call rx_i_158
|
||||||
|
|
||||||
rx_i_134: ;AND_64
|
rx_i_134: ;AND_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -2049,7 +2049,7 @@ rx_i_146: ;CALL
|
|||||||
jmp rx_i_147
|
jmp rx_i_147
|
||||||
taken_call_146:
|
taken_call_146:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_260
|
call rx_i_261
|
||||||
|
|
||||||
rx_i_147: ;IMUL_32
|
rx_i_147: ;IMUL_32
|
||||||
dec edi
|
dec edi
|
||||||
@ -2277,7 +2277,7 @@ rx_i_163: ;CALL
|
|||||||
jmp rx_i_164
|
jmp rx_i_164
|
||||||
taken_call_163:
|
taken_call_163:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_184
|
call rx_i_185
|
||||||
|
|
||||||
rx_i_164: ;ADD_32
|
rx_i_164: ;ADD_32
|
||||||
dec edi
|
dec edi
|
||||||
@ -2430,7 +2430,7 @@ rx_i_173: ;CALL
|
|||||||
jmp rx_i_174
|
jmp rx_i_174
|
||||||
taken_call_173:
|
taken_call_173:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_200
|
call rx_i_201
|
||||||
|
|
||||||
rx_i_174: ;FPSQRT
|
rx_i_174: ;FPSQRT
|
||||||
dec edi
|
dec edi
|
||||||
@ -2593,7 +2593,7 @@ rx_i_185: ;CALL
|
|||||||
jmp rx_i_186
|
jmp rx_i_186
|
||||||
taken_call_185:
|
taken_call_185:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_214
|
call rx_i_215
|
||||||
|
|
||||||
rx_i_186: ;FPADD
|
rx_i_186: ;FPADD
|
||||||
dec edi
|
dec edi
|
||||||
@ -2647,7 +2647,7 @@ rx_i_189: ;CALL
|
|||||||
jmp rx_i_190
|
jmp rx_i_190
|
||||||
taken_call_189:
|
taken_call_189:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_249
|
call rx_i_250
|
||||||
|
|
||||||
rx_i_190: ;XOR_64
|
rx_i_190: ;XOR_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -3209,7 +3209,7 @@ rx_i_230: ;CALL
|
|||||||
jmp rx_i_231
|
jmp rx_i_231
|
||||||
taken_call_230:
|
taken_call_230:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_331
|
call rx_i_332
|
||||||
|
|
||||||
rx_i_231: ;FPMUL
|
rx_i_231: ;FPMUL
|
||||||
dec edi
|
dec edi
|
||||||
@ -3323,7 +3323,7 @@ rx_i_237: ;CALL
|
|||||||
jmp rx_i_238
|
jmp rx_i_238
|
||||||
taken_call_237:
|
taken_call_237:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_271
|
call rx_i_272
|
||||||
|
|
||||||
rx_i_238: ;FPDIV
|
rx_i_238: ;FPDIV
|
||||||
dec edi
|
dec edi
|
||||||
@ -3379,7 +3379,7 @@ rx_i_241: ;CALL
|
|||||||
mov ecx, r15d
|
mov ecx, r15d
|
||||||
call rx_read_dataset
|
call rx_read_dataset
|
||||||
push rax
|
push rax
|
||||||
call rx_i_298
|
call rx_i_299
|
||||||
|
|
||||||
rx_i_242: ;ROR_64
|
rx_i_242: ;ROR_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -3597,7 +3597,7 @@ rx_i_257: ;CALL
|
|||||||
jmp rx_i_258
|
jmp rx_i_258
|
||||||
taken_call_257:
|
taken_call_257:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_370
|
call rx_i_371
|
||||||
|
|
||||||
rx_i_258: ;FPADD
|
rx_i_258: ;FPADD
|
||||||
dec edi
|
dec edi
|
||||||
@ -3776,7 +3776,7 @@ rx_i_270: ;CALL
|
|||||||
jmp rx_i_271
|
jmp rx_i_271
|
||||||
taken_call_270:
|
taken_call_270:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_298
|
call rx_i_299
|
||||||
|
|
||||||
rx_i_271: ;ROL_64
|
rx_i_271: ;ROL_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -3868,7 +3868,7 @@ rx_i_277: ;CALL
|
|||||||
and eax, 2047
|
and eax, 2047
|
||||||
mov rax, qword ptr [rsi + rax * 8]
|
mov rax, qword ptr [rsi + rax * 8]
|
||||||
push rax
|
push rax
|
||||||
call rx_i_375
|
call rx_i_376
|
||||||
|
|
||||||
rx_i_278: ;FPADD
|
rx_i_278: ;FPADD
|
||||||
dec edi
|
dec edi
|
||||||
@ -4548,7 +4548,7 @@ rx_i_326: ;CALL
|
|||||||
jmp rx_i_327
|
jmp rx_i_327
|
||||||
taken_call_326:
|
taken_call_326:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_346
|
call rx_i_347
|
||||||
|
|
||||||
rx_i_327: ;MUL_64
|
rx_i_327: ;MUL_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -4922,7 +4922,7 @@ rx_i_354: ;CALL
|
|||||||
jmp rx_i_355
|
jmp rx_i_355
|
||||||
taken_call_354:
|
taken_call_354:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_355
|
call rx_i_356
|
||||||
|
|
||||||
rx_i_355: ;MUL_64
|
rx_i_355: ;MUL_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -5659,7 +5659,7 @@ rx_i_409: ;CALL
|
|||||||
jmp rx_i_410
|
jmp rx_i_410
|
||||||
taken_call_409:
|
taken_call_409:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_497
|
call rx_i_498
|
||||||
|
|
||||||
rx_i_410: ;FPDIV
|
rx_i_410: ;FPDIV
|
||||||
dec edi
|
dec edi
|
||||||
@ -5866,7 +5866,7 @@ rx_i_425: ;CALL
|
|||||||
jmp rx_i_426
|
jmp rx_i_426
|
||||||
taken_call_425:
|
taken_call_425:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_34
|
call rx_i_35
|
||||||
|
|
||||||
rx_i_426: ;IMUL_32
|
rx_i_426: ;IMUL_32
|
||||||
dec edi
|
dec edi
|
||||||
@ -6556,7 +6556,7 @@ rx_i_476: ;CALL
|
|||||||
and eax, 2047
|
and eax, 2047
|
||||||
mov rax, qword ptr [rsi + rax * 8]
|
mov rax, qword ptr [rsi + rax * 8]
|
||||||
push rax
|
push rax
|
||||||
call rx_i_11
|
call rx_i_12
|
||||||
|
|
||||||
rx_i_477: ;MUL_64
|
rx_i_477: ;MUL_64
|
||||||
dec edi
|
dec edi
|
||||||
@ -6580,7 +6580,7 @@ rx_i_478: ;CALL
|
|||||||
jmp rx_i_479
|
jmp rx_i_479
|
||||||
taken_call_478:
|
taken_call_478:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_72
|
call rx_i_73
|
||||||
|
|
||||||
rx_i_479: ;FPSUB
|
rx_i_479: ;FPSUB
|
||||||
dec edi
|
dec edi
|
||||||
@ -6721,7 +6721,7 @@ rx_i_489: ;CALL
|
|||||||
jmp rx_i_490
|
jmp rx_i_490
|
||||||
taken_call_489:
|
taken_call_489:
|
||||||
push rax
|
push rax
|
||||||
call rx_i_61
|
call rx_i_62
|
||||||
|
|
||||||
rx_i_490: ;ADD_64
|
rx_i_490: ;ADD_64
|
||||||
dec edi
|
dec edi
|
||||||
|
Loading…
Reference in New Issue
Block a user