mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 07:48:54 +00:00
Big endian bug fixes
This commit is contained in:
parent
1c3666aa98
commit
018c1a5222
@ -41,11 +41,15 @@ static FORCE_INLINE uint32_t load32(const void *src) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static FORCE_INLINE uint64_t load64(const void *src) {
|
static FORCE_INLINE uint64_t load64_native(const void *src) {
|
||||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
|
||||||
uint64_t w;
|
uint64_t w;
|
||||||
memcpy(&w, src, sizeof w);
|
memcpy(&w, src, sizeof w);
|
||||||
return w;
|
return w;
|
||||||
|
}
|
||||||
|
|
||||||
|
static FORCE_INLINE uint64_t load64(const void *src) {
|
||||||
|
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||||
|
return load64_native(src);
|
||||||
#else
|
#else
|
||||||
const uint8_t *p = (const uint8_t *)src;
|
const uint8_t *p = (const uint8_t *)src;
|
||||||
uint64_t w = *p++;
|
uint64_t w = *p++;
|
||||||
@ -75,9 +79,13 @@ static FORCE_INLINE void store32(void *dst, uint32_t w) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static FORCE_INLINE void store64_native(void *dst, uint64_t w) {
|
||||||
|
memcpy(dst, &w, sizeof w);
|
||||||
|
}
|
||||||
|
|
||||||
static FORCE_INLINE void store64(void *dst, uint64_t w) {
|
static FORCE_INLINE void store64(void *dst, uint64_t w) {
|
||||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||||
memcpy(dst, &w, sizeof w);
|
store64_native(dst, w);
|
||||||
#else
|
#else
|
||||||
uint8_t *p = (uint8_t *)dst;
|
uint8_t *p = (uint8_t *)dst;
|
||||||
*p++ = (uint8_t)w;
|
*p++ = (uint8_t)w;
|
||||||
|
@ -192,7 +192,7 @@ namespace randomx {
|
|||||||
executeSuperscalar(rl, prog, &cache->reciprocalCache);
|
executeSuperscalar(rl, prog, &cache->reciprocalCache);
|
||||||
|
|
||||||
for (unsigned q = 0; q < 8; ++q)
|
for (unsigned q = 0; q < 8; ++q)
|
||||||
rl[q] ^= load64(mixBlock + 8 * q);
|
rl[q] ^= load64_native(mixBlock + 8 * q);
|
||||||
|
|
||||||
registerValue = rl[prog.getAddressRegister()];
|
registerValue = rl[prog.getAddressRegister()];
|
||||||
}
|
}
|
||||||
|
@ -295,7 +295,8 @@ inline __m128i _mm_slli_si128(__m128i _A, int _Imm) {
|
|||||||
|
|
||||||
inline __m128i _mm_loadl_epi64(__m128i const* mem_addr) {
|
inline __m128i _mm_loadl_epi64(__m128i const* mem_addr) {
|
||||||
__m128i x;
|
__m128i x;
|
||||||
x.u64[0] = load64(mem_addr);
|
x.u32[0] = load32((uint8_t*)mem_addr + 0);
|
||||||
|
x.u32[1] = load32((uint8_t*)mem_addr + 4);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -573,14 +573,6 @@ namespace randomx {
|
|||||||
constexpr int LOOK_FORWARD_CYCLES = 4;
|
constexpr int LOOK_FORWARD_CYCLES = 4;
|
||||||
constexpr int MAX_THROWAWAY_COUNT = 256;
|
constexpr int MAX_THROWAWAY_COUNT = 256;
|
||||||
|
|
||||||
#ifndef _DEBUG
|
|
||||||
constexpr bool TRACE = false;
|
|
||||||
constexpr bool INFO = false;
|
|
||||||
#else
|
|
||||||
constexpr bool TRACE = true;
|
|
||||||
constexpr bool INFO = true;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template<bool commit>
|
template<bool commit>
|
||||||
static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
|
static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
|
||||||
//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
|
//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
|
||||||
@ -588,21 +580,21 @@ namespace randomx {
|
|||||||
for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
|
for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
|
||||||
if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
|
if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
|
||||||
if (commit) {
|
if (commit) {
|
||||||
if (TRACE) std::cout << "; P5 at cycle " << cycle << std::endl;
|
if (trace) std::cout << "; P5 at cycle " << cycle << std::endl;
|
||||||
portBusy[cycle][2] = uop;
|
portBusy[cycle][2] = uop;
|
||||||
}
|
}
|
||||||
return cycle;
|
return cycle;
|
||||||
}
|
}
|
||||||
if ((uop & ExecutionPort::P0) != 0 && !portBusy[cycle][0]) {
|
if ((uop & ExecutionPort::P0) != 0 && !portBusy[cycle][0]) {
|
||||||
if (commit) {
|
if (commit) {
|
||||||
if (TRACE) std::cout << "; P0 at cycle " << cycle << std::endl;
|
if (trace) std::cout << "; P0 at cycle " << cycle << std::endl;
|
||||||
portBusy[cycle][0] = uop;
|
portBusy[cycle][0] = uop;
|
||||||
}
|
}
|
||||||
return cycle;
|
return cycle;
|
||||||
}
|
}
|
||||||
if ((uop & ExecutionPort::P1) != 0 && !portBusy[cycle][1]) {
|
if ((uop & ExecutionPort::P1) != 0 && !portBusy[cycle][1]) {
|
||||||
if (commit) {
|
if (commit) {
|
||||||
if (TRACE) std::cout << "; P1 at cycle " << cycle << std::endl;
|
if (trace) std::cout << "; P1 at cycle " << cycle << std::endl;
|
||||||
portBusy[cycle][1] = uop;
|
portBusy[cycle][1] = uop;
|
||||||
}
|
}
|
||||||
return cycle;
|
return cycle;
|
||||||
@ -621,7 +613,7 @@ namespace randomx {
|
|||||||
//move instructions are eliminated and don't need an execution unit
|
//move instructions are eliminated and don't need an execution unit
|
||||||
if (mop.isEliminated()) {
|
if (mop.isEliminated()) {
|
||||||
if (commit)
|
if (commit)
|
||||||
if (TRACE) std::cout << "; (eliminated)" << std::endl;
|
if (trace) std::cout << "; (eliminated)" << std::endl;
|
||||||
return cycle;
|
return cycle;
|
||||||
}
|
}
|
||||||
else if (mop.isSimple()) {
|
else if (mop.isSimple()) {
|
||||||
@ -677,7 +669,7 @@ namespace randomx {
|
|||||||
|
|
||||||
//select a decode configuration
|
//select a decode configuration
|
||||||
decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen);
|
decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen);
|
||||||
if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
|
if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
|
||||||
|
|
||||||
int bufferIndex = 0;
|
int bufferIndex = 0;
|
||||||
|
|
||||||
@ -692,15 +684,15 @@ namespace randomx {
|
|||||||
//select an instruction so that the first macro-op fits into the current slot
|
//select an instruction so that the first macro-op fits into the current slot
|
||||||
currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0);
|
currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0);
|
||||||
macroOpIndex = 0;
|
macroOpIndex = 0;
|
||||||
if (TRACE) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
|
if (trace) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
|
||||||
}
|
}
|
||||||
const MacroOp& mop = currentInstruction.getInfo().getOp(macroOpIndex);
|
const MacroOp& mop = currentInstruction.getInfo().getOp(macroOpIndex);
|
||||||
if (TRACE) std::cout << mop.getName() << " ";
|
if (trace) std::cout << mop.getName() << " ";
|
||||||
|
|
||||||
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
|
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
|
||||||
int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle);
|
int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle);
|
||||||
if (scheduleCycle < 0) {
|
if (scheduleCycle < 0) {
|
||||||
if (TRACE) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
|
if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
|
||||||
//__debugbreak();
|
//__debugbreak();
|
||||||
portsSaturated = true;
|
portsSaturated = true;
|
||||||
break;
|
break;
|
||||||
@ -711,7 +703,7 @@ namespace randomx {
|
|||||||
int forward;
|
int forward;
|
||||||
//if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward
|
//if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward
|
||||||
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectSource(scheduleCycle, registers, gen); ++forward) {
|
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectSource(scheduleCycle, registers, gen); ++forward) {
|
||||||
if (TRACE) std::cout << "; src STALL at cycle " << cycle << std::endl;
|
if (trace) std::cout << "; src STALL at cycle " << cycle << std::endl;
|
||||||
++scheduleCycle;
|
++scheduleCycle;
|
||||||
++cycle;
|
++cycle;
|
||||||
}
|
}
|
||||||
@ -720,22 +712,22 @@ namespace randomx {
|
|||||||
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
|
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
|
||||||
throwAwayCount++;
|
throwAwayCount++;
|
||||||
macroOpIndex = currentInstruction.getInfo().getSize();
|
macroOpIndex = currentInstruction.getInfo().getSize();
|
||||||
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
|
if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
|
||||||
//cycle = topCycle;
|
//cycle = topCycle;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
//abort this decode buffer
|
//abort this decode buffer
|
||||||
if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
|
if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
|
||||||
currentInstruction = SuperscalarInstruction::Null;
|
currentInstruction = SuperscalarInstruction::Null;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
|
if (trace) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
|
||||||
}
|
}
|
||||||
//find a destination register that will be ready when this instruction executes
|
//find a destination register that will be ready when this instruction executes
|
||||||
if (macroOpIndex == currentInstruction.getInfo().getDstOp()) {
|
if (macroOpIndex == currentInstruction.getInfo().getDstOp()) {
|
||||||
int forward;
|
int forward;
|
||||||
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) {
|
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) {
|
||||||
if (TRACE) std::cout << "; dst STALL at cycle " << cycle << std::endl;
|
if (trace) std::cout << "; dst STALL at cycle " << cycle << std::endl;
|
||||||
++scheduleCycle;
|
++scheduleCycle;
|
||||||
++cycle;
|
++cycle;
|
||||||
}
|
}
|
||||||
@ -743,16 +735,16 @@ namespace randomx {
|
|||||||
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
|
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
|
||||||
throwAwayCount++;
|
throwAwayCount++;
|
||||||
macroOpIndex = currentInstruction.getInfo().getSize();
|
macroOpIndex = currentInstruction.getInfo().getSize();
|
||||||
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
|
if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
|
||||||
//cycle = topCycle;
|
//cycle = topCycle;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
//abort this decode buffer
|
//abort this decode buffer
|
||||||
if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
|
if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
|
||||||
currentInstruction = SuperscalarInstruction::Null;
|
currentInstruction = SuperscalarInstruction::Null;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
|
if (trace) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
|
||||||
}
|
}
|
||||||
throwAwayCount = 0;
|
throwAwayCount = 0;
|
||||||
|
|
||||||
@ -773,7 +765,7 @@ namespace randomx {
|
|||||||
ri.latency = retireCycle;
|
ri.latency = retireCycle;
|
||||||
ri.lastOpGroup = currentInstruction.getGroup();
|
ri.lastOpGroup = currentInstruction.getGroup();
|
||||||
ri.lastOpPar = currentInstruction.getGroupPar();
|
ri.lastOpPar = currentInstruction.getGroupPar();
|
||||||
if (TRACE) std::cout << "; RETIRED at cycle " << retireCycle << std::endl;
|
if (trace) std::cout << "; RETIRED at cycle " << retireCycle << std::endl;
|
||||||
}
|
}
|
||||||
codeSize += mop.getSize();
|
codeSize += mop.getSize();
|
||||||
bufferIndex++;
|
bufferIndex++;
|
||||||
|
@ -37,14 +37,6 @@ const uint8_t blockTemplate_[] = {
|
|||||||
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
|
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr char hexmap[] = "0123456789abcdef";
|
|
||||||
void outputHex(std::ostream& os, const char* data, int length) {
|
|
||||||
for (int i = 0; i < length; ++i) {
|
|
||||||
os << hexmap[(data[i] & 0xF0) >> 4];
|
|
||||||
os << hexmap[data[i] & 0x0F];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class AtomicHash {
|
class AtomicHash {
|
||||||
public:
|
public:
|
||||||
AtomicHash() {
|
AtomicHash() {
|
||||||
@ -101,7 +93,8 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
|
|||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
bool softAes, miningMode, verificationMode, help, largePages, jit;
|
bool softAes, miningMode, verificationMode, help, largePages, jit;
|
||||||
int noncesCount, threadCount, initThreadCount;
|
int noncesCount, threadCount, initThreadCount;
|
||||||
int32_t seed;
|
int32_t seedValue;
|
||||||
|
char seed[4];
|
||||||
|
|
||||||
readOption("--softAes", argc, argv, softAes);
|
readOption("--softAes", argc, argv, softAes);
|
||||||
readOption("--mine", argc, argv, miningMode);
|
readOption("--mine", argc, argv, miningMode);
|
||||||
@ -109,11 +102,13 @@ int main(int argc, char** argv) {
|
|||||||
readIntOption("--threads", argc, argv, threadCount, 1);
|
readIntOption("--threads", argc, argv, threadCount, 1);
|
||||||
readIntOption("--nonces", argc, argv, noncesCount, 1000);
|
readIntOption("--nonces", argc, argv, noncesCount, 1000);
|
||||||
readIntOption("--init", argc, argv, initThreadCount, 1);
|
readIntOption("--init", argc, argv, initThreadCount, 1);
|
||||||
readIntOption("--seed", argc, argv, seed, 0);
|
readIntOption("--seed", argc, argv, seedValue, 0);
|
||||||
readOption("--largePages", argc, argv, largePages);
|
readOption("--largePages", argc, argv, largePages);
|
||||||
readOption("--jit", argc, argv, jit);
|
readOption("--jit", argc, argv, jit);
|
||||||
readOption("--help", argc, argv, help);
|
readOption("--help", argc, argv, help);
|
||||||
|
|
||||||
|
store32(&seed, seedValue);
|
||||||
|
|
||||||
std::cout << "RandomX benchmark" << std::endl;
|
std::cout << "RandomX benchmark" << std::endl;
|
||||||
|
|
||||||
if (help || (!miningMode && !verificationMode)) {
|
if (help || (!miningMode && !verificationMode)) {
|
||||||
@ -229,7 +224,7 @@ int main(int argc, char** argv) {
|
|||||||
double elapsed = sw.getElapsed();
|
double elapsed = sw.getElapsed();
|
||||||
std::cout << "Calculated result: ";
|
std::cout << "Calculated result: ";
|
||||||
result.print(std::cout);
|
result.print(std::cout);
|
||||||
if (noncesCount == 1000 && seed == 0)
|
if (noncesCount == 1000 && seedValue == 0)
|
||||||
std::cout << "Reference result: b69741719152625854031c2337ceae68c3030f2b9581a73acebaa69fc9b555fc" << std::endl;
|
std::cout << "Reference result: b69741719152625854031c2337ceae68c3030f2b9581a73acebaa69fc9b555fc" << std::endl;
|
||||||
if (!miningMode) {
|
if (!miningMode) {
|
||||||
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
|
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
|
||||||
|
@ -24,6 +24,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
|
||||||
|
constexpr char hexmap[] = "0123456789abcdef";
|
||||||
|
inline void outputHex(std::ostream& os, const char* data, int length) {
|
||||||
|
for (int i = 0; i < length; ++i) {
|
||||||
|
os << hexmap[(data[i] & 0xF0) >> 4];
|
||||||
|
os << hexmap[data[i] & 0x0F];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline void dump(const char* buffer, uint64_t count, const char* name) {
|
inline void dump(const char* buffer, uint64_t count, const char* name) {
|
||||||
std::ofstream fout(name, std::ios::out | std::ios::binary);
|
std::ofstream fout(name, std::ios::out | std::ios::binary);
|
||||||
fout.write(buffer, count);
|
fout.write(buffer, count);
|
||||||
|
@ -114,7 +114,7 @@ namespace randomx {
|
|||||||
template<class Allocator, bool softAes>
|
template<class Allocator, bool softAes>
|
||||||
void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||||
auto& ibc = byteCode[ic];
|
auto& ibc = byteCode[ic];
|
||||||
if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
|
if (trace && ibc.type != InstructionType::NOP) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
|
||||||
switch (ibc.type)
|
switch (ibc.type)
|
||||||
{
|
{
|
||||||
case InstructionType::IADD_RS: {
|
case InstructionType::IADD_RS: {
|
||||||
@ -270,7 +270,7 @@ namespace randomx {
|
|||||||
default:
|
default:
|
||||||
UNREACHABLE;
|
UNREACHABLE;
|
||||||
}
|
}
|
||||||
if (trace) {
|
if (trace && ibc.type != InstructionType::NOP) {
|
||||||
if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
|
if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
|
||||||
print(*ibc.idst);
|
print(*ibc.idst);
|
||||||
else //if(ibc.type >= 20 && ibc.type <= 30)
|
else //if(ibc.type >= 20 && ibc.type <= 30)
|
||||||
|
Loading…
Reference in New Issue
Block a user