mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-12-22 07:48:54 +00:00
Big endian bug fixes
This commit is contained in:
parent
1c3666aa98
commit
018c1a5222
@ -41,11 +41,15 @@ static FORCE_INLINE uint32_t load32(const void *src) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCE_INLINE uint64_t load64(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
static FORCE_INLINE uint64_t load64_native(const void *src) {
|
||||
uint64_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
}
|
||||
|
||||
static FORCE_INLINE uint64_t load64(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return load64_native(src);
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
@ -75,9 +79,13 @@ static FORCE_INLINE void store32(void *dst, uint32_t w) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCE_INLINE void store64_native(void *dst, uint64_t w) {
|
||||
memcpy(dst, &w, sizeof w);
|
||||
}
|
||||
|
||||
static FORCE_INLINE void store64(void *dst, uint64_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
store64_native(dst, w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
|
@ -192,7 +192,7 @@ namespace randomx {
|
||||
executeSuperscalar(rl, prog, &cache->reciprocalCache);
|
||||
|
||||
for (unsigned q = 0; q < 8; ++q)
|
||||
rl[q] ^= load64(mixBlock + 8 * q);
|
||||
rl[q] ^= load64_native(mixBlock + 8 * q);
|
||||
|
||||
registerValue = rl[prog.getAddressRegister()];
|
||||
}
|
||||
|
@ -295,7 +295,8 @@ inline __m128i _mm_slli_si128(__m128i _A, int _Imm) {
|
||||
|
||||
inline __m128i _mm_loadl_epi64(__m128i const* mem_addr) {
|
||||
__m128i x;
|
||||
x.u64[0] = load64(mem_addr);
|
||||
x.u32[0] = load32((uint8_t*)mem_addr + 0);
|
||||
x.u32[1] = load32((uint8_t*)mem_addr + 4);
|
||||
return x;
|
||||
}
|
||||
|
||||
|
@ -573,14 +573,6 @@ namespace randomx {
|
||||
constexpr int LOOK_FORWARD_CYCLES = 4;
|
||||
constexpr int MAX_THROWAWAY_COUNT = 256;
|
||||
|
||||
#ifndef _DEBUG
|
||||
constexpr bool TRACE = false;
|
||||
constexpr bool INFO = false;
|
||||
#else
|
||||
constexpr bool TRACE = true;
|
||||
constexpr bool INFO = true;
|
||||
#endif
|
||||
|
||||
template<bool commit>
|
||||
static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
|
||||
//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
|
||||
@ -588,21 +580,21 @@ namespace randomx {
|
||||
for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
|
||||
if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
|
||||
if (commit) {
|
||||
if (TRACE) std::cout << "; P5 at cycle " << cycle << std::endl;
|
||||
if (trace) std::cout << "; P5 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][2] = uop;
|
||||
}
|
||||
return cycle;
|
||||
}
|
||||
if ((uop & ExecutionPort::P0) != 0 && !portBusy[cycle][0]) {
|
||||
if (commit) {
|
||||
if (TRACE) std::cout << "; P0 at cycle " << cycle << std::endl;
|
||||
if (trace) std::cout << "; P0 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][0] = uop;
|
||||
}
|
||||
return cycle;
|
||||
}
|
||||
if ((uop & ExecutionPort::P1) != 0 && !portBusy[cycle][1]) {
|
||||
if (commit) {
|
||||
if (TRACE) std::cout << "; P1 at cycle " << cycle << std::endl;
|
||||
if (trace) std::cout << "; P1 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][1] = uop;
|
||||
}
|
||||
return cycle;
|
||||
@ -621,7 +613,7 @@ namespace randomx {
|
||||
//move instructions are eliminated and don't need an execution unit
|
||||
if (mop.isEliminated()) {
|
||||
if (commit)
|
||||
if (TRACE) std::cout << "; (eliminated)" << std::endl;
|
||||
if (trace) std::cout << "; (eliminated)" << std::endl;
|
||||
return cycle;
|
||||
}
|
||||
else if (mop.isSimple()) {
|
||||
@ -677,7 +669,7 @@ namespace randomx {
|
||||
|
||||
//select a decode configuration
|
||||
decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen);
|
||||
if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
|
||||
if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
|
||||
|
||||
int bufferIndex = 0;
|
||||
|
||||
@ -692,15 +684,15 @@ namespace randomx {
|
||||
//select an instruction so that the first macro-op fits into the current slot
|
||||
currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0);
|
||||
macroOpIndex = 0;
|
||||
if (TRACE) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
|
||||
if (trace) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
|
||||
}
|
||||
const MacroOp& mop = currentInstruction.getInfo().getOp(macroOpIndex);
|
||||
if (TRACE) std::cout << mop.getName() << " ";
|
||||
if (trace) std::cout << mop.getName() << " ";
|
||||
|
||||
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
|
||||
int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle);
|
||||
if (scheduleCycle < 0) {
|
||||
if (TRACE) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
|
||||
if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
|
||||
//__debugbreak();
|
||||
portsSaturated = true;
|
||||
break;
|
||||
@ -711,7 +703,7 @@ namespace randomx {
|
||||
int forward;
|
||||
//if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward
|
||||
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectSource(scheduleCycle, registers, gen); ++forward) {
|
||||
if (TRACE) std::cout << "; src STALL at cycle " << cycle << std::endl;
|
||||
if (trace) std::cout << "; src STALL at cycle " << cycle << std::endl;
|
||||
++scheduleCycle;
|
||||
++cycle;
|
||||
}
|
||||
@ -720,22 +712,22 @@ namespace randomx {
|
||||
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
|
||||
throwAwayCount++;
|
||||
macroOpIndex = currentInstruction.getInfo().getSize();
|
||||
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
|
||||
if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
|
||||
//cycle = topCycle;
|
||||
continue;
|
||||
}
|
||||
//abort this decode buffer
|
||||
if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
|
||||
if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
|
||||
currentInstruction = SuperscalarInstruction::Null;
|
||||
break;
|
||||
}
|
||||
if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
|
||||
if (trace) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
|
||||
}
|
||||
//find a destination register that will be ready when this instruction executes
|
||||
if (macroOpIndex == currentInstruction.getInfo().getDstOp()) {
|
||||
int forward;
|
||||
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) {
|
||||
if (TRACE) std::cout << "; dst STALL at cycle " << cycle << std::endl;
|
||||
if (trace) std::cout << "; dst STALL at cycle " << cycle << std::endl;
|
||||
++scheduleCycle;
|
||||
++cycle;
|
||||
}
|
||||
@ -743,16 +735,16 @@ namespace randomx {
|
||||
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
|
||||
throwAwayCount++;
|
||||
macroOpIndex = currentInstruction.getInfo().getSize();
|
||||
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
|
||||
if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
|
||||
//cycle = topCycle;
|
||||
continue;
|
||||
}
|
||||
//abort this decode buffer
|
||||
if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
|
||||
if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
|
||||
currentInstruction = SuperscalarInstruction::Null;
|
||||
break;
|
||||
}
|
||||
if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
|
||||
if (trace) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
|
||||
}
|
||||
throwAwayCount = 0;
|
||||
|
||||
@ -773,7 +765,7 @@ namespace randomx {
|
||||
ri.latency = retireCycle;
|
||||
ri.lastOpGroup = currentInstruction.getGroup();
|
||||
ri.lastOpPar = currentInstruction.getGroupPar();
|
||||
if (TRACE) std::cout << "; RETIRED at cycle " << retireCycle << std::endl;
|
||||
if (trace) std::cout << "; RETIRED at cycle " << retireCycle << std::endl;
|
||||
}
|
||||
codeSize += mop.getSize();
|
||||
bufferIndex++;
|
||||
|
@ -37,14 +37,6 @@ const uint8_t blockTemplate_[] = {
|
||||
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
|
||||
};
|
||||
|
||||
constexpr char hexmap[] = "0123456789abcdef";
|
||||
void outputHex(std::ostream& os, const char* data, int length) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
os << hexmap[(data[i] & 0xF0) >> 4];
|
||||
os << hexmap[data[i] & 0x0F];
|
||||
}
|
||||
}
|
||||
|
||||
class AtomicHash {
|
||||
public:
|
||||
AtomicHash() {
|
||||
@ -101,7 +93,8 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
|
||||
int main(int argc, char** argv) {
|
||||
bool softAes, miningMode, verificationMode, help, largePages, jit;
|
||||
int noncesCount, threadCount, initThreadCount;
|
||||
int32_t seed;
|
||||
int32_t seedValue;
|
||||
char seed[4];
|
||||
|
||||
readOption("--softAes", argc, argv, softAes);
|
||||
readOption("--mine", argc, argv, miningMode);
|
||||
@ -109,11 +102,13 @@ int main(int argc, char** argv) {
|
||||
readIntOption("--threads", argc, argv, threadCount, 1);
|
||||
readIntOption("--nonces", argc, argv, noncesCount, 1000);
|
||||
readIntOption("--init", argc, argv, initThreadCount, 1);
|
||||
readIntOption("--seed", argc, argv, seed, 0);
|
||||
readIntOption("--seed", argc, argv, seedValue, 0);
|
||||
readOption("--largePages", argc, argv, largePages);
|
||||
readOption("--jit", argc, argv, jit);
|
||||
readOption("--help", argc, argv, help);
|
||||
|
||||
store32(&seed, seedValue);
|
||||
|
||||
std::cout << "RandomX benchmark" << std::endl;
|
||||
|
||||
if (help || (!miningMode && !verificationMode)) {
|
||||
@ -229,7 +224,7 @@ int main(int argc, char** argv) {
|
||||
double elapsed = sw.getElapsed();
|
||||
std::cout << "Calculated result: ";
|
||||
result.print(std::cout);
|
||||
if (noncesCount == 1000 && seed == 0)
|
||||
if (noncesCount == 1000 && seedValue == 0)
|
||||
std::cout << "Reference result: b69741719152625854031c2337ceae68c3030f2b9581a73acebaa69fc9b555fc" << std::endl;
|
||||
if (!miningMode) {
|
||||
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
|
||||
|
@ -24,6 +24,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
constexpr char hexmap[] = "0123456789abcdef";
|
||||
inline void outputHex(std::ostream& os, const char* data, int length) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
os << hexmap[(data[i] & 0xF0) >> 4];
|
||||
os << hexmap[data[i] & 0x0F];
|
||||
}
|
||||
}
|
||||
|
||||
inline void dump(const char* buffer, uint64_t count, const char* name) {
|
||||
std::ofstream fout(name, std::ios::out | std::ios::binary);
|
||||
fout.write(buffer, count);
|
||||
|
@ -114,7 +114,7 @@ namespace randomx {
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
auto& ibc = byteCode[ic];
|
||||
if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
|
||||
if (trace && ibc.type != InstructionType::NOP) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
|
||||
switch (ibc.type)
|
||||
{
|
||||
case InstructionType::IADD_RS: {
|
||||
@ -270,7 +270,7 @@ namespace randomx {
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
if (trace) {
|
||||
if (trace && ibc.type != InstructionType::NOP) {
|
||||
if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
|
||||
print(*ibc.idst);
|
||||
else //if(ibc.type >= 20 && ibc.type <= 30)
|
||||
|
Loading…
Reference in New Issue
Block a user