mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-06 23:28:54 +00:00
Merge pull request #284 from SChernykh/opt-rcp
Optimized randomx_reciprocal
This commit is contained in:
commit
7db92b73f7
@ -445,7 +445,7 @@ namespace randomx {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
|
||||||
uint64_t divisor = instr.getImm32();
|
const uint32_t divisor = instr.getImm32();
|
||||||
if (!isZeroOrPowerOf2(divisor)) {
|
if (!isZeroOrPowerOf2(divisor)) {
|
||||||
registerUsage[instr.dst] = i;
|
registerUsage[instr.dst] = i;
|
||||||
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;
|
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;
|
||||||
|
@ -243,7 +243,7 @@ namespace randomx {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (opcode < ceil_IMUL_RCP) {
|
if (opcode < ceil_IMUL_RCP) {
|
||||||
uint64_t divisor = instr.getImm32();
|
const uint32_t divisor = instr.getImm32();
|
||||||
if (!isZeroOrPowerOf2(divisor)) {
|
if (!isZeroOrPowerOf2(divisor)) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
ibc.type = InstructionType::IMUL_R;
|
ibc.type = InstructionType::IMUL_R;
|
||||||
|
@ -686,7 +686,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
|
|||||||
|
|
||||||
void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
|
void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
|
||||||
{
|
{
|
||||||
const uint64_t divisor = instr.getImm32();
|
const uint32_t divisor = instr.getImm32();
|
||||||
if (isZeroOrPowerOf2(divisor))
|
if (isZeroOrPowerOf2(divisor))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -695,22 +695,11 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
|
|||||||
constexpr uint32_t tmp_reg = 20;
|
constexpr uint32_t tmp_reg = 20;
|
||||||
const uint32_t dst = IntRegMap[instr.dst];
|
const uint32_t dst = IntRegMap[instr.dst];
|
||||||
|
|
||||||
constexpr uint64_t N = 1ULL << 63;
|
|
||||||
const uint64_t q = N / divisor;
|
|
||||||
const uint64_t r = N % divisor;
|
|
||||||
#ifdef __GNUC__
|
|
||||||
const uint64_t shift = 64 - __builtin_clzll(divisor);
|
|
||||||
#else
|
|
||||||
uint64_t shift = 32;
|
|
||||||
for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
|
|
||||||
--shift;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t);
|
const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t);
|
||||||
|
|
||||||
literalPos -= sizeof(uint64_t);
|
literalPos -= sizeof(uint64_t);
|
||||||
const uint64_t randomx_reciprocal = (q << shift) + ((r << shift) / divisor);
|
|
||||||
memcpy(code + literalPos, &randomx_reciprocal, sizeof(randomx_reciprocal));
|
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
|
||||||
|
memcpy(code + literalPos, &reciprocal, sizeof(reciprocal));
|
||||||
|
|
||||||
if (literal_id < 12)
|
if (literal_id < 12)
|
||||||
{
|
{
|
||||||
|
@ -776,7 +776,7 @@ namespace randomx {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void v1_IMUL_RCP(HANDLER_ARGS) {
|
static void v1_IMUL_RCP(HANDLER_ARGS) {
|
||||||
uint64_t divisor = isn.getImm32();
|
const uint32_t divisor = isn.getImm32();
|
||||||
if (!isZeroOrPowerOf2(divisor)) {
|
if (!isZeroOrPowerOf2(divisor)) {
|
||||||
state.registerUsage[isn.dst] = i;
|
state.registerUsage[isn.dst] = i;
|
||||||
if (state.rcpCount < 4) {
|
if (state.rcpCount < 4) {
|
||||||
|
@ -618,7 +618,7 @@ namespace randomx {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
|
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
|
||||||
uint64_t divisor = instr.getImm32();
|
const uint32_t divisor = instr.getImm32();
|
||||||
if (!isZeroOrPowerOf2(divisor)) {
|
if (!isZeroOrPowerOf2(divisor)) {
|
||||||
registerUsage[instr.dst] = i;
|
registerUsage[instr.dst] = i;
|
||||||
emit(MOV_RAX_I);
|
emit(MOV_RAX_I);
|
||||||
|
@ -44,36 +44,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
ret
|
ret
|
||||||
|
|
||||||
*/
|
*/
|
||||||
uint64_t randomx_reciprocal(uint64_t divisor) {
|
uint64_t randomx_reciprocal(uint32_t divisor) {
|
||||||
|
|
||||||
assert(divisor != 0);
|
assert(divisor != 0);
|
||||||
|
|
||||||
const uint64_t p2exp63 = 1ULL << 63;
|
const uint64_t p2exp63 = 1ULL << 63;
|
||||||
|
const uint64_t q = p2exp63 / divisor;
|
||||||
|
const uint64_t r = p2exp63 % divisor;
|
||||||
|
|
||||||
uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor;
|
#ifdef __GNUC__
|
||||||
|
const uint32_t shift = 64 - __builtin_clzll(divisor);
|
||||||
|
#else
|
||||||
|
uint32_t shift = 32;
|
||||||
|
for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
|
||||||
|
--shift;
|
||||||
|
#endif
|
||||||
|
|
||||||
unsigned bsr = 0; //highest set bit in divisor
|
return (q << shift) + ((r << shift) / divisor);
|
||||||
|
|
||||||
for (uint64_t bit = divisor; bit > 0; bit >>= 1)
|
|
||||||
bsr++;
|
|
||||||
|
|
||||||
for (unsigned shift = 0; shift < bsr; shift++) {
|
|
||||||
if (remainder >= divisor - remainder) {
|
|
||||||
quotient = quotient * 2 + 1;
|
|
||||||
remainder = remainder * 2 - divisor;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
quotient = quotient * 2;
|
|
||||||
remainder = remainder * 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return quotient;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !RANDOMX_HAVE_FAST_RECIPROCAL
|
#if !RANDOMX_HAVE_FAST_RECIPROCAL
|
||||||
|
|
||||||
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
|
uint64_t randomx_reciprocal_fast(uint32_t divisor) {
|
||||||
return randomx_reciprocal(divisor);
|
return randomx_reciprocal(divisor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint64_t randomx_reciprocal(uint64_t);
|
uint64_t randomx_reciprocal(uint32_t);
|
||||||
uint64_t randomx_reciprocal_fast(uint64_t);
|
uint64_t randomx_reciprocal_fast(uint32_t);
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
}
|
}
|
||||||
|
@ -477,7 +477,7 @@ int analyze(randomx::Program& p) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (opcode < randomx::ceil_IMUL_RCP) {
|
if (opcode < randomx::ceil_IMUL_RCP) {
|
||||||
uint64_t divisor = instr.getImm32();
|
const uint32_t divisor = instr.getImm32();
|
||||||
if (!randomx::isZeroOrPowerOf2(divisor)) {
|
if (!randomx::isZeroOrPowerOf2(divisor)) {
|
||||||
instr.dst = instr.dst % randomx::RegistersCount;
|
instr.dst = instr.dst % randomx::RegistersCount;
|
||||||
instr.opcode |= DST_INT;
|
instr.opcode |= DST_INT;
|
||||||
|
Loading…
Reference in New Issue
Block a user