Optimized randomx_reciprocal

Also limited it to 32 bit because it's supposed to work only with 32-bit values, according to the specs.
This commit is contained in:
SChernykh 2023-10-20 10:54:25 +02:00
parent 5fc512e71c
commit 5c49ab12a0
8 changed files with 23 additions and 42 deletions

View File

@ -445,7 +445,7 @@ namespace randomx {
} }
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
uint64_t divisor = instr.getImm32(); const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) { if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i; registerUsage[instr.dst] = i;
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl; asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;

View File

@ -243,7 +243,7 @@ namespace randomx {
} }
if (opcode < ceil_IMUL_RCP) { if (opcode < ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32(); const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) { if (!isZeroOrPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::IMUL_R; ibc.type = InstructionType::IMUL_R;

View File

@ -686,7 +686,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos) void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
{ {
const uint64_t divisor = instr.getImm32(); const uint32_t divisor = instr.getImm32();
if (isZeroOrPowerOf2(divisor)) if (isZeroOrPowerOf2(divisor))
return; return;
@ -695,22 +695,11 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
constexpr uint32_t tmp_reg = 20; constexpr uint32_t tmp_reg = 20;
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint64_t N = 1ULL << 63;
const uint64_t q = N / divisor;
const uint64_t r = N % divisor;
#ifdef __GNUC__
const uint64_t shift = 64 - __builtin_clzll(divisor);
#else
uint64_t shift = 32;
for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
--shift;
#endif
const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t); const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t);
literalPos -= sizeof(uint64_t); literalPos -= sizeof(uint64_t);
const uint64_t randomx_reciprocal = (q << shift) + ((r << shift) / divisor);
memcpy(code + literalPos, &randomx_reciprocal, sizeof(randomx_reciprocal)); const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
memcpy(code + literalPos, &reciprocal, sizeof(reciprocal));
if (literal_id < 12) if (literal_id < 12)
{ {

View File

@ -776,7 +776,7 @@ namespace randomx {
} }
static void v1_IMUL_RCP(HANDLER_ARGS) { static void v1_IMUL_RCP(HANDLER_ARGS) {
uint64_t divisor = isn.getImm32(); const uint32_t divisor = isn.getImm32();
if (!isZeroOrPowerOf2(divisor)) { if (!isZeroOrPowerOf2(divisor)) {
state.registerUsage[isn.dst] = i; state.registerUsage[isn.dst] = i;
if (state.rcpCount < 4) { if (state.rcpCount < 4) {

View File

@ -618,7 +618,7 @@ namespace randomx {
} }
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
uint64_t divisor = instr.getImm32(); const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) { if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i; registerUsage[instr.dst] = i;
emit(MOV_RAX_I); emit(MOV_RAX_I);

View File

@ -44,36 +44,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ret ret
*/ */
uint64_t randomx_reciprocal(uint64_t divisor) { uint64_t randomx_reciprocal(uint32_t divisor) {
assert(divisor != 0); assert(divisor != 0);
const uint64_t p2exp63 = 1ULL << 63; const uint64_t p2exp63 = 1ULL << 63;
const uint64_t q = p2exp63 / divisor;
const uint64_t r = p2exp63 % divisor;
uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor; #ifdef __GNUC__
const uint32_t shift = 64 - __builtin_clzll(divisor);
#else
uint32_t shift = 32;
for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
--shift;
#endif
unsigned bsr = 0; //highest set bit in divisor return (q << shift) + ((r << shift) / divisor);
for (uint64_t bit = divisor; bit > 0; bit >>= 1)
bsr++;
for (unsigned shift = 0; shift < bsr; shift++) {
if (remainder >= divisor - remainder) {
quotient = quotient * 2 + 1;
remainder = remainder * 2 - divisor;
}
else {
quotient = quotient * 2;
remainder = remainder * 2;
}
}
return quotient;
} }
#if !RANDOMX_HAVE_FAST_RECIPROCAL #if !RANDOMX_HAVE_FAST_RECIPROCAL
uint64_t randomx_reciprocal_fast(uint64_t divisor) { uint64_t randomx_reciprocal_fast(uint32_t divisor) {
return randomx_reciprocal(divisor); return randomx_reciprocal(divisor);
} }

View File

@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extern "C" { extern "C" {
#endif #endif
uint64_t randomx_reciprocal(uint64_t); uint64_t randomx_reciprocal(uint32_t);
uint64_t randomx_reciprocal_fast(uint64_t); uint64_t randomx_reciprocal_fast(uint32_t);
#if defined(__cplusplus) #if defined(__cplusplus)
} }

View File

@ -477,7 +477,7 @@ int analyze(randomx::Program& p) {
} }
if (opcode < randomx::ceil_IMUL_RCP) { if (opcode < randomx::ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32(); const uint32_t divisor = instr.getImm32();
if (!randomx::isZeroOrPowerOf2(divisor)) { if (!randomx::isZeroOrPowerOf2(divisor)) {
instr.dst = instr.dst % randomx::RegistersCount; instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_INT; instr.opcode |= DST_INT;