mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2025-01-10 17:18:53 +00:00
787 lines
27 KiB
Python
787 lines
27 KiB
Python
|
import random
|
||
|
import sys
|
||
|
import os
|
||
|
|
||
|
PROGRAM_SIZE = 1024
|
||
|
INSTRUCTION_COUNT = 65536
|
||
|
|
||
|
def genBytes(count):
|
||
|
return ', '.join(str(random.getrandbits(8)) for i in range(count))
|
||
|
|
||
|
class OperandType:
|
||
|
INT32 = 0
|
||
|
UINT32 = 1
|
||
|
INT64 = 2
|
||
|
UINT64 = 3
|
||
|
FLOAT = 4
|
||
|
SHIFT = 5
|
||
|
|
||
|
def declareType(type):
|
||
|
converters = {
|
||
|
0: "int32_t",
|
||
|
1: "uint32_t",
|
||
|
2: "int64_t",
|
||
|
3: "uint64_t",
|
||
|
4: "double",
|
||
|
5: "int32_t"
|
||
|
}
|
||
|
return converters.get(type)
|
||
|
|
||
|
def toSigned32(x):
|
||
|
return x - ((x & 0x80000000) << 1)
|
||
|
|
||
|
def toSigned64(x):
|
||
|
return x - ((x & 0x8000000000000000) << 1)
|
||
|
|
||
|
def immediateTo(val, type):
|
||
|
converters = {
|
||
|
0: toSigned32(val),
|
||
|
1: val,
|
||
|
2: toSigned32(val),
|
||
|
3: val,
|
||
|
4: float(toSigned32(val) << 32),
|
||
|
5: val & 63
|
||
|
}
|
||
|
return repr(converters.get(type))
|
||
|
|
||
|
def registerTo(expr, type):
|
||
|
converters = {
|
||
|
0: "(int64_t){0}",
|
||
|
1: "{0}",
|
||
|
2: "(int64_t){0}",
|
||
|
3: "{0}",
|
||
|
4: "{0}",
|
||
|
5: "({0} & 63)"
|
||
|
}
|
||
|
return converters.get(type).format(expr)
|
||
|
|
||
|
def registerFrom(num, type):
|
||
|
converters = {
|
||
|
0: "r{0}",
|
||
|
1: "r{0}",
|
||
|
2: "r{0}",
|
||
|
3: "r{0}",
|
||
|
4: "((convertible_t)f{0}).u64",
|
||
|
5: "r{0}"
|
||
|
}
|
||
|
return converters.get(type).format(num)
|
||
|
|
||
|
def convertibleTo(expr, type):
|
||
|
converters = {
|
||
|
0: "{0}.i32",
|
||
|
1: "{0}.u32",
|
||
|
2: "{0}.i64",
|
||
|
3: "{0}.u64",
|
||
|
4: "(double){0}.i64",
|
||
|
5: "({0}.u64 & 63)"
|
||
|
}
|
||
|
return converters.get(type).format(expr)
|
||
|
|
||
|
def convertibleFrom(expr, type):
|
||
|
converters = {
|
||
|
0: "{0}.i32",
|
||
|
1: "{0}.u32",
|
||
|
2: "{0}.i64",
|
||
|
3: "{0}.u64",
|
||
|
4: "{0}.f64",
|
||
|
5: "({0}.u64 & 63)"
|
||
|
}
|
||
|
return converters.get(type).format(expr)
|
||
|
|
||
|
def getRegister(num, type):
|
||
|
registers = {
|
||
|
0: "r{0}",
|
||
|
1: "r{0}",
|
||
|
2: "r{0}",
|
||
|
3: "r{0}",
|
||
|
4: "f{0}",
|
||
|
5: "r{0}"
|
||
|
}
|
||
|
return registers.get(type).format(num)
|
||
|
|
||
|
def writeInitialValues(file):
|
||
|
file.write("\tclock_t clockStart = clock(), clockEnd;\n")
|
||
|
for i in range(8):
|
||
|
file.write("\tr{0} = {1}ULL;\n".format(i, random.getrandbits(64)))
|
||
|
for i in range(8):
|
||
|
file.write("\tf{0} = {1};\n".format(i, toSigned64(random.getrandbits(64))))
|
||
|
file.write("\tG = _mm_set_epi64x({0}ULL, {1}ULL);\n".format(random.getrandbits(64), random.getrandbits(64)))
|
||
|
file.write("\tmmu.m0 = {1};\n".format(i, random.getrandbits(32) & 0xFFFFFF00))
|
||
|
file.write("\taesInitialize((__m128i*)aesKey, (__m128i*)aesSeed, (__m128i*)scratchpad, SCRATCHPAD_SIZE);\n")
|
||
|
file.write("\tmmu.mx = 0;\n")
|
||
|
file.write("\tmmu.sp = 0;\n")
|
||
|
file.write("\tic = 65536;\n")
|
||
|
file.write("\tmxcsr = (_mm_getcsr() | _MM_FLUSH_ZERO_ON) & ~_MM_ROUND_MASK; //flush denormals to zero, round to nearest\n")
|
||
|
file.write("\t_mm_setcsr(mxcsr);\n")
|
||
|
|
||
|
def writeEpilog(file):
|
||
|
file.write("\tend:\n")
|
||
|
file.write("\t\tclockEnd = clock();\n")
|
||
|
for i in range(8):
|
||
|
file.write('\t\tprintf("r{0} = %-36llu f{0} = %g\\n", r{0}, f{0});\n'.format(i))
|
||
|
file.write(("\t\tuint64_t spadsum = 0;\n"
|
||
|
"\t\tfor(int i = 0; i < SCRATCHPAD_LENGTH; ++i) {\n"
|
||
|
"\t\t spadsum += scratchpad[i].u64;\n"
|
||
|
"\t\t}\n"
|
||
|
'\t\tprintf("scratchpad sum = %llu\\n", spadsum);\n'
|
||
|
'\t\tprintf("runtime: %f\\n", (clockEnd - clockStart) / (double)CLOCKS_PER_SEC);\n'))
|
||
|
file.write("\t\treturn 0;")
|
||
|
file.write("}")
|
||
|
|
||
|
def writeCommon(file, i, symbol, type, name):
|
||
|
file.write("\ti_{0}: {{ //{1}\n".format(i, name))
|
||
|
file.write("\t\tif(0 == ic--) goto end;\n")
|
||
|
file.write("\t\tr{0} ^= (uint32_t)_mm_cvtsi128_si32(G);\n".format(symbol.ra))
|
||
|
file.write("\t\taddr_t addr = r{0};\n".format(symbol.ra))
|
||
|
file.write("\t\tr{0} = __rolq(r{0}, 32);\n".format(symbol.ra))
|
||
|
file.write("\t\tG = _mm_shuffle_epi32(G, _MM_SHUFFLE(1, 2, 3, 0));\n")
|
||
|
if symbol.gen == 0:
|
||
|
file.write("\t\t__m128i K = _mm_set_epi64x({0}, r{1});\n".format(registerFrom(symbol.xb, type), symbol.ra))
|
||
|
file.write("\t\tG = _mm_aesenc_si128(G, K);\n")
|
||
|
|
||
|
def readA(symbol, type):
|
||
|
location = {
|
||
|
0: "readDram(&mmu, addr)",
|
||
|
1: "readDram(&mmu, addr)",
|
||
|
2: "readDram(&mmu, addr)",
|
||
|
3: "readDram(&mmu, addr)",
|
||
|
4: "SCRATCHPAD_256K(addr)",
|
||
|
5: "SCRATCHPAD_16K(addr)",
|
||
|
6: "SCRATCHPAD_16K(addr)",
|
||
|
7: "SCRATCHPAD_16K(addr)",
|
||
|
}
|
||
|
return convertibleTo(location.get(symbol.loca), type)
|
||
|
|
||
|
def writeC(symbol, type):
|
||
|
location = {
|
||
|
0: "SCRATCHPAD_256K(addr)",
|
||
|
1: "SCRATCHPAD_16K(addr)",
|
||
|
2: "",
|
||
|
3: "",
|
||
|
4: "SCRATCHPAD_16K(addr)",
|
||
|
5: "SCRATCHPAD_16K(addr)",
|
||
|
6: "",
|
||
|
7: ""
|
||
|
}
|
||
|
c = location.get(symbol.loca)
|
||
|
if c == "":
|
||
|
c = getRegister(symbol.xb, type)
|
||
|
else:
|
||
|
c = convertibleFrom(c, type)
|
||
|
return c
|
||
|
|
||
|
def readB(symbol, type):
|
||
|
if symbol.locb < 6:
|
||
|
return registerTo(getRegister(symbol.xb, type), type)
|
||
|
else:
|
||
|
return immediateTo(symbol.imm1, type)
|
||
|
|
||
|
class CodeSymbol:
|
||
|
def __init__(self, qi):
|
||
|
self.opcode = qi & 255
|
||
|
self.loca = (qi >> 8) & 7
|
||
|
self.ra = (qi >> 11) & 7
|
||
|
self.gen = (qi >> 14) & 3
|
||
|
self.locb = (qi >> 16) & 7
|
||
|
self.xb = (qi >> 19) & 7
|
||
|
self.imm0 = (qi >> 24) & 255
|
||
|
self.imm1 = qi >> 32
|
||
|
|
||
|
def writeOperation(file, i, symbol, type, name, op):
|
||
|
writeCommon(file, i, symbol, type, name)
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(type), readB(symbol, type)))
|
||
|
file.write("\t\t{0} = A {1} B; }}\n".format(writeC(symbol, type), op))
|
||
|
|
||
|
def write_ADD_64(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT64, 'ADD_64', '+');
|
||
|
|
||
|
def write_ADD_32(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT32, 'ADD_32', '+');
|
||
|
|
||
|
def write_SUB_64(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT64, 'SUB_64', '-');
|
||
|
|
||
|
def write_SUB_32(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT32, 'SUB_32', '-');
|
||
|
|
||
|
def write_MUL_64(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT64, 'MUL_64', '*');
|
||
|
|
||
|
def write_MULH_64(file, i, symbol):
|
||
|
type = OperandType.UINT64
|
||
|
writeCommon(file, i, symbol, type, 'MULH_64')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(type), readB(symbol, type)))
|
||
|
file.write("\t\t{0} = ((uint128_t)A * B) >> 64; }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
def write_MUL_32(file, i, symbol):
|
||
|
type = OperandType.UINT32
|
||
|
writeCommon(file, i, symbol, type, 'MUL_32')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(type), readB(symbol, type)))
|
||
|
file.write("\t\t{0} = (uint64_t)A * B; }}\n".format(writeC(symbol, OperandType.UINT64)))
|
||
|
|
||
|
def write_IMUL_32(file, i, symbol):
|
||
|
type = OperandType.INT32
|
||
|
writeCommon(file, i, symbol, type, 'IMUL_32')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(type), readB(symbol, type)))
|
||
|
file.write("\t\t{0} = (int64_t)A * B; }}\n".format(writeC(symbol, OperandType.INT64)))
|
||
|
|
||
|
def write_IMULH_64(file, i, symbol):
|
||
|
type = OperandType.INT64
|
||
|
writeCommon(file, i, symbol, type, 'IMULH_64')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(type), readB(symbol, type)))
|
||
|
file.write("\t\t{0} = ((int128_t)A * B) >> 64; }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
def write_DIV_64(file, i, symbol):
|
||
|
type = OperandType.UINT64
|
||
|
writeCommon(file, i, symbol, type, 'DIV_64')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.UINT32), readB(symbol, OperandType.UINT32)))
|
||
|
file.write("\t\tif(B == 0) B = 1;\n".format(declareType(type), readB(symbol, type)))
|
||
|
file.write("\t\t{0} = A / B; }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
def write_IDIV_64(file, i, symbol):
|
||
|
type = OperandType.INT64
|
||
|
writeCommon(file, i, symbol, type, 'IDIV_64')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.INT32), readB(symbol, OperandType.INT32)))
|
||
|
file.write("\t\tif(B == 0) B = 1;\n".format(declareType(type), readB(symbol, type)))
|
||
|
file.write("\t\t{0} = A / B; }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
def write_AND_64(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT64, 'AND_64', '&');
|
||
|
|
||
|
def write_AND_32(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT32, 'AND_32', '&');
|
||
|
|
||
|
def write_OR_64(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT64, 'OR_64', '|');
|
||
|
|
||
|
def write_OR_32(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT32, 'OR_32', '|');
|
||
|
|
||
|
def write_XOR_64(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT64, 'XOR_64', '^');
|
||
|
|
||
|
def write_XOR_32(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.UINT32, 'XOR_32', '^');
|
||
|
|
||
|
def write_SHL_64(file, i, symbol):
|
||
|
type = OperandType.UINT64
|
||
|
writeCommon(file, i, symbol, type, 'SHL_64')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.SHIFT), readB(symbol, OperandType.SHIFT)))
|
||
|
file.write("\t\t{0} = A << B; }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
def write_SHR_64(file, i, symbol):
|
||
|
type = OperandType.UINT64
|
||
|
writeCommon(file, i, symbol, type, 'SHR_64')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.SHIFT), readB(symbol, OperandType.SHIFT)))
|
||
|
file.write("\t\t{0} = A >> B; }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
def write_SAR_64(file, i, symbol):
|
||
|
type = OperandType.INT64
|
||
|
writeCommon(file, i, symbol, type, 'SAR_64')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.SHIFT), readB(symbol, OperandType.SHIFT)))
|
||
|
file.write("\t\t{0} = A >> B; }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
def write_ROL_64(file, i, symbol):
|
||
|
type = OperandType.UINT64
|
||
|
writeCommon(file, i, symbol, type, 'ROL_64')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.SHIFT), readB(symbol, OperandType.SHIFT)))
|
||
|
file.write("\t\t{0} = __rolq(A, B); }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
def write_ROR_64(file, i, symbol):
|
||
|
type = OperandType.UINT64
|
||
|
writeCommon(file, i, symbol, type, 'ROR_64')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.SHIFT), readB(symbol, OperandType.SHIFT)))
|
||
|
file.write("\t\t{0} = __rorq(A, B); }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
def write_FADD(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.FLOAT, 'FADD', '+');
|
||
|
|
||
|
def write_FSUB(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.FLOAT, 'FSUB', '-');
|
||
|
|
||
|
def write_FMUL(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.FLOAT, 'FMUL', '*');
|
||
|
|
||
|
def write_FDIV(file, i, symbol):
|
||
|
writeOperation(file, i, symbol, OperandType.FLOAT, 'FDIV', '/');
|
||
|
|
||
|
def write_FSQRT(file, i, symbol):
|
||
|
type = OperandType.FLOAT
|
||
|
writeCommon(file, i, symbol, type, 'FSQRT')
|
||
|
file.write("\t\t{0} A = fabs({1});\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\t{0} = _mm_cvtsd_f64(_mm_sqrt_sd(_mm_setzero_pd(), _mm_load_pd(&A))); }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
def write_FROUND(file, i, symbol):
|
||
|
type = OperandType.FLOAT
|
||
|
writeCommon(file, i, symbol, type, 'FROUND')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(OperandType.UINT64), readA(symbol, OperandType.UINT64)))
|
||
|
file.write("\t\t{0} = A;\n".format(writeC(symbol, type)))
|
||
|
file.write("\t\t_mm_setcsr(mxcsr | ((uint32_t)(A << 13) & _MM_ROUND_MASK)); }\n")
|
||
|
|
||
|
def write_CALL(file, i, symbol):
|
||
|
type = OperandType.UINT64
|
||
|
writeCommon(file, i, symbol, type, 'CALL')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
if symbol.locb < 6:
|
||
|
file.write("\t\tif((uint32_t){0} <= {1}) {{\n".format(getRegister(symbol.xb, type), immediateTo(symbol.imm1, type)))
|
||
|
file.write("\t\t\tPUSH_VALUE(A);\n");
|
||
|
file.write("\t\t\tPUSH_ADDRESS(&&i_{0});\n".format((i + 1) & (PROGRAM_SIZE - 1)));
|
||
|
file.write("\t\t\tgoto i_{0};\n".format((i + 1 + symbol.imm0) & (PROGRAM_SIZE - 1)));
|
||
|
if symbol.locb < 6:
|
||
|
file.write("\t\t}}\n\t\t{0} = A;".format(writeC(symbol, type)))
|
||
|
file.write(" }\n")
|
||
|
|
||
|
def write_RET(file, i, symbol):
|
||
|
type = OperandType.UINT64
|
||
|
writeCommon(file, i, symbol, type, 'RET')
|
||
|
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||
|
file.write("\t\tif(!STACK_IS_EMPTY()")
|
||
|
if symbol.locb < 6:
|
||
|
file.write(" && (uint32_t){0} <= {1}".format(getRegister(symbol.xb, type), immediateTo(symbol.imm1, type)))
|
||
|
file.write(") {\n")
|
||
|
file.write("\t\t\tvoid* target = POP_ADDRESS();\n")
|
||
|
file.write("\t\t\tuint64_t C = POP_VALUE();\n")
|
||
|
file.write("\t\t\t{0} = A ^ C;\n".format(writeC(symbol, type)))
|
||
|
file.write("\t\t\tgoto *target;\n")
|
||
|
file.write("\t\t}}\n\t\t{0} = A; }}\n".format(writeC(symbol, type)))
|
||
|
|
||
|
opcodeMap = {
|
||
|
0: write_ADD_64,
|
||
|
1: write_ADD_64,
|
||
|
2: write_ADD_64,
|
||
|
3: write_ADD_64,
|
||
|
4: write_ADD_64,
|
||
|
5: write_ADD_64,
|
||
|
6: write_ADD_64,
|
||
|
7: write_ADD_64,
|
||
|
8: write_ADD_64,
|
||
|
9: write_ADD_64,
|
||
|
10: write_ADD_64,
|
||
|
11: write_ADD_64,
|
||
|
12: write_ADD_64,
|
||
|
13: write_ADD_64,
|
||
|
14: write_ADD_32,
|
||
|
15: write_ADD_32,
|
||
|
16: write_ADD_32,
|
||
|
17: write_ADD_32,
|
||
|
18: write_ADD_32,
|
||
|
19: write_ADD_32,
|
||
|
20: write_ADD_32,
|
||
|
21: write_SUB_64,
|
||
|
22: write_SUB_64,
|
||
|
23: write_SUB_64,
|
||
|
24: write_SUB_64,
|
||
|
25: write_SUB_64,
|
||
|
26: write_SUB_64,
|
||
|
27: write_SUB_64,
|
||
|
28: write_SUB_64,
|
||
|
29: write_SUB_64,
|
||
|
30: write_SUB_64,
|
||
|
31: write_SUB_64,
|
||
|
32: write_SUB_64,
|
||
|
33: write_SUB_64,
|
||
|
34: write_SUB_64,
|
||
|
35: write_SUB_32,
|
||
|
36: write_SUB_32,
|
||
|
37: write_SUB_32,
|
||
|
38: write_SUB_32,
|
||
|
39: write_SUB_32,
|
||
|
40: write_SUB_32,
|
||
|
41: write_SUB_32,
|
||
|
42: write_MUL_64,
|
||
|
43: write_MUL_64,
|
||
|
44: write_MUL_64,
|
||
|
45: write_MUL_64,
|
||
|
46: write_MULH_64,
|
||
|
47: write_MULH_64,
|
||
|
48: write_MULH_64,
|
||
|
49: write_MULH_64,
|
||
|
50: write_MUL_32,
|
||
|
51: write_MUL_32,
|
||
|
52: write_MUL_32,
|
||
|
53: write_MUL_32,
|
||
|
54: write_IMUL_32,
|
||
|
55: write_IMUL_32,
|
||
|
56: write_IMUL_32,
|
||
|
57: write_IMUL_32,
|
||
|
58: write_IMULH_64,
|
||
|
59: write_IMULH_64,
|
||
|
60: write_IMULH_64,
|
||
|
61: write_IMULH_64,
|
||
|
62: write_DIV_64,
|
||
|
63: write_IDIV_64,
|
||
|
64: write_AND_64,
|
||
|
65: write_AND_64,
|
||
|
66: write_AND_64,
|
||
|
67: write_AND_64,
|
||
|
68: write_AND_64,
|
||
|
69: write_AND_64,
|
||
|
70: write_AND_64,
|
||
|
71: write_AND_64,
|
||
|
72: write_AND_64,
|
||
|
73: write_AND_64,
|
||
|
74: write_AND_64,
|
||
|
75: write_AND_64,
|
||
|
76: write_AND_64,
|
||
|
77: write_AND_32,
|
||
|
78: write_AND_32,
|
||
|
79: write_AND_32,
|
||
|
80: write_AND_32,
|
||
|
81: write_AND_32,
|
||
|
82: write_AND_32,
|
||
|
83: write_OR_64,
|
||
|
84: write_OR_64,
|
||
|
85: write_OR_64,
|
||
|
86: write_OR_64,
|
||
|
87: write_OR_64,
|
||
|
88: write_OR_64,
|
||
|
89: write_OR_64,
|
||
|
90: write_OR_64,
|
||
|
91: write_OR_64,
|
||
|
92: write_OR_64,
|
||
|
93: write_OR_64,
|
||
|
94: write_OR_64,
|
||
|
95: write_OR_64,
|
||
|
96: write_OR_32,
|
||
|
97: write_OR_32,
|
||
|
98: write_OR_32,
|
||
|
99: write_OR_32,
|
||
|
100: write_OR_32,
|
||
|
101: write_OR_32,
|
||
|
102: write_XOR_64,
|
||
|
103: write_XOR_64,
|
||
|
104: write_XOR_64,
|
||
|
105: write_XOR_64,
|
||
|
106: write_XOR_64,
|
||
|
107: write_XOR_64,
|
||
|
108: write_XOR_64,
|
||
|
109: write_XOR_64,
|
||
|
110: write_XOR_64,
|
||
|
111: write_XOR_64,
|
||
|
112: write_XOR_64,
|
||
|
113: write_XOR_64,
|
||
|
114: write_XOR_64,
|
||
|
115: write_XOR_64,
|
||
|
116: write_XOR_32,
|
||
|
117: write_XOR_32,
|
||
|
118: write_XOR_32,
|
||
|
119: write_XOR_32,
|
||
|
120: write_XOR_32,
|
||
|
121: write_XOR_32,
|
||
|
122: write_SHL_64,
|
||
|
123: write_SHL_64,
|
||
|
124: write_SHL_64,
|
||
|
125: write_SHL_64,
|
||
|
126: write_SHL_64,
|
||
|
127: write_SHL_64,
|
||
|
128: write_SHL_64,
|
||
|
129: write_SHR_64,
|
||
|
130: write_SHR_64,
|
||
|
131: write_SHR_64,
|
||
|
132: write_SHR_64,
|
||
|
133: write_SAR_64,
|
||
|
134: write_SAR_64,
|
||
|
135: write_SAR_64,
|
||
|
136: write_ROL_64,
|
||
|
137: write_ROL_64,
|
||
|
138: write_ROL_64,
|
||
|
139: write_ROL_64,
|
||
|
140: write_ROL_64,
|
||
|
141: write_ROL_64,
|
||
|
142: write_ROL_64,
|
||
|
143: write_ROL_64,
|
||
|
144: write_ROL_64,
|
||
|
145: write_ROL_64,
|
||
|
146: write_ROL_64,
|
||
|
147: write_ROR_64,
|
||
|
148: write_ROR_64,
|
||
|
149: write_ROR_64,
|
||
|
150: write_ROR_64,
|
||
|
151: write_ROR_64,
|
||
|
152: write_ROR_64,
|
||
|
153: write_ROR_64,
|
||
|
154: write_ROR_64,
|
||
|
155: write_ROR_64,
|
||
|
156: write_ROR_64,
|
||
|
157: write_ROR_64,
|
||
|
158: write_FADD,
|
||
|
159: write_FADD,
|
||
|
160: write_FADD,
|
||
|
161: write_FADD,
|
||
|
162: write_FADD,
|
||
|
163: write_FADD,
|
||
|
164: write_FADD,
|
||
|
165: write_FADD,
|
||
|
166: write_FADD,
|
||
|
167: write_FADD,
|
||
|
168: write_FADD,
|
||
|
169: write_FADD,
|
||
|
170: write_FADD,
|
||
|
171: write_FADD,
|
||
|
172: write_FADD,
|
||
|
173: write_FADD,
|
||
|
174: write_FADD,
|
||
|
175: write_FADD,
|
||
|
176: write_FSUB,
|
||
|
177: write_FSUB,
|
||
|
178: write_FSUB,
|
||
|
179: write_FSUB,
|
||
|
180: write_FSUB,
|
||
|
181: write_FSUB,
|
||
|
182: write_FSUB,
|
||
|
183: write_FSUB,
|
||
|
184: write_FSUB,
|
||
|
185: write_FSUB,
|
||
|
186: write_FSUB,
|
||
|
187: write_FSUB,
|
||
|
188: write_FSUB,
|
||
|
189: write_FSUB,
|
||
|
190: write_FSUB,
|
||
|
191: write_FSUB,
|
||
|
192: write_FSUB,
|
||
|
193: write_FSUB,
|
||
|
194: write_FMUL,
|
||
|
195: write_FMUL,
|
||
|
196: write_FMUL,
|
||
|
197: write_FMUL,
|
||
|
198: write_FMUL,
|
||
|
199: write_FMUL,
|
||
|
200: write_FMUL,
|
||
|
201: write_FMUL,
|
||
|
202: write_FMUL,
|
||
|
203: write_FMUL,
|
||
|
204: write_FMUL,
|
||
|
205: write_FMUL,
|
||
|
206: write_FMUL,
|
||
|
207: write_FMUL,
|
||
|
208: write_FMUL,
|
||
|
209: write_FMUL,
|
||
|
210: write_FMUL,
|
||
|
211: write_FMUL,
|
||
|
212: write_FDIV,
|
||
|
213: write_FDIV,
|
||
|
214: write_FDIV,
|
||
|
215: write_FSQRT,
|
||
|
216: write_FSQRT,
|
||
|
217: write_FSQRT,
|
||
|
218: write_FSQRT,
|
||
|
219: write_FSQRT,
|
||
|
220: write_FSQRT,
|
||
|
221: write_FSQRT,
|
||
|
222: write_FROUND,
|
||
|
223: write_FROUND,
|
||
|
224: write_CALL,
|
||
|
225: write_CALL,
|
||
|
226: write_CALL,
|
||
|
227: write_CALL,
|
||
|
228: write_CALL,
|
||
|
229: write_CALL,
|
||
|
230: write_CALL,
|
||
|
231: write_CALL,
|
||
|
232: write_CALL,
|
||
|
233: write_CALL,
|
||
|
234: write_CALL,
|
||
|
235: write_CALL,
|
||
|
236: write_CALL,
|
||
|
237: write_CALL,
|
||
|
238: write_CALL,
|
||
|
239: write_CALL,
|
||
|
240: write_CALL,
|
||
|
241: write_RET,
|
||
|
242: write_RET,
|
||
|
243: write_RET,
|
||
|
244: write_RET,
|
||
|
245: write_RET,
|
||
|
246: write_RET,
|
||
|
247: write_RET,
|
||
|
248: write_RET,
|
||
|
249: write_RET,
|
||
|
250: write_RET,
|
||
|
251: write_RET,
|
||
|
252: write_RET,
|
||
|
253: write_RET,
|
||
|
254: write_RET,
|
||
|
255: write_RET,
|
||
|
}
|
||
|
|
||
|
def writeCode(file, i, symbol):
|
||
|
opcodeMap.get(symbol.opcode)(file, i, symbol)
|
||
|
|
||
|
def writeMain(file):
|
||
|
file.write(("int main() {\n"
|
||
|
" register uint64_t r0, r1, r2, r3, r4, r5, r6, r7;\n"
|
||
|
" register double f0, f1, f2, f3, f4, f5, f6, f7;\n"
|
||
|
" register __m128i G; //g0-g3\n"
|
||
|
" register uint64_t ic;\n"
|
||
|
" convertible_t scratchpad[SCRATCHPAD_LENGTH];\n"
|
||
|
" stack_t stack[STACK_LENGTH];\n"
|
||
|
" mmu_t mmu;\n"
|
||
|
" uint32_t mxcsr;\n"
|
||
|
))
|
||
|
|
||
|
def writeProlog(file):
|
||
|
file.write(("#include <stdint.h>\n"
|
||
|
"#include <time.h>\n"
|
||
|
"#include <stdio.h>\n"
|
||
|
"#include <x86intrin.h>\n"
|
||
|
"#include <emmintrin.h>\n"
|
||
|
"#include <wmmintrin.h>\n"
|
||
|
"#include <math.h>\n"
|
||
|
"typedef uint32_t addr_t;\n"
|
||
|
"typedef unsigned __int128 uint128_t;\n"
|
||
|
"typedef __int128 int128_t;\n"
|
||
|
"typedef union {\n"
|
||
|
" double f64;\n"
|
||
|
" int64_t i64;\n"
|
||
|
" uint64_t u64;\n"
|
||
|
" int32_t i32;\n"
|
||
|
" uint32_t u32;\n"
|
||
|
"} convertible_t;\n"
|
||
|
"typedef union {\n"
|
||
|
" uint64_t value;\n"
|
||
|
" void* address;\n"
|
||
|
"} stack_t;\n"
|
||
|
"typedef struct {\n"
|
||
|
" addr_t m0;\n"
|
||
|
" addr_t m1;\n"
|
||
|
" addr_t mx;\n"
|
||
|
" uint32_t sp;\n"
|
||
|
"} mmu_t;\n"
|
||
|
"#define SCRATCHPAD_SIZE (256 * 1024)\n"
|
||
|
"#define SCRATCHPAD_LENGTH (SCRATCHPAD_SIZE / sizeof(convertible_t))\n"
|
||
|
"#define SCRATCHPAD_MASK14 (16 * 1024 / sizeof(convertible_t) - 1)\n"
|
||
|
"#define SCRATCHPAD_MASK18 (SCRATCHPAD_LENGTH - 1)\n"
|
||
|
"#define SCRATCHPAD_16K(x) scratchpad[(x >> 3) & SCRATCHPAD_MASK14]\n"
|
||
|
"#define SCRATCHPAD_256K(x) scratchpad[(x >> 3) & SCRATCHPAD_MASK18]\n"
|
||
|
"#define STACK_LENGTH (32 * 1024)\n"
|
||
|
"#define DRAM(x) __rolq(6364136223846793005*(x)+1442695040888963407,32)\n"
|
||
|
"//#define PREFETCH(x) _mm_prefetch(x, _MM_HINT_T0)\n"
|
||
|
"#define PREFETCH(x)\n"
|
||
|
"#define PUSH_VALUE(x) stack[mmu.sp++].value = x\n"
|
||
|
"#define PUSH_ADDRESS(x) stack[mmu.sp++].address = x\n"
|
||
|
"#define STACK_IS_EMPTY() (mmu.sp == 0)\n"
|
||
|
"#define POP_VALUE() stack[--mmu.sp].value\n"
|
||
|
"#define POP_ADDRESS() stack[--mmu.sp].address\n"
|
||
|
"static convertible_t readDram(mmu_t* mmu, addr_t addr) {\n"
|
||
|
" convertible_t data;\n"
|
||
|
" data.u64 = DRAM(mmu->m0); //TODO\n"
|
||
|
" mmu->m0 += 8;\n"
|
||
|
" mmu->mx ^= addr;\n"
|
||
|
" if((mmu->m0 & 255) == 192) {\n"
|
||
|
" mmu->m1 = mmu->mx & 0xFFFFFF00;\n"
|
||
|
" PREFETCH(mmu->m1); //TODO\n"
|
||
|
" }\n"
|
||
|
" if((mmu->m0 & 255) == 0)\n"
|
||
|
" mmu->m0 = mmu->m1;\n"
|
||
|
" return data;\n"
|
||
|
"}\n"
|
||
|
"static inline __m128i sl_xor(__m128i tmp1) {\n"
|
||
|
" __m128i tmp4;\n"
|
||
|
" tmp4 = _mm_slli_si128(tmp1, 0x04);\n"
|
||
|
" tmp1 = _mm_xor_si128(tmp1, tmp4);\n"
|
||
|
" tmp4 = _mm_slli_si128(tmp4, 0x04);\n"
|
||
|
" tmp1 = _mm_xor_si128(tmp1, tmp4);\n"
|
||
|
" tmp4 = _mm_slli_si128(tmp4, 0x04);\n"
|
||
|
" tmp1 = _mm_xor_si128(tmp1, tmp4);\n"
|
||
|
" return tmp1;\n"
|
||
|
"}\n"
|
||
|
"#define AES_GENKEY_SUB(rcon) do { \\\n"
|
||
|
" __m128i xout1 = _mm_aeskeygenassist_si128(xout2, rcon); \\\n"
|
||
|
" xout1 = _mm_shuffle_epi32(xout1, 0xFF); \\\n"
|
||
|
" xout0 = sl_xor(xout0); \\\n"
|
||
|
" xout0 = _mm_xor_si128(xout0, xout1); \\\n"
|
||
|
" xout1 = _mm_aeskeygenassist_si128(xout0, 0x00); \\\n"
|
||
|
" xout1 = _mm_shuffle_epi32(xout1, 0xAA); \\\n"
|
||
|
" xout2 = sl_xor(xout2); \\\n"
|
||
|
" xout2 = _mm_xor_si128(xout2, xout1); } while(0)\n"
|
||
|
"static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) {\n"
|
||
|
" __m128i xout0, xout2;\n"
|
||
|
" xout0 = _mm_load_si128(memory);\n"
|
||
|
" xout2 = _mm_load_si128(memory+1);\n"
|
||
|
" *k0 = xout0;\n"
|
||
|
" *k1 = xout2;\n"
|
||
|
" AES_GENKEY_SUB(0x01);\n"
|
||
|
" *k2 = xout0;\n"
|
||
|
" *k3 = xout2;\n"
|
||
|
" AES_GENKEY_SUB(0x02);\n"
|
||
|
" *k4 = xout0;\n"
|
||
|
" *k5 = xout2;\n"
|
||
|
" AES_GENKEY_SUB(0x04);\n"
|
||
|
" *k6 = xout0;\n"
|
||
|
" *k7 = xout2;\n"
|
||
|
" AES_GENKEY_SUB(0x08);\n"
|
||
|
" *k8 = xout0;\n"
|
||
|
" *k9 = xout2;\n"
|
||
|
"}\n"
|
||
|
"static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) {\n"
|
||
|
" *x0 = _mm_aesenc_si128(*x0, key);\n"
|
||
|
" *x1 = _mm_aesenc_si128(*x1, key);\n"
|
||
|
" *x2 = _mm_aesenc_si128(*x2, key);\n"
|
||
|
" *x3 = _mm_aesenc_si128(*x3, key);\n"
|
||
|
" *x4 = _mm_aesenc_si128(*x4, key);\n"
|
||
|
" *x5 = _mm_aesenc_si128(*x5, key);\n"
|
||
|
" *x6 = _mm_aesenc_si128(*x6, key);\n"
|
||
|
" *x7 = _mm_aesenc_si128(*x7, key);\n"
|
||
|
"}\n"
|
||
|
"static void aesInitialize(__m128i* key, __m128i* seed, __m128i* output, size_t count) {\n"
|
||
|
" \n"
|
||
|
" __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;\n"
|
||
|
" __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;\n"
|
||
|
" \n"
|
||
|
" aes_genkey(key, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);\n"
|
||
|
" \n"
|
||
|
" xin0 = _mm_load_si128(seed + 0);\n"
|
||
|
" xin1 = _mm_load_si128(seed + 1);\n"
|
||
|
" xin2 = _mm_load_si128(seed + 2);\n"
|
||
|
" xin3 = _mm_load_si128(seed + 3);\n"
|
||
|
" xin4 = _mm_load_si128(seed + 4);\n"
|
||
|
" xin5 = _mm_load_si128(seed + 5);\n"
|
||
|
" xin6 = _mm_load_si128(seed + 6);\n"
|
||
|
" xin7 = _mm_load_si128(seed + 7);\n"
|
||
|
" \n"
|
||
|
" for (size_t i = 0; i < count / sizeof(__m128i); i += 8)\n"
|
||
|
" {\n"
|
||
|
" aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||
|
" aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||
|
" aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||
|
" aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||
|
" aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||
|
" aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||
|
" aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||
|
" aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||
|
" aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||
|
" aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||
|
" \n"
|
||
|
" _mm_store_si128(output + i + 0, xin0);\n"
|
||
|
" _mm_store_si128(output + i + 1, xin1);\n"
|
||
|
" _mm_store_si128(output + i + 2, xin2);\n"
|
||
|
" _mm_store_si128(output + i + 3, xin3);\n"
|
||
|
" _mm_store_si128(output + i + 4, xin4);\n"
|
||
|
" _mm_store_si128(output + i + 5, xin5);\n"
|
||
|
" _mm_store_si128(output + i + 6, xin6);\n"
|
||
|
" _mm_store_si128(output + i + 7, xin7);\n"
|
||
|
" }\n"
|
||
|
"}\n"))
|
||
|
|
||
|
with sys.stdout as file:
|
||
|
writeProlog(file)
|
||
|
file.write("const unsigned char aesKey[32] = {{ {0} }};\n".format(genBytes(32)))
|
||
|
file.write("const unsigned char aesSeed[128] = {{ {0} }};\n".format(genBytes(128)))
|
||
|
writeMain(file)
|
||
|
writeInitialValues(file)
|
||
|
for i in range(PROGRAM_SIZE):
|
||
|
writeCode(file, i, CodeSymbol(random.getrandbits(64)))
|
||
|
file.write("\t\tgoto i_0;\n")
|
||
|
writeEpilog(file)
|