VMProtect

import sys
from struct import pack
from io import BytesIO

from ctypes import *
from winappdbg import Debug, EventHandler
import pefile

from capstone import *
from capstone.x86 import *

from unicorn import *
from unicorn.x86_const import *

from keystone import *

SECTION_HEADER_SIZE = 0x28
IMAGE_DESCRIPTOR_HEADER_SIZE = 0x14

md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True

global target_pe
global mem_pages

# ----------------------------------------------------------------------------------------------------------------
def align_data(data, blocksize):
    r = data
    rm = len( r ) % blocksize
    if rm != 0:
        r += (blocksize - rm) * b'\x00'
    return r

# ----------------------------------------------------------------------------------------------------------------
def align_int(integer, blocksize):
    r = integer
    rm = r % blocksize
    if rm != 0:
        r += (blocksize - rm)
    return r

# ----------------------------------------------------------------------------------------------------------------
def image_import_descriptor(OriginalFirstThunk, Name, FirstThunk):
    """
    +00     DWORD   OriginalFirstThunk
    +04     DWORD   TimeDateStamp
    +08     DWORD   ForwarderChain
    +12     DWORD   Name
    +16     DWORD   FirstThunk
    """
    return pack('<LLLLL', OriginalFirstThunk, 0, 0, Name, FirstThunk)

# ----------------------------------------------------------------------------------------------------------------
def image_import_by_name(hint, name):
    """
    +00     WORD    Hint
    +02     BYTE    Name
    """
    return pack('<H', hint) + name.encode() + b'\0'


# ----------------------------------------------------------------------------------------------------------------
def image_thunk_data(imageimportbyname_rva, x64, lastthunk=True):
    if x64:
        r  = pack('<Q', imageimportbyname_rva )
        if lastthunk:
            r += pack('<Q', 0 )
    else:
        r  = pack('<L', imageimportbyname_rva )
        if lastthunk:
            r += pack('<L', 0 )
    return r

# ----------------------------------------------------------------------------------------------------------------
def build_image_import_by_name(names, base=0):
    funcrva = []
    r = b''
    for i, name in enumerate(names):
        funcrva.append( len( r ) )
        r += image_import_by_name(i, name)
    return funcrva, r

# ----------------------------------------------------------------------------------------------------------------
def build_image_thunk_data(offsets, x64, base=0):
    r = b''
    offd = dict()
    for i in range(len(offsets)):
        r += image_thunk_data(base + offsets[i], x64, bool(i==len(offsets)-1))
    return r

# ----------------------------------------------------------------------------------------------------------------
def section_header(roff, rsize, voff, vsize, name=b'.h4x'):
    """
    +00     BYTE    name[8]
    +08     DWORD   virtualsize
    +12     DWORD   virtualaddress
    +16     DWORD   rawsize
    +20     DWORD   rawaddress
    +24     DWORD   relocaddress
    +28     DWORD   linenumbers
    +32     WORD    nrofrelocs
    +34     WORD    nroflinenumbers
    +36     DWORD   characteristics
    """
    return name.ljust(8, b'\x00') + pack('<LLLLLLHHL', vsize, voff, rsize, roff, 0, 0, 0, 0, 0xC0000040)

# ----------------------------------------------------------------------------------------------------------------
def rebuild_import_table(file_data, impt):
    # collect some pe info from the file for later
    pe = pefile.PE(data=file_data, fast_load=True)
    is64bits = bool(pe.OPTIONAL_HEADER.Magic == 0x20b)
    nrofsections = pe.FILE_HEADER.NumberOfSections
    secalignment = pe.OPTIONAL_HEADER.SectionAlignment
    filealignment = pe.OPTIONAL_HEADER.FileAlignment
    sizeofheaders = pe.OPTIONAL_HEADER.SizeOfHeaders
    lastsecoffset = pe.sections[nrofsections-1].__file_offset__
    lastsection = pe.sections[nrofsections-1]
    lastviraddr = lastsection.VirtualAddress + lastsection.Misc_VirtualSize
    sizeofimage = pe.OPTIONAL_HEADER.SizeOfImage
    pe.close()

    imp_tbl = b''
    imp_disc = b''
    rvas = dict()
    for dllname in impt:
        importbyname_offsets, importbyname_data = build_image_import_by_name(impt[dllname])
        thunk_data = build_image_thunk_data(importbyname_offsets, is64bits, sizeofimage + len(imp_tbl) + len(dllname) + 1)

        name_rva = len(imp_tbl)

        imp_tbl += dllname.encode() + b'\x00'
        imp_tbl += importbyname_data
        imp_tbl = align_data(imp_tbl, 8)
        firstthunk_rva = sizeofimage + len(imp_tbl)
        imp_tbl += thunk_data

        rvas[dllname] = dict()
        for i, funcname in enumerate(impt[dllname]):
            rvas[dllname][funcname] = firstthunk_rva + (i * 8)

        imp_disc += image_import_descriptor( firstthunk_rva, sizeofimage + name_rva, firstthunk_rva )

    imp_disc += image_import_descriptor( 0, 0, 0 )
    imp_tbl = align_data(imp_tbl, 4)

    import_dir_rva = sizeofimage + len(imp_tbl)
    imp_tbl += imp_disc
    newsec_data = align_data(imp_tbl, filealignment)

    newsec_rawsize = len(newsec_data)
    # get the alignd virtual offset and size
    newsec_viraddr = align_int(lastviraddr, secalignment)
    newsec_virsize = align_int(newsec_rawsize, secalignment)
    newsec_rawaddr = len(file_data)

    # create a section header
    newsec_header = section_header(newsec_rawaddr, newsec_rawsize, newsec_viraddr, newsec_virsize)

    # contruct the new pe file
    new_pe  = file_data[:lastsecoffset + SECTION_HEADER_SIZE]
    new_pe += newsec_header
    new_pe += (sizeofheaders - len(new_pe)) * b'\x00'
    new_pe += file_data[sizeofheaders:]
    new_pe += newsec_data

    # parse the pe of the rebuild data
    pe = pefile.PE(data=new_pe, fast_load=True)
    # increase nr of sections
    pe.FILE_HEADER.NumberOfSections += 1
    # update the imagesize
    pe.OPTIONAL_HEADER.SizeOfImage = newsec_viraddr + newsec_virsize
    # update the data_dir[imports] rva and size
    pe.OPTIONAL_HEADER.DATA_DIRECTORY[ pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'] ].VirtualAddress = import_dir_rva
    pe.OPTIONAL_HEADER.DATA_DIRECTORY[ pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'] ].Size = newsec_rawsize + IMAGE_DESCRIPTOR_HEADER_SIZE
    # since this is removed from the header we reset its values if set
    bound_imorts_dir = pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT']
    if pe.OPTIONAL_HEADER.DATA_DIRECTORY[ bound_imorts_dir ].VirtualAddress != 0:
        pe.OPTIONAL_HEADER.DATA_DIRECTORY[ bound_imorts_dir ].VirtualAddress = 0
        pe.OPTIONAL_HEADER.DATA_DIRECTORY[ bound_imorts_dir ].Size = 0

    # write to new file
    return pe.write(), rvas

# ----------------------------------------------------------------------------------------------------------------
def hook_code64(uc, address, size, user_data):
    code = uc.mem_read(address, size)
    insn = disassemble(code, 0)
    print_insn(insn)

# ----------------------------------------------------------------------------------------------------------------
def emu(pe, offsets, imports, tracecode=False):
    imp = dict()
    imgbase = pe.OPTIONAL_HEADER.ImageBase
    imgsize = pe.OPTIONAL_HEADER.SizeOfImage

    esp = 0x1000000

    mu = Uc(UC_ARCH_X86, UC_MODE_64)

    mu.mem_map(imgbase, imgsize)
    mu.mem_map(esp, 2 * 1024 * 1024)

    mu.mem_write(imgbase, bytes(pe.__data__))
    mu.mem_write(esp, int(2 * 1024 * 1024) * b'\0')

    if tracecode: mu.hook_add(UC_HOOK_CODE, hook_code64)

    for offset, vm_eip  in offsets:
        mu.reg_write(UC_X86_REG_ESP, esp + int(1 * 1024 * 1024))
        try:
            mu.emu_start(imgbase + (vm_eip + 0x1000), imgbase + imgsize)
        except UcError as e:
            rip = mu.reg_read(UC_X86_REG_RIP)
            if rip in imports:
                print('0x%016x => %s' % (offset, imports[rip]))
                imp[offset] = imports[rip]
            else:
                print(f'0x{rip:016x} not found')
    return imp

# ----------------------------------------------------------------------------------------------------------------
def print_insn(insn):
    print("0x%016x: %s %s" % (insn.address, insn.mnemonic.ljust(5, ' '), insn.op_str))

# ----------------------------------------------------------------------------------------------------------------
def disassemble(code, ep, maxinslen=12):
    for insn in md.disasm(code[ep:ep+maxinslen], ep):
        return insn

# ----------------------------------------------------------------------------------------------------------------
def call_ins_in_range(code, minaddress, maxaddress):
    ep = 0
    codelen = len(code)
    addresses = list()
    while ep < codelen:
        insn = disassemble(code, ep)
        if insn and insn.size == 5 and insn.id == X86_INS_CALL \
        and insn.operands[0].type == X86_OP_IMM:
            addr = insn.operands[0].imm & 0xffffffff
            if addr >= minaddress and addr <= maxaddress:
                addresses.append( (ep, addr) )
        ep += 1
    return addresses

# ----------------------------------------------------------------------------------------------------------------
def dump_fix(img, oep=None):
    pe = pefile.PE(data=img, fast_load=True)
    sectionAlignment = pe.OPTIONAL_HEADER.SectionAlignment
    for section in pe.sections:
        section.PointerToRawData = section.VirtualAddress
        vsize = align_int(section.Misc_VirtualSize, sectionAlignment)
        # section.SizeOfRawData = section.Misc_VirtualSize
        section.SizeOfRawData = vsize
        section.Misc_VirtualSize = vsize

    if oep:
        pe.OPTIONAL_HEADER.AddressOfEntryPoint = oep

    # disable aslr for now, we need to do a reloc correction actualy
    pe.OPTIONAL_HEADER.DllCharacteristics ^= 0x40

    return pe.write()

# ----------------------------------------------------------------------------------------------------------------
def analyse_vmp_api_stub(code, address):
    vmp_api = list()
    while True:
        insn = disassemble(code, address)
        if insn.id in [X86_INS_PUSH, X86_INS_POP]:
            vmp_api.append(insn)
        elif insn.id in [X86_INS_LEA, X86_INS_MOV] and (insn.operands[0].type == X86_OP_MEM or insn.operands[1].type == X86_OP_MEM):
            vmp_api.append(insn)
        elif insn.id == X86_INS_XCHG and insn.operands[0].type != insn.operands[1].type:
            vmp_api.append(insn)
        elif insn.id == X86_INS_RET:
            vmp_api.append(insn)
            break

        if insn.id == X86_INS_JMP:
            address = insn.operands[0].imm
        else:
            address += insn.size
    return vmp_api

# ----------------------------------------------------------------------------------------------------------------
def action_callback_page_access( event ):
    global target_pe

    process = event.get_process()
    thread  = event.get_thread()
    context = thread.get_context()

    img_base = process.get_image_base()

    rip = context['Rip']

    # we need this rip range filter, because we keep hiting a vmp section first...?!
    if rip >= img_base + target_pe.sections[0].VirtualAddress \
    and rip <= img_base + target_pe.sections[0].VirtualAddress + target_pe.sections[0].Misc_VirtualSize:
        print(f'page_access: OEP => 0x{rip:016x} - 0x{rip-img_base:016x}')
        event.debug.erase_all_breakpoints()

        img_pe = dump_fix(process.read( img_base, target_pe.OPTIONAL_HEADER.SizeOfImage ), rip-img_base)

        pe = pefile.PE(data=img_pe, fast_load=True)
        pe.parse_data_directories(directories=[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']])

        # grab the imported dll names from the import table
        implookup = dict()
        for entry in pe.DIRECTORY_ENTRY_IMPORT:
            dllname = entry.dll.decode()
            mod = process.get_module_by_name(dllname)

            pedll = pefile.PE('c:\\windows\\system32\\' + dllname, fast_load=True)
            pedll.parse_data_directories(directories=[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT']])

            print(f'--> {dllname}')
            for exp in pedll.DIRECTORY_ENTRY_EXPORT.symbols:
                if exp.name == None:
                    continue
                addr = mod.resolve( exp.name )
                if addr not in implookup:
                    implookup[addr] = (dllname, exp.name.decode())
            pedll.close()

        # locate call's into the vmp section
        code_section_data = pe.sections[0].get_data()
        vm_section = pe.sections[4]

        calltos = call_ins_in_range(
            code_section_data,
            vm_section.VirtualAddress,
            vm_section.VirtualAddress + vm_section.Misc_VirtualSize
            )

        print('got %d vmp_api call(s)' % len(calltos))

        # emulate the vmp api stubs
        call2imp = emu(pe, calltos, implookup)

        pe.close()

        # filter imports for iat rebuilding
        imptbl = dict()
        for callfrom in call2imp:
            dll, func = call2imp[callfrom]
            if dll not in imptbl:
                imptbl[dll] = list()

            if func not in imptbl[dll]:
                imptbl[dll].append( func )

        # rebuild import table
        img_pe, iat_rvas = rebuild_import_table(img_pe, imptbl)

        # patch vmp call instructions
        fio = BytesIO(img_pe)
        ks = Ks(KS_ARCH_X86, KS_MODE_64)
        for cfrom, cto in calltos:
            vmpapi = analyse_vmp_api_stub(img_pe, cto + 0x1000)
            if vmpapi[2].id == X86_INS_LEA and vmpapi[2].operands[1].type == X86_OP_MEM and vmpapi[2].operands[1].mem.disp == 1:
                """
                0x00000000000fd9eb: push  rax
                0x00000000000fd9f7: mov   rax, qword ptr [rsp + 8]
                0x0000000000242ad2: lea   rax, [rax + 1]
                0x00000000001b7986: mov   qword ptr [rsp + 8], rax
                0x00000000001b7992: lea   rax, [rip - 0x1af489]
                0x0000000000173257: mov   rax, qword ptr [rax + 0xd5f69]
                0x000000000017325e: lea   rax, [rax + 0x7fde623c]
                0x000000000023dff5: xchg  qword ptr [rsp], rax
                0x0000000000256c24: ret
                """
                dllname, funcname = call2imp[cfrom]
                encoding, count = ks.asm('call qword ptr[0x%016x]' % (iat_rvas[dllname][funcname] - 0x1000 - cfrom))
                fio.seek( cfrom + 0x1000 )
                fio.write( bytes(encoding) )
            elif vmpapi[0].id == X86_INS_POP and vmpapi[-1].size == 1:
                """
                0x000000000020c562: pop   rbx
                0x000000000020bcda: xchg  qword ptr [rsp], rbx
                0x00000000000fcc27: push  rbx
                0x00000000000fcc28: lea   rbx, [rip - 0xf30ea]
                0x0000000000162a3c: mov   rbx, qword ptr [rbx + 0x261566]
                0x00000000000e2e6f: lea   rbx, [rbx + 0x19bd2ee5]
                0x00000000000e2e76: xchg  qword ptr [rsp], rbx
                0x0000000000259b32: ret
                """
                dllname, funcname = call2imp[cfrom]
                encoding, count = ks.asm('call qword ptr[0x%016x]' % (iat_rvas[dllname][funcname] - 0x1000 - (cfrom - 1)))
                fio.seek( (cfrom - 1) + 0x1000 )
                fio.write( bytes(encoding) )
            elif vmpapi[0].id == X86_INS_POP and vmpapi[-1].size == 3:
                """
                0x0000000000199b18: pop   rbp
                0x0000000000287e2e: xchg  qword ptr [rsp], rbp
                0x000000000016eeff: push  rbp
                0x000000000016ef04: lea   rbp, [rip - 0x167a8b]
                0x00000000002735e8: mov   rbp, qword ptr [rbp + 0x10440d]
                0x00000000000fcc15: lea   rbp, [rbp + 0x1c97310c]
                0x00000000001885a6: xchg  qword ptr [rsp], rbp
                0x0000000000150cb8: ret   8
                """
                dllname, funcname = call2imp[cfrom]
                # print(f'pop_call_ret8: 0x{img_base+0x1000+cfrom:016x} -> {funcname}')
                encoding, count = ks.asm('jmp qword ptr[0x%016x]' % (iat_rvas[dllname][funcname] - 0x1000 - (cfrom - 2)))
                fio.seek( (cfrom - 2) + 0x1000 )
                fio.write( bytes(encoding) )
            elif vmpapi[0].id == X86_INS_PUSH and vmpapi[-1].size == 3:
                """
                0x0000000000149386: push  rsi
                0x000000000014938a: lea   rsi, [rip - 0x144344]
                0x00000000001bfe00: mov   rsi, qword ptr [rsi + 0x12b866]
                0x0000000000224a7c: lea   rsi, [rsi + 0x95a4b96]
                0x00000000001f6ad4: xchg  qword ptr [rsp], rsi
                0x00000000001f6ad8: ret   8
                """
                dllname, funcname = call2imp[cfrom]
                # print(f'push_call_ret8: 0x{img_base+0x1000+cfrom:016x} -> {funcname}')
                dist = 1 if img_pe[(cfrom-1)+0x1000] == 0x48 else 0
                encoding, count = ks.asm('jmp qword ptr[0x%016x]' % (iat_rvas[dllname][funcname] - 0x1000 - (cfrom - dist)))
                fio.seek( (cfrom - dist) + 0x1000 )
                fio.write( bytes(encoding) )

            else:
                print(f'unknown!!!! 0x{img_base+0x1000+cfrom:016x}')

        fio.seek(0)
        with open('dump.exe', 'wb') as fout:
            fout.write( fio.read() )

        print('PE image dumped')
        print('Done')
        exit(1)

# ----------------------------------------------------------------------------------------------------------------
def action_callback_NtProtectVirtualMemory( event ):
    global target_pe
    global mem_pages

    process = event.get_process()
    thread  = event.get_thread()
    context = thread.get_context()

    img_base = process.get_image_base()
    img_size = target_pe.OPTIONAL_HEADER.SizeOfImage

    address = process.read_qword(context['Rdx'])
    size = process.read_qword(context['R8'])
    mode = context['R9']

    if address >= img_base and address <= img_base + img_size:
        print(f'NtProtectVirtualMemory: address=0x{address:016x} size=0x{size:016x} prot={mode:x}')
        if address not in mem_pages:
            mem_pages.append( address )
        elif len(mem_pages) > 1 and address == mem_pages[-1]:
            # memory bp on the .text(0) section
            event.debug.erase_all_breakpoints()
            pid = process.get_pid()
            pages = (align_int(target_pe.sections[0].Misc_VirtualSize, 4096) // 4096)
            print(f'page_breakpoint: address=0x{img_base + target_pe.sections[0].VirtualAddress:016x} pages={pages} size=0x{pages*4096:x}')
            event.debug.define_page_breakpoint(
                pid,
                img_base + target_pe.sections[0].VirtualAddress,
                pages=pages,
                action=action_callback_page_access)
            event.debug.enable_page_breakpoint(pid, img_base + target_pe.sections[0].VirtualAddress)

# ----------------------------------------------------------------------------------------------------------------
class MyEventHandler( EventHandler ):
    def load_dll( self, event ):
        module = event.get_module()
        if module.match_name('ntdll.dll'):
            # set HWBP on ntdll.NtProtectVirtualMemory
            address = module.resolve( 'NtProtectVirtualMemory' )
            tid = event.get_thread().get_tid()
            event.debug.define_hardware_breakpoint(
                tid,
                address,
                triggerFlag=Debug.BP_BREAK_ON_EXECUTION,
                sizeFlag=Debug.BP_WATCH_BYTE,
                action=action_callback_NtProtectVirtualMemory)
            event.debug.enable_hardware_breakpoint(tid, address)

# ----------------------------------------------------------------------------------------------------------------
def debugger( argv ):
    global target_pe
    global mem_pages

    target_pe = pefile.PE(argv[0], fast_load=True)
    mem_pages = list()

    with Debug( MyEventHandler(), bKillOnExit = True ) as debug:
        debug.execv( argv )
        debug.loop()

# ----------------------------------------------------------------------------------------------------------------
if __name__ == "__main__":
    debugger( sys.argv[1:] )