65.9K
CodeProject 正在变化。 阅读更多。
Home

x64 API Hook + 反汇编器

starIconstarIconstarIconstarIconstarIcon

5.00/5 (3投票s)

2018年11月8日

CPOL

3分钟阅读

viewsIcon

17593

downloadIcon

326

本文向您展示如何在 x64 上实现 trampoline(跳板),介绍同步和异步钩子的概念,以及动态添加的运行时过滤器和日志记录器。

引言

让我们先看看这个工具能做什么。考虑以下程序

#include <Windows.h>
#include <stdio.h>

#pragma comment(lib, "TestDll.lib")

__declspec(dllimport) void export_function1();

__declspec(dllimport) void export_function2();

int main(int argc, char* argv[])
{
    export_function1();    // import from TestDll.dll
    Sleep(1);              // import from kernel32.dll
    getchar();             // import from msvcr120d.dll

    export_function2();    // import from TestDll.dll
    Sleep(1);              // import from kernel32.dll
    getchar();             // import from msvcr120d.dll

    return 0;
}

让我们看看如何在 sync(同步) 模式下逐步执行调用

以及在 async(异步) 模式下

日志文件

所以工作流程是

  1. 指定目标进程使用的库,以便对其导出进行修补(我们将函数的前两个字节替换为无限循环,以便进程将阻塞,这确保我们不会错过任何 API 调用)
  2. 启动目标进程(或执行一些将触发目标进程执行的操作,它如何开始执行并不重要)
  3. 现在我们可以输入命令(以添加其他钩子,执行 syncasync 等待等)

请注意,我们可以通过加载过滤器来 filter(过滤) API 调用。过滤器的作用是检查线程上下文、进程内存等,并返回 true(如果我们对此调用感兴趣)或 false(否则)。此外,在同步模式下,我们可以在某些调用上停止,附加调试器,做一些事情,分离调试器,然后像什么都没发生一样继续。另一种可能性是添加自定义日志记录器,将寄存器值和内存内容转储到文件中。

Using the Code

现在让我们把注意力转向代码。我将在这里检查关键时刻,您可以随时下载示例以查看完整代码。让我们从结构定义开始

struct FUNCTION_CONTEXT
{
    DWORD64 Rip;
    DWORD64 Rsp;
    DWORD64 Rcx;
    DWORD64 Rdx;
    DWORD64 R8;
    DWORD64 R9;
};

struct LIBRARY_ITEM;
struct FUNCTION_ITEM;
typedef BOOL(*TAux)(FUNCTION_ITEM *func, HANDLE hProcess, FUNCTION_CONTEXT *context);

struct FUNCTION_ITEM
{
    LIBRARY_ITEM *lib;
    char *Name;
    UCHAR Bytes[BYTES_SIZE];
    UCHAR *SyncTrampoline;
    UCHAR SyncHook[BYTES_SIZE];
    UCHAR *AsyncTrampoline;
    UCHAR AsyncHook[BYTES_SIZE];
    DWORD Offset;
    DWORD RVA;
    DWORD64 Rip;
    HMODULE LibFilter;
    TAux ProcFilter;
    HMODULE LibLogger;
    TAux ProcLogger;
    DWORD Mode;
};

struct LIBRARY_ITEM
{
    char *LibName;
    char *FileName;
    UCHAR *Base;
    DWORD Count;
    FUNCTION_ITEM *item;
};

FUNCTION_CONTEXT 结构保存钩子时刻的寄存器值。 LIBRARY_ITEM 结构保存有关保存要挂钩的函数的模块的信息。 FUNCTION_ITEM 结构保存有关已挂钩函数的信息。 如您所见,我们将使用 trampoline 来实现 API 挂钩,此技术是线程安全的,并且还允许我们挂钩“free”(自由)函数(不仅仅是通过 IAT 中的指针调用的“API”)。

现在让我们看看修补库的代码,它在我们输入目标进程 ID 之前执行

LIBRARY_ITEM* AddLibrary(char *FileName, char *LibName)
{
    FILE *file;
    DWORD size;
    UCHAR *Image;
    IMAGE_NT_HEADERS *Headers;
    IMAGE_SECTION_HEADER *Sections;
    DWORD ExportsRVA;
    DWORD ExportsOffset;
    IMAGE_EXPORT_DIRECTORY *Exports;
    DWORD AddressOfFunctionsOffset;
    DWORD AddressOfNamesOffset;
    DWORD *AddressOfFunctions;
    DWORD *AddressOfNames;
    DWORD FunctionOffset;
    DWORD NameOffset;
    UCHAR *Function;
    char *Name;
    FUNCTION_ITEM *item;
    LIBRARY_ITEM *lib;
    DWORD Count;
    DWORD RVA;
    UCHAR SyncHook[SYNC_HOOK_SIZE];

    GenerateSyncHook(NULL, SyncHook);

    file = fopen(FileName, "rb");
    fseek(file, 0, SEEK_END);
    size = ftell(file);

    Image = (UCHAR*)malloc(size);
    fseek(file, 0, SEEK_SET);
    fread(Image, size, 1, file);
    fclose(file);

    Headers = (IMAGE_NT_HEADERS64*)(Image + ((IMAGE_DOS_HEADER*)Image)->e_lfanew);
    Sections = (IMAGE_SECTION_HEADER*)((UCHAR*)Headers + 
     (offsetof(IMAGE_NT_HEADERS, OptionalHeader) + Headers->FileHeader.SizeOfOptionalHeader));
    
    ExportsRVA = Headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress;
    ExportsOffset = TranslateRVAToOffset(Sections, ExportsRVA);
    Exports = (IMAGE_EXPORT_DIRECTORY*)(Image + ExportsOffset);

    if (Exports->NumberOfNames != Exports->NumberOfFunctions) DbgRaiseAssertionFailure();

    AddressOfFunctionsOffset = TranslateRVAToOffset(Sections, Exports->AddressOfFunctions);
    AddressOfNamesOffset = TranslateRVAToOffset(Sections, Exports->AddressOfNames);

    AddressOfFunctions = (DWORD*)(Image + AddressOfFunctionsOffset);
    AddressOfNames = (DWORD*)(Image + AddressOfNamesOffset);

    Count = Exports->NumberOfNames;
    item = (FUNCTION_ITEM*)malloc(sizeof(FUNCTION_ITEM) * Count);

    lib = (LIBRARY_ITEM*)malloc(sizeof(LIBRARY_ITEM));

    lib->Count = Count;
    lib->Base = NULL;
    lib->LibName = StringCopy(LibName);
    lib->item = item;
    lib->FileName = StringCopy(FileName);

    for (DWORD i = 0; i < Count; ++i)
    {
        FunctionOffset = TranslateRVAToOffset(Sections, AddressOfFunctions[i]);
        NameOffset = TranslateRVAToOffset(Sections, AddressOfNames[i]);

        Function = Image + FunctionOffset;
        Name = (char*)(Image + NameOffset);

        RVA = GetTargetRVAFromStub(Function, AddressOfFunctions[i]);

        if (RVA != AddressOfFunctions[i])
        {
            FunctionOffset = TranslateRVAToOffset(Sections, RVA);
            Function = Image + FunctionOffset;
        }

        if (memcmp(Function, SyncHook, SYNC_HOOK_SIZE))
        {
            item[i].Name = StringCopy(Name);
            item[i].RVA = RVA;
            item[i].Rip = 0;
            item[i].Offset = FunctionOffset;
            item[i].LibFilter = NULL;
            item[i].ProcFilter = NULL;
            item[i].LibLogger = NULL;
            item[i].ProcLogger = NULL;
            item[i].SyncTrampoline = NULL;
            item[i].AsyncTrampoline = NULL;
            item[i].Mode = MODE_SYNC_HOOK;
            item[i].lib = lib;

            memcpy(item[i].Bytes, Function, BYTES_SIZE);
            memcpy(Function, SyncHook, SYNC_HOOK_SIZE);
        }
        else
        {
            memset(&item[i], 0, sizeof(item[i]));
        }
    }

    file = fopen(FileName, "wb");
    fwrite(Image, size, 1, file);
    fclose(file);
    free(Image);

    return lib;
}

如您所见,我们修补库文件,构建 LIBRARY_ITEM 结构,该结构为每个修补的函数保存 FUNCTION_ITEM 结构。

现在让我们考虑一下挂钩的过程。

Sync(同步) 钩子

  1. 进程阻塞(函数的前几个字节被无限循环替换)
  2. 我们枚举目标进程的所有线程,找到相应的 FUNCTION_ITEM 结构
  3. 现在我们可以使用调试器调试调用的函数,或者直接跳过它(更改线程指令指针,使其指向我们先前生成的同步 trampoline)

Async(异步) 钩子

  1. 函数的前几个字节被一条指令替换,该指令获取其自身的指令指针值并跳转到由注入到目标进程中的特殊 DLL 导出的函数
  2. 此导出的函数收集寄存器值并将此信息传递给我们(使用套接字)
  3. 我们找到相应的 FUNCTION_ITEM 结构,记录函数调用,并将异步 trampoline 的地址发送回目标进程(使用套接字)
  4. 导出的函数接收异步 trampoline 的地址并跳转到此地址

让我们看看生成 trampoline 的函数

void GenerateTrampoline(HANDLE hProcess, DWORD64 Rip, 
     DWORD Mode, UCHAR *Bytes, UCHAR *TrampBytes, DWORD *TrampLen)
{
    DWORD i = 0;
    DWORD j = 0;
    DWORD64 Address;
    Buffer code_buf;
    Instruction inst;
    INT32 Offset;
    UCHAR Rex;
    UCHAR Opcode;
    UCHAR Modrm;
    DWORD HookLen;
    DWORD AddrReg;
    DWORD Reg;

    if (Mode == MODE_SYNC_HOOK) HookLen = SYNC_HOOK_SIZE;
    else if (Mode == MODE_ASYNC_HOOK) HookLen = ASYNC_HOOK_SIZE;
    else DbgRaiseAssertionFailure();

    while (TRUE)
    {
        if (Bytes[i] == 0xC3)            // ret
        {
            TrampBytes[j] = Bytes[i];
            ++j;
            break;
        }
        
        if (i >= HookLen)
        {
            Address = Rip + i;
            GenerateDirectJump(Address, &TrampBytes[j]);
            j += DIRECT_JUMP_SIZE;
            break;
        }

        if (Bytes[i] == 0xEB)            // jmp rel8
        {
            ++i;
            Offset = Bytes[i];
            ++i;
            Address = Rip + i + Offset;
            GenerateDirectJump(Address, &TrampBytes[j]);
            j += DIRECT_JUMP_SIZE;
            break;
        }

        if (Bytes[i] == 0xE9)            // jmp rel32
        {
            ++i;
            Offset = *((INT32*)&Bytes[i]);
            i += sizeof(Offset);
            Address = Rip + i + Offset;
            GenerateDirectJump(Address, &TrampBytes[j]);
            j += DIRECT_JUMP_SIZE;
            break;
        }
        
        if (Bytes[i] == 0xE8)            // call rel32
        {
            ++i;
            Offset = *((INT32*)&Bytes[i]);
            i += sizeof(Offset);
            Address = Rip + i + Offset;
            GenerateDirectCall(Address, &TrampBytes[j]);
            j += DIRECT_CALL_SIZE;
            continue;
        }

        if ((Bytes[i] >= 0x40) && (Bytes[i] <= 0x4F))
        {
            ++i;

            if ((Bytes[i] == 0xFF) && (Bytes[i + 1] == 0x25))     // jmp m64 (rip relative)
            {
                i += 2;
                Offset = *((INT32*)&Bytes[i]);
                i += sizeof(Offset);
                ReadProcessMemoryEx(hProcess, Rip + i + Offset, (UCHAR*)&Address, sizeof(Address));
                GenerateDirectJump(Address, &TrampBytes[j]);
                j += DIRECT_JUMP_SIZE;
                break;
            }

            if ((Bytes[i] == 0xFF) && (Bytes[i + 1] == 0x15))     // call m64 (rip relative)
            {
                i += 2;
                Offset = *((INT32*)&Bytes[i]);
                i += sizeof(Offset);
                ReadProcessMemoryEx(hProcess, Rip + i + Offset, (UCHAR*)&Address, sizeof(Address));
                GenerateDirectCall(Address, &TrampBytes[j]);
                j += DIRECT_CALL_SIZE;
                continue;
            }
            
            --i;
        }

        c_MakeBuffer(Bytes, BYTES_SIZE - i, (Encoding)0, &code_buf);

        inst_set_params(&inst, MODE_64, C_TRUE, &code_buf, NULL, 
                           SHOW_ADDRESS | SHOW_LOWER | SHOW_PSEUDO);

        if (!decode(&inst)) DbgRaiseAssertionFailure();

        if (inst.rip)
        {
            if ((Bytes[i] >= 0x40) && (Bytes[i] <= 0x4F))
            {
                Rex = Bytes[i];
                ++i;
            }
            else Rex = 0;

            Opcode = Bytes[i];
            ++i;

            Modrm = Bytes[i];
            ++i;

            Offset = *((INT32*)&Bytes[i]);
            i += sizeof(Offset);

            Address = Rip + i + Offset;

            Reg = (Modrm & 0x38) >> 3;
            AddrReg = (Reg) ? (0) : (1);

            GenerateRegisterOverride(AddrReg, Address, &TrampBytes[j]);
            j += REGISTER_OVERRIDE_SIZE;

            if (Rex)
            {
                TrampBytes[j] = Rex;
                ++j;
            }

            TrampBytes[j] = Opcode;
            ++j;

            TrampBytes[j] = AddrReg | (Reg << 3);
            ++j;

            GenerateRegisterRestore(AddrReg, &TrampBytes[j]);
            j += REGISTER_RESTORE_SIZE;
        }
        else
        {
            memcpy(&TrampBytes[j], &Bytes[i], code_buf.i);

            i += code_buf.i;
            j += code_buf.i;
        }
    }

    *TrampLen = j;
}

我使用自己的反汇编器,它包含在示例中。 但是,您可以将其替换为另一个,我只是需要一种快速的方法来确定指令是否使用 rip 相对寻址。

现在是生成钩子的函数

void GenerateSyncHook(UCHAR *Bytes, UCHAR *HookBytes)
{
    GenerateRelative8Jump(-(SYNC_HOOK_SIZE), HookBytes);

    if (Bytes) memcpy(HookBytes + SYNC_HOOK_SIZE, Bytes + SYNC_HOOK_SIZE, BYTES_SIZE - SYNC_HOOK_SIZE);
}

void GenerateAsyncHook(UCHAR *Base, DWORD64 Rip, UCHAR *Bytes, UCHAR *HookBytes)
{
    INT32 Offset;

    GenerateRelativeCall(0, HookBytes);
    Offset = (DWORD64)(Base + sizeof(IMAGE_DOS_HEADER)) - (Rip + ASYNC_HOOK_SIZE);
    GenerateIndirectJump(Offset, HookBytes + RELATIVE_CALL_SIZE);

    if (Bytes) memcpy(HookBytes + ASYNC_HOOK_SIZE, Bytes + 
                      ASYNC_HOOK_SIZE, BYTES_SIZE - ASYNC_HOOK_SIZE);
}

Init(初始化) 函数

void InitSync(FUNCTION_ITEM *func, HANDLE hProcess)
{
    DWORD TrampLen;
    UCHAR TrampBytes[MAX_TRAMPOLINE_SIZE];

    if (!func->SyncTrampoline)
    {
        GenerateTrampoline(hProcess, func->Rip, MODE_SYNC_HOOK, func->Bytes, TrampBytes, &TrampLen);
        func->SyncTrampoline = (UCHAR*)VirtualAllocEx
                               (hProcess, NULL, TrampLen, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
        WriteProcessMemoryEx(hProcess, (DWORD64)func->SyncTrampoline, TrampBytes, TrampLen);
        GenerateSyncHook(func->Bytes, func->SyncHook);
    }
}

void InitSync(LIBRARY_ITEM *lib, DWORD FilterMode, HANDLE hProcess)
{
    for (DWORD i = 0; i < lib->Count; ++i)
    {
        if (lib->item[i].Name)
        {
            if (lib->item[i].Mode == FilterMode)
            {
                InitSync(&lib->item[i], hProcess);
            }
        }
    }
}

void InitAsync(FUNCTION_ITEM *func, HANDLE hProcess)
{
    DWORD TrampLen;
    UCHAR TrampBytes[MAX_TRAMPOLINE_SIZE];

    if (!func->AsyncTrampoline)
    {
        GenerateTrampoline(hProcess, func->Rip, MODE_ASYNC_HOOK, func->Bytes, TrampBytes, &TrampLen);
        func->AsyncTrampoline = 
           (UCHAR*)VirtualAllocEx(hProcess, NULL, TrampLen, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
        WriteProcessMemoryEx(hProcess, (DWORD64)func->AsyncTrampoline, TrampBytes, TrampLen);
        GenerateAsyncHook(func->lib->Base, func->Rip, func->Bytes, func->AsyncHook);
    }
}

void InitAsync(LIBRARY_ITEM *lib, DWORD FilterMode, HANDLE hProcess, UCHAR *pLog)
{
    WriteProcessMemoryEx(hProcess, 
           (DWORD64)(lib->Base + sizeof(IMAGE_DOS_HEADER)), (UCHAR*)&pLog, sizeof(pLog));

    for (DWORD i = 0; i < lib->Count; ++i)
    {
        if (lib->item[i].Name)
        {
            if (lib->item[i].Mode == FilterMode)
            {
                InitAsync(&lib->item[i], hProcess);
            }
        }
    }
}

添加导出函数、“free”(自由)函数和对象的函数

LIBRARY_ITEM* AddExport(char *LibName, char *ProcName, HANDLE hProcess)
{
    HMODULE hModule;
    LIBRARY_ITEM *lib;
    UCHAR *Rip;

    lib = NULL;
    hModule = GetModuleHandleRemote(hProcess, LibName);

    if (hModule)
    {
        Rip = GetProcAddressRemote(hProcess, hModule, ProcName);

        if (Rip)
        {
            lib = (LIBRARY_ITEM*)malloc(sizeof(LIBRARY_ITEM));
            lib->Base = (UCHAR*)hModule;
            lib->Count = 1;
            lib->FileName = NULL;
            lib->LibName = StringCopy(LibName);
            lib->item = (FUNCTION_ITEM*)malloc(sizeof(FUNCTION_ITEM));
            lib->item->Name = StringCopy(ProcName);
            lib->item->lib = lib;
            lib->item->Offset = 0;
            lib->item->Rip = (DWORD64)Rip;
            lib->item->RVA = 0;
            ReadProcessMemoryEx(hProcess, (DWORD64)Rip, lib->item->Bytes, BYTES_SIZE);
            lib->item->LibFilter = NULL;
            lib->item->ProcFilter = NULL;
            lib->item->LibLogger = NULL;
            lib->item->ProcLogger = NULL;
            lib->item->SyncTrampoline = NULL;
            lib->item->AsyncTrampoline = NULL;
            InitSync(lib->item, hProcess);
            SetHook(lib->item, hProcess, MODE_SYNC_HOOK);
        }
    }

    return lib;
}

LIBRARY_ITEM* AddFunction(char *LibName, char *FunctionName, DWORD RVA, HANDLE hProcess)
{
    HMODULE Module;
    LIBRARY_ITEM *lib;

    lib = NULL;
    Module = GetModuleHandleRemote(hProcess, LibName);

    if (Module)
    {
        lib = (LIBRARY_ITEM*)malloc(sizeof(LIBRARY_ITEM));
        lib->Base = (UCHAR*)Module;
        lib->Count = 1;
        lib->FileName = NULL;
        lib->LibName = StringCopy(LibName);
        lib->item = (FUNCTION_ITEM*)malloc(sizeof(FUNCTION_ITEM));
        lib->item->Rip = (DWORD64)Module + RVA;
        ReadProcessMemoryEx(hProcess, lib->item->Rip, lib->item->Bytes, BYTES_SIZE);
        lib->item->Name = StringCopy(FunctionName);
        lib->item->LibFilter = NULL;
        lib->item->ProcFilter = NULL;
        lib->item->LibLogger = NULL;
        lib->item->ProcLogger = NULL;
        lib->item->SyncTrampoline = NULL;
        lib->item->AsyncTrampoline = NULL;
        lib->item->Offset = 0;
        lib->item->RVA = 0;
        lib->item->lib = lib;
        InitSync(lib->item, hProcess);
        SetHook(&lib->item[0], hProcess, MODE_SYNC_HOOK);
    }

    return lib;
}

LIBRARY_ITEM* AddObject(char *LibName, char *ObjectName, DWORD RVA, DWORD Count, HANDLE hProcess)
{
    LIBRARY_ITEM *lib;
    HMODULE Module;
    UCHAR **Vtbl;
    char Number[10];

    lib = NULL;
    Module = GetModuleHandleRemote(hProcess, LibName);

    if (Module)
    {
        lib = (LIBRARY_ITEM*)malloc(sizeof(LIBRARY_ITEM));
        lib->Base = (UCHAR*)Module;
        lib->LibName = StringCopy(LibName);
        lib->FileName = NULL;
        lib->Count = Count;
        lib->item = (FUNCTION_ITEM*)malloc(sizeof(FUNCTION_ITEM) * Count);
        Vtbl = (UCHAR**)malloc(sizeof(UCHAR*) * Count);
        ReadProcessMemoryEx(hProcess, (DWORD64)((UCHAR*)Module + RVA), 
                           (UCHAR*)Vtbl, sizeof(UCHAR*) * Count);

        for (DWORD i = 0; i < Count; ++i)
        {
            lib->item[i].Rip = (DWORD64)Vtbl[i];
            ReadProcessMemoryEx(hProcess, (DWORD64)Vtbl[i], lib->item[i].Bytes, BYTES_SIZE);
            lib->item[i].Name = (char*)malloc(strlen(ObjectName) + 5);
            strcpy(lib->item[i].Name, ObjectName);
            strcat(lib->item[i].Name, "::");
            itoa(i, Number, 10);
            strcat(lib->item[i].Name, Number);
            lib->item[i].LibFilter = NULL;
            lib->item[i].ProcFilter = NULL;
            lib->item[i].LibLogger = NULL;
            lib->item[i].ProcLogger = NULL;
            lib->item[i].SyncTrampoline = NULL;
            lib->item[i].AsyncTrampoline = NULL;
            lib->item[i].Offset = 0;
            lib->item[i].lib = lib;
            InitSync(&lib->item[i], hProcess);
            SetHook(&lib->item[i], hProcess, MODE_SYNC_HOOK);
        }

        free(Vtbl);
    }

    return lib;
}

设置钩子的函数

void SetHook(FUNCTION_ITEM *func, HANDLE hProcess, DWORD Mode)
{
    if (Mode == MODE_NO_HOOK)
    {
        WriteProcessMemoryEx(hProcess, func->Rip, func->Bytes, BYTES_SIZE);
    }
    else if (Mode == MODE_SYNC_HOOK)
    {
        WriteProcessMemoryEx(hProcess, func->Rip, func->SyncHook, BYTES_SIZE);
    }
    else if (Mode == MODE_ASYNC_HOOK)
    {
        WriteProcessMemoryEx(hProcess, func->Rip, func->AsyncHook, BYTES_SIZE);
    }
    else DbgRaiseAssertionFailure();

    func->Mode = Mode;
}

void SetHook(LIBRARY_ITEM *lib, DWORD FilterMode, HANDLE hProcess, DWORD Mode)
{
    for (DWORD i = 0; i < lib->Count; ++i)
    {
        if (lib->item[i].Name)
        {
            if (lib->item[i].Mode == FilterMode)
            {
                SetHook(&lib->item[i], hProcess, Mode);
            }
        }
    }
}

检查目标进程是否已加载库的函数

BOOL IsLibraryLoaded(LIBRARY_ITEM *lib, HANDLE hProcess)
{
    HMODULE hModule;

    hModule = GetModuleHandleRemote(hProcess, lib->LibName);

    if (hModule)
    {
        if (!lib->Base)
        {
            lib->Base = (UCHAR*)hModule;

            for (DWORD i = 0; i < lib->Count; ++i)
            {
                if (lib->item[i].Name)
                {
                    lib->item[i].Rip = (DWORD64)(lib->Base + lib->item[i].RVA);
                }
            }
        }
        else
        {
            if (lib->Base != (UCHAR*)hModule) DbgRaiseAssertionFailure();
        }

        return TRUE;
    }
    else
    {
        lib->Base = NULL;
        return FALSE;
    }
}

枚举目标进程的线程并找到 block(块) 的函数

BOOL GetSyncRip(HANDLE hProcess, FUNCTION_CONTEXT *context, HANDLE *phThread)
{
    DWORD TID;
    DWORD PID;
    HANDLE h;
    BOOL b;
    HANDLE hThread;
    THREADENTRY32 te;
    CONTEXT lcContext;
    UCHAR Buffer[SYNC_HOOK_SIZE];
    UCHAR SyncHook[SYNC_HOOK_SIZE];

    GenerateSyncHook(NULL, SyncHook);

    b = FALSE;

    TID = 0;
    PID = GetProcessId(hProcess);

    h = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);

    if (h != INVALID_HANDLE_VALUE)
    {
        te.dwSize = sizeof(te);

        if (Thread32First(h, &te))
        {
            do
            {
                if (te.dwSize >= (FIELD_OFFSET(THREADENTRY32, th32OwnerProcessID) + 
                                  sizeof(te.th32OwnerProcessID)))
                {
                    if (PID == te.th32OwnerProcessID)
                    {
                        TID = te.th32ThreadID;

                        hThread = OpenThread(PROCESS_ALL_ACCESS, FALSE, TID);
                        if (!hThread) DbgRaiseAssertionFailure();

                        lcContext.ContextFlags = CONTEXT_ALL;
                        if (!GetThreadContext(hThread, &lcContext)) DbgRaiseAssertionFailure();

                        ReadProcessMemoryEx(hProcess, lcContext.Rip, Buffer, sizeof(Buffer));

                        if (!memcmp(Buffer, SyncHook, SYNC_HOOK_SIZE))
                        {
                            context->Rip = lcContext.Rip;
                            context->Rsp = lcContext.Rsp;
                            context->Rcx = lcContext.Rcx;
                            context->Rdx = lcContext.Rdx;
                            context->R8 = lcContext.R8;
                            context->R9 = lcContext.R9;

                            *phThread = hThread;
                            b = TRUE;
                            break;
                        }
                        else CloseHandle(hThread);
                    }
                }

                te.dwSize = sizeof(te);
            }
            while (Thread32Next(h, &te));
        }

        CloseHandle(h);
    }

    return b;
}

实现 syncasync 等待的代码在单独的线程中运行。

用于 sync wait(等待) 的线程函数

DWORD WaitSyncThreadRoutine(SYNC_WAIT_THREAD_ARGS *args)
{
    BOOL b;
    HANDLE hThread;
    FUNCTION_ITEM *func;
    FUNCTION_CONTEXT context;

    std::list<LIBRARY_ITEM*>::iterator lib_i;

    while (TRUE)
    {
        b = FALSE;

        while (!args->Exit)
        {
            Sleep(100);

            b = GetSyncRip(args->hProcess, &context, &hThread);

            if (b) break;
        }

        if (b)
        {
            func = NULL;

            for (lib_i = g_slib.begin(); lib_i != g_slib.end(); ++lib_i)
            {
                if (IsLibraryLoaded((*lib_i), args->hProcess))
                {
                    func = LookupItemByRip((*lib_i), context.Rip);
                    if (func) break;
                }
            }

            if (func) goto found;

            for (lib_i = g_lib.begin(); lib_i != g_lib.end(); ++lib_i)
            {
                func = LookupItemByRip((*lib_i), context.Rip);
                if (func) break;
            }

        found:

            if (func)
            {
                if ((!func->ProcFilter) || (func->ProcFilter(func, args->hProcess, &context)))
                {
                    memcpy(&args->context, &context, sizeof(context));
                    args->hThread = hThread;
                    args->func = func;
                    printf("Found\n");
                    break;
                }
                else
                {
                    InitSync(func, args->hProcess);
                    SkipItem(func, hThread);
                    CloseHandle(hThread);
                }
            }
            else
            {
                CloseHandle(hThread);
                DbgRaiseAssertionFailure();
            }
        }
    }

    return 0;
}

用于 async wait(等待) 的线程函数

DWORD WaitAsyncThreadRoutine(ASYNC_WAIT_THREAD_ARGS *args)
{
    fd_set set;
    BOOL restored;
    timeval timeout;
    FUNCTION_ITEM *func;
    FUNCTION_CONTEXT context;
    std::list<LIBRARY_ITEM*>::iterator lib_i;

    restored = FALSE;

    for (lib_i = g_lib.begin(); lib_i != g_lib.end(); ++lib_i)
    {
        SetHook((*lib_i), MODE_SYNC_HOOK, args->hProcess, MODE_ASYNC_HOOK);
    }

    for (lib_i = g_slib.begin(); lib_i != g_slib.end(); ++lib_i)
    {
        if (IsLibraryLoaded((*lib_i), args->hProcess))
        {
            SetHook((*lib_i), MODE_SYNC_HOOK, args->hProcess, MODE_ASYNC_HOOK);
        }
    }

    while (TRUE)
    {
        while (TRUE)
        {
            if (args->Exit)
            {
                if (!restored)
                {
                    for (lib_i = g_lib.begin(); lib_i != g_lib.end(); ++lib_i)
                    {
                        SetHook((*lib_i), MODE_ASYNC_HOOK, args->hProcess, MODE_SYNC_HOOK);
                    }

                    for (lib_i = g_slib.begin(); lib_i != g_slib.end(); ++lib_i)
                    {
                        if (IsLibraryLoaded((*lib_i), args->hProcess))
                        {
                            SetHook((*lib_i), MODE_ASYNC_HOOK, args->hProcess, MODE_SYNC_HOOK);
                        }
                    }

                    restored = TRUE;
                }
            }

            set.fd_count = 1;
            set.fd_array[0] = args->Socket;

            timeout.tv_sec = 5;
            timeout.tv_usec = 0;

            if (select(0, &set, NULL, NULL, &timeout)) break;

            if (args->Exit) goto end;
        }

        if (recv(args->Socket, (char*)&context, sizeof(context), 0) == SOCKET_ERROR) goto end;

        func = NULL;

        for (lib_i = g_slib.begin(); lib_i != g_slib.end(); ++lib_i)
        {
            if (IsLibraryLoaded((*lib_i), args->hProcess))
            {
                func = LookupItemByRip((*lib_i), context.Rip);
                if (func) break;
            }
        }

        if (func) goto found;

        for (lib_i = g_lib.begin(); lib_i != g_lib.end(); ++lib_i)
        {
            func = LookupItemByRip((*lib_i), context.Rip);
            if (func) break;
        }

    found:

        if (func)
        {
            if ((!func->ProcFilter) || (func->ProcFilter(func, args->hProcess, &context)))
            {
                if (!func->ProcLogger) LogItem(func, args->hProcess, &context);
                else func->ProcLogger(func, args->hProcess, &context);
            }

            send(args->Socket, (char*)&func->AsyncTrampoline, sizeof(func->AsyncTrampoline), 0);
        }
        else DbgRaiseAssertionFailure();
    }

end:

    return 0;
}

将代码注入目标进程的函数

HMODULE LoadLibraryRemote(HANDLE hProcess, char *pLibName)
{
    HANDLE hThread;
    void *Proc, *Data;
    LOAD_LIBRARY_THREAD_ARGS thread_args;

    thread_args.pLoadLibraryA = LoadLibraryA;
    thread_args.Module = NULL;
    strcpy(thread_args.LibName, pLibName);

    Proc = VirtualAllocEx(hProcess, NULL, LOAD_LIBRARY_THREAD_ROUTINE_SIZE, 
                          MEM_COMMIT, PAGE_EXECUTE_READWRITE);

    Data = VirtualAllocEx(hProcess, NULL, sizeof(thread_args), MEM_COMMIT, PAGE_READWRITE);

    // disable incremental linking to make it work
    WriteProcessMemoryEx(hProcess, (DWORD64)Proc, 
             (UCHAR*)LoadLibraryThreadRoutine, LOAD_LIBRARY_THREAD_ROUTINE_SIZE);

    WriteProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));

    hThread = CreateRemoteThread(hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)Proc, Data, 0, NULL);

    WaitForSingleObject(hThread, INFINITE);

    CloseHandle(hThread);

    ReadProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));

    VirtualFreeEx(hProcess, Proc, 0, MEM_RELEASE);

    VirtualFreeEx(hProcess, Data, 0, MEM_RELEASE);

    return thread_args.Module;
}

HMODULE GetModuleHandleRemote(HANDLE hProcess, char *pLibName)
{
    HANDLE hThread;
    void *Proc, *Data;
    GET_MODULE_HANDLE_THREAD_ARGS thread_args;

    thread_args.pGetModuleHandleA = GetModuleHandleA;
    thread_args.Module = NULL;
    strcpy(thread_args.LibName, pLibName);

    Proc = VirtualAllocEx(hProcess, NULL, GET_MODULE_HANDLE_THREAD_ROUTINE_SIZE, 
                          MEM_COMMIT, PAGE_EXECUTE_READWRITE);

    Data = VirtualAllocEx(hProcess, NULL, sizeof(thread_args), MEM_COMMIT, PAGE_READWRITE);

    // disable incremental linking to make it work
    WriteProcessMemoryEx(hProcess, (DWORD64)Proc, 
    (UCHAR*)GetModuleHandleThreadRoutine, GET_MODULE_HANDLE_THREAD_ROUTINE_SIZE);

    WriteProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));

    hThread = CreateRemoteThread(hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)Proc, Data, 0, NULL);

    WaitForSingleObject(hThread, INFINITE);

    CloseHandle(hThread);

    ReadProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));

    VirtualFreeEx(hProcess, Proc, 0, MEM_RELEASE);

    VirtualFreeEx(hProcess, Data, 0, MEM_RELEASE);

    return thread_args.Module;
}

UCHAR* GetProcAddressRemote(HANDLE hProcess, HMODULE hModule, char *pProcName)
{
    DWORD64 Rip;
    UCHAR Bytes[BYTES_SIZE];
    void *Proc, *Data;
    HANDLE hThread;
    GET_PROC_ADDRESS_THREAD_ARGS thread_args;

    thread_args.pGetProcAddress = GetProcAddress;
    thread_args.Module = hModule;
    thread_args.Proc = NULL;
    strcpy(thread_args.ProcName, pProcName);

    Proc = VirtualAllocEx(hProcess, NULL, 
           GET_PROCESS_ADDRESS_THREAD_ROUTINE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);

    Data = VirtualAllocEx(hProcess, NULL, sizeof(thread_args), MEM_COMMIT, PAGE_READWRITE);

    // disable incremental linking to make it work
    WriteProcessMemoryEx(hProcess, (DWORD64)Proc, 
           (UCHAR*)GetProcAddressThreadRoutine, GET_PROCESS_ADDRESS_THREAD_ROUTINE_SIZE);

    WriteProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));

    hThread = CreateRemoteThread(hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)Proc, Data, 0, NULL);

    WaitForSingleObject(hThread, INFINITE);

    CloseHandle(hThread);

    ReadProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));

    VirtualFreeEx(hProcess, Proc, 0, MEM_RELEASE);

    VirtualFreeEx(hProcess, Data, 0, MEM_RELEASE);

    Rip = (DWORD64)thread_args.Proc;

    ReadProcessMemoryEx(hProcess, Rip, Bytes, BYTES_SIZE);

    Rip = GetTargetAddressFromStub(hProcess, Bytes, Rip);

    return (UCHAR*)Rip;
}

void FreeLibraryRemote(HANDLE hProcess, HMODULE hModule)
{
    HANDLE hThread;
    void *Proc, *Data;
    FREE_LIBRARY_THREAD_ARGS thread_args;

    thread_args.pFreeLibrary = FreeLibrary;
    thread_args.Module = hModule;

    Proc = VirtualAllocEx(hProcess, NULL, FREE_LIBRARY_THREAD_ROUTINE_SIZE, 
                          MEM_COMMIT, PAGE_EXECUTE_READWRITE);

    Data = VirtualAllocEx(hProcess, NULL, sizeof(thread_args), MEM_COMMIT, PAGE_READWRITE);

    // disable incremental linking to make it work
    WriteProcessMemoryEx(hProcess, (DWORD64)Proc, (UCHAR*)FreeLibraryThreadRoutine, 
                         FREE_LIBRARY_THREAD_ROUTINE_SIZE);

    WriteProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));

    hThread = CreateRemoteThread(hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)Proc, Data, 0, NULL);

    WaitForSingleObject(hThread, INFINITE);

    CloseHandle(hThread);

    ReadProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));

    VirtualFreeEx(hProcess, Proc, 0, MEM_RELEASE);

    VirtualFreeEx(hProcess, Data, 0, MEM_RELEASE);
}

现在让我们把注意力转向注入到目标进程中以实现 async 钩子的特殊 DLL。 导出的函数

PUBLIC Log
EXTERN LogImp : PROC

.code

Log PROC
    pop rax                                ; get function rip + 5
    sub rax, 5                            ; get function rip
    sub rsp, 48                            ; make space for context structure
    mov qword ptr [rsp], rax            ; store rip
    lea rax, [rsp + 48]                    ; get function rsp
    mov qword ptr [rsp + 8], rax        ; store rsp
    mov qword ptr [rsp + 16], rcx        ; store rcx
    mov qword ptr [rsp + 24], rdx        ; store rdx
    mov qword ptr [rsp + 32], r8        ; store r8
    mov qword ptr [rsp + 40], r9        ; store r9
    mov rcx, rsp                        ; one parameter
    sub rsp, 32                            ; shadow space
    call LogImp                            ; returns address of trampoline
    add rsp, 32                            ; shadow space
    mov r9, qword ptr [rsp + 40]        ; restore r9
    mov r8, qword ptr [rsp + 32]        ; restore r8
    mov rdx, qword ptr [rsp + 24]        ; restore rdx
    mov rcx, qword ptr [rsp + 16]        ; restore rcx
    add rsp, 48                            ; restore rsp
    jmp rax                                ; jump to trampoline
Log ENDP

END

async 钩子看起来像这样

call $+5
sub rsp, 8
mov dword ptr [rsp], Log                           ; low dword of Log address
mov dword ptr [rsp + 4], Log >> 32         ; high dword of Log address
ret

因此,Log 函数的前两条指令将挂钩函数的地址放在 rax 寄存器中。

现在是 LogImp 函数

extern "C" DWORD64 LogImp(FUNCTION_CONTEXT *context)
{
    DWORD64 Trampoline;

    EnterCriticalSection(&g_Section);

    send(g_Socket, (const char*)context, sizeof(*context), 0);

    recv(g_Socket, (char*)&Trampoline, sizeof(Trampoline), 0);

    LeaveCriticalSection(&g_Section);

    return Trampoline;
}

基本上,就是这样! 感谢您的阅读。

© . All rights reserved.