动态生成 x86 函数调用
CNativeInvoke 类在运行时生成 x86 函数调用代码并执行它。
引言
几年前,我曾考虑创建一个自己的字节码解释型语言。该语言的目标之一是能够尽可能直接地从字节码调用本机代码。我的想法是字节码编译器会设置通用的调用指令,而解释器会确定目标地址是否在字节码内存块内。如果不在,它将使用 CNativeInvoke 类,我现在与大家分享——也许有人可以以此为基础构建我未完成的字节码解释型语言。
CNativeInvoke
CNativeInvoke 暴露了方法来定义生成的调用是 __cdecl 还是 __stdcall,设置 C++ 类/接口调用的“this”指针,添加参数以及执行调用(带返回值或不带返回值)。CNativeInvoke 使用 Windows API VirtualAlloc() 分配动态代码块的内存,并使用 VirtualProtect() 将该内存标记为可执行。
代码中的注释显示了写入内存的汇编指令。我完全不是汇编专家,所以花了一些时间才弄清楚使用哪些操作码等等。Visual Studio 从 C++ 到 ASM 的映射在这项研究中很有帮助。
NativeInvoke.h
#pragma once
#define SIZE_NATIVE_INVOKE_PAGE 1024
#define MAX_NATIVE_PARAMS 63
class CNativeInvoke
{
private:
LPBYTE m_pbPage;
INT m_nWritePtr;
INT m_cParams;
BOOL m_fStackCleanup;
DWORD_PTR m_dwThisPtr;
public:
CNativeInvoke (BOOL fStackCleanup = TRUE /* Defaulted for __cdecl */, DWORD_PTR dwThisPtr = 0);
~CNativeInvoke ();
HRESULT Initialize (VOID);
VOID SetStackCleanup (BOOL fStackCleanup);
VOID SetThisPtr (DWORD_PTR dwThisPtr);
VOID Reset (VOID);
HRESULT AddParam8 (BYTE bParam);
HRESULT AddParam16 (WORD wParam);
HRESULT AddParam32 (DWORD dwParam);
HRESULT Call (DWORD_PTR dwPtr);
HRESULT Call (DWORD_PTR dwPtr, DWORD* pdwReturn);
HRESULT Call (DWORD_PTR dwPtr, DWORDLONG* pdwlReturn);
protected:
VOID EmitCall (DWORD_PTR dwPtr);
VOID EmitOpCode (BYTE bOpCode, DWORD dwValue);
VOID EmitOpCode (BYTE bOpCode, BYTE bOperand, DWORD dwValue);
HRESULT Execute (VOID);
};
NativeInvoke.cpp
#include <windows.h>
#include "Assert.h" // Change this to include your own Assert(x) macro
#include "NativeInvoke.h"
#define DWORDPTR(p) (DWORD)(DWORD_PTR)(p)
CNativeInvoke::CNativeInvoke (BOOL fStackCleanup, DWORD_PTR dwThisPtr)
{
m_pbPage = NULL;
m_nWritePtr = 0;
m_cParams = 0;
m_fStackCleanup = fStackCleanup;
m_dwThisPtr = dwThisPtr;
}
CNativeInvoke::~CNativeInvoke ()
{
if(m_pbPage)
VirtualFree(m_pbPage,SIZE_NATIVE_INVOKE_PAGE,MEM_RELEASE);
}
HRESULT CNativeInvoke::Initialize (VOID)
{
HRESULT hr;
Assert(NULL == m_pbPage);
m_pbPage = (LPBYTE)VirtualAlloc(NULL,SIZE_NATIVE_INVOKE_PAGE,MEM_COMMIT |
MEM_RESERVE,PAGE_EXECUTE_READWRITE);
if(m_pbPage)
{
Reset();
hr = S_OK;
}
else
hr = HRESULT_FROM_WIN32(GetLastError());
return hr;
}
VOID CNativeInvoke::SetStackCleanup (BOOL fStackCleanup)
{
// TRUE means the caller cleans up the stack, and this means adding a value to esp
// to cover for the passed parameters. If FALSE (or if there are no parameters,
// it is assumed that the callee will clean up the stack.
//
// If using __cdecl or __thiscall (with variable parameter lists), you must set
// the stack cleanup to TRUE.
//
// If using __stdcall or __thiscall (with a fixed parameter list), you must set
// the stack cleanup to FALSE.
m_fStackCleanup = fStackCleanup;
}
VOID CNativeInvoke::SetThisPtr (DWORD_PTR dwThisPtr)
{
m_dwThisPtr = dwThisPtr;
}
VOID CNativeInvoke::Reset (VOID)
{
m_pbPage[0] = 0x55; // push ebp
m_pbPage[1] = 0x8B; // mov ebp, esp
m_pbPage[2] = 0xEC;
m_nWritePtr = 3;
m_cParams = 0;
}
HRESULT CNativeInvoke::AddParam8 (BYTE bParam)
{
HRESULT hr;
if(m_nWritePtr + 5 < SIZE_NATIVE_INVOKE_PAGE && m_cParams < MAX_NATIVE_PARAMS)
{
m_pbPage[m_nWritePtr] = 0x68; // push dword <n>
m_pbPage[m_nWritePtr + 1] = bParam;
m_pbPage[m_nWritePtr + 2] = 0;
m_pbPage[m_nWritePtr + 3] = 0;
m_pbPage[m_nWritePtr + 4] = 0;
m_nWritePtr += 5;
m_cParams++;
hr = S_OK;
}
else
hr = E_FAIL;
return hr;
}
HRESULT CNativeInvoke::AddParam16 (WORD wParam)
{
HRESULT hr;
if(m_nWritePtr + 5 < SIZE_NATIVE_INVOKE_PAGE && m_cParams < MAX_NATIVE_PARAMS)
{
m_pbPage[m_nWritePtr] = 0x68; // push dword <n>
m_pbPage[m_nWritePtr + 1] = (BYTE)(wParam & 0xFF);
m_pbPage[m_nWritePtr + 2] = (BYTE)(wParam >> 8);
m_pbPage[m_nWritePtr + 3] = 0;
m_pbPage[m_nWritePtr + 4] = 0;
m_nWritePtr += 5;
m_cParams++;
hr = S_OK;
}
else
hr = E_FAIL;
return hr;
}
HRESULT CNativeInvoke::AddParam32 (DWORD dwParam)
{
HRESULT hr;
if(m_nWritePtr + 5 < SIZE_NATIVE_INVOKE_PAGE && m_cParams < MAX_NATIVE_PARAMS)
{
m_pbPage[m_nWritePtr] = 0x68; // push dword <n>
m_pbPage[m_nWritePtr + 1] = (BYTE)(dwParam & 0xFF);
m_pbPage[m_nWritePtr + 2] = (BYTE)(dwParam >> 8);
m_pbPage[m_nWritePtr + 3] = (BYTE)(dwParam >> 16);
m_pbPage[m_nWritePtr + 4] = (BYTE)(dwParam >> 24);
m_nWritePtr += 5;
m_cParams++;
hr = S_OK;
}
else
hr = E_FAIL;
return hr;
}
HRESULT CNativeInvoke::Call (DWORD_PTR dwPtr)
{
HRESULT hr;
if(m_nWritePtr + 17 < SIZE_NATIVE_INVOKE_PAGE)
{
INT nWritePtr = m_nWritePtr;
EmitCall(dwPtr); // Write up to 15 bytes.
m_pbPage[m_nWritePtr] = 0x5D; // pop ebp
m_pbPage[m_nWritePtr + 1] = 0xC3; // ret
m_nWritePtr += 2;
hr = Execute();
m_nWritePtr = nWritePtr;
}
else
hr = E_FAIL;
return hr;
}
HRESULT CNativeInvoke::Call (DWORD_PTR dwPtr, DWORD* pdwReturn)
{
HRESULT hr;
if(m_nWritePtr + 22 < SIZE_NATIVE_INVOKE_PAGE)
{
INT nWritePtr = m_nWritePtr;
EmitCall(dwPtr); // Write up to 15 bytes.
EmitOpCode(0xA3, DWORDPTR(pdwReturn)); // mov [pdwReturn], eax
m_pbPage[m_nWritePtr] = 0x5D; // pop ebp
m_pbPage[m_nWritePtr + 1] = 0xC3; // ret
m_nWritePtr += 2;
hr = Execute();
m_nWritePtr = nWritePtr;
}
else
hr = E_FAIL;
return hr;
}
HRESULT CNativeInvoke::Call (DWORD_PTR dwPtr, DWORDLONG* pdwlReturn)
{
HRESULT hr;
if(m_nWritePtr + 28 < SIZE_NATIVE_INVOKE_PAGE)
{
DWORD* pdwReturn = (DWORD*)pdwlReturn;
INT nWritePtr = m_nWritePtr;
EmitCall(dwPtr); // Write up to 15 bytes.
EmitOpCode(0xA3, DWORDPTR(pdwReturn)); // mov [pdwReturn], eax
EmitOpCode(0x89, 0x15, DWORDPTR(pdwReturn + 1));// mov [pdwReturn+1], edx
m_pbPage[m_nWritePtr] = 0x5D; // pop ebp
m_pbPage[m_nWritePtr + 1] = 0xC3; // ret
m_nWritePtr += 2;
hr = Execute();
m_nWritePtr = nWritePtr;
}
else
hr = E_FAIL;
return hr;
}
VOID CNativeInvoke::EmitCall (DWORD_PTR dwPtr)
{
Assert(m_nWritePtr + 15 < SIZE_NATIVE_INVOKE_PAGE);
if(0 != m_dwThisPtr)
EmitOpCode(0xB9,(DWORD)m_dwThisPtr); // mov ecx, m_dwThisPtr
m_pbPage[m_nWritePtr] = 0xB8; // mov eax, <address>
m_pbPage[m_nWritePtr + 1] = (BYTE)(dwPtr & 0xFF);
m_pbPage[m_nWritePtr + 2] = (BYTE)(dwPtr >> 8);
m_pbPage[m_nWritePtr + 3] = (BYTE)(dwPtr >> 16);
m_pbPage[m_nWritePtr + 4] = (BYTE)(dwPtr >> 24);
m_pbPage[m_nWritePtr + 5] = 0xFF; // call eax
m_pbPage[m_nWritePtr + 6] = 0xD0;
m_nWritePtr += 7;
if(0 < m_cParams && m_fStackCleanup)
{
m_pbPage[m_nWritePtr] = 0x83; // add esp, byte +<n>
m_pbPage[m_nWritePtr + 1] = 0xC4;
// This is why MAX_NATIVE_PARAMS is 63.
// The value of the product below will always fit in one BYTE.
m_pbPage[m_nWritePtr + 2] = (BYTE)(m_cParams * sizeof(DWORD));
m_nWritePtr += 3;
}
}
VOID CNativeInvoke::EmitOpCode (BYTE bOpCode, DWORD dwValue)
{
Assert(m_nWritePtr + 5 < SIZE_NATIVE_INVOKE_PAGE);
m_pbPage[m_nWritePtr] = bOpCode;
m_pbPage[m_nWritePtr + 1] = (BYTE)(dwValue & 0xFF);
m_pbPage[m_nWritePtr + 2] = (BYTE)(dwValue >> 8);
m_pbPage[m_nWritePtr + 3] = (BYTE)(dwValue >> 16);
m_pbPage[m_nWritePtr + 4] = (BYTE)(dwValue >> 24);
m_nWritePtr += 5;
}
VOID CNativeInvoke::EmitOpCode (BYTE bOpCode, BYTE bOperand, DWORD dwValue)
{
Assert(m_nWritePtr + 6 < SIZE_NATIVE_INVOKE_PAGE);
m_pbPage[m_nWritePtr] = bOpCode;
m_pbPage[m_nWritePtr + 1] = bOperand;
m_pbPage[m_nWritePtr + 2] = (BYTE)(dwValue & 0xFF);
m_pbPage[m_nWritePtr + 3] = (BYTE)(dwValue >> 8);
m_pbPage[m_nWritePtr + 4] = (BYTE)(dwValue >> 16);
m_pbPage[m_nWritePtr + 5] = (BYTE)(dwValue >> 24);
m_nWritePtr += 6;
}
HRESULT CNativeInvoke::Execute (VOID)
{
HRESULT hr;
DWORD dwPrevProtection;
if(VirtualProtect(m_pbPage, m_nWritePtr, PAGE_EXECUTE, &dwPrevProtection))
{
VOID (WINAPI* pfnCall)(VOID) = (VOID(WINAPI*)(VOID))m_pbPage;
if(FlushInstructionCache(GetCurrentProcess(), pfnCall, m_nWritePtr))
{
pfnCall();
hr = S_OK;
}
else
hr = HRESULT_FROM_WIN32(GetLastError());
VirtualProtect(m_pbPage, m_nWritePtr, dwPrevProtection, &dwPrevProtection);
}
else
hr = HRESULT_FROM_WIN32(GetLastError());
return hr;
}
使用代码
在尝试编译此代码之前,请确保为 NativeInvoke.cpp 中使用的 Assert(x) 宏提供定义。
请务必将此代码构建为 x86。它可以在 64 位 Windows 上使用 WoW64 运行,但它本身必须构建为 x86(32 位)。
一旦编译成功,使用它就很简单了。以下是一些示例
DWORDLONG MyTestFunction (int a, int b)
{
printf("MyTestFunction(): a = %d, b = %d\n",a,b);
return (DWORDLONG)a * (DWORDLONG)b;
}
DWORD MyOtherTestFunction (int a, int b)
{
printf("MyOtherTestFunction(): a = %d, b = %d\n",a,b);
return (DWORD)(a - b);
}
interface ITest
{
virtual VOID Whatever (int a, int b) = 0;
};
class CTest : public ITest
{
protected:
INT m_n;
public:
CTest () { m_n = 10; }
~CTest () {}
VOID Whatever (int a, int b)
{
printf("CTest::Whatever(): m_n = %d\n",m_n);
printf("a = %d, b = %d\n",a,b);
}
};
INT main (INT cArgs, __in_ecount(cArgs) PSTR* ppszArgs)
{
CNativeInvoke Native;
CTest Test;
ITest* lpTest = &Test;
union
{
VOID (ITest::*pWhatever)(int,int);
DWORD dwWhatever;
};
pWhatever = &ITest::Whatever;
if(SUCCEEDED(Native.Initialize()))
{
DWORDLONG dwResult64 = 0;
DWORD dwResult32 = 0;
// Push the parameters in right-to-left order. These pushed
// parameters will be reused for all three example calls.
Native.AddParam32(150000); // b
Native.AddParam32(200000); // a
// Call MyTestFunction() and print the 64-bit return value.
Native.Call((DWORD)MyTestFunction, &dwResult64);
printf("Result: %I64u\r\n", dwResult64);
// Call MyOtherTestFunction() and print the 32-bit return value.
Native.Call((DWORD)MyOtherTestFunction, &dwResult32);
printf("Result: %u\r\n", dwResult32);
// Call an object's method that doesn't return a value.
Native.SetThisPtr((DWORD_PTR)lpTest);
Native.SetStackCleanup(FALSE); // Fixed parameter list on a __thiscall
Native.Call(dwWhatever);
}
return 0;
}
如果一切编译并运行正确,您将看到此输出
MyTestFunction(): a = 200000, b = 150000
Result: 30000000000
MyOtherTestFunction(): a = 200000, b = 150000
Result: 50000
CTest::Whatever(): m_n = 10
a = 200000, b = 150000
下一步?
我很乐意看到有人将此代码整合到字节码解释器或其他类似的东西中。如果有人可以重新实现 CNativeInvoke 的方法以生成 x64 或 ARM 指令,那就太好了!使用内部预处理器宏基于平台选择实现将会很棒!