從彙編角度看程序,只能看到mov,sub,div,xcmpchg等指令,函數,結構體,指針,數組等中級編程語言中的語法糖均不存在,至於C++,Java等高級語言中的對象,類,虛函數等更是不復存在,那麼我們在高級語言中建議的語法糖在哪呢,它們是怎麼被轉化到彙編的呢?......
1,彙編之於函數調用
先看以下函數調用代碼
int func(int x, int y){return x + y;}int main(){int a = 10, b = 100;int c = func(a, b);}
; Listing generated by Microsoft (R) Optimizing Compiler Version 18.00.40629.0
TITLE E:\tmp\x.c
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
PUBLIC _func
PUBLIC _main
; Function compile flags: /Odtp
_TEXT SEGMENT
_c$ = -12 ; size = 4
_a$ = -8 ; size = 4
_b$ = -4 ; size = 4
_main PROC
; File e:\tmp\x.c
; Line 6
push ebp
mov ebp, esp
sub esp, 12 ; 0000000cH
; Line 7
mov DWORD PTR _a$[ebp], 10 ; 0000000aH
mov DWORD PTR _b$[ebp], 100 ; 00000064H
; Line 8
mov eax, DWORD PTR _b$[ebp]
push eax
mov ecx, DWORD PTR _a$[ebp]
push ecx
call _func
add esp, 8
mov DWORD PTR _c$[ebp], eax
; Line 9
xor eax, eax
mov esp, ebp
pop ebp
ret 0
_main ENDP
_TEXT ENDS
; Function compile flags: /Odtp
_TEXT SEGMENT
_x$ = 8 ; size = 4
_y$ = 12 ; size = 4
_func PROC
; File e:\tmp\x.c
; Line 2
push ebp
mov ebp, esp
; Line 3
mov eax, DWORD PTR _x$[ebp]
add eax, DWORD PTR _y$[ebp]
; Line 4
pop ebp
ret 0
_func ENDP
_TEXT ENDS
END
int func(int x, int y){
return x + y;
}
int main(){
int a = 10, b = 100, c;
int (*fptr)(int,int) =func;
__asm{
push b //b入stack
push a //a入stack
call fptr //jmp到func
mov c, eax //eax是返回值,用c存儲
add esp, 8 //平衡stack
}
printf("%d\n",c);
}
typedef struct Record{
int ary[100];
int x;
}Record;
Record func(){
Record rc;
rc.x = 100;
return rc;
}
int main(){
Record r = func();
printf("%d", r.x);
}
; Listing generated by Microsoft (R) Optimizing Compiler Version 18.00.40629.0
TITLE E:\tmp\x.c
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
_DATA SEGMENT
$SG1335 DB '%d', 00H
_DATA ENDS
PUBLIC _func
PUBLIC _main
EXTRN _printf:PROC
EXTRN @__security_check_cookie@4:PROC
EXTRN ___security_cookie:DWORD
; Function compile flags: /Odtp
_TEXT SEGMENT
$T1 = -812 ; size = 404
_r$ = -408 ; size = 404
__$ArrayPad$ = -4 ; size = 4
_main PROC
; File e:\tmp\x.c
; Line 14
push ebp
mov ebp, esp
sub esp, 812 ; 0000032cH
mov eax, DWORD PTR ___security_cookie
xor eax, ebp
mov DWORD PTR __$ArrayPad$[ebp], eax
push esi
push edi
; Line 15
lea eax, DWORD PTR $T1[ebp]
push eax
call _func
add esp, 4
mov ecx, 101 ; 00000065H
mov esi, eax
lea edi, DWORD PTR _r$[ebp]
rep movsd
; Line 16
mov ecx, DWORD PTR _r$[ebp+400]
push ecx
push OFFSET $SG1335
call _printf
add esp, 8
; Line 17
xor eax, eax
pop edi
pop esi
mov ecx, DWORD PTR __$ArrayPad$[ebp]
xor ecx, ebp
call @__security_check_cookie@4
mov esp, ebp
pop ebp
ret 0
_main ENDP
_TEXT ENDS
; Function compile flags: /Odtp
_TEXT SEGMENT
_rc$ = -408 ; size = 404
__$ArrayPad$ = -4 ; size = 4
$T1 = 8 ; size = 4
_func PROC
; File e:\tmp\x.c
; Line 8
push ebp
mov ebp, esp
sub esp, 408 ; 00000198H
mov eax, DWORD PTR ___security_cookie
xor eax, ebp
mov DWORD PTR __$ArrayPad$[ebp], eax
push esi
push edi
; Line 10
mov DWORD PTR _rc$[ebp+400], 100 ; 00000064H
; Line 11
mov ecx, 101 ; 00000065H
lea esi, DWORD PTR _rc$[ebp]
mov edi, DWORD PTR $T1[ebp]
rep movsd
mov eax, DWORD PTR $T1[ebp]
; Line 12
pop edi
pop esi
mov ecx, DWORD PTR __$ArrayPad$[ebp]
xor ecx, ebp
call @__security_check_cookie@4
mov esp, ebp
pop ebp
ret 0
_func ENDP
_TEXT ENDS
END
typedef struct Record{
int ary[100];
int x;
}Record;
Record func(){
Record rc;
rc.x = 100;
return rc;
}
int main(){
Record* r;
func(); //調用函數
__asm{
mov r, eax //返回值就是func中返回的對象的首地址,在此取出
}
printf("%d", r->x);
}
2,面向對象程序設計中成員函數調用過程與原理
面向對象程序設計中,我們經常使用類,而事實上,面向對象的三大特性就是封裝、繼承、多態,虛函數的使用極大方便程序的編寫,麻煩了程序的調試(面向對象程序比C語言程序難調)。在這一節,將主要討論類的成員函數調用過程,上一節中主要討論了普通函數的調用(從語義上講,類的靜態函數就是外部普通函數),與普通函數比,類的成員函數多了一個this指針(有的語言稱爲self),先以以下代碼爲例子:
class CK{
public:
CK(){
m_iVal = 10;
}
public:
void Show(){
m_iVal += 10;
}
private:
int m_iVal;
};
int main(){
CK obj;
obj.Show();
}
; Listing generated by Microsoft (R) Optimizing Compiler Version 18.00.40629.0
TITLE E:\tmp\x.cpp
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
PUBLIC ??0CK@@QAE@XZ ; CK::CK
PUBLIC ?Show@CK@@QAEXXZ ; CK::Show
PUBLIC _main
; Function compile flags: /Odtp
_TEXT SEGMENT
_obj$ = -4 ; size = 4
_main PROC
; File e:\tmp\x.cpp
; Line 16
push ebp
mov ebp, esp
push ecx
; Line 17
lea ecx, DWORD PTR _obj$[ebp]
call ??0CK@@QAE@XZ ; CK::CK
; Line 18
lea ecx, DWORD PTR _obj$[ebp]
call ?Show@CK@@QAEXXZ ; CK::Show
; Line 19
xor eax, eax
mov esp, ebp
pop ebp
ret 0
_main ENDP
_TEXT ENDS
; Function compile flags: /Odtp
; COMDAT ?Show@CK@@QAEXXZ
_TEXT SEGMENT
_this$ = -4 ; size = 4
?Show@CK@@QAEXXZ PROC ; CK::Show, COMDAT
; _this$ = ecx
; File e:\tmp\x.cpp
; Line 8
push ebp
mov ebp, esp
push ecx
mov DWORD PTR _this$[ebp], ecx
; Line 9
mov eax, DWORD PTR _this$[ebp]
mov ecx, DWORD PTR [eax]
add ecx, 10 ; 0000000aH
mov edx, DWORD PTR _this$[ebp]
mov DWORD PTR [edx], ecx
; Line 10
mov esp, ebp
pop ebp
ret 0
?Show@CK@@QAEXXZ ENDP ; CK::Show
_TEXT ENDS
; Function compile flags: /Odtp
; COMDAT ??0CK@@QAE@XZ
_TEXT SEGMENT
_this$ = -4 ; size = 4
??0CK@@QAE@XZ PROC ; CK::CK, COMDAT
; _this$ = ecx
; File e:\tmp\x.cpp
; Line 4
push ebp
mov ebp, esp
push ecx
mov DWORD PTR _this$[ebp], ecx
; Line 5
mov eax, DWORD PTR _this$[ebp]
mov DWORD PTR [eax], 10 ; 0000000aH
; Line 6
mov eax, DWORD PTR _this$[ebp]
mov esp, ebp
pop ebp
ret 0
??0CK@@QAE@XZ ENDP ; CK::CK
_TEXT ENDS
END
#include <stdio.h>
class Base{
public:
virtual void Show(int x, int y) = 0;
};
class Derived: public Base{
public:
Derived(){
m_iValue = 10;
}
public:
virtual void Show(int x, int y){
printf("%d", x + y + m_iValue);
}
private:
int m_iValue;
};
int main(){
Derived * obj = new Derived;
auto pMemFunc =&Derived::Show;
__asm{
mov ecx, obj //傳遞this
push 10 //y
push 100 //x
call pMemFunc; //調用成員函數
// add esp, 8 //平衡stack,並不需要。因爲類的成員函數是__thiscall方式,會自己解決參數造成的堆棧不平衡
}
}
3,構造自己的線程類
線程是操作系統中重要的概念,是操作系統中可以異步執行的執行體,在Win32中,如果要創建一個線程,我們需要使用CreateThread或者__beginthreadex等API,這些API有個特點,即需要傳遞一個非類成員函數(靜態函數或普通C函數),在公司的AngelicaES引擎中,線程的創建使用的就是全局靜態函數,然而,在Java中,創建線程只需要一個Thread對象或一個Thread對象加一個實現了Runable的接口,在Java中,使用線程,往往構造一個Thread對象,然後調用Start函數,線程就起來了,並沒有我們看到的全局函數或者靜態成員函數,那麼Java等高級語言是怎麼實現的呢?在前兩節中,已經講清楚了普通函數調用和類的成員函數調用的過程與原理,事實上,只需要使用這個原理就可以實現一個類似Java的線程函數(不使用任何全局函數或static函數),如果想將一個類的成員函數作爲線程函數來執行,在語法層次我們無法逃脫this指針的束縛。但是,事實上,我們可以編寫一段奇怪的二進制指令(可直接執行的機器碼),在字節碼裏面設置好this指針等信息並跳轉到類的成員函數中去,最後將這段字節碼作爲線程函數去執行(強制轉換成CreateThread需要的線程函數類型),但是,我們並沒有使用使用全局函數或static函數,先看以下一段字節碼:
const static unsigned char g_thread_proc[]= {
//------------parameter-----------------
0x8B,0x44,0x24,0x04, // mov eax,dword ptr [esp+10h]
0x50, // push eax
//-----------this pointer-------------
0xB9,0x00,0x00,0x00,0x00, // mov ecx,0x12FF5C
//-----------call back function-------------
0xB8,0x00,0x00,0x00,0x00, // mov eax,0
0xFF,0xD0, // call eax
//return
0xC2,0x10,0x00 // ret 10h
};
//core.h
#ifndef __ZX_CORE_H__
#define __ZX_CORE_H__
#include <windows.h>
#ifndef interface
#define interface struct
#endif
#ifndef implement
#define implement :public
#endif
const static unsigned char g_thread_proc[]={
//------------parameter-----------------
0x8B,0x44,0x24,0x04, // mov eax,dword ptr [esp+10h]
0x50, // push eax
//-----------this pointer-------------
0xB9,0x00,0x00,0x00,0x00, // mov ecx,0x12FF5C
//-----------call back function-------------
0xB8,0x00,0x00,0x00,0x00, // mov eax,0
0xFF,0xD0, // call eax
//return
0xC2,0x10,0x00 // ret 10h
};
#endif
//runnable.h
#ifndef __ZX_RUNNABLE_H__
#define __ZX_RUNNABLE_H__
#include "core.h"
interface ZXRunnable{
virtual void run(void* lpParameter)= 0;
};
#endif
//thread.h
#ifndef __ZX_THREAD_H__
#define __ZX_THREAD_H__
#include "core.h"
#include "runnable.h"
class ZXThread{
public:
ZXThread();
ZXThread(ZXRunnable* runnable);
virtual ~ZXThread();
public:
void Start();
void Wait();
void SetRunnable(ZXRunnable* runnable);
ZXRunnable* GetRunnable();
private:
ZXRunnable* m_pRunnable;
HANDLE m_hThread;
unsigned char m_thread_proc[sizeof(g_thread_proc)];
};
#endif
//thread.cpp
#include "thread.h"
ZXThread::ZXThread(): m_pRunnable(NULL), m_hThread(NULL) { }
ZXThread::ZXThread(ZXRunnable* runnable): m_pRunnable(runnable), m_hThread(NULL){}
ZXThread::~ZXThread(){
delete m_pRunnable;
}
void ZXThread::SetRunnable(ZXRunnable* runnable){
m_pRunnable= runnable;
}
ZXRunnable* ZXThread::GetRunnable(){
return(m_pRunnable);
}
void ZXThread::Start(){
CopyMemory(m_thread_proc, g_thread_proc, sizeof(g_thread_proc));
*(int*)(&m_thread_proc[6])= (int)m_pRunnable;
void (ZXRunnable::*func)(void* lpParameter)= &ZXRunnable::run;
int addr;
__asm{
mov eax, func
mov addr, eax
}
*(int*)(&m_thread_proc[11])= addr;
m_hThread= ::CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)(void*)m_thread_proc,
NULL, 0, NULL);
}
void ZXThread::Wait(){
::WaitForSingleObject(m_hThread, INFINITE);
}
測試代碼:
#include <iostream>
#include "thread.h"
using namespace std;
class ZXRun implement ZXRunnable{
public:
virtual void run(void* lpParameter){
cout<<"Hello,World!"<<endl;
}
};
int main(){
ZXThread boss(new ZXRun);
boss.Start();
boss.Wait();
}
4,總結
上面實際給出瞭解決一類難題的思路,即如果某個地方需要一個全局的函數(或類的static函數)---Thunk技術,而我們想要完全面向對象(即我們不想使用全局函數或非static類函數),解決方法就是使用機器碼,在機器碼內完全跳轉(Thunk技術---跟Knuth有點像,以前看過國內一位大牛僅使用4個字節就實現封裝Windows窗口消息函數的代碼,而只要百度那4個字節,就可以搜索出那位牛人,貌似是金山的一位大牛,佩服)。