看開源代碼如何解析ELF文件

工具ROPgadget

在ROPgadget中有識別並分析多種文件結構,這次主要用這個功能來分析ELF文件格式。

分析的文件爲libc.so

上代碼

class Binary:
    def __init__(self, options):
        self.__fileName  = options.binary
        self.__rawBinary = None
        self.__binary    = None
        
        try:
            fd = open(self.__fileName, "rb")
            self.__rawBinary = fd.read()
            fd.close()
        except:
            print("[Error] Can't open the binary or binary not found")
            return None

        if   options.rawArch and options.rawMode:
             self.__binary = Raw(self.__rawBinary, options.rawArch, options.rawMode)
        elif self.__rawBinary[:4] == unhexlify(b"7f454c46"):
             self.__binary = ELF(self.__rawBinary)
        elif self.__rawBinary[:2] == unhexlify(b"4d5a"):
             self.__binary = PE(self.__rawBinary)
        elif self.__rawBinary[:4] == unhexlify(b"cafebabe"):
             self.__binary = UNIVERSAL(self.__rawBinary)
        elif self.__rawBinary[:4] == unhexlify(b"cefaedfe") or self.__rawBinary[:4] == unhexlify(b"cffaedfe"):
             self.__binary = MACHO(self.__rawBinary)
        else:
            print("[Error] Binary format not supported")
            return None
在binary.py中進行文件類型判定,ELF文件最開始四個字節‘7f454c46’

class ELFFlags:
    ELFCLASS32  = 0x01
    ELFCLASS64  = 0x02
    EI_CLASS    = 0x04
    EI_DATA     = 0x05
    ELFDATA2LSB = 0x01
    ELFDATA2MSB = 0x02
    EM_386      = 0x03
    EM_X86_64   = 0x3e
    EM_ARM      = 0x28
    EM_MIPS     = 0x08
    EM_SPARCv8p = 0x12
    EM_PowerPC  = 0x14
    EM_ARM64    = 0xb7

class ELF:
    def __init__(self, binary):
        self.__binary    = bytearray(binary)
        self.__ElfHeader = None
        self.__shdr_l    = []
        self.__phdr_l    = []

        self.__setHeaderElf()
        self.__setShdr()
        self.__setPhdr()
ELF文件類初始化

    def __setHeaderElf(self):
        e_ident = self.__binary[:15]#ELF文件魔數

        ei_class = e_ident[ELFFlags.EI_CLASS]
        ei_data  = e_ident[ELFFlags.EI_DATA]

        if ei_class != ELFFlags.ELFCLASS32 and ei_class != ELFFlags.ELFCLASS64:
            print("[Error] ELF.__setHeaderElf() - Bad Arch size")
            return None

        if ei_data != ELFFlags.ELFDATA2LSB and ei_data != ELFFlags.ELFDATA2MSB:
            print("[Error] ELF.__setHeaderElf() - Bad architecture endian")
            return None

        if ei_class == ELFFlags.ELFCLASS32: 
            if ei_data == ELFFlags.ELFDATA2LSB:
                self.__ElfHeader = Elf32_Ehdr_LSB.from_buffer_copy(self.__binary)
            elif ei_data == ELFFlags.ELFDATA2MSB:
                self.__ElfHeader = Elf32_Ehdr_MSB.from_buffer_copy(self.__binary)
        elif ei_class == ELFFlags.ELFCLASS64: 
            if ei_data == ELFFlags.ELFDATA2LSB:
                self.__ElfHeader = Elf64_Ehdr_LSB.from_buffer_copy(self.__binary)
            elif ei_data == ELFFlags.ELFDATA2MSB:
                self.__ElfHeader = Elf64_Ehdr_MSB.from_buffer_copy(self.__binary)

        self.getArch() # Check if architecture is supported
設置ELF文件頭

ELF前16個字節稱爲魔數

其中前四字節之前已經說過了,第一個字符是ACSII字符中DEL控制符,後三個是ELF的ACSII碼

第5個字節爲Class位,0爲無效文件,1爲32位文件,2爲64位文件

第6個字節指定字節序(Data)有以下取值

0 無效格式

1 小端格式

2 大端格式

根據5和6字節信息選擇相應的拷貝方式(如32位小端等)

    def getArch(self):
        if self.__ElfHeader.e_machine == ELFFlags.EM_386 or self.__ElfHeader.e_machine == ELFFlags.EM_X86_64: 
            return CS_ARCH_X86
        elif self.__ElfHeader.e_machine == ELFFlags.EM_ARM:
            return CS_ARCH_ARM
        elif self.__ElfHeader.e_machine == ELFFlags.EM_ARM64:
            return CS_ARCH_ARM64
        elif self.__ElfHeader.e_machine == ELFFlags.EM_MIPS:
            return CS_ARCH_MIPS
        elif self.__ElfHeader.e_machine == ELFFlags.EM_PowerPC:
            return CS_ARCH_PPC
        elif self.__ElfHeader.e_machine == ELFFlags.EM_SPARCv8p:
            return CS_ARCH_SPARC
        else:
            print("[Error] ELF.getArch() - Architecture not supported")
            return None
e_machine是一個雙字節(19,20字節)的表示CPU平臺屬性的成員

之後執行函數

    def __setShdr(self):<span style="white-space:pre">	</span>#設置段頭部
        shdr_num = self.__ElfHeader.e_shnum    #段數量
        base = self.__binary[self.__ElfHeader.e_shoff:]#獲取段表
        shdr_l = []

        e_ident = self.__binary[:15]
        ei_data = e_ident[ELFFlags.EI_DATA]

        for i in range(shdr_num):

            if self.getArchMode() == CS_MODE_32:
                if   ei_data == ELFFlags.ELFDATA2LSB: shdr = Elf32_Shdr_LSB.from_buffer_copy(base)#32位小端格式拷貝
                elif ei_data == ELFFlags.ELFDATA2MSB: shdr = Elf32_Shdr_MSB.from_buffer_copy(base)
            elif self.getArchMode() == CS_MODE_64:
                if   ei_data == ELFFlags.ELFDATA2LSB: shdr = Elf64_Shdr_LSB.from_buffer_copy(base)
                elif ei_data == ELFFlags.ELFDATA2MSB: shdr = Elf64_Shdr_MSB.from_buffer_copy(base)

            self.__shdr_l.append(shdr)
            base = base[self.__ElfHeader.e_shentsize:]

        # setup name from the strings table
        if self.__ElfHeader.e_shstrndx != 0:
            string_table = str(self.__binary[(self.__shdr_l[self.__ElfHeader.e_shstrndx].sh_offset):])
            for i in range(shdr_num):
                self.__shdr_l[i].str_name = string_table[self.__shdr_l[i].sh_name:].split('\0')[0]
該函數處理段頭部

第一行self.__ElfHeader.e_shnum表示段數量,e_shnum也是一個雙字節成員(48,49字節),在本機的實際運行中可以看到libc.so的段數量爲32(好TM多)

第二行self.__ElfHeader.e_shoff代表段表在文件中的偏移,32位版本中爲4字節(33,34,35,36字節),實際運行值爲0x4b88f

之後同樣按照32位小端格式從段表中拷貝添加到self.__shdr_l

self.__ElfHeader.e_shentsize指段表描述符大小,雙字節(47,48字節),實際運行值40

self.__ElfHeader.e_shstrndx指段表字符串表所在段在段表中的下標,雙字節(51,52字節),實際運行值爲31,也就是說段表中最後一個段是段表字符串表所在段(好拗口)

根據這個值找到字符串所在段,然後依次分給各段


下一個函數

def __setPhdr(self):
        pdhr_num = self.__ElfHeader.e_phnum
        base = self.__binary[self.__ElfHeader.e_phoff:]
        phdr_l = []

        e_ident = self.__binary[:15]
        ei_data = e_ident[ELFFlags.EI_DATA]

        for i in range(pdhr_num):
            if self.getArchMode() == CS_MODE_32:
                if   ei_data == ELFFlags.ELFDATA2LSB: phdr = Elf32_Phdr_LSB.from_buffer_copy(base)
                elif ei_data == ELFFlags.ELFDATA2MSB: phdr = Elf32_Phdr_MSB.from_buffer_copy(base)
            elif self.getArchMode() == CS_MODE_64:
                if   ei_data == ELFFlags.ELFDATA2LSB: phdr = Elf64_Phdr_LSB.from_buffer_copy(base)
                elif ei_data == ELFFlags.ELFDATA2MSB: phdr = Elf64_Phdr_MSB.from_buffer_copy(base)

            self.__phdr_l.append(phdr)
            base = base[self.__ElfHeader.e_phentsize:]
self.__ElfHeader.e_phnum是ELF執行視圖中Segment的個數,雙字節(45,46),實際結果9

self.__ElfHeader.e_phoff是Segment的偏移,雙字節(43,44)

之後和段表一樣,放入self__phdr_l










發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章