diff options
author | Henrik Nord <[email protected]> | 2014-03-21 16:37:11 +0100 |
---|---|---|
committer | Henrik Nord <[email protected]> | 2014-03-21 16:37:26 +0100 |
commit | 9d46875b53ffb21bc55aec4a2c76472133ea5d1c (patch) | |
tree | 2e0fa6e8829213470acc239b8885c3a79ced63f0 /lib/hipe/llvm/elf_format.erl | |
parent | bbe92ec2c359373205149dabc61ef0d50784760f (diff) | |
parent | b326df0d935d221574abf58d5e2a3efddd020278 (diff) | |
download | otp-9d46875b53ffb21bc55aec4a2c76472133ea5d1c.tar.gz otp-9d46875b53ffb21bc55aec4a2c76472133ea5d1c.tar.bz2 otp-9d46875b53ffb21bc55aec4a2c76472133ea5d1c.zip |
Merge branch 'yiannist/hipe-llvm-backend'
* yiannist/hipe-llvm-backend:
Support the LLVM backend in HiPE
Implement the LLVM backend
Extend RTL API to support the LLVM backend
Add support for llvm unique symbols in hipe_gensym
Add a BIF that only returns the atom ok
Move some common code in hipe_pack_constants
Add better specs in hipe_pack_constants and cleanup
OTP-11801
Diffstat (limited to 'lib/hipe/llvm/elf_format.erl')
-rw-r--r-- | lib/hipe/llvm/elf_format.erl | 790 |
1 files changed, 790 insertions, 0 deletions
diff --git a/lib/hipe/llvm/elf_format.erl b/lib/hipe/llvm/elf_format.erl new file mode 100644 index 0000000000..260da9b5e6 --- /dev/null +++ b/lib/hipe/llvm/elf_format.erl @@ -0,0 +1,790 @@ +%% -*- erlang-indent-level: 2 -*- + +%%% @copyright 2011-2014 Yiannis Tsiouris <[email protected]>, +%%% Chris Stavrakakis <[email protected]>, +%%% Kostis Sagonas <[email protected]> +%%% @author Yiannis Tsiouris <[email protected]> +%%% [http://www.softlab.ntua.gr/~gtsiour/] + +%%% @doc This module contains functions for extracting various pieces of +%%% information from an ELF formated Object file. To fully understand +%%% the ELF format and the use of these functions please read +%%% "[http://www.linuxjournal.com/article/1060?page=0,0]" carefully. + +-module(elf_format). + +-export([get_tab_entries/1, + %% Relocations + get_rodata_relocs/1, + get_text_relocs/1, + extract_rela/2, + get_rela_addends/1, + %% Note + extract_note/2, + %% Executable code + extract_text/1, + %% GCC Exception Table + get_exn_handlers/1, + %% Misc. + set_architecture_flag/1, + is64bit/0 + ]). + +-include("elf_format.hrl"). + +%%------------------------------------------------------------------------------ +%% Types +%%------------------------------------------------------------------------------ + +-type elf() :: binary(). + +-type lp() :: non_neg_integer(). % landing pad +-type num() :: non_neg_integer(). +-type index() :: non_neg_integer(). +-type offset() :: non_neg_integer(). +-type size() :: non_neg_integer(). +-type start() :: non_neg_integer(). + +-type info() :: index(). +-type nameoff() :: offset(). +-type valueoff() :: offset(). + +-type name() :: string(). +-type name_size() :: {name(), size()}. +-type name_sizes() :: [name_size()]. + +%%------------------------------------------------------------------------------ +%% Abstract Data Types and Accessors for ELF Structures. +%%------------------------------------------------------------------------------ + +%% File header +-record(elf_ehdr, {ident, % ELF identification + type, % Object file type + machine, % Machine Type + version, % Object file version + entry, % Entry point address + phoff, % Program header offset + shoff :: offset(), % Section header offset + flags, % Processor-specific flags + ehsize :: size(), % ELF header size + phentsize :: size(), % Size of program header entry + phnum :: num(), % Number of program header entries + shentsize :: size(), % Size of section header entry + shnum :: num(), % Number of section header entries + shstrndx :: index() % Section name string table index + }). +-type elf_ehdr() :: #elf_ehdr{}. + +-record(elf_ehdr_ident, {class, % File class + data, % Data encoding + version, % File version + osabi, % OS/ABI identification + abiversion, % ABI version + pad, % Start of padding bytes + nident % Size of e_ident[] + }). +%% -type elf_ehdr_ident() :: #elf_ehdr_ident{}. + +%% Section header entries +-record(elf_shdr, {name, % Section name + type, % Section type + flags, % Section attributes + addr, % Virtual address in memory + offset :: offset(), % Offset in file + size :: size(), % Size of section + link, % Link to other section + info, % Miscellaneous information + addralign, % Address align boundary + entsize % Size of entries, if section has table + }). +%% -type elf_shdr() :: #elf_shdr{}. + +%% Symbol table entries +-record(elf_sym, {name :: nameoff(), % Symbol name + info, % Type and Binding attributes + other, % Reserved + shndx, % Section table index + value :: valueoff(), % Symbol value + size :: size() % Size of object + }). +-type elf_sym() :: #elf_sym{}. + +%% Relocations +-record(elf_rel, {r_offset :: offset(), % Address of reference + r_info :: info() % Symbol index and type of relocation + }). +-type elf_rel() :: #elf_rel{}. + +-record(elf_rela, {r_offset :: offset(), % Address of reference + r_info :: info(), % Symbol index and type of relocation + r_addend :: offset() % Constant part of expression + }). +-type elf_rela() :: #elf_rela{}. + +%% %% Program header table +%% -record(elf_phdr, {type, % Type of segment +%% flags, % Segment attributes +%% offset, % Offset in file +%% vaddr, % Virtual address in memory +%% paddr, % Reserved +%% filesz, % Size of segment in file +%% memsz, % Size of segment in memory +%% align % Alignment of segment +%% }). + +%% %% GCC exception table +%% -record(elf_gccexntab, {lpbenc, % Landing pad base encoding +%% lpbase, % Landing pad base +%% ttenc, % Type table encoding +%% ttoff, % Type table offset +%% csenc, % Call-site table encoding +%% cstabsize, % Call-site table size +%% cstab :: cstab() % Call-site table +%% }). +%% -type elf_gccexntab() :: #elf_gccexntab{}. + +-record(elf_gccexntab_callsite, {start :: start(), % Call-site start + size :: size(), % Call-site size + lp :: lp(), % Call-site landing pad + % (exception handler) + onaction % On action (e.g. cleanup) + }). +%% -type elf_gccexntab_callsite() :: #elf_gccexntab_callsite{}. + +%%------------------------------------------------------------------------------ +%% Accessor Functions +%%------------------------------------------------------------------------------ + +%% File header +%% -spec mk_ehdr(...) -> elf_ehrd(). +mk_ehdr(Ident, Type, Machine, Version, Entry, Phoff, Shoff, Flags, Ehsize, + Phentsize, Phnum, Shentsize, Shnum, Shstrndx) -> + #elf_ehdr{ident = Ident, type = Type, machine = Machine, version = Version, + entry = Entry, phoff = Phoff, shoff = Shoff, flags = Flags, + ehsize = Ehsize, phentsize = Phentsize, phnum = Phnum, + shentsize = Shentsize, shnum = Shnum, shstrndx = Shstrndx}. + +%% -spec ehdr_shoff(elf_ehdr()) -> offset(). +%% ehdr_shoff(#elf_ehdr{shoff = Offset}) -> Offset. +%% +%% -spec ehdr_shentsize(elf_ehdr()) -> size(). +%% ehdr_shentsize(#elf_ehdr{shentsize = Size}) -> Size. +%% +%% -spec ehdr_shnum(elf_ehdr()) -> num(). +%% ehdr_shnum(#elf_ehdr{shnum = Num}) -> Num. +%% +%% -spec ehdr_shstrndx(elf_ehdr()) -> index(). +%% ehdr_shstrndx(#elf_ehdr{shstrndx = Index}) -> Index. + + +%%-spec mk_ehdr_ident(...) -> elf_ehdr_ident(). +mk_ehdr_ident(Class, Data, Version, OsABI, AbiVersion, Pad, Nident) -> + #elf_ehdr_ident{class = Class, data = Data, version = Version, osabi = OsABI, + abiversion = AbiVersion, pad = Pad, nident = Nident}. + +%%%------------------------- +%%% Section header entries +%%%------------------------- +mk_shdr(Name, Type, Flags, Addr, Offset, Size, Link, Info, AddrAlign, EntSize) -> + #elf_shdr{name = Name, type = Type, flags = Flags, addr = Addr, + offset = Offset, size = Size, link = Link, info = Info, + addralign = AddrAlign, entsize = EntSize}. + +%% -spec shdr_offset(elf_shdr()) -> offset(). +%% shdr_offset(#elf_shdr{offset = Offset}) -> Offset. +%% +%% -spec shdr_size(elf_shdr()) -> size(). +%% shdr_size(#elf_shdr{size = Size}) -> Size. + +%%%------------------------- +%%% Symbol Table Entries +%%%------------------------- +mk_sym(Name, Info, Other, Shndx, Value, Size) -> + #elf_sym{name = Name, info = Info, other = Other, + shndx = Shndx, value = Value, size = Size}. + +-spec sym_name(elf_sym()) -> nameoff(). +sym_name(#elf_sym{name = Name}) -> Name. + +%% -spec sym_value(elf_sym()) -> valueoff(). +%% sym_value(#elf_sym{value = Value}) -> Value. +%% +%% -spec sym_size(elf_sym()) -> size(). +%% sym_size(#elf_sym{size = Size}) -> Size. + +%%%------------------------- +%%% Relocations +%%%------------------------- +-spec mk_rel(offset(), info()) -> elf_rel(). +mk_rel(Offset, Info) -> + #elf_rel{r_offset = Offset, r_info = Info}. + +%% The following two functions capitalize on the fact that the two kinds of +%% relocation records (for 32- and 64-bit architectures have similar structure. + +-spec r_offset(elf_rel() | elf_rela()) -> offset(). +r_offset(#elf_rel{r_offset = Offset}) -> Offset; +r_offset(#elf_rela{r_offset = Offset}) -> Offset. + +-spec r_info(elf_rel() | elf_rela()) -> info(). +r_info(#elf_rel{r_info = Info}) -> Info; +r_info(#elf_rela{r_info = Info}) -> Info. + +-spec mk_rela(offset(), info(), offset()) -> elf_rela(). +mk_rela(Offset, Info, Addend) -> + #elf_rela{r_offset = Offset, r_info = Info, r_addend = Addend}. + +-spec rela_addend(elf_rela()) -> offset(). +rela_addend(#elf_rela{r_addend = Addend}) -> Addend. + +%% %%%------------------------- +%% %%% GCC exception table +%% %%%------------------------- +%% -type cstab() :: [elf_gccexntab_callsite()]. +%% +%% mk_gccexntab(LPbenc, LPbase, TTenc, TToff, CSenc, CStabsize, CStab) -> +%% #elf_gccexntab{lpbenc = LPbenc, lpbase = LPbase, ttenc = TTenc, +%% ttoff = TToff, csenc = CSenc, cstabsize = CStabsize, +%% cstab = CStab}. +%% +%% -spec gccexntab_cstab(elf_gccexntab()) -> cstab(). +%% gccexntab_cstab(#elf_gccexntab{cstab = CSTab}) -> CSTab. + +mk_gccexntab_callsite(Start, Size, LP, Action) -> + #elf_gccexntab_callsite{start = Start, size=Size, lp=LP, onaction=Action}. + +%% -spec gccexntab_callsite_start(elf_gccexntab_callsite()) -> start(). +%% gccexntab_callsite_start(#elf_gccexntab_callsite{start = Start}) -> Start. +%% +%% -spec gccexntab_callsite_size(elf_gccexntab_callsite()) -> size(). +%% gccexntab_callsite_size(#elf_gccexntab_callsite{size = Size}) -> Size. +%% +%% -spec gccexntab_callsite_lp(elf_gccexntab_callsite()) -> lp(). +%% gccexntab_callsite_lp(#elf_gccexntab_callsite{lp = LP}) -> LP. + +%%------------------------------------------------------------------------------ +%% Functions to manipulate the ELF File Header +%%------------------------------------------------------------------------------ + +%% @doc Extracts the File Header from an ELF formatted object file. Also sets +%% the ELF class variable in the process dictionary (used by many functions +%% in this and hipe_llvm_main modules). +-spec extract_header(elf()) -> elf_ehdr(). +extract_header(Elf) -> + Ehdr_bin = get_binary_segment(Elf, 0, ?ELF_EHDR_SIZE), + << %% Structural pattern matching on fields. + Ident_bin:?E_IDENT_SIZE/binary, + Type:?bits(?E_TYPE_SIZE)/integer-little, + Machine:?bits(?E_MACHINE_SIZE)/integer-little, + Version:?bits(?E_VERSION_SIZE)/integer-little, + Entry:?bits(?E_ENTRY_SIZE)/integer-little, + Phoff:?bits(?E_PHOFF_SIZE)/integer-little, + Shoff:?bits(?E_SHOFF_SIZE)/integer-little, + Flags:?bits(?E_FLAGS_SIZE)/integer-little, + Ehsize:?bits(?E_EHSIZE_SIZE)/integer-little, + Phentsize:?bits(?E_PHENTSIZE_SIZE)/integer-little, + Phnum:?bits(?E_PHNUM_SIZE)/integer-little, + Shentsize:?bits(?E_SHENTSIZE_SIZE)/integer-little, + Shnum:?bits(?E_SHENTSIZE_SIZE)/integer-little, + Shstrndx:?bits(?E_SHSTRNDX_SIZE)/integer-little + >> = Ehdr_bin, + <<16#7f, $E, $L, $F, Class, Data, Version, Osabi, Abiversion, + Pad:6/binary, Nident + >> = Ident_bin, + Ident = mk_ehdr_ident(Class, Data, Version, Osabi, + Abiversion, Pad, Nident), + mk_ehdr(Ident, Type, Machine, Version, Entry, Phoff, Shoff, Flags, + Ehsize, Phentsize, Phnum, Shentsize, Shnum, Shstrndx). + +%%------------------------------------------------------------------------------ +%% Functions to manipulate Section Header Entries +%%------------------------------------------------------------------------------ + +%% @doc Extracts the Section Header Table from an ELF formated Object File. +extract_shdrtab(Elf) -> + %% Extract File Header to get info about Section Header Offset (in bytes), + %% Entry Size (in bytes) and Number of entries + #elf_ehdr{shoff = ShOff, shentsize = ShEntsize, shnum = ShNum} = + extract_header(Elf), + %% Get actual Section header table (binary) + ShdrBin = get_binary_segment(Elf, ShOff, ShNum * ShEntsize), + get_shdrtab_entries(ShdrBin, []). + +get_shdrtab_entries(<<>>, Acc) -> + lists:reverse(Acc); +get_shdrtab_entries(ShdrBin, Acc) -> + <<%% Structural pattern matching on fields. + Name:?bits(?SH_NAME_SIZE)/integer-little, + Type:?bits(?SH_TYPE_SIZE)/integer-little, + Flags:?bits(?SH_FLAGS_SIZE)/integer-little, + Addr:?bits(?SH_ADDR_SIZE)/integer-little, + Offset:?bits(?SH_OFFSET_SIZE)/integer-little, + Size:?bits(?SH_SIZE_SIZE)/integer-little, + Link:?bits(?SH_LINK_SIZE)/integer-little, + Info:?bits(?SH_INFO_SIZE)/integer-little, + Addralign:?bits(?SH_ADDRALIGN_SIZE)/integer-little, + Entsize:?bits(?SH_ENTSIZE_SIZE)/integer-little, + MoreShdrE/binary + >> = ShdrBin, + ShdrE = mk_shdr(Name, Type, Flags, Addr, Offset, + Size, Link, Info, Addralign, Entsize), + get_shdrtab_entries(MoreShdrE, [ShdrE | Acc]). + +%% @doc Extracts a specific Entry of a Section Header Table. This function +%% takes as argument the Section Header Table (`SHdrTab') and the entry's +%% serial number (`EntryNum') and returns the entry (`shdr'). +get_shdrtab_entry(SHdrTab, EntryNum) -> + lists:nth(EntryNum + 1, SHdrTab). + +%%------------------------------------------------------------------------------ +%% Functions to manipulate Section Header String Table +%%------------------------------------------------------------------------------ + +%% @doc Extracts the Section Header String Table. This section is not a known +%% ELF Object File section. It is just a "hidden" table storing the +%% names of all sections that exist in current object file. +-spec extract_shstrtab(elf()) -> [name()]. +extract_shstrtab(Elf) -> + %% Extract Section Name String Table Index + #elf_ehdr{shstrndx = ShStrNdx} = extract_header(Elf), + ShHdrTab = extract_shdrtab(Elf), + %% Extract Section header entry and get actual Section-header String Table + #elf_shdr{offset = ShStrOffset, size = ShStrSize} = + get_shdrtab_entry(ShHdrTab, ShStrNdx), + case get_binary_segment(Elf, ShStrOffset, ShStrSize) of + <<>> -> %% Segment empty + []; + ShStrTab -> %% Convert to string table + [Name || {Name, _Size} <- get_names(ShStrTab)] + end. + +%%------------------------------------------------------------------------------ + +-spec get_tab_entries(elf()) -> [{name(), valueoff(), size()}]. +get_tab_entries(Elf) -> + SymTab = extract_symtab(Elf), + Ts = [{Name, Value, Size div ?ELF_XWORD_SIZE} + || #elf_sym{name = Name, value = Value, size = Size} <- SymTab, + Name =/= 0], + {NameIndices, ValueOffs, Sizes} = lists:unzip3(Ts), + %% Find the names of the symbols. + %% Get string table entries ([{Name, Offset in strtab section}]). Keep only + %% relevant entries: + StrTab = extract_strtab(Elf), + Relevant = [get_strtab_entry(StrTab, Off) || Off <- NameIndices], + %% Zip back to {Name, ValueOff, Size} + lists:zip3(Relevant, ValueOffs, Sizes). + +%%------------------------------------------------------------------------------ +%% Functions to manipulate Symbol Table +%%------------------------------------------------------------------------------ + +%% @doc Function that extracts Symbol Table from an ELF Object file. +extract_symtab(Elf) -> + Symtab_bin = extract_segment_by_name(Elf, ?SYMTAB), + get_symtab_entries(Symtab_bin, []). + +get_symtab_entries(<<>>, Acc) -> + lists:reverse(Acc); +get_symtab_entries(Symtab_bin, Acc) -> + <<SymE_bin:?ELF_SYM_SIZE/binary, MoreSymE/binary>> = Symtab_bin, + case is64bit() of + true -> + <<%% Structural pattern matching on fields. + Name:?bits(?ST_NAME_SIZE)/integer-little, + Info:?bits(?ST_INFO_SIZE)/integer-little, + Other:?bits(?ST_OTHER_SIZE)/integer-little, + Shndx:?bits(?ST_SHNDX_SIZE)/integer-little, + Value:?bits(?ST_VALUE_SIZE)/integer-little, + Size:?bits(?ST_SIZE_SIZE)/integer-little + >> = SymE_bin; + false -> + << %% Same fields in different order: + Name:?bits(?ST_NAME_SIZE)/integer-little, + Value:?bits(?ST_VALUE_SIZE)/integer-little, + Size:?bits(?ST_SIZE_SIZE)/integer-little, + Info:?bits(?ST_INFO_SIZE)/integer-little, + Other:?bits(?ST_OTHER_SIZE)/integer-little, + Shndx:?bits(?ST_SHNDX_SIZE)/integer-little + >> = SymE_bin + end, + SymE = mk_sym(Name, Info, Other, Shndx, Value, Size), + get_symtab_entries(MoreSymE, [SymE | Acc]). + +%% @doc Extracts a specific entry from the Symbol Table (as binary). +%% This function takes as arguments the Symbol Table (`SymTab') +%% and the entry's serial number and returns that entry (`sym'). +get_symtab_entry(SymTab, EntryNum) -> + lists:nth(EntryNum + 1, SymTab). + +%%------------------------------------------------------------------------------ +%% Functions to manipulate String Table +%%------------------------------------------------------------------------------ + +%% @doc Extracts String Table from an ELF formated Object File. +-spec extract_strtab(elf()) -> [{string(), offset()}]. +extract_strtab(Elf) -> + Strtab_bin = extract_segment_by_name(Elf, ?STRTAB), + NamesSizes = get_names(Strtab_bin), + make_offsets(NamesSizes). + +%% @doc Returns the name of the symbol at the given offset. The string table +%% contains entries of the form {Name, Offset}. If no such offset exists +%% returns the empty string (`""'). +%% XXX: There might be a bug here because of the "compact" saving the ELF +%% format uses: e.g. only stores ".rela.text" for ".rela.text" and ".text". +get_strtab_entry(Strtab, Offset) -> + case lists:keyfind(Offset, 2, Strtab) of + {Name, Offset} -> Name; + false -> "" + end. + +%%------------------------------------------------------------------------------ +%% Functions to manipulate Relocations +%%------------------------------------------------------------------------------ + +%% @doc This function gets as argument an ELF binary file and returns a list +%% with all .rela.rodata labels (i.e. constants and literals in code) +%% or an empty list if no ".rela.rodata" section exists in code. +-spec get_rodata_relocs(elf()) -> [offset()]. +get_rodata_relocs(Elf) -> + case is64bit() of + true -> + %% Only care about the addends (== offsets): + get_rela_addends(extract_rela(Elf, ?RODATA)); + false -> + %% Find offsets hardcoded in ".rodata" entry + %%XXX: Treat all 0s as padding and skip them! + [SkipPadding || SkipPadding <- extract_rodata(Elf), SkipPadding =/= 0] + end. + +-spec get_rela_addends([elf_rela()]) -> [offset()]. +get_rela_addends(RelaEntries) -> + [rela_addend(E) || E <- RelaEntries]. + +%% @doc Extract a list of the form `[{SymbolName, Offset}]' with all relocatable +%% symbols and their offsets in the code from the ".text" section. +-spec get_text_relocs(elf()) -> [{name(), offset()}]. +get_text_relocs(Elf) -> + %% Only care about the symbol table index and the offset: + NameOffsetTemp = [{?ELF_R_SYM(r_info(E)), r_offset(E)} + || E <- extract_rela(Elf, ?TEXT)], + {NameIndices, ActualOffsets} = lists:unzip(NameOffsetTemp), + %% Find the names of the symbols: + %% + %% Get those symbol table entries that are related to Text relocs: + Symtab = extract_symtab(Elf), + SymtabEs = [get_symtab_entry(Symtab, Index) || Index <- NameIndices], + %XXX: not zero-indexed! + %% Symbol table entries contain the offset of the name of the symbol in + %% String Table: + SymtabEs2 = [sym_name(E) || E <- SymtabEs], %XXX: Do we need to sort SymtabE? + %% Get string table entries ([{Name, Offset in strtab section}]). Keep only + %% relevant entries: + Strtab = extract_strtab(Elf), + Relevant = [get_strtab_entry(Strtab, Off) || Off <- SymtabEs2], + %% Zip back with actual offsets: + lists:zip(Relevant, ActualOffsets). + +%% @doc Extract the Relocations segment for section `Name' (that is passed +%% as second argument) from an ELF formated Object file binary. +-spec extract_rela(elf(), name()) -> [elf_rel() | elf_rela()]. +extract_rela(Elf, Name) -> + SegName = + case is64bit() of + true -> ?RELA(Name); % ELF-64 uses ".rela" + false -> ?REL(Name) % ...while ELF-32 uses ".rel" + end, + Rela_bin = extract_segment_by_name(Elf, SegName), + get_rela_entries(Rela_bin, []). + +get_rela_entries(<<>>, Acc) -> + lists:reverse(Acc); +get_rela_entries(Bin, Acc) -> + E = case is64bit() of + true -> + <<%% Structural pattern matching on fields of a Rela Entry. + Offset:?bits(?R_OFFSET_SIZE)/integer-little, + Info:?bits(?R_INFO_SIZE)/integer-little, + Addend:?bits(?R_ADDEND_SIZE)/integer-little, + Rest/binary + >> = Bin, + mk_rela(Offset, Info, Addend); + false -> + <<%% Structural pattern matching on fields of a Rel Entry. + Offset:?bits(?R_OFFSET_SIZE)/integer-little, + Info:?bits(?R_INFO_SIZE)/integer-little, + Rest/binary + >> = Bin, + mk_rel(Offset, Info) + end, + get_rela_entries(Rest, [E | Acc]). + +%% %% @doc Extract the `EntryNum' (serial number) Relocation Entry. +%% get_rela_entry(Rela, EntryNum) -> +%% lists:nth(EntryNum + 1, Rela). + +%%------------------------------------------------------------------------------ +%% Functions to manipulate Executable Code segment +%%------------------------------------------------------------------------------ + +%% @doc This function gets as arguments an ELF formated binary file and +%% returns the Executable Code (".text" segment) or an empty binary if it +%% is not found. +-spec extract_text(elf()) -> binary(). +extract_text(Elf) -> + extract_segment_by_name(Elf, ?TEXT). + +%%------------------------------------------------------------------------------ +%% Functions to manipulate Note Section +%%------------------------------------------------------------------------------ + +%% @doc Extract specific Note Section from an ELF Object file. The function +%% takes as first argument the object file (`Elf') and the `Name' of the +%% wanted Note Section (<b>without</b> the ".note" prefix!). It returns +%% the specified binary segment or an empty binary if no such section +%% exists. +-spec extract_note(elf(), string()) -> binary(). +extract_note(Elf, Name) -> + extract_segment_by_name(Elf, ?NOTE(Name)). + +%%------------------------------------------------------------------------------ +%% Functions to manipulate GCC Exception Table segment +%%------------------------------------------------------------------------------ + +%% A description for the C++ exception table formats can be found at Exception +%% Handling Tables (http://www.codesourcery.com/cxx-abi/exceptions.pdf). + +%% A list with `{Start, End, HandlerOffset}' for all call sites in the code +-spec get_exn_handlers(elf()) -> [{start(), start(), lp()}]. +get_exn_handlers(Elf) -> + CallSites = extract_gccexntab_callsites(Elf), + [{Start, Start + Size, LP} + || #elf_gccexntab_callsite{start = Start, size = Size, lp = LP} <- CallSites]. + +%% @doc This function gets as argument an ELF binary file and returns +%% the table (list) of call sites which is stored in GCC +%% Exception Table (".gcc_except_table") section. +%% It returns an empty list if the Exception Table is not found. +%% XXX: Assumes there is *no* Action Record Table. +extract_gccexntab_callsites(Elf) -> + case extract_segment_by_name(Elf, ?GCC_EXN_TAB) of + <<>> -> + []; + ExnTab -> + %% First byte of LSDA is Landing Pad base encoding. + <<LBenc:8, More/binary>> = ExnTab, + %% Second byte is the Landing Pad base (if its encoding is not + %% DW_EH_PE_omit) (optional). + {_LPBase, LSDACont} = + case LBenc =:= ?DW_EH_PE_omit of + true -> % No landing pad base byte. (-1 denotes that) + {-1, More}; + false -> % Landing pad base. + <<Base:8, More2/binary>> = More, + {Base, More2} + end, + %% Next byte of LSDA is the encoding of the Type Table. + <<TTenc:8, More3/binary>> = LSDACont, + %% Next byte is the Types Table offset encoded in U-LEB128 (optional). + {_TTOff, LSDACont2} = + case TTenc =:= ?DW_EH_PE_omit of + true -> % There is no Types Table pointer. (-1 denotes that) + {-1, More3}; + false -> % The byte offset from this field to the start of the Types + % Table used for exception matching. + leb128_decode(More3) + end, + %% Next byte of LSDA is the encoding of the fields in the Call-site Table. + <<_CSenc:8, More4/binary>> = LSDACont2, + %% Sixth byte is the size (in bytes) of the Call-site Table encoded in + %% U-LEB128. + {_CSTabSize, CSTab} = leb128_decode(More4), + %% Extract all call site information + get_gccexntab_callsites(CSTab, []) + end. + +get_gccexntab_callsites(<<>>, Acc) -> + lists:reverse(Acc); +get_gccexntab_callsites(CSTab, Acc) -> + %% We are only interested in the Landing Pad of every entry. + <<Start:32/integer-little, Size:32/integer-little, + LP:32/integer-little, OnAction:8, More/binary + >> = CSTab, + GccCS = mk_gccexntab_callsite(Start, Size, LP, OnAction), + get_gccexntab_callsites(More, [GccCS | Acc]). + +%%------------------------------------------------------------------------------ +%% Functions to manipulate Read-only Data (.rodata) +%%------------------------------------------------------------------------------ +extract_rodata(Elf) -> + Rodata_bin = extract_segment_by_name(Elf, ?RODATA), + get_rodata_entries(Rodata_bin, []). + +get_rodata_entries(<<>>, Acc) -> + lists:reverse(Acc); +get_rodata_entries(Rodata_bin, Acc) -> + <<Num:?bits(?ELF_ADDR_SIZE)/integer-little, More/binary>> = Rodata_bin, + get_rodata_entries(More, [Num | Acc]). + +%%------------------------------------------------------------------------------ +%% Helper functions +%%------------------------------------------------------------------------------ + +%% @doc Returns the binary segment starting at `Offset' with length `Size' +%% (bytes) from a binary file. If `Offset' is bigger than the byte size of +%% the binary, an empty binary (`<<>>') is returned. +-spec get_binary_segment(binary(), offset(), size()) -> binary(). +get_binary_segment(Bin, Offset, _Size) when Offset > byte_size(Bin) -> + <<>>; +get_binary_segment(Bin, Offset, Size) -> + <<_Hdr:Offset/binary, BinSeg:Size/binary, _More/binary>> = Bin, + BinSeg. + +%% @doc This function gets as arguments an ELF formated binary object and +%% a string with the segments' name and returns the specified segment or +%% an empty binary (`<<>>') if there exists no segment with that name. +%% There are handy macros defined in elf_format.hrl for all Standard +%% Section Names. +-spec extract_segment_by_name(elf(), string()) -> binary(). +extract_segment_by_name(Elf, SectionName) -> + %% Extract Section Header Table and Section Header String Table from binary + SHdrTable = extract_shdrtab(Elf), + Names = extract_shstrtab(Elf), + %% Zip to a list of (Name,ShdrE) + [_Zero | ShdrEs] = lists:keysort(2, SHdrTable), % Skip first entry (zeros). + L = lists:zip(Names, ShdrEs), + %% Find Section Header Table entry by name + case lists:keyfind(SectionName, 1, L) of + {SectionName, ShdrE} -> %% Note: Same name. + #elf_shdr{offset = Offset, size = Size} = ShdrE, + get_binary_segment(Elf, Offset, Size); + false -> %% Not found. + <<>> + end. + +%% @doc Extracts a list of strings with (zero-separated) names from a binary. +%% Returns tuples of `{Name, Size}'. +%% XXX: Skip trailing 0. +-spec get_names(<<_:8,_:_*8>>) -> name_sizes(). +get_names(<<0, Bin/binary>>) -> + NamesSizes = get_names(Bin, []), + fix_names(NamesSizes, []). + +get_names(<<>>, Acc) -> + lists:reverse(Acc); +get_names(Bin, Acc) -> + {Name, MoreNames} = bin_get_string(Bin), + get_names(MoreNames, [{Name, length(Name)} | Acc]). + +%% @doc Fix names: +%% e.g. If ".rela.text" exists, ".text" does not. Same goes for +%% ".rel.text". In that way, the Section Header String Table is more +%% compact. Add ".text" just *before* the corresponding rela-field, +%% etc. +-spec fix_names(name_sizes(), name_sizes()) -> name_sizes(). +fix_names([], Acc) -> + lists:reverse(Acc); +fix_names([{Name, Size}=T | Names], Acc) -> + case is64bit() of + true -> + case string:str(Name, ".rela") =:= 1 of + true -> %% Name starts with ".rela": + Section = string:substr(Name, 6), + fix_names(Names, [{Section, Size - 5} + | [T | Acc]]); % XXX: Is order ok? (".text" + % always before ".rela.text") + false -> %% Name does not start with ".rela": + fix_names(Names, [T | Acc]) + end; + false -> + case string:str(Name, ".rel") =:= 1 of + true -> %% Name starts with ".rel": + Section = string:substr(Name, 5), + fix_names(Names, [{Section, Size - 4} + | [T | Acc]]); % XXX: Is order ok? (".text" + % always before ".rela.text") + false -> %% Name does not start with ".rel": + fix_names(Names, [T | Acc]) + end + end. + + +%% @doc A function that byte-reverses a binary. This might be needed because of +%% little (fucking!) endianess. +-spec bin_reverse(binary()) -> binary(). +bin_reverse(Bin) when is_binary(Bin) -> + bin_reverse(Bin, <<>>). + +-spec bin_reverse(binary(), binary()) -> binary(). +bin_reverse(<<>>, Acc) -> + Acc; +bin_reverse(<<Head, More/binary>>, Acc) -> + bin_reverse(More, <<Head, Acc/binary>>). + +%% @doc A function that extracts a null-terminated string from a binary. It +%% returns the found string along with the rest of the binary. +-spec bin_get_string(binary()) -> {string(), binary()}. +bin_get_string(Bin) -> + bin_get_string(Bin, <<>>). + +bin_get_string(<<>>, BinAcc) -> + Bin = bin_reverse(BinAcc), % little endian! + {binary_to_list(Bin), <<>>}; +bin_get_string(<<0, MoreBin/binary>>, BinAcc) -> + Bin = bin_reverse(BinAcc), % little endian! + {binary_to_list(Bin), MoreBin}; +bin_get_string(<<Letter, Tail/binary>>, BinAcc) -> + bin_get_string(Tail, <<Letter, BinAcc/binary>>). + +%% @doc +make_offsets(NamesSizes) -> + {Names, Sizes} = lists:unzip(NamesSizes), + Offsets = make_offsets_from_sizes(Sizes, 1, []), + lists:zip(Names, Offsets). + +make_offsets_from_sizes([], _, Acc) -> + lists:reverse(Acc); +make_offsets_from_sizes([Size | Sizes], Cur, Acc) -> + make_offsets_from_sizes(Sizes, Size+Cur+1, [Cur | Acc]). % For the "."! + +%% @doc Little-Endian Base 128 (LEB128) Decoder +%% This function extracts the <b>first</b> LEB128-encoded integer in a +%% binary and returns that integer along with the remaining binary. This is +%% done because a LEB128 number has variable bit-size and that is a way of +%% extracting only one number in a binary and continuing parsing the binary +%% for other kind of data (e.g. different encoding). +%% FIXME: Only decodes unsigned data! +-spec leb128_decode(binary()) -> {integer(), binary()}. +leb128_decode(LebNum) -> + leb128_decode(LebNum, 0, <<>>). + +-spec leb128_decode(binary(), integer(), binary()) -> {integer(), binary()}. +leb128_decode(LebNum, NoOfBits, Acc) -> + <<Sentinel:1/bits, NextBundle:7/bits, MoreLebNums/bits>> = LebNum, + case Sentinel of + <<1:1>> -> % more bytes to follow + leb128_decode(MoreLebNums, NoOfBits+7, <<NextBundle:7/bits, Acc/bits>>); + <<0:1>> -> % byte bundle stop + Size = NoOfBits+7, + <<Num:Size/integer>> = <<NextBundle:7/bits, Acc/bits>>, + {Num, MoreLebNums} + end. + +%% @doc Extract ELF Class from ELF header and export symbol to process +%% dictionary. +-spec set_architecture_flag(elf()) -> 'ok'. +set_architecture_flag(Elf) -> + %% Extract information about ELF Class from ELF Header + <<16#7f, $E, $L, $F, EI_Class, _MoreHeader/binary>> + = get_binary_segment(Elf, 0, ?ELF_EHDR_SIZE), + put(elf_class, EI_Class), + ok. + +%% @doc Read from object file header if the file class is ELF32 or ELF64. +-spec is64bit() -> boolean(). +is64bit() -> + case get(elf_class) of + ?ELFCLASS64 -> true; + ?ELFCLASS32 -> false + end. |