1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
|
%% -*- erlang-indent-level: 2 -*-
%%% @copyright 2011-2014 Yiannis Tsiouris <[email protected]>,
%%% Chris Stavrakakis <[email protected]>,
%%% Kostis Sagonas <[email protected]>
%%% @author Yiannis Tsiouris <[email protected]>
%%% [http://www.softlab.ntua.gr/~gtsiour/]
%%% @doc This module contains functions for extracting various pieces of
%%% information from an ELF formated Object file. To fully understand
%%% the ELF format and the use of these functions please read
%%% "[http://www.linuxjournal.com/article/1060?page=0,0]" carefully.
-module(elf_format).
-export([%% Relocations
extract_rela/2,
%% Note
extract_note/2,
%% Executable code
extract_text/1,
%% GCC Exception Table
get_exn_handlers/1,
%% Symbols
elf_symbols/1,
%% Main interface
read/1
]).
-include("elf_format.hrl").
%%------------------------------------------------------------------------------
%% Types
%%------------------------------------------------------------------------------
-export_type([elf/0]).
-type lp() :: non_neg_integer(). % landing pad
-type num() :: non_neg_integer().
-type index() :: non_neg_integer().
-type start() :: non_neg_integer().
-type tuple(X) :: {} | {X} | {X, X} | tuple().
%%------------------------------------------------------------------------------
%% Abstract Data Types and Accessors for ELF Structures.
%%------------------------------------------------------------------------------
-record(elf, {file :: binary()
,sec_idx :: tuple(elf_shdr())
,sec_nam :: #{string() => elf_shdr()}
,sym_idx :: undefined | tuple(elf_sym())
}).
-opaque elf() :: #elf{}.
%% File header
-record(elf_ehdr, {ident, % ELF identification
type, % Object file type
machine, % Machine Type
version, % Object file version
entry, % Entry point address
phoff, % Program header offset
shoff :: offset(), % Section header offset
flags, % Processor-specific flags
ehsize :: size(), % ELF header size
phentsize :: size(), % Size of program header entry
phnum :: num(), % Number of program header entries
shentsize :: size(), % Size of section header entry
shnum :: num(), % Number of section header entries
shstrndx :: index() % Section name string table index
}).
-type elf_ehdr() :: #elf_ehdr{}.
-record(elf_ehdr_ident, {class, % File class
data, % Data encoding
version, % File version
osabi, % OS/ABI identification
abiversion, % ABI version
pad, % Start of padding bytes
nident % Size of e_ident[]
}).
%% -type elf_ehdr_ident() :: #elf_ehdr_ident{}.
%% %% Program header table
%% -record(elf_phdr, {type, % Type of segment
%% flags, % Segment attributes
%% offset, % Offset in file
%% vaddr, % Virtual address in memory
%% paddr, % Reserved
%% filesz, % Size of segment in file
%% memsz, % Size of segment in memory
%% align % Alignment of segment
%% }).
%% %% GCC exception table
%% -record(elf_gccexntab, {lpbenc, % Landing pad base encoding
%% lpbase, % Landing pad base
%% ttenc, % Type table encoding
%% ttoff, % Type table offset
%% csenc, % Call-site table encoding
%% cstabsize, % Call-site table size
%% cstab :: cstab() % Call-site table
%% }).
%% -type elf_gccexntab() :: #elf_gccexntab{}.
-record(elf_gccexntab_callsite, {start :: start(), % Call-site start
size :: size(), % Call-site size
lp :: lp(), % Call-site landing pad
% (exception handler)
onaction % On action (e.g. cleanup)
}).
%% -type elf_gccexntab_callsite() :: #elf_gccexntab_callsite{}.
%%------------------------------------------------------------------------------
%% Accessor Functions
%%------------------------------------------------------------------------------
%% File header
%% -spec mk_ehdr(...) -> elf_ehrd().
mk_ehdr(Ident, Type, Machine, Version, Entry, Phoff, Shoff, Flags, Ehsize,
Phentsize, Phnum, Shentsize, Shnum, Shstrndx) ->
#elf_ehdr{ident = Ident, type = Type, machine = Machine, version = Version,
entry = Entry, phoff = Phoff, shoff = Shoff, flags = Flags,
ehsize = Ehsize, phentsize = Phentsize, phnum = Phnum,
shentsize = Shentsize, shnum = Shnum, shstrndx = Shstrndx}.
%% -spec ehdr_shoff(elf_ehdr()) -> offset().
%% ehdr_shoff(#elf_ehdr{shoff = Offset}) -> Offset.
%%
%% -spec ehdr_shentsize(elf_ehdr()) -> size().
%% ehdr_shentsize(#elf_ehdr{shentsize = Size}) -> Size.
%%
%% -spec ehdr_shnum(elf_ehdr()) -> num().
%% ehdr_shnum(#elf_ehdr{shnum = Num}) -> Num.
%%
%% -spec ehdr_shstrndx(elf_ehdr()) -> index().
%% ehdr_shstrndx(#elf_ehdr{shstrndx = Index}) -> Index.
%%-spec mk_ehdr_ident(...) -> elf_ehdr_ident().
mk_ehdr_ident(Class, Data, Version, OsABI, AbiVersion, Pad, Nident) ->
#elf_ehdr_ident{class = Class, data = Data, version = Version, osabi = OsABI,
abiversion = AbiVersion, pad = Pad, nident = Nident}.
%%%-------------------------
%%% Section header entries
%%%-------------------------
mk_shdr(Name, Type, Flags, Addr, Offset, Size, Link, Info, AddrAlign, EntSize) ->
#elf_shdr{name = Name, type = Type, flags = Flags, addr = Addr,
offset = Offset, size = Size, link = Link, info = Info,
addralign = AddrAlign, entsize = EntSize}.
%% -spec shdr_offset(elf_shdr()) -> offset().
%% shdr_offset(#elf_shdr{offset = Offset}) -> Offset.
%%
%% -spec shdr_size(elf_shdr()) -> size().
%% shdr_size(#elf_shdr{size = Size}) -> Size.
%%%-------------------------
%%% Symbol Table Entries
%%%-------------------------
mk_sym(Name, Bind, Type, Section, Value, Size) ->
#elf_sym{name = Name, bind = Bind, type = Type,
section = Section, value = Value, size = Size}.
%% -spec sym_name(elf_sym()) -> string().
%% sym_name(#elf_sym{name = Name}) -> Name.
%%
%% -spec sym_value(elf_sym()) -> valueoff().
%% sym_value(#elf_sym{value = Value}) -> Value.
%%
%% -spec sym_size(elf_sym()) -> size().
%% sym_size(#elf_sym{size = Size}) -> Size.
%% %%%-------------------------
%% %%% GCC exception table
%% %%%-------------------------
%% -type cstab() :: [elf_gccexntab_callsite()].
%%
%% mk_gccexntab(LPbenc, LPbase, TTenc, TToff, CSenc, CStabsize, CStab) ->
%% #elf_gccexntab{lpbenc = LPbenc, lpbase = LPbase, ttenc = TTenc,
%% ttoff = TToff, csenc = CSenc, cstabsize = CStabsize,
%% cstab = CStab}.
%%
%% -spec gccexntab_cstab(elf_gccexntab()) -> cstab().
%% gccexntab_cstab(#elf_gccexntab{cstab = CSTab}) -> CSTab.
mk_gccexntab_callsite(Start, Size, LP, Action) ->
#elf_gccexntab_callsite{start = Start, size=Size, lp=LP, onaction=Action}.
%% -spec gccexntab_callsite_start(elf_gccexntab_callsite()) -> start().
%% gccexntab_callsite_start(#elf_gccexntab_callsite{start = Start}) -> Start.
%%
%% -spec gccexntab_callsite_size(elf_gccexntab_callsite()) -> size().
%% gccexntab_callsite_size(#elf_gccexntab_callsite{size = Size}) -> Size.
%%
%% -spec gccexntab_callsite_lp(elf_gccexntab_callsite()) -> lp().
%% gccexntab_callsite_lp(#elf_gccexntab_callsite{lp = LP}) -> LP.
%%------------------------------------------------------------------------------
%% Main interface function
%%------------------------------------------------------------------------------
%% @doc Parses an ELF file.
-spec read(binary()) -> elf().
read(ElfBin) ->
Header = extract_header(ElfBin),
[_UndefinedSec|Sections] = extract_shdrtab(ElfBin, Header),
SecNam = maps:from_list(
[{Name, Sec} || Sec = #elf_shdr{name=Name} <- Sections]),
Elf0 = #elf{file=ElfBin, sec_idx=list_to_tuple(Sections), sec_nam=SecNam},
[_UndefinedSym|Symbols] = extract_symtab(Elf0, extract_strtab(Elf0)),
Elf0#elf{sym_idx=list_to_tuple(Symbols)}.
%%------------------------------------------------------------------------------
%% Functions to manipulate the ELF File Header
%%------------------------------------------------------------------------------
%% @doc Extracts the File Header from an ELF formatted object file. Also sets
%% the ELF class variable in the process dictionary (used by many functions
%% in this and hipe_llvm_main modules).
-spec extract_header(binary()) -> elf_ehdr().
extract_header(ElfBin) ->
Ehdr_bin = get_binary_segment(ElfBin, 0, ?ELF_EHDR_SIZE),
<< %% Structural pattern matching on fields.
Ident_bin:?E_IDENT_SIZE/binary,
Type:?bits(?E_TYPE_SIZE)/integer-little,
Machine:?bits(?E_MACHINE_SIZE)/integer-little,
Version:?bits(?E_VERSION_SIZE)/integer-little,
Entry:?bits(?E_ENTRY_SIZE)/integer-little,
Phoff:?bits(?E_PHOFF_SIZE)/integer-little,
Shoff:?bits(?E_SHOFF_SIZE)/integer-little,
Flags:?bits(?E_FLAGS_SIZE)/integer-little,
Ehsize:?bits(?E_EHSIZE_SIZE)/integer-little,
Phentsize:?bits(?E_PHENTSIZE_SIZE)/integer-little,
Phnum:?bits(?E_PHNUM_SIZE)/integer-little,
Shentsize:?bits(?E_SHENTSIZE_SIZE)/integer-little,
Shnum:?bits(?E_SHENTSIZE_SIZE)/integer-little,
Shstrndx:?bits(?E_SHSTRNDX_SIZE)/integer-little
>> = Ehdr_bin,
<<16#7f, $E, $L, $F, Class, Data, Version, Osabi, Abiversion,
Pad:6/binary, Nident
>> = Ident_bin,
Ident = mk_ehdr_ident(Class, Data, Version, Osabi,
Abiversion, Pad, Nident),
mk_ehdr(Ident, Type, Machine, Version, Entry, Phoff, Shoff, Flags,
Ehsize, Phentsize, Phnum, Shentsize, Shnum, Shstrndx).
%%------------------------------------------------------------------------------
%% Functions to manipulate Section Header Entries
%%------------------------------------------------------------------------------
-type shdrtab() :: [elf_shdr()].
%% @doc Extracts the Section Header Table from an ELF formated Object File.
-spec extract_shdrtab(binary(), elf_ehdr()) -> shdrtab().
extract_shdrtab(ElfBin, #elf_ehdr{shoff=ShOff, shentsize=?ELF_SHDRENTRY_SIZE,
shnum=ShNum, shstrndx=ShStrNdx}) ->
%% Get actual Section header table (binary)
ShdrBin = get_binary_segment(ElfBin, ShOff, ShNum * ?ELF_SHDRENTRY_SIZE),
%% We need to lookup the offset and size of the section header string table
%% before we can fully parse the section table. We compute its offset and
%% extract the fields we need here.
ShStrEntryOffset = ShStrNdx * ?ELF_SHDRENTRY_SIZE,
<<_:ShStrEntryOffset/binary, _:?SH_NAME_SIZE/binary,
_:?SH_TYPE_SIZE/binary, _:?SH_FLAGS_SIZE/binary, _:?SH_ADDR_SIZE/binary,
ShStrOffset:?bits(?SH_OFFSET_SIZE)/little,
ShStrSize:?bits(?SH_SIZE_SIZE)/little,
_/binary>> = ShdrBin,
ShStrTab = parse_strtab(get_binary_segment(ElfBin, ShStrOffset, ShStrSize)),
get_shdrtab_entries(ShdrBin, ShStrTab).
get_shdrtab_entries(<<>>, _ShStrTab) -> [];
get_shdrtab_entries(ShdrTab, ShStrTab) ->
<<%% Structural pattern matching on fields.
Name:?bits(?SH_NAME_SIZE)/integer-little,
Type:?bits(?SH_TYPE_SIZE)/integer-little,
Flags:?bits(?SH_FLAGS_SIZE)/integer-little,
Addr:?bits(?SH_ADDR_SIZE)/integer-little,
Offset:?bits(?SH_OFFSET_SIZE)/integer-little,
Size:?bits(?SH_SIZE_SIZE)/integer-little,
Link:?bits(?SH_LINK_SIZE)/integer-little,
Info:?bits(?SH_INFO_SIZE)/integer-little,
Addralign:?bits(?SH_ADDRALIGN_SIZE)/integer-little,
Entsize:?bits(?SH_ENTSIZE_SIZE)/integer-little,
Rest/binary
>> = ShdrTab,
Entry = mk_shdr(get_strtab_entry(Name, ShStrTab), decode_shdr_type(Type),
Flags, Addr, Offset, Size, Link, Info, Addralign, Entsize),
[Entry | get_shdrtab_entries(Rest, ShStrTab)].
decode_shdr_type(?SHT_NULL) -> 'null';
decode_shdr_type(?SHT_PROGBITS) -> 'progbits';
decode_shdr_type(?SHT_SYMTAB) -> 'symtab';
decode_shdr_type(?SHT_STRTAB) -> 'strtab';
decode_shdr_type(?SHT_RELA) -> 'rela';
decode_shdr_type(?SHT_HASH) -> 'hash'; %unused
decode_shdr_type(?SHT_DYNAMIC) -> 'dynamic'; %unused
decode_shdr_type(?SHT_NOTE) -> 'note'; %unused
decode_shdr_type(?SHT_NOBITS) -> 'nobits';
decode_shdr_type(?SHT_REL) -> 'rel';
decode_shdr_type(?SHT_SHLIB) -> 'shlib'; %unused
decode_shdr_type(?SHT_DYNSYM) -> 'dynsym'; %unused
decode_shdr_type(OS) when ?SHT_LOOS =< OS, OS =< ?SHT_HIOS -> {os, OS};
decode_shdr_type(Proc) when ?SHT_LOPROC =< Proc, Proc =< ?SHT_HIPROC ->
{proc, Proc}.
-spec elf_section(non_neg_integer(), elf()) -> undefined | abs | elf_shdr().
elf_section(0, #elf{}) -> undefined;
elf_section(?SHN_ABS, #elf{}) -> abs;
elf_section(Index, #elf{sec_idx=SecIdx}) when Index =< tuple_size(SecIdx) ->
element(Index, SecIdx).
%%------------------------------------------------------------------------------
%% Functions to manipulate Symbol Table
%%------------------------------------------------------------------------------
%% @doc Function that extracts Symbol Table from an ELF Object file.
extract_symtab(Elf, StrTab) ->
Symtab = extract_segment_by_name(Elf, ?SYMTAB),
[parse_sym(Sym, Elf, StrTab) || <<Sym:?ELF_SYM_SIZE/binary>> <= Symtab].
-ifdef(BIT32).
parse_sym(<<%% Structural pattern matching on fields.
Name:?bits(?ST_NAME_SIZE)/integer-little,
Value:?bits(?ST_VALUE_SIZE)/integer-little,
Size:?bits(?ST_SIZE_SIZE)/integer-little,
Info:?bits(?ST_INFO_SIZE)/integer-little,
_Other:?bits(?ST_OTHER_SIZE)/integer-little,
Shndx:?bits(?ST_SHNDX_SIZE)/integer-little>>,
Elf, StrTab) ->
mk_sym(get_strtab_entry(Name, StrTab), decode_symbol_bind(?ELF_ST_BIND(Info)),
decode_symbol_type(?ELF_ST_TYPE(Info)), elf_section(Shndx, Elf), Value,
Size).
-else.
parse_sym(<<%% Same fields in different order:
Name:?bits(?ST_NAME_SIZE)/integer-little,
Info:?bits(?ST_INFO_SIZE)/integer-little,
_Other:?bits(?ST_OTHER_SIZE)/integer-little,
Shndx:?bits(?ST_SHNDX_SIZE)/integer-little,
Value:?bits(?ST_VALUE_SIZE)/integer-little,
Size:?bits(?ST_SIZE_SIZE)/integer-little>>,
Elf, StrTab) ->
mk_sym(get_strtab_entry(Name, StrTab), decode_symbol_bind(?ELF_ST_BIND(Info)),
decode_symbol_type(?ELF_ST_TYPE(Info)), elf_section(Shndx, Elf), Value,
Size).
-endif.
decode_symbol_bind(?STB_LOCAL) -> 'local';
decode_symbol_bind(?STB_GLOBAL) -> 'global';
decode_symbol_bind(?STB_WEAK) -> 'weak'; %unused
decode_symbol_bind(OS) when ?STB_LOOS =< OS, OS =< ?STB_HIOS -> {os, OS};
decode_symbol_bind(Proc) when ?STB_LOPROC =< Proc, Proc =< ?STB_HIPROC ->
{proc, Proc}.
decode_symbol_type(?STT_NOTYPE) -> 'notype';
decode_symbol_type(?STT_OBJECT) -> 'object';
decode_symbol_type(?STT_FUNC) -> 'func';
decode_symbol_type(?STT_SECTION) -> 'section';
decode_symbol_type(?STT_FILE) -> 'file';
decode_symbol_type(OS) when ?STT_LOOS =< OS, OS =< ?STT_HIOS -> {os, OS};
decode_symbol_type(Proc) when ?STT_LOPROC =< Proc, Proc =< ?STT_HIPROC ->
{proc, Proc}.
%% @doc Extracts a specific entry from the Symbol Table.
-spec elf_symbol(0, elf()) -> undefined;
(pos_integer(), elf()) -> elf_sym().
elf_symbol(0, #elf{}) -> undefined;
elf_symbol(Index, #elf{sym_idx=SymIdx}) -> element(Index, SymIdx).
-spec elf_symbols(elf()) -> [elf_sym()].
elf_symbols(#elf{sym_idx=SymIdx}) -> tuple_to_list(SymIdx).
%%------------------------------------------------------------------------------
%% Functions to manipulate String Table
%%------------------------------------------------------------------------------
%% ADT: get_strtab_entry/1 must be used to consume this type.
-type strtab() :: binary().
%% @doc Extracts String Table from an ELF formated Object File.
-spec extract_strtab(elf()) -> strtab().
extract_strtab(Elf) ->
parse_strtab(extract_segment_by_name(Elf, ?STRTAB)).
-spec parse_strtab(binary()) -> strtab().
parse_strtab(StrTabSectionBin) -> StrTabSectionBin.
%% @doc Returns the name of the symbol at the given offset.
-spec get_strtab_entry(non_neg_integer(), strtab()) -> string().
get_strtab_entry(Offset, StrTab) ->
<<_:Offset/binary, StrBin/binary>> = StrTab,
bin_get_string(StrBin).
%% @doc Extracts a null-terminated string from a binary.
-spec bin_get_string(binary()) -> string().
%% FIXME: No regard for encoding (just happens to work for ASCII and Latin-1)
bin_get_string(<<0, _/binary>>) -> [];
bin_get_string(<<Char, Rest/binary>>) -> [Char|bin_get_string(Rest)].
%%------------------------------------------------------------------------------
%% Functions to manipulate Relocations
%%------------------------------------------------------------------------------
%% @doc Extract the Relocations segment for section `Name' (that is passed
%% as second argument) from an ELF formated Object file binary.
-spec extract_rela(elf(), name()) -> [elf_rel()].
-ifdef(BIT32).
extract_rela(Elf, Name) ->
SecData = extract_segment_by_name(Elf, Name),
[#elf_rel{offset=Offset, symbol=elf_symbol(?ELF_R_SYM(Info), Elf),
type=decode_reloc_type(?ELF_R_TYPE(Info)),
addend=read_implicit_addend(Offset, SecData)}
|| <<Offset:?bits(?R_OFFSET_SIZE)/little,
Info:?bits(?R_INFO_SIZE)/little % 386 uses ".rel"
>> <= extract_segment_by_name(Elf, ?REL(Name))].
%% The only types HiPE knows how to patch
decode_reloc_type(1) -> '32';
decode_reloc_type(2) -> 'pc32'.
read_implicit_addend(Offset, Section) ->
%% All x86 relocation types uses 'word32' relocation fields; i.e. 32-bit LE.
<<_:Offset/binary, Addend:32/signed-little, _/binary>> = Section,
Addend.
-else. %% BIT32
extract_rela(Elf, Name) ->
[#elf_rel{offset=Offset, symbol=elf_symbol(?ELF_R_SYM(Info), Elf),
type=decode_reloc_type(?ELF_R_TYPE(Info)), addend=Addend}
|| <<Offset:?bits(?R_OFFSET_SIZE)/little,
Info:?bits(?R_INFO_SIZE)/little,
Addend:?bits(?R_ADDEND_SIZE)/signed-little % X86_64 uses ".rela"
>> <= extract_segment_by_name(Elf, ?RELA(Name))].
decode_reloc_type(1) -> '64';
decode_reloc_type(2) -> 'pc32';
decode_reloc_type(10) -> '32'.
-endif. %% BIT32
%%------------------------------------------------------------------------------
%% Functions to manipulate Executable Code segment
%%------------------------------------------------------------------------------
%% @doc This function gets as arguments an ELF formated binary file and
%% returns the Executable Code (".text" segment) or an empty binary if it
%% is not found.
-spec extract_text(elf()) -> binary().
extract_text(Elf) ->
extract_segment_by_name(Elf, ?TEXT).
%%------------------------------------------------------------------------------
%% Functions to manipulate Note Section
%%------------------------------------------------------------------------------
%% @doc Extract specific Note Section from an ELF Object file. The function
%% takes as first argument the object file (`Elf') and the `Name' of the
%% wanted Note Section (<b>without</b> the ".note" prefix!). It returns
%% the specified binary segment or an empty binary if no such section
%% exists.
-spec extract_note(elf(), string()) -> binary().
extract_note(Elf, Name) ->
extract_segment_by_name(Elf, ?NOTE(Name)).
%%------------------------------------------------------------------------------
%% Functions to manipulate GCC Exception Table segment
%%------------------------------------------------------------------------------
%% A description for the C++ exception table formats can be found at Exception
%% Handling Tables (http://www.codesourcery.com/cxx-abi/exceptions.pdf).
%% A list with `{Start, End, HandlerOffset}' for all call sites in the code
-spec get_exn_handlers(elf()) -> [{start(), start(), lp()}].
get_exn_handlers(Elf) ->
CallSites = extract_gccexntab_callsites(Elf),
[{Start, Start + Size, LP}
|| #elf_gccexntab_callsite{start = Start, size = Size, lp = LP} <- CallSites].
%% @doc This function gets as argument an ELF binary file and returns
%% the table (list) of call sites which is stored in GCC
%% Exception Table (".gcc_except_table") section.
%% It returns an empty list if the Exception Table is not found.
%% XXX: Assumes there is *no* Action Record Table.
extract_gccexntab_callsites(Elf) ->
case extract_segment_by_name(Elf, ?GCC_EXN_TAB) of
<<>> ->
[];
ExnTab ->
%% First byte of LSDA is Landing Pad base encoding.
<<LBenc:8, More/binary>> = ExnTab,
%% Second byte is the Landing Pad base (if its encoding is not
%% DW_EH_PE_omit) (optional).
{_LPBase, LSDACont} =
case LBenc =:= ?DW_EH_PE_omit of
true -> % No landing pad base byte. (-1 denotes that)
{-1, More};
false -> % Landing pad base.
<<Base:8, More2/binary>> = More,
{Base, More2}
end,
%% Next byte of LSDA is the encoding of the Type Table.
<<TTenc:8, More3/binary>> = LSDACont,
%% Next byte is the Types Table offset encoded in U-LEB128 (optional).
{_TTOff, LSDACont2} =
case TTenc =:= ?DW_EH_PE_omit of
true -> % There is no Types Table pointer. (-1 denotes that)
{-1, More3};
false -> % The byte offset from this field to the start of the Types
% Table used for exception matching.
leb128_decode(More3)
end,
%% Next byte of LSDA is the encoding of the fields in the Call-site Table.
<<_CSenc:8, More4/binary>> = LSDACont2,
%% Sixth byte is the size (in bytes) of the Call-site Table encoded in
%% U-LEB128.
{_CSTabSize, CSTab} = leb128_decode(More4),
%% Extract all call site information
get_gccexntab_callsites(CSTab, [])
end.
get_gccexntab_callsites(<<>>, Acc) ->
lists:reverse(Acc);
get_gccexntab_callsites(CSTab, Acc) ->
%% We are only interested in the Landing Pad of every entry.
<<Start:32/integer-little, Size:32/integer-little,
LP:32/integer-little, OnAction:8, More/binary
>> = CSTab,
GccCS = mk_gccexntab_callsite(Start, Size, LP, OnAction),
get_gccexntab_callsites(More, [GccCS | Acc]).
%%------------------------------------------------------------------------------
%% Helper functions
%%------------------------------------------------------------------------------
%% @doc Returns the binary segment starting at `Offset' with length `Size'
%% (bytes) from a binary file. If `Offset' is bigger than the byte size of
%% the binary, an empty binary (`<<>>') is returned.
-spec get_binary_segment(binary(), offset(), size()) -> binary().
get_binary_segment(Bin, Offset, _Size) when Offset > byte_size(Bin) ->
<<>>;
get_binary_segment(Bin, Offset, Size) ->
<<_Hdr:Offset/binary, BinSeg:Size/binary, _More/binary>> = Bin,
BinSeg.
%% @doc This function gets as arguments an ELF formated binary object and
%% a string with the segments' name and returns the specified segment or
%% an empty binary (`<<>>') if there exists no segment with that name.
%% There are handy macros defined in elf_format.hrl for all Standard
%% Section Names.
-spec extract_segment_by_name(elf(), string()) -> binary().
extract_segment_by_name(#elf{file=ElfBin, sec_nam=SecNam}, SectionName) ->
%% Find Section Header Table entry by name
case SecNam of
#{SectionName := #elf_shdr{offset=Offset, size=Size}} ->
get_binary_segment(ElfBin, Offset, Size);
#{} -> %% Not found.
<<>>
end.
%% @doc Little-Endian Base 128 (LEB128) Decoder
%% This function extracts the <b>first</b> LEB128-encoded integer in a
%% binary and returns that integer along with the remaining binary. This is
%% done because a LEB128 number has variable bit-size and that is a way of
%% extracting only one number in a binary and continuing parsing the binary
%% for other kind of data (e.g. different encoding).
%% FIXME: Only decodes unsigned data!
-spec leb128_decode(binary()) -> {integer(), binary()}.
leb128_decode(LebNum) ->
leb128_decode(LebNum, 0, <<>>).
-spec leb128_decode(binary(), integer(), binary()) -> {integer(), binary()}.
leb128_decode(LebNum, NoOfBits, Acc) ->
<<Sentinel:1/bits, NextBundle:7/bits, MoreLebNums/bits>> = LebNum,
case Sentinel of
<<1:1>> -> % more bytes to follow
leb128_decode(MoreLebNums, NoOfBits+7, <<NextBundle:7/bits, Acc/bits>>);
<<0:1>> -> % byte bundle stop
Size = NoOfBits+7,
<<Num:Size/integer>> = <<NextBundle:7/bits, Acc/bits>>,
{Num, MoreLebNums}
end.
|