1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
|
%% -*- erlang-indent-level: 2 -*-
%%% @copyright 2011-2014 Yiannis Tsiouris <[email protected]>,
%%% Chris Stavrakakis <[email protected]>,
%%% Kostis Sagonas <[email protected]>
%%% @author Yiannis Tsiouris <[email protected]>
%%% [http://www.softlab.ntua.gr/~gtsiour/]
%%% @doc This module contains functions for extracting various pieces of
%%% information from an ELF formated Object file. To fully understand
%%% the ELF format and the use of these functions please read
%%% "[http://www.linuxjournal.com/article/1060?page=0,0]" carefully.
-module(elf_format).
-export([get_tab_entries/1,
%% Relocations
get_rodata_relocs/1,
get_text_relocs/1,
extract_rela/2,
get_rela_addends/1,
%% Note
extract_note/2,
%% Executable code
extract_text/1,
%% GCC Exception Table
get_exn_handlers/1
]).
-include("elf_format.hrl").
%%------------------------------------------------------------------------------
%% Types
%%------------------------------------------------------------------------------
-type elf() :: binary().
-type lp() :: non_neg_integer(). % landing pad
-type num() :: non_neg_integer().
-type index() :: non_neg_integer().
-type offset() :: non_neg_integer().
-type size() :: non_neg_integer().
-type start() :: non_neg_integer().
-type info() :: index().
-type nameoff() :: offset().
-type valueoff() :: offset().
-type name() :: string().
-type name_size() :: {name(), size()}.
-type name_sizes() :: [name_size()].
%%------------------------------------------------------------------------------
%% Abstract Data Types and Accessors for ELF Structures.
%%------------------------------------------------------------------------------
%% File header
-record(elf_ehdr, {ident, % ELF identification
type, % Object file type
machine, % Machine Type
version, % Object file version
entry, % Entry point address
phoff, % Program header offset
shoff :: offset(), % Section header offset
flags, % Processor-specific flags
ehsize :: size(), % ELF header size
phentsize :: size(), % Size of program header entry
phnum :: num(), % Number of program header entries
shentsize :: size(), % Size of section header entry
shnum :: num(), % Number of section header entries
shstrndx :: index() % Section name string table index
}).
-type elf_ehdr() :: #elf_ehdr{}.
-record(elf_ehdr_ident, {class, % File class
data, % Data encoding
version, % File version
osabi, % OS/ABI identification
abiversion, % ABI version
pad, % Start of padding bytes
nident % Size of e_ident[]
}).
%% -type elf_ehdr_ident() :: #elf_ehdr_ident{}.
%% Section header entries
-record(elf_shdr, {name, % Section name
type, % Section type
flags, % Section attributes
addr, % Virtual address in memory
offset :: offset(), % Offset in file
size :: size(), % Size of section
link, % Link to other section
info, % Miscellaneous information
addralign, % Address align boundary
entsize % Size of entries, if section has table
}).
%% -type elf_shdr() :: #elf_shdr{}.
%% Symbol table entries
-record(elf_sym, {name :: nameoff(), % Symbol name
info, % Type and Binding attributes
other, % Reserved
shndx, % Section table index
value :: valueoff(), % Symbol value
size :: size() % Size of object
}).
-type elf_sym() :: #elf_sym{}.
%% Relocations
-ifdef(BIT32).
-record(elf_rel, {r_offset :: offset(), % Address of reference
r_info :: info() % Symbol index and type of relocation
}).
-type reloc() :: #elf_rel{}.
-else.
-record(elf_rela, {r_offset :: offset(), % Address of reference
r_info :: info(), % Symbol index and type of relocation
r_addend :: offset() % Constant part of expression
}).
-type reloc() :: #elf_rela{}.
-endif.
%% %% Program header table
%% -record(elf_phdr, {type, % Type of segment
%% flags, % Segment attributes
%% offset, % Offset in file
%% vaddr, % Virtual address in memory
%% paddr, % Reserved
%% filesz, % Size of segment in file
%% memsz, % Size of segment in memory
%% align % Alignment of segment
%% }).
%% %% GCC exception table
%% -record(elf_gccexntab, {lpbenc, % Landing pad base encoding
%% lpbase, % Landing pad base
%% ttenc, % Type table encoding
%% ttoff, % Type table offset
%% csenc, % Call-site table encoding
%% cstabsize, % Call-site table size
%% cstab :: cstab() % Call-site table
%% }).
%% -type elf_gccexntab() :: #elf_gccexntab{}.
-record(elf_gccexntab_callsite, {start :: start(), % Call-site start
size :: size(), % Call-site size
lp :: lp(), % Call-site landing pad
% (exception handler)
onaction % On action (e.g. cleanup)
}).
%% -type elf_gccexntab_callsite() :: #elf_gccexntab_callsite{}.
%%------------------------------------------------------------------------------
%% Accessor Functions
%%------------------------------------------------------------------------------
%% File header
%% -spec mk_ehdr(...) -> elf_ehrd().
mk_ehdr(Ident, Type, Machine, Version, Entry, Phoff, Shoff, Flags, Ehsize,
Phentsize, Phnum, Shentsize, Shnum, Shstrndx) ->
#elf_ehdr{ident = Ident, type = Type, machine = Machine, version = Version,
entry = Entry, phoff = Phoff, shoff = Shoff, flags = Flags,
ehsize = Ehsize, phentsize = Phentsize, phnum = Phnum,
shentsize = Shentsize, shnum = Shnum, shstrndx = Shstrndx}.
%% -spec ehdr_shoff(elf_ehdr()) -> offset().
%% ehdr_shoff(#elf_ehdr{shoff = Offset}) -> Offset.
%%
%% -spec ehdr_shentsize(elf_ehdr()) -> size().
%% ehdr_shentsize(#elf_ehdr{shentsize = Size}) -> Size.
%%
%% -spec ehdr_shnum(elf_ehdr()) -> num().
%% ehdr_shnum(#elf_ehdr{shnum = Num}) -> Num.
%%
%% -spec ehdr_shstrndx(elf_ehdr()) -> index().
%% ehdr_shstrndx(#elf_ehdr{shstrndx = Index}) -> Index.
%%-spec mk_ehdr_ident(...) -> elf_ehdr_ident().
mk_ehdr_ident(Class, Data, Version, OsABI, AbiVersion, Pad, Nident) ->
#elf_ehdr_ident{class = Class, data = Data, version = Version, osabi = OsABI,
abiversion = AbiVersion, pad = Pad, nident = Nident}.
%%%-------------------------
%%% Section header entries
%%%-------------------------
mk_shdr(Name, Type, Flags, Addr, Offset, Size, Link, Info, AddrAlign, EntSize) ->
#elf_shdr{name = Name, type = Type, flags = Flags, addr = Addr,
offset = Offset, size = Size, link = Link, info = Info,
addralign = AddrAlign, entsize = EntSize}.
%% -spec shdr_offset(elf_shdr()) -> offset().
%% shdr_offset(#elf_shdr{offset = Offset}) -> Offset.
%%
%% -spec shdr_size(elf_shdr()) -> size().
%% shdr_size(#elf_shdr{size = Size}) -> Size.
%%%-------------------------
%%% Symbol Table Entries
%%%-------------------------
mk_sym(Name, Info, Other, Shndx, Value, Size) ->
#elf_sym{name = Name, info = Info, other = Other,
shndx = Shndx, value = Value, size = Size}.
-spec sym_name(elf_sym()) -> nameoff().
sym_name(#elf_sym{name = Name}) -> Name.
%% -spec sym_value(elf_sym()) -> valueoff().
%% sym_value(#elf_sym{value = Value}) -> Value.
%%
%% -spec sym_size(elf_sym()) -> size().
%% sym_size(#elf_sym{size = Size}) -> Size.
%%%-------------------------
%%% Relocations
%%%-------------------------
%% The following two functions capitalize on the fact that the two kinds of
%% relocation records (for 32- and 64-bit architectures have similar structure.
-spec r_offset(reloc()) -> offset().
-spec r_info(reloc()) -> info().
-ifdef(BIT32).
-spec mk_rel(offset(), info()) -> reloc().
mk_rel(Offset, Info) ->
#elf_rel{r_offset = Offset, r_info = Info}.
r_offset(#elf_rel{r_offset = Offset}) -> Offset.
r_info(#elf_rel{r_info = Info}) -> Info.
-else.%%BIT32
-spec mk_rela(offset(), info(), offset()) -> reloc().
mk_rela(Offset, Info, Addend) ->
#elf_rela{r_offset = Offset, r_info = Info, r_addend = Addend}.
r_offset(#elf_rela{r_offset = Offset}) -> Offset.
r_info(#elf_rela{r_info = Info}) -> Info.
-spec rela_addend(reloc()) -> offset().
rela_addend(#elf_rela{r_addend = Addend}) -> Addend.
-endif.%%BIT32
%% %%%-------------------------
%% %%% GCC exception table
%% %%%-------------------------
%% -type cstab() :: [elf_gccexntab_callsite()].
%%
%% mk_gccexntab(LPbenc, LPbase, TTenc, TToff, CSenc, CStabsize, CStab) ->
%% #elf_gccexntab{lpbenc = LPbenc, lpbase = LPbase, ttenc = TTenc,
%% ttoff = TToff, csenc = CSenc, cstabsize = CStabsize,
%% cstab = CStab}.
%%
%% -spec gccexntab_cstab(elf_gccexntab()) -> cstab().
%% gccexntab_cstab(#elf_gccexntab{cstab = CSTab}) -> CSTab.
mk_gccexntab_callsite(Start, Size, LP, Action) ->
#elf_gccexntab_callsite{start = Start, size=Size, lp=LP, onaction=Action}.
%% -spec gccexntab_callsite_start(elf_gccexntab_callsite()) -> start().
%% gccexntab_callsite_start(#elf_gccexntab_callsite{start = Start}) -> Start.
%%
%% -spec gccexntab_callsite_size(elf_gccexntab_callsite()) -> size().
%% gccexntab_callsite_size(#elf_gccexntab_callsite{size = Size}) -> Size.
%%
%% -spec gccexntab_callsite_lp(elf_gccexntab_callsite()) -> lp().
%% gccexntab_callsite_lp(#elf_gccexntab_callsite{lp = LP}) -> LP.
%%------------------------------------------------------------------------------
%% Functions to manipulate the ELF File Header
%%------------------------------------------------------------------------------
%% @doc Extracts the File Header from an ELF formatted object file. Also sets
%% the ELF class variable in the process dictionary (used by many functions
%% in this and hipe_llvm_main modules).
-spec extract_header(elf()) -> elf_ehdr().
extract_header(Elf) ->
Ehdr_bin = get_binary_segment(Elf, 0, ?ELF_EHDR_SIZE),
<< %% Structural pattern matching on fields.
Ident_bin:?E_IDENT_SIZE/binary,
Type:?bits(?E_TYPE_SIZE)/integer-little,
Machine:?bits(?E_MACHINE_SIZE)/integer-little,
Version:?bits(?E_VERSION_SIZE)/integer-little,
Entry:?bits(?E_ENTRY_SIZE)/integer-little,
Phoff:?bits(?E_PHOFF_SIZE)/integer-little,
Shoff:?bits(?E_SHOFF_SIZE)/integer-little,
Flags:?bits(?E_FLAGS_SIZE)/integer-little,
Ehsize:?bits(?E_EHSIZE_SIZE)/integer-little,
Phentsize:?bits(?E_PHENTSIZE_SIZE)/integer-little,
Phnum:?bits(?E_PHNUM_SIZE)/integer-little,
Shentsize:?bits(?E_SHENTSIZE_SIZE)/integer-little,
Shnum:?bits(?E_SHENTSIZE_SIZE)/integer-little,
Shstrndx:?bits(?E_SHSTRNDX_SIZE)/integer-little
>> = Ehdr_bin,
<<16#7f, $E, $L, $F, Class, Data, Version, Osabi, Abiversion,
Pad:6/binary, Nident
>> = Ident_bin,
Ident = mk_ehdr_ident(Class, Data, Version, Osabi,
Abiversion, Pad, Nident),
mk_ehdr(Ident, Type, Machine, Version, Entry, Phoff, Shoff, Flags,
Ehsize, Phentsize, Phnum, Shentsize, Shnum, Shstrndx).
%%------------------------------------------------------------------------------
%% Functions to manipulate Section Header Entries
%%------------------------------------------------------------------------------
%% @doc Extracts the Section Header Table from an ELF formated Object File.
extract_shdrtab(Elf) ->
%% Extract File Header to get info about Section Header Offset (in bytes),
%% Entry Size (in bytes) and Number of entries
#elf_ehdr{shoff = ShOff, shentsize = ShEntsize, shnum = ShNum} =
extract_header(Elf),
%% Get actual Section header table (binary)
ShdrBin = get_binary_segment(Elf, ShOff, ShNum * ShEntsize),
get_shdrtab_entries(ShdrBin, []).
get_shdrtab_entries(<<>>, Acc) ->
lists:reverse(Acc);
get_shdrtab_entries(ShdrBin, Acc) ->
<<%% Structural pattern matching on fields.
Name:?bits(?SH_NAME_SIZE)/integer-little,
Type:?bits(?SH_TYPE_SIZE)/integer-little,
Flags:?bits(?SH_FLAGS_SIZE)/integer-little,
Addr:?bits(?SH_ADDR_SIZE)/integer-little,
Offset:?bits(?SH_OFFSET_SIZE)/integer-little,
Size:?bits(?SH_SIZE_SIZE)/integer-little,
Link:?bits(?SH_LINK_SIZE)/integer-little,
Info:?bits(?SH_INFO_SIZE)/integer-little,
Addralign:?bits(?SH_ADDRALIGN_SIZE)/integer-little,
Entsize:?bits(?SH_ENTSIZE_SIZE)/integer-little,
MoreShdrE/binary
>> = ShdrBin,
ShdrE = mk_shdr(Name, Type, Flags, Addr, Offset,
Size, Link, Info, Addralign, Entsize),
get_shdrtab_entries(MoreShdrE, [ShdrE | Acc]).
%% @doc Extracts a specific Entry of a Section Header Table. This function
%% takes as argument the Section Header Table (`SHdrTab') and the entry's
%% serial number (`EntryNum') and returns the entry (`shdr').
get_shdrtab_entry(SHdrTab, EntryNum) ->
lists:nth(EntryNum + 1, SHdrTab).
%%------------------------------------------------------------------------------
%% Functions to manipulate Section Header String Table
%%------------------------------------------------------------------------------
%% @doc Extracts the Section Header String Table. This section is not a known
%% ELF Object File section. It is just a "hidden" table storing the
%% names of all sections that exist in current object file.
-spec extract_shstrtab(elf()) -> [name()].
extract_shstrtab(Elf) ->
%% Extract Section Name String Table Index
#elf_ehdr{shstrndx = ShStrNdx} = extract_header(Elf),
ShHdrTab = extract_shdrtab(Elf),
%% Extract Section header entry and get actual Section-header String Table
#elf_shdr{offset = ShStrOffset, size = ShStrSize} =
get_shdrtab_entry(ShHdrTab, ShStrNdx),
case get_binary_segment(Elf, ShStrOffset, ShStrSize) of
<<>> -> %% Segment empty
[];
ShStrTab -> %% Convert to string table
[Name || {Name, _Size} <- get_names(ShStrTab)]
end.
%%------------------------------------------------------------------------------
-spec get_tab_entries(elf()) -> [{name(), valueoff(), size()}].
get_tab_entries(Elf) ->
SymTab = extract_symtab(Elf),
Ts = [{Name, Value, Size div ?ELF_XWORD_SIZE}
|| #elf_sym{name = Name, value = Value, size = Size} <- SymTab,
Name =/= 0],
{NameIndices, ValueOffs, Sizes} = lists:unzip3(Ts),
%% Find the names of the symbols.
%% Get string table entries ([{Name, Offset in strtab section}]). Keep only
%% relevant entries:
StrTab = extract_strtab(Elf),
Relevant = [get_strtab_entry(StrTab, Off) || Off <- NameIndices],
%% Zip back to {Name, ValueOff, Size}
lists:zip3(Relevant, ValueOffs, Sizes).
%%------------------------------------------------------------------------------
%% Functions to manipulate Symbol Table
%%------------------------------------------------------------------------------
%% @doc Function that extracts Symbol Table from an ELF Object file.
extract_symtab(Elf) ->
Symtab_bin = extract_segment_by_name(Elf, ?SYMTAB),
get_symtab_entries(Symtab_bin, []).
get_symtab_entries(<<>>, Acc) ->
lists:reverse(Acc);
get_symtab_entries(Symtab_bin, Acc) ->
<<SymE_bin:?ELF_SYM_SIZE/binary, MoreSymE/binary>> = Symtab_bin,
SymE = parse_sym(SymE_bin),
get_symtab_entries(MoreSymE, [SymE | Acc]).
-ifdef(BIT32).
parse_sym(<<%% Structural pattern matching on fields.
Name:?bits(?ST_NAME_SIZE)/integer-little,
Value:?bits(?ST_VALUE_SIZE)/integer-little,
Size:?bits(?ST_SIZE_SIZE)/integer-little,
Info:?bits(?ST_INFO_SIZE)/integer-little,
Other:?bits(?ST_OTHER_SIZE)/integer-little,
Shndx:?bits(?ST_SHNDX_SIZE)/integer-little>>) ->
mk_sym(Name, Info, Other, Shndx, Value, Size).
-else.
parse_sym(<<%% Same fields in different order:
Name:?bits(?ST_NAME_SIZE)/integer-little,
Info:?bits(?ST_INFO_SIZE)/integer-little,
Other:?bits(?ST_OTHER_SIZE)/integer-little,
Shndx:?bits(?ST_SHNDX_SIZE)/integer-little,
Value:?bits(?ST_VALUE_SIZE)/integer-little,
Size:?bits(?ST_SIZE_SIZE)/integer-little>>) ->
mk_sym(Name, Info, Other, Shndx, Value, Size).
-endif.
%% @doc Extracts a specific entry from the Symbol Table (as binary).
%% This function takes as arguments the Symbol Table (`SymTab')
%% and the entry's serial number and returns that entry (`sym').
get_symtab_entry(SymTab, EntryNum) ->
lists:nth(EntryNum + 1, SymTab).
%%------------------------------------------------------------------------------
%% Functions to manipulate String Table
%%------------------------------------------------------------------------------
%% @doc Extracts String Table from an ELF formated Object File.
-spec extract_strtab(elf()) -> [{string(), offset()}].
extract_strtab(Elf) ->
Strtab_bin = extract_segment_by_name(Elf, ?STRTAB),
NamesSizes = get_names(Strtab_bin),
make_offsets(NamesSizes).
%% @doc Returns the name of the symbol at the given offset. The string table
%% contains entries of the form {Name, Offset}. If no such offset exists
%% returns the empty string (`""').
%% XXX: There might be a bug here because of the "compact" saving the ELF
%% format uses: e.g. only stores ".rela.text" for ".rela.text" and ".text".
get_strtab_entry(Strtab, Offset) ->
case lists:keyfind(Offset, 2, Strtab) of
{Name, Offset} -> Name;
false -> ""
end.
%%------------------------------------------------------------------------------
%% Functions to manipulate Relocations
%%------------------------------------------------------------------------------
%% @doc This function gets as argument an ELF binary file and returns a list
%% with all .rela.rodata labels (i.e. constants and literals in code)
%% or an empty list if no ".rela.rodata" section exists in code.
-spec get_rodata_relocs(elf()) -> [offset()].
-spec get_rela_addends([reloc()]) -> [offset()].
-ifdef(BIT32).
get_rodata_relocs(Elf) ->
[SkipPadding || SkipPadding <- extract_rodata(Elf), SkipPadding =/= 0].
get_rela_addends(_RelaEntries) -> error(notsup).
-else.
get_rodata_relocs(Elf) -> get_rela_addends(extract_rela(Elf, ?RODATA)).
get_rela_addends(RelaEntries) -> [rela_addend(E) || E <- RelaEntries].
-endif.
%% @doc Extract a list of the form `[{SymbolName, Offset}]' with all relocatable
%% symbols and their offsets in the code from the ".text" section.
-spec get_text_relocs(elf()) -> [{name(), offset()}].
get_text_relocs(Elf) ->
%% Only care about the symbol table index and the offset:
NameOffsetTemp = [{?ELF_R_SYM(r_info(E)), r_offset(E)}
|| E <- extract_rela(Elf, ?TEXT)],
{NameIndices, ActualOffsets} = lists:unzip(NameOffsetTemp),
%% Find the names of the symbols:
%%
%% Get those symbol table entries that are related to Text relocs:
Symtab = extract_symtab(Elf),
SymtabEs = [get_symtab_entry(Symtab, Index) || Index <- NameIndices],
%XXX: not zero-indexed!
%% Symbol table entries contain the offset of the name of the symbol in
%% String Table:
SymtabEs2 = [sym_name(E) || E <- SymtabEs], %XXX: Do we need to sort SymtabE?
%% Get string table entries ([{Name, Offset in strtab section}]). Keep only
%% relevant entries:
Strtab = extract_strtab(Elf),
Relevant = [get_strtab_entry(Strtab, Off) || Off <- SymtabEs2],
%% Zip back with actual offsets:
lists:zip(Relevant, ActualOffsets).
%% @doc Extract the Relocations segment for section `Name' (that is passed
%% as second argument) from an ELF formated Object file binary.
-spec extract_rela(elf(), name()) -> [reloc()].
-ifdef(BIT32).
extract_rela(Elf, Name) ->
%% Structural pattern matching on fields of a Rel Entry.
[mk_rel(Offset, Info)
|| <<Offset:?bits(?R_OFFSET_SIZE)/little,
Info:?bits(?R_INFO_SIZE)/little % ELF-32 uses ".rel"
>> <= extract_segment_by_name(Elf, ?REL(Name))].
-else.
extract_rela(Elf, Name) ->
[mk_rela(Offset, Info, Addend)
|| <<Offset:?bits(?R_OFFSET_SIZE)/little,
Info:?bits(?R_INFO_SIZE)/little,
Addend:?bits(?R_ADDEND_SIZE)/little % ...while ELF-64 uses ".rela"
>> <= extract_segment_by_name(Elf, ?RELA(Name))].
-endif.
%% %% @doc Extract the `EntryNum' (serial number) Relocation Entry.
%% get_rela_entry(Rela, EntryNum) ->
%% lists:nth(EntryNum + 1, Rela).
%%------------------------------------------------------------------------------
%% Functions to manipulate Executable Code segment
%%------------------------------------------------------------------------------
%% @doc This function gets as arguments an ELF formated binary file and
%% returns the Executable Code (".text" segment) or an empty binary if it
%% is not found.
-spec extract_text(elf()) -> binary().
extract_text(Elf) ->
extract_segment_by_name(Elf, ?TEXT).
%%------------------------------------------------------------------------------
%% Functions to manipulate Note Section
%%------------------------------------------------------------------------------
%% @doc Extract specific Note Section from an ELF Object file. The function
%% takes as first argument the object file (`Elf') and the `Name' of the
%% wanted Note Section (<b>without</b> the ".note" prefix!). It returns
%% the specified binary segment or an empty binary if no such section
%% exists.
-spec extract_note(elf(), string()) -> binary().
extract_note(Elf, Name) ->
extract_segment_by_name(Elf, ?NOTE(Name)).
%%------------------------------------------------------------------------------
%% Functions to manipulate GCC Exception Table segment
%%------------------------------------------------------------------------------
%% A description for the C++ exception table formats can be found at Exception
%% Handling Tables (http://www.codesourcery.com/cxx-abi/exceptions.pdf).
%% A list with `{Start, End, HandlerOffset}' for all call sites in the code
-spec get_exn_handlers(elf()) -> [{start(), start(), lp()}].
get_exn_handlers(Elf) ->
CallSites = extract_gccexntab_callsites(Elf),
[{Start, Start + Size, LP}
|| #elf_gccexntab_callsite{start = Start, size = Size, lp = LP} <- CallSites].
%% @doc This function gets as argument an ELF binary file and returns
%% the table (list) of call sites which is stored in GCC
%% Exception Table (".gcc_except_table") section.
%% It returns an empty list if the Exception Table is not found.
%% XXX: Assumes there is *no* Action Record Table.
extract_gccexntab_callsites(Elf) ->
case extract_segment_by_name(Elf, ?GCC_EXN_TAB) of
<<>> ->
[];
ExnTab ->
%% First byte of LSDA is Landing Pad base encoding.
<<LBenc:8, More/binary>> = ExnTab,
%% Second byte is the Landing Pad base (if its encoding is not
%% DW_EH_PE_omit) (optional).
{_LPBase, LSDACont} =
case LBenc =:= ?DW_EH_PE_omit of
true -> % No landing pad base byte. (-1 denotes that)
{-1, More};
false -> % Landing pad base.
<<Base:8, More2/binary>> = More,
{Base, More2}
end,
%% Next byte of LSDA is the encoding of the Type Table.
<<TTenc:8, More3/binary>> = LSDACont,
%% Next byte is the Types Table offset encoded in U-LEB128 (optional).
{_TTOff, LSDACont2} =
case TTenc =:= ?DW_EH_PE_omit of
true -> % There is no Types Table pointer. (-1 denotes that)
{-1, More3};
false -> % The byte offset from this field to the start of the Types
% Table used for exception matching.
leb128_decode(More3)
end,
%% Next byte of LSDA is the encoding of the fields in the Call-site Table.
<<_CSenc:8, More4/binary>> = LSDACont2,
%% Sixth byte is the size (in bytes) of the Call-site Table encoded in
%% U-LEB128.
{_CSTabSize, CSTab} = leb128_decode(More4),
%% Extract all call site information
get_gccexntab_callsites(CSTab, [])
end.
get_gccexntab_callsites(<<>>, Acc) ->
lists:reverse(Acc);
get_gccexntab_callsites(CSTab, Acc) ->
%% We are only interested in the Landing Pad of every entry.
<<Start:32/integer-little, Size:32/integer-little,
LP:32/integer-little, OnAction:8, More/binary
>> = CSTab,
GccCS = mk_gccexntab_callsite(Start, Size, LP, OnAction),
get_gccexntab_callsites(More, [GccCS | Acc]).
%%------------------------------------------------------------------------------
%% Functions to manipulate Read-only Data (.rodata)
%%------------------------------------------------------------------------------
-ifdef(BIT32).
extract_rodata(Elf) ->
Rodata_bin = extract_segment_by_name(Elf, ?RODATA),
get_rodata_entries(Rodata_bin, []).
get_rodata_entries(<<>>, Acc) ->
lists:reverse(Acc);
get_rodata_entries(Rodata_bin, Acc) ->
<<Num:?bits(?ELF_ADDR_SIZE)/integer-little, More/binary>> = Rodata_bin,
get_rodata_entries(More, [Num | Acc]).
-endif.
%%------------------------------------------------------------------------------
%% Helper functions
%%------------------------------------------------------------------------------
%% @doc Returns the binary segment starting at `Offset' with length `Size'
%% (bytes) from a binary file. If `Offset' is bigger than the byte size of
%% the binary, an empty binary (`<<>>') is returned.
-spec get_binary_segment(binary(), offset(), size()) -> binary().
get_binary_segment(Bin, Offset, _Size) when Offset > byte_size(Bin) ->
<<>>;
get_binary_segment(Bin, Offset, Size) ->
<<_Hdr:Offset/binary, BinSeg:Size/binary, _More/binary>> = Bin,
BinSeg.
%% @doc This function gets as arguments an ELF formated binary object and
%% a string with the segments' name and returns the specified segment or
%% an empty binary (`<<>>') if there exists no segment with that name.
%% There are handy macros defined in elf_format.hrl for all Standard
%% Section Names.
-spec extract_segment_by_name(elf(), string()) -> binary().
extract_segment_by_name(Elf, SectionName) ->
%% Extract Section Header Table and Section Header String Table from binary
SHdrTable = extract_shdrtab(Elf),
Names = extract_shstrtab(Elf),
%% Zip to a list of (Name,ShdrE)
[_Zero | ShdrEs] = lists:keysort(2, SHdrTable), % Skip first entry (zeros).
L = lists:zip(Names, ShdrEs),
%% Find Section Header Table entry by name
case lists:keyfind(SectionName, 1, L) of
{SectionName, ShdrE} -> %% Note: Same name.
#elf_shdr{offset = Offset, size = Size} = ShdrE,
get_binary_segment(Elf, Offset, Size);
false -> %% Not found.
<<>>
end.
%% @doc Extracts a list of strings with (zero-separated) names from a binary.
%% Returns tuples of `{Name, Size}'.
%% XXX: Skip trailing 0.
-spec get_names(<<_:8,_:_*8>>) -> name_sizes().
get_names(<<0, Bin/binary>>) ->
NamesSizes = get_names(Bin, []),
fix_names(NamesSizes, []).
get_names(<<>>, Acc) ->
lists:reverse(Acc);
get_names(Bin, Acc) ->
{Name, MoreNames} = bin_get_string(Bin),
get_names(MoreNames, [{Name, length(Name)} | Acc]).
%% @doc Fix names:
%% e.g. If ".rela.text" exists, ".text" does not. Same goes for
%% ".rel.text". In that way, the Section Header String Table is more
%% compact. Add ".text" just *before* the corresponding rela-field,
%% etc.
-spec fix_names(name_sizes(), name_sizes()) -> name_sizes().
fix_names([], Acc) ->
lists:reverse(Acc);
fix_names([{Name, Size}=T | Names], Acc) ->
case is64bit() of
true ->
case string:str(Name, ".rela") =:= 1 of
true -> %% Name starts with ".rela":
Section = string:substr(Name, 6),
fix_names(Names, [{Section, Size - 5}
| [T | Acc]]); % XXX: Is order ok? (".text"
% always before ".rela.text")
false -> %% Name does not start with ".rela":
fix_names(Names, [T | Acc])
end;
false ->
case string:str(Name, ".rel") =:= 1 of
true -> %% Name starts with ".rel":
Section = string:substr(Name, 5),
fix_names(Names, [{Section, Size - 4}
| [T | Acc]]); % XXX: Is order ok? (".text"
% always before ".rela.text")
false -> %% Name does not start with ".rel":
fix_names(Names, [T | Acc])
end
end.
%% @doc A function that byte-reverses a binary. This might be needed because of
%% little (fucking!) endianess.
-spec bin_reverse(binary()) -> binary().
bin_reverse(Bin) when is_binary(Bin) ->
bin_reverse(Bin, <<>>).
-spec bin_reverse(binary(), binary()) -> binary().
bin_reverse(<<>>, Acc) ->
Acc;
bin_reverse(<<Head, More/binary>>, Acc) ->
bin_reverse(More, <<Head, Acc/binary>>).
%% @doc A function that extracts a null-terminated string from a binary. It
%% returns the found string along with the rest of the binary.
-spec bin_get_string(binary()) -> {string(), binary()}.
bin_get_string(Bin) ->
bin_get_string(Bin, <<>>).
bin_get_string(<<>>, BinAcc) ->
Bin = bin_reverse(BinAcc), % little endian!
{binary_to_list(Bin), <<>>};
bin_get_string(<<0, MoreBin/binary>>, BinAcc) ->
Bin = bin_reverse(BinAcc), % little endian!
{binary_to_list(Bin), MoreBin};
bin_get_string(<<Letter, Tail/binary>>, BinAcc) ->
bin_get_string(Tail, <<Letter, BinAcc/binary>>).
%% @doc
make_offsets(NamesSizes) ->
{Names, Sizes} = lists:unzip(NamesSizes),
Offsets = make_offsets_from_sizes(Sizes, 1, []),
lists:zip(Names, Offsets).
make_offsets_from_sizes([], _, Acc) ->
lists:reverse(Acc);
make_offsets_from_sizes([Size | Sizes], Cur, Acc) ->
make_offsets_from_sizes(Sizes, Size+Cur+1, [Cur | Acc]). % For the "."!
%% @doc Little-Endian Base 128 (LEB128) Decoder
%% This function extracts the <b>first</b> LEB128-encoded integer in a
%% binary and returns that integer along with the remaining binary. This is
%% done because a LEB128 number has variable bit-size and that is a way of
%% extracting only one number in a binary and continuing parsing the binary
%% for other kind of data (e.g. different encoding).
%% FIXME: Only decodes unsigned data!
-spec leb128_decode(binary()) -> {integer(), binary()}.
leb128_decode(LebNum) ->
leb128_decode(LebNum, 0, <<>>).
-spec leb128_decode(binary(), integer(), binary()) -> {integer(), binary()}.
leb128_decode(LebNum, NoOfBits, Acc) ->
<<Sentinel:1/bits, NextBundle:7/bits, MoreLebNums/bits>> = LebNum,
case Sentinel of
<<1:1>> -> % more bytes to follow
leb128_decode(MoreLebNums, NoOfBits+7, <<NextBundle:7/bits, Acc/bits>>);
<<0:1>> -> % byte bundle stop
Size = NoOfBits+7,
<<Num:Size/integer>> = <<NextBundle:7/bits, Acc/bits>>,
{Num, MoreLebNums}
end.
|