aboutsummaryrefslogtreecommitdiffstats
path: root/lib/hipe/llvm/elf_format.erl
blob: 13a00bfd38268c188c424962803520f787539866 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
%% -*- erlang-indent-level: 2 -*-

%%% @copyright 2011-2014 Yiannis Tsiouris <[email protected]>,
%%%                      Chris Stavrakakis <[email protected]>,
%%%                      Kostis Sagonas <[email protected]>
%%% @author Yiannis Tsiouris <[email protected]>
%%%    [http://www.softlab.ntua.gr/~gtsiour/]

%%% @doc This module contains functions for extracting various pieces of
%%%      information from an ELF formated Object file. To fully understand
%%%      the ELF format and the use of these functions please read
%%%      "[http://www.linuxjournal.com/article/1060?page=0,0]" carefully.

-module(elf_format).

-export([get_tab_entries/1,
         %% Relocations
         get_rodata_relocs/1,
         get_text_relocs/1,
         extract_rela/2,
         get_rela_addends/1,
         %% Note
         extract_note/2,
         %% Executable code
         extract_text/1,
         %% GCC Exception Table
	 get_exn_handlers/1,
	 %% Main interface
	 read/1
        ]).

-include("elf_format.hrl").

%%------------------------------------------------------------------------------
%% Types
%%------------------------------------------------------------------------------

-export_type([elf/0]).

-opaque elf()  :: binary().

-type lp()     :: non_neg_integer().  % landing pad
-type num()    :: non_neg_integer().
-type index()  :: non_neg_integer().
-type offset() :: non_neg_integer().
-type size()   :: non_neg_integer().
-type start()  :: non_neg_integer().

-type info()     :: index().
-type nameoff()  :: offset().
-type valueoff() :: offset().

-type name()       :: string().

%%------------------------------------------------------------------------------
%% Abstract Data Types and Accessors for ELF Structures.
%%------------------------------------------------------------------------------

%% File header
-record(elf_ehdr, {ident,                  % ELF identification
		   type,                    % Object file type
		   machine,                 % Machine Type
		   version,                 % Object file version
		   entry,                   % Entry point address
		   phoff,                   % Program header offset
		   shoff      :: offset(),  % Section header offset
		   flags,                   % Processor-specific flags
		   ehsize     :: size(),    % ELF header size
		   phentsize  :: size(),    % Size of program header entry
		   phnum      :: num(),     % Number of program header entries
		   shentsize  :: size(),    % Size of section header entry
		   shnum      :: num(),     % Number of section header entries
		   shstrndx   :: index()    % Section name string table index
		  }).
-type elf_ehdr() :: #elf_ehdr{}.

-record(elf_ehdr_ident, {class,                   % File class
			 data,                     % Data encoding
			 version,                  % File version
			 osabi,                    % OS/ABI identification
			 abiversion,               % ABI version
			 pad,                      % Start of padding bytes
			 nident                    % Size of e_ident[]
			}).
%% -type elf_ehdr_ident() :: #elf_ehdr_ident{}.

%% Section header entries
-record(elf_shdr, {name       :: string(),  % Section name
		   type,                    % Section type
		   flags,                   % Section attributes
		   addr,                    % Virtual address in memory
		   offset     :: offset(),  % Offset in file
		   size       :: size(),    % Size of section
		   link,                    % Link to other section
		   info,                    % Miscellaneous information
		   addralign,               % Address align boundary
		   entsize                  % Size of entries, if section has table
		  }).
-type elf_shdr() :: #elf_shdr{}.

%% Symbol table entries
-record(elf_sym, {name   :: nameoff(),     % Symbol name
		  info,                     % Type and Binding attributes
		  other,                    % Reserved
		  shndx,                    % Section table index
		  value  :: valueoff(),     % Symbol value
		  size   :: size()          % Size of object
		 }).
-type elf_sym() :: #elf_sym{}.

%% Relocations
-ifdef(BIT32).
-record(elf_rel, {r_offset  :: offset(),  % Address of reference
		  r_info    :: info()      % Symbol index and type of relocation
		 }).
-type reloc() :: #elf_rel{}.
-else.

-record(elf_rela, {r_offset  :: offset(), % Address of reference
		   r_info    :: info(),    % Symbol index and type of relocation
		   r_addend  :: offset()   % Constant part of expression
		  }).
-type reloc() :: #elf_rela{}.
-endif.

%% %% Program header table
%% -record(elf_phdr, {type,   % Type of segment
%% 		   flags,  % Segment attributes
%% 		   offset, % Offset in file
%% 		   vaddr,  % Virtual address in memory
%% 		   paddr,  % Reserved
%% 		   filesz, % Size of segment in file
%% 		   memsz,  % Size of segment in memory
%% 		   align   % Alignment of segment
%% 		  }).

%% %% GCC exception table
%% -record(elf_gccexntab, {lpbenc,    % Landing pad base encoding
%% 			lpbase,    % Landing pad base
%% 			ttenc,     % Type table encoding
%% 			ttoff,     % Type table offset
%% 			csenc,     % Call-site table encoding
%% 			cstabsize, % Call-site table size
%% 			cstab     :: cstab() % Call-site table
%% 		       }).
%% -type elf_gccexntab() :: #elf_gccexntab{}.

-record(elf_gccexntab_callsite, {start :: start(), % Call-site start
				 size  :: size(),   % Call-site size
				 lp    :: lp(),     % Call-site landing pad
                                                   %  (exception handler)
				 onaction           % On action (e.g. cleanup)
				}).
%% -type elf_gccexntab_callsite() :: #elf_gccexntab_callsite{}.

%%------------------------------------------------------------------------------
%% Accessor Functions
%%------------------------------------------------------------------------------

%% File header
%% -spec mk_ehdr(...) -> elf_ehrd().
mk_ehdr(Ident, Type, Machine, Version, Entry, Phoff, Shoff, Flags, Ehsize,
	Phentsize, Phnum, Shentsize, Shnum, Shstrndx) ->
    #elf_ehdr{ident = Ident, type = Type, machine = Machine, version = Version,
	      entry = Entry, phoff = Phoff, shoff = Shoff, flags = Flags,
	      ehsize = Ehsize, phentsize = Phentsize, phnum = Phnum,
	      shentsize = Shentsize, shnum = Shnum, shstrndx = Shstrndx}.

%% -spec ehdr_shoff(elf_ehdr()) -> offset().
%% ehdr_shoff(#elf_ehdr{shoff = Offset}) -> Offset.
%% 
%% -spec ehdr_shentsize(elf_ehdr()) -> size().
%% ehdr_shentsize(#elf_ehdr{shentsize = Size}) -> Size.
%% 
%% -spec ehdr_shnum(elf_ehdr()) -> num().
%% ehdr_shnum(#elf_ehdr{shnum = Num}) -> Num.
%% 
%% -spec ehdr_shstrndx(elf_ehdr()) -> index().
%% ehdr_shstrndx(#elf_ehdr{shstrndx = Index}) -> Index.


%%-spec mk_ehdr_ident(...) -> elf_ehdr_ident().
mk_ehdr_ident(Class, Data, Version, OsABI, AbiVersion, Pad, Nident) ->
  #elf_ehdr_ident{class = Class, data = Data, version = Version, osabi = OsABI,
		  abiversion = AbiVersion, pad = Pad, nident = Nident}.

%%%-------------------------
%%% Section header entries
%%%-------------------------
mk_shdr(Name, Type, Flags, Addr, Offset, Size, Link, Info, AddrAlign, EntSize) ->
    #elf_shdr{name = Name, type = Type, flags = Flags, addr = Addr,
	      offset = Offset, size = Size, link = Link, info = Info,
	      addralign = AddrAlign, entsize = EntSize}.

%% -spec shdr_offset(elf_shdr()) -> offset().
%% shdr_offset(#elf_shdr{offset = Offset}) -> Offset.
%% 
%% -spec shdr_size(elf_shdr()) -> size().
%% shdr_size(#elf_shdr{size = Size}) -> Size.

%%%-------------------------
%%% Symbol Table Entries
%%%-------------------------
mk_sym(Name, Info, Other, Shndx, Value, Size) ->
  #elf_sym{name = Name, info = Info, other = Other,
	   shndx = Shndx, value = Value, size = Size}.

-spec sym_name(elf_sym()) -> nameoff().
sym_name(#elf_sym{name = Name}) -> Name.

%% -spec sym_value(elf_sym()) -> valueoff().
%% sym_value(#elf_sym{value = Value}) -> Value.
%% 
%% -spec sym_size(elf_sym()) -> size().
%% sym_size(#elf_sym{size = Size}) -> Size.

%%%-------------------------
%%% Relocations
%%%-------------------------


%% The following two functions capitalize on the fact that the two kinds of
%% relocation records (for 32- and 64-bit architectures have similar structure.
-spec r_offset(reloc()) -> offset().
-spec r_info(reloc()) -> info().

-ifdef(BIT32).

-spec mk_rel(offset(), info()) -> reloc().
mk_rel(Offset, Info) ->
  #elf_rel{r_offset = Offset, r_info = Info}.

r_offset(#elf_rel{r_offset = Offset}) -> Offset.
r_info(#elf_rel{r_info = Info}) -> Info.

-else.%%BIT32

-spec mk_rela(offset(), info(), offset()) -> reloc().
mk_rela(Offset, Info, Addend) ->
  #elf_rela{r_offset = Offset, r_info = Info, r_addend = Addend}.

r_offset(#elf_rela{r_offset = Offset}) -> Offset.
r_info(#elf_rela{r_info = Info}) -> Info.

-spec rela_addend(reloc()) -> offset().
rela_addend(#elf_rela{r_addend = Addend}) -> Addend.

-endif.%%BIT32

%% %%%-------------------------
%% %%% GCC exception table
%% %%%-------------------------
%% -type cstab()  :: [elf_gccexntab_callsite()].
%%
%% mk_gccexntab(LPbenc, LPbase, TTenc, TToff, CSenc, CStabsize, CStab) ->
%%   #elf_gccexntab{lpbenc = LPbenc, lpbase = LPbase, ttenc = TTenc,
%% 		 ttoff = TToff, csenc = CSenc, cstabsize = CStabsize,
%% 		 cstab = CStab}.
%%
%% -spec gccexntab_cstab(elf_gccexntab()) -> cstab().
%% gccexntab_cstab(#elf_gccexntab{cstab = CSTab}) -> CSTab.

mk_gccexntab_callsite(Start, Size, LP, Action) ->
   #elf_gccexntab_callsite{start = Start, size=Size, lp=LP, onaction=Action}.

%% -spec gccexntab_callsite_start(elf_gccexntab_callsite()) -> start().
%% gccexntab_callsite_start(#elf_gccexntab_callsite{start = Start}) -> Start.
%% 
%% -spec gccexntab_callsite_size(elf_gccexntab_callsite()) -> size().
%% gccexntab_callsite_size(#elf_gccexntab_callsite{size = Size}) -> Size.
%% 
%% -spec gccexntab_callsite_lp(elf_gccexntab_callsite()) -> lp().
%% gccexntab_callsite_lp(#elf_gccexntab_callsite{lp = LP}) -> LP.

%%------------------------------------------------------------------------------
%% Main interface function
%%------------------------------------------------------------------------------

%% @doc Parses an ELF file.
-spec read(binary()) -> elf().
read(ElfBin) -> ElfBin.

%%------------------------------------------------------------------------------
%% Functions to manipulate the ELF File Header
%%------------------------------------------------------------------------------

%% @doc Extracts the File Header from an ELF formatted object file. Also sets
%%      the ELF class variable in the process dictionary (used by many functions
%%      in this and hipe_llvm_main modules).
-spec extract_header(elf()) -> elf_ehdr().
extract_header(Elf) ->
  Ehdr_bin = get_binary_segment(Elf, 0, ?ELF_EHDR_SIZE),
  << %% Structural pattern matching on fields.
     Ident_bin:?E_IDENT_SIZE/binary,
     Type:?bits(?E_TYPE_SIZE)/integer-little,
     Machine:?bits(?E_MACHINE_SIZE)/integer-little,
     Version:?bits(?E_VERSION_SIZE)/integer-little,
     Entry:?bits(?E_ENTRY_SIZE)/integer-little,
     Phoff:?bits(?E_PHOFF_SIZE)/integer-little,
     Shoff:?bits(?E_SHOFF_SIZE)/integer-little,
     Flags:?bits(?E_FLAGS_SIZE)/integer-little,
     Ehsize:?bits(?E_EHSIZE_SIZE)/integer-little,
     Phentsize:?bits(?E_PHENTSIZE_SIZE)/integer-little,
     Phnum:?bits(?E_PHNUM_SIZE)/integer-little,
     Shentsize:?bits(?E_SHENTSIZE_SIZE)/integer-little,
     Shnum:?bits(?E_SHENTSIZE_SIZE)/integer-little,
     Shstrndx:?bits(?E_SHSTRNDX_SIZE)/integer-little
  >> = Ehdr_bin,
  <<16#7f, $E, $L, $F, Class, Data, Version, Osabi, Abiversion,
    Pad:6/binary, Nident
  >> = Ident_bin,
  Ident = mk_ehdr_ident(Class, Data, Version, Osabi,
			Abiversion, Pad, Nident),
  mk_ehdr(Ident, Type, Machine, Version, Entry, Phoff, Shoff, Flags,
	  Ehsize, Phentsize, Phnum, Shentsize, Shnum, Shstrndx).

%%------------------------------------------------------------------------------
%% Functions to manipulate Section Header Entries
%%------------------------------------------------------------------------------

-type shdrtab() :: [elf_shdr()].

%% @doc Extracts the Section Header Table from an ELF formated Object File.
-spec extract_shdrtab(elf()) -> shdrtab().
extract_shdrtab(Elf) ->
  %% Extract File Header to get info about Section Header Offset (in bytes),
  %% Entry Size (in bytes) and Number of entries
  #elf_ehdr{shoff = ShOff, shentsize = ShEntsize, shnum = ShNum
	   ,shstrndx = ShStrNdx} = extract_header(Elf),
  %% Get actual Section header table (binary)
  ShdrBin = get_binary_segment(Elf, ShOff,  ShNum * ShEntsize),
  %% We need to lookup the offset and size of the section header string table
  %% before we can fully parse the section table. We compute its offset and
  %% extract the fields we need here.
  ShStrEntryOffset = ShStrNdx * ?ELF_SHDRENTRY_SIZE,
  <<_:ShStrEntryOffset/binary, _:?SH_NAME_SIZE/binary,
    _:?SH_TYPE_SIZE/binary, _:?SH_FLAGS_SIZE/binary, _:?SH_ADDR_SIZE/binary,
    ShStrOffset:?bits(?SH_OFFSET_SIZE)/little,
    ShStrSize:?bits(?SH_SIZE_SIZE)/little,
    _/binary>> = ShdrBin,
  ShStrTab = parse_strtab(get_binary_segment(Elf, ShStrOffset, ShStrSize)),
  get_shdrtab_entries(ShdrBin, ShStrTab).

get_shdrtab_entries(<<>>, _ShStrTab) -> [];
get_shdrtab_entries(ShdrTab, ShStrTab) ->
  <<%% Structural pattern matching on fields.
    Name:?bits(?SH_NAME_SIZE)/integer-little,
    Type:?bits(?SH_TYPE_SIZE)/integer-little,
    Flags:?bits(?SH_FLAGS_SIZE)/integer-little,
    Addr:?bits(?SH_ADDR_SIZE)/integer-little,
    Offset:?bits(?SH_OFFSET_SIZE)/integer-little,
    Size:?bits(?SH_SIZE_SIZE)/integer-little,
    Link:?bits(?SH_LINK_SIZE)/integer-little,
    Info:?bits(?SH_INFO_SIZE)/integer-little,
    Addralign:?bits(?SH_ADDRALIGN_SIZE)/integer-little,
    Entsize:?bits(?SH_ENTSIZE_SIZE)/integer-little,
    Rest/binary
  >> = ShdrTab,
  Entry = mk_shdr(get_strtab_entry(Name, ShStrTab), Type, Flags, Addr, Offset,
		  Size, Link, Info, Addralign, Entsize),
  [Entry | get_shdrtab_entries(Rest, ShStrTab)].

%%------------------------------------------------------------------------------

-spec get_tab_entries(elf()) -> [{name(), valueoff(), size()}].
get_tab_entries(Elf) ->
  SymTab = extract_symtab(Elf),
  StrTab = extract_strtab(Elf),
  [{get_strtab_entry(Name, StrTab), Value, Size div ?ELF_XWORD_SIZE}
   || #elf_sym{name = Name, value = Value, size = Size} <- SymTab,
      Name =/= 0].

%%------------------------------------------------------------------------------
%% Functions to manipulate Symbol Table
%%------------------------------------------------------------------------------

%% @doc Function that extracts Symbol Table from an ELF Object file.
extract_symtab(Elf) ->
  Symtab = extract_segment_by_name(Elf, ?SYMTAB),
  [parse_sym(Sym) || <<Sym:?ELF_SYM_SIZE/binary>> <= Symtab].

-ifdef(BIT32).
parse_sym(<<%% Structural pattern matching on fields.
	    Name:?bits(?ST_NAME_SIZE)/integer-little,
	    Value:?bits(?ST_VALUE_SIZE)/integer-little,
	    Size:?bits(?ST_SIZE_SIZE)/integer-little,
	    Info:?bits(?ST_INFO_SIZE)/integer-little,
	    Other:?bits(?ST_OTHER_SIZE)/integer-little,
	    Shndx:?bits(?ST_SHNDX_SIZE)/integer-little>>) ->
  mk_sym(Name, Info, Other, Shndx, Value, Size).
-else.
parse_sym(<<%% Same fields in different order:
	    Name:?bits(?ST_NAME_SIZE)/integer-little,
	    Info:?bits(?ST_INFO_SIZE)/integer-little,
	    Other:?bits(?ST_OTHER_SIZE)/integer-little,
	    Shndx:?bits(?ST_SHNDX_SIZE)/integer-little,
	    Value:?bits(?ST_VALUE_SIZE)/integer-little,
	    Size:?bits(?ST_SIZE_SIZE)/integer-little>>) ->
  mk_sym(Name, Info, Other, Shndx, Value, Size).
-endif.

%% @doc Extracts a specific entry from the Symbol Table (as binary).
%%      This function takes as arguments the Symbol Table (`SymTab')
%%      and the entry's serial number and returns that entry (`sym').
get_symtab_entry(SymTab, EntryNum) ->
  lists:nth(EntryNum + 1, SymTab).

%%------------------------------------------------------------------------------
%% Functions to manipulate String Table
%%------------------------------------------------------------------------------

%% ADT: get_strtab_entry/1 must be used to consume this type.
-type strtab() :: binary().

%% @doc Extracts String Table from an ELF formated Object File.
-spec extract_strtab(elf()) -> strtab().
extract_strtab(Elf) ->
  parse_strtab(extract_segment_by_name(Elf, ?STRTAB)).

-spec parse_strtab(binary()) -> strtab().
parse_strtab(StrTabSectionBin) -> StrTabSectionBin.

%% @doc Returns the name of the symbol at the given offset.
-spec get_strtab_entry(non_neg_integer(), strtab()) -> string().
get_strtab_entry(Offset, StrTab) ->
  <<_:Offset/binary, StrBin/binary>> = StrTab,
  bin_get_string(StrBin).

%%------------------------------------------------------------------------------
%% Functions to manipulate Relocations
%%------------------------------------------------------------------------------

%% @doc This function gets as argument an ELF binary file and returns a list
%%      with all .rela.rodata labels (i.e. constants and literals in code)
%%      or an empty list if no ".rela.rodata" section exists in code.
-spec get_rodata_relocs(elf()) -> [offset()].
-spec get_rela_addends([reloc()]) -> [offset()].
-ifdef(BIT32).
get_rodata_relocs(Elf) ->
  [SkipPadding || SkipPadding <- extract_rodata(Elf), SkipPadding =/= 0].
get_rela_addends(_RelaEntries) -> error(notsup).
-else.
get_rodata_relocs(Elf) -> get_rela_addends(extract_rela(Elf, ?RODATA)).
get_rela_addends(RelaEntries) -> [rela_addend(E) || E <- RelaEntries].
-endif.


%% @doc Extract a list of the form `[{SymbolName, Offset}]' with all relocatable
%%      symbols and their offsets in the code from the ".text" section.
-spec get_text_relocs(elf()) -> [{name(), offset()}].
get_text_relocs(Elf) ->
  Symtab = extract_symtab(Elf),
  Strtab = extract_strtab(Elf),
  [begin
     %% Find the names of the symbols:
     Symbol = get_symtab_entry(Symtab, ?ELF_R_SYM(r_info(E))),
     Name = get_strtab_entry(sym_name(Symbol), Strtab),
     %% Only care about the name and the offset:
     {Name, r_offset(E)}
   end || E <- extract_rela(Elf, ?TEXT)].

%% @doc Extract the Relocations segment for section `Name' (that is passed
%%      as second argument) from an ELF formated Object file binary.
-spec extract_rela(elf(), name()) -> [reloc()].

-ifdef(BIT32).
extract_rela(Elf, Name) ->
  %% Structural pattern matching on fields of a Rel Entry.
  [mk_rel(Offset, Info)
   || <<Offset:?bits(?R_OFFSET_SIZE)/little,
	Info:?bits(?R_INFO_SIZE)/little % ELF-32 uses ".rel"
      >> <= extract_segment_by_name(Elf, ?REL(Name))].
-else.
extract_rela(Elf, Name) ->
  [mk_rela(Offset, Info, Addend)
   || <<Offset:?bits(?R_OFFSET_SIZE)/little,
	Info:?bits(?R_INFO_SIZE)/little,
	Addend:?bits(?R_ADDEND_SIZE)/little % ...while ELF-64 uses ".rela"
      >> <= extract_segment_by_name(Elf, ?RELA(Name))].
-endif.

%%------------------------------------------------------------------------------
%% Functions to manipulate Executable Code segment
%%------------------------------------------------------------------------------

%% @doc This function gets as arguments an ELF formated binary file and
%%      returns the Executable Code (".text" segment) or an empty binary if it
%%      is not found.
-spec extract_text(elf()) -> binary().
extract_text(Elf) ->
  extract_segment_by_name(Elf, ?TEXT).

%%------------------------------------------------------------------------------
%% Functions to manipulate Note Section
%%------------------------------------------------------------------------------

%% @doc Extract specific Note Section from an ELF Object file. The function
%%      takes as first argument the object file (`Elf') and the `Name' of the
%%      wanted Note Section (<b>without</b> the ".note" prefix!). It returns
%%      the specified binary segment or an empty binary if no such section
%%      exists.
-spec extract_note(elf(), string()) -> binary().
extract_note(Elf, Name) ->
  extract_segment_by_name(Elf, ?NOTE(Name)).

%%------------------------------------------------------------------------------
%% Functions to manipulate GCC Exception Table segment
%%------------------------------------------------------------------------------

%% A description for the C++ exception table formats can be found at Exception
%% Handling Tables (http://www.codesourcery.com/cxx-abi/exceptions.pdf).

%% A list with `{Start, End, HandlerOffset}' for all call sites in the code
-spec get_exn_handlers(elf()) -> [{start(), start(), lp()}].
get_exn_handlers(Elf) ->
  CallSites = extract_gccexntab_callsites(Elf),
  [{Start, Start + Size, LP}
   || #elf_gccexntab_callsite{start = Start, size = Size, lp = LP} <- CallSites].

%% @doc This function gets as argument an ELF binary file and returns
%%      the table (list) of call sites which is stored in GCC
%%      Exception Table (".gcc_except_table") section.
%%      It returns an empty list if the Exception Table is not found.
%%      XXX: Assumes there is *no*  Action Record Table.
extract_gccexntab_callsites(Elf) ->
  case extract_segment_by_name(Elf, ?GCC_EXN_TAB) of
    <<>> ->
      [];
    ExnTab ->
      %% First byte of LSDA is Landing Pad base encoding.
      <<LBenc:8, More/binary>> = ExnTab,
      %% Second byte is the Landing Pad base (if its encoding is not
      %% DW_EH_PE_omit) (optional).
      {_LPBase, LSDACont} =
        case LBenc =:= ?DW_EH_PE_omit of
          true ->  % No landing pad base byte. (-1 denotes that)
            {-1, More};
          false -> % Landing pad base.
            <<Base:8, More2/binary>> = More,
            {Base, More2}
        end,
      %% Next byte of LSDA is the encoding of the Type Table.
      <<TTenc:8, More3/binary>> = LSDACont,
      %% Next byte is the Types Table offset encoded in U-LEB128 (optional).
      {_TTOff, LSDACont2} =
        case TTenc =:= ?DW_EH_PE_omit of
          true ->  % There is no Types Table pointer. (-1 denotes that)
            {-1, More3};
          false -> % The byte offset from this field to the start of the Types
                   % Table used for exception matching.
            leb128_decode(More3)
        end,
      %% Next byte of LSDA is the encoding of the fields in the Call-site Table.
      <<_CSenc:8, More4/binary>> = LSDACont2,
      %% Sixth byte is the size (in bytes) of the Call-site Table encoded in
      %% U-LEB128.
      {_CSTabSize, CSTab} = leb128_decode(More4),
      %% Extract all call site information
      get_gccexntab_callsites(CSTab, [])
  end.

get_gccexntab_callsites(<<>>, Acc) ->
  lists:reverse(Acc);
get_gccexntab_callsites(CSTab, Acc) ->
  %% We are only interested in the Landing Pad of every entry.
  <<Start:32/integer-little, Size:32/integer-little,
    LP:32/integer-little, OnAction:8, More/binary
  >> = CSTab,
  GccCS = mk_gccexntab_callsite(Start, Size, LP, OnAction),
  get_gccexntab_callsites(More, [GccCS | Acc]).

%%------------------------------------------------------------------------------
%% Functions to manipulate Read-only Data (.rodata)
%%------------------------------------------------------------------------------
-ifdef(BIT32).
extract_rodata(Elf) ->
  Rodata_bin = extract_segment_by_name(Elf, ?RODATA),
  get_rodata_entries(Rodata_bin, []).

get_rodata_entries(<<>>, Acc) ->
  lists:reverse(Acc);
get_rodata_entries(Rodata_bin, Acc) ->
  <<Num:?bits(?ELF_ADDR_SIZE)/integer-little, More/binary>> = Rodata_bin,
  get_rodata_entries(More, [Num | Acc]).
-endif.

%%------------------------------------------------------------------------------
%% Helper functions
%%------------------------------------------------------------------------------

%% @doc Returns the binary segment starting at `Offset' with length `Size'
%%      (bytes) from a binary file. If `Offset' is bigger than the byte size of
%%      the binary, an empty binary (`<<>>') is returned.
-spec get_binary_segment(binary(), offset(), size()) -> binary().
get_binary_segment(Bin, Offset, _Size) when Offset > byte_size(Bin) ->
  <<>>;
get_binary_segment(Bin, Offset, Size) ->
  <<_Hdr:Offset/binary, BinSeg:Size/binary, _More/binary>> = Bin,
  BinSeg.

%% @doc This function gets as arguments an ELF formated binary object and
%%      a string with the segments' name and returns the specified segment or
%%      an empty binary (`<<>>') if there exists no segment with that name.
%%      There are handy macros defined in elf_format.hrl for all Standard
%%      Section Names.
-spec extract_segment_by_name(elf(), string()) -> binary().
extract_segment_by_name(Elf, SectionName) ->
  %% Extract Section Header Table from binary
  SHdrTable = extract_shdrtab(Elf),
  %% Find Section Header Table entry by name
  case lists:keyfind(SectionName, #elf_shdr.name, SHdrTable) of
    %% Note: Same name.
    #elf_shdr{name = SectionName, offset = Offset, size = Size} ->
      get_binary_segment(Elf, Offset, Size);
    false -> %% Not found.
      <<>>
  end.

%% @doc Extracts a null-terminated string from a binary.
-spec bin_get_string(binary()) -> string().
%% FIXME: No regard for encoding (just happens to work for ASCII and Latin-1)
bin_get_string(<<0, _/binary>>) -> [];
bin_get_string(<<Char, Rest/binary>>) -> [Char|bin_get_string(Rest)].

%% @doc Little-Endian Base 128 (LEB128) Decoder
%%     This function extracts the <b>first</b> LEB128-encoded integer in a
%%     binary and returns that integer along with the remaining binary. This is
%%     done because a LEB128 number has variable bit-size and that is a way of
%%     extracting only one number in a binary and continuing parsing the binary
%%     for other kind of data (e.g. different encoding).
%% FIXME: Only decodes unsigned data!
-spec leb128_decode(binary()) -> {integer(), binary()}.
leb128_decode(LebNum) ->
  leb128_decode(LebNum, 0, <<>>).

-spec leb128_decode(binary(), integer(), binary()) -> {integer(), binary()}.
leb128_decode(LebNum, NoOfBits, Acc) ->
  <<Sentinel:1/bits, NextBundle:7/bits, MoreLebNums/bits>> = LebNum,
  case Sentinel of
    <<1:1>> -> % more bytes to follow
      leb128_decode(MoreLebNums, NoOfBits+7, <<NextBundle:7/bits, Acc/bits>>);
    <<0:1>> -> % byte bundle stop
      Size = NoOfBits+7,
      <<Num:Size/integer>> = <<NextBundle:7/bits, Acc/bits>>,
      {Num, MoreLebNums}
  end.