diff options
Diffstat (limited to 'lib/parsetools/src')
| -rw-r--r-- | lib/parsetools/src/esyntax.yrl | 360 | ||||
| -rw-r--r-- | lib/parsetools/src/leex.erl | 99 | ||||
| -rw-r--r-- | lib/parsetools/src/yecc.erl | 120 | ||||
| -rw-r--r-- | lib/parsetools/src/yeccscan.erl | 6 | 
4 files changed, 146 insertions, 439 deletions
| diff --git a/lib/parsetools/src/esyntax.yrl b/lib/parsetools/src/esyntax.yrl deleted file mode 100644 index 1ecb54f0a7..0000000000 --- a/lib/parsetools/src/esyntax.yrl +++ /dev/null @@ -1,360 +0,0 @@ -%% -%% %CopyrightBegin% -%%  -%% Copyright Ericsson AB 1996-2009. All Rights Reserved. -%%  -%% The contents of this file are subject to the Erlang Public License, -%% Version 1.1, (the "License"); you may not use this file except in -%% compliance with the License. You should have received a copy of the -%% Erlang Public License along with this software. If not, it can be -%% retrieved online at http://www.erlang.org/. -%%  -%% Software distributed under the License is distributed on an "AS IS" -%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See -%% the License for the specific language governing rights and limitations -%% under the License. -%%  -%% %CopyrightEnd% -%% -Nonterminals -add_op attribute basic_type bif_test -case_expr clause_body -clause_guard clause_head comp_op cr_clause cr_clauses expr expr_tail -exprs farity farity_list form formal_parameter_list function -function_call function_clause guard guard_call guard_expr -guard_expr_list guard_exprs guard_expr_tail guard_expr_tuple -guard_parameter_list -guard_tests guard_test if_clause if_clauses if_expr list match_expr -mult_op parameter_list pattern patterns pattern_list pattern_tail pattern_tuple -prefix_op receive_expr send_expr tuple. - -Terminals -'!' '(' ')' '*' '+' ',' '-' '->' '/' '/=' ':' ';' '<' '=' '=/=' '=:=' -'=<' '==' '>' '>=' '[' ']' 'after' 'band' 'begin' 'bnot' -'bor' 'bsl' 'bsr' 'bxor' 'case' 'catch' 'div' 'end' 'if' 'of' -'receive' 'rem' 'when' '{' '|' '}' atom float integer string var. -% 'receive' 'rem' 'true' 'when' '{' '|' '}' atom float integer string var. - -Rootsymbol form. - -Endsymbol dot. - -Unary 0 'catch'. -Right 200 '='. -Right 200 '!'. -Left 300 add_op. -Left 400 mult_op. -Unary 500 prefix_op. - - -add_op -> '+' : '$1'. -add_op -> '-' : '$1'. -add_op -> 'bor' : '$1'. -add_op -> 'bxor' : '$1'. -add_op -> 'bsl' : '$1'. -add_op -> 'bsr' : '$1'. - -comp_op -> '==' : '$1'. -comp_op -> '/=' : '$1'. -comp_op -> '=<' : '$1'. -comp_op -> '<' : '$1'. -comp_op -> '>=' : '$1'. -comp_op -> '>' : '$1'. -comp_op -> '=:=' : '$1'. -comp_op -> '=/=' : '$1'. - -mult_op -> '*' : '$1'. -mult_op -> '/' : '$1'. -mult_op -> 'div' : '$1'. -mult_op -> 'rem' : '$1'. -mult_op -> 'band' : '$1'. - -prefix_op -> '+' : '$1'. -prefix_op -> '-' : '$1'. -prefix_op -> 'bnot' : '$1'. - - -basic_type -> atom : '$1'. -basic_type -> float : '$1'. -basic_type -> integer : '$1'. -basic_type -> string : '$1'. -basic_type -> var : '$1'. -% basic_type -> 'true' : {atom, element(2, '$1'), 'true'}. - - -pattern -> basic_type : '$1'. -pattern -> pattern_list : '$1'. -pattern -> pattern_tuple : '$1'. - -pattern_list -> '[' ']' : {nil, element(2, '$1')}. -pattern_list -> '[' pattern pattern_tail ']' : -   {cons, element(2, '$1'), '$2', '$3'}. - -pattern_tail -> '|' pattern : '$2'. -pattern_tail -> ',' pattern pattern_tail : -   {cons, element(2, '$2'), '$2', '$3'}. -pattern_tail -> '$empty' : {nil, 0}. - -pattern_tuple -> '{' '}' : {tuple, element(2, '$1'), []}. -pattern_tuple -> '{' patterns '}' : {tuple, element(2, '$1'), '$2'}. - -patterns -> pattern : ['$1']. -patterns -> pattern ',' patterns : ['$1' | '$3']. - - -expr -> basic_type : '$1'. -expr -> list : '$1'. -expr -> tuple : '$1'. -expr -> function_call : '$1'. - -expr -> expr add_op expr : -   {Op, Pos} = '$2', -   {arith, Pos, Op, '$1', '$3'}. -expr -> expr mult_op expr : -   {Op, Pos} = '$2', -   {arith, Pos, Op, '$1', '$3'}. -expr -> prefix_op expr: -   case '$2' of -       {float, Pos, N} -> -	   case '$1' of -	       {'-', _} -> -		   {float, Pos, -N}; -	       {'+', _} -> -		   {float, Pos, N}; -	       {Op, Pos1} -> -		   {arith, Pos1, Op, {float, Pos, N}} -	   end; -       {integer, Pos, N} -> -	   case '$1' of -	       {'-', _} -> -		   {integer, Pos, -N}; -	       {'+', _} -> -		   {integer, Pos, N}; -	       {Op, Pos1} -> -		   {arith, Pos1, Op, {integer, Pos, N}} -	   end; -       _ -> -	   {Op, Pos} = '$1', -	   {arith, Pos, Op, '$2'} -   end. - -expr -> '(' expr ')' : '$2'. -expr -> 'begin' exprs 'end' : {block, element(2, '$1'), '$2'}. -expr -> 'catch' expr : {'catch', element(2, '$1'), '$2'}. - -expr -> case_expr : '$1'. -expr -> if_expr : '$1'. -expr -> receive_expr : '$1'. -expr -> match_expr : '$1'. -expr -> send_expr : '$1'. - - -list -> '[' ']' : {nil, element(2, '$1')}. -list -> '[' expr expr_tail ']' : {cons, element(2, '$1'), '$2', '$3'}. - -expr_tail -> '|' expr : '$2'. -expr_tail -> ',' expr expr_tail : {cons, element(2, '$2'), '$2', '$3'}. -expr_tail -> '$empty' : {nil, 0}. - -tuple -> '{' '}' : {tuple, element(2, '$1'), []}. -tuple -> '{' exprs '}' : {tuple, element(2, '$1'), '$2'}. - - -function_call -> atom '(' parameter_list ')' : -   case erl_parse:erlang_bif(element(3, '$1'), length('$3')) of -       true -> -	   {bif, element(2, '$1'), element(3, '$1'), '$3'}; -       false -> -	   {call, element(2, '$1'), [], element(3, '$1'), '$3'} -   end. -function_call -> atom ':' atom '(' parameter_list ')' : -   {call, element(2, '$1'), element(3, '$1'), element(3, '$3'), '$5'}. - -parameter_list -> exprs : '$1'. -parameter_list -> '$empty' : []. - - -case_expr -> 'case' expr 'of' cr_clauses 'end' : -   {'case', element(2, '$1'), '$2', '$4'}. - -cr_clause -> pattern clause_guard clause_body : -   {clause, element(2, '$1'), ['$1'], '$2', '$3'}. - -cr_clauses -> cr_clause : ['$1']. -cr_clauses -> cr_clause ';' cr_clauses : ['$1' | '$3']. - -if_expr -> 'if' if_clauses 'end' : {'if', element(2, '$1'), '$2'}. - -if_clause -> guard clause_body : {clause, element(2, hd('$2')), '$1', '$2'}. - -if_clauses -> if_clause : ['$1']. -if_clauses -> if_clause ';' if_clauses : ['$1' | '$3']. - -receive_expr -> 'receive' 'after' expr clause_body 'end' : -   {'receive', element(2, '$1'), [], '$3', '$4'}. -receive_expr -> 'receive' cr_clauses 'end' : -   {'receive', element(2, '$1'), '$2'}. -receive_expr -> 'receive' cr_clauses 'after' expr clause_body 'end' : -   {'receive', element(2, '$1'), '$2', '$4', '$5'}. - - -match_expr -> expr '=' expr : -   case erl_parse:is_term('$1') of -       true -> -	   {match, element(2, '$1'), '$1', '$3'}; -       false -> -	   throw({error, {element(2, '$1'), yecc, "illegal lhs in match **"}}) -   end. - -send_expr -> expr '!' expr : -   Pos = element(2, '$1'), -   {send, Pos, '$1', '$3'}. - - -exprs -> expr : ['$1']. -exprs -> expr ',' exprs : ['$1' | '$3']. - - -guard_expr -> basic_type : '$1'. -guard_expr -> guard_expr_list : '$1'. -guard_expr -> guard_expr_tuple : '$1'. -guard_expr -> guard_call : '$1'. -guard_expr -> '(' guard_expr ')' : '$2'. -guard_expr -> guard_expr add_op guard_expr : -   {Op, Pos} = '$2', -   {arith, Pos, Op, '$1', '$3'}. -guard_expr -> guard_expr mult_op guard_expr : -   {Op, Pos} = '$2', -   {arith, Pos, Op, '$1', '$3'}. -guard_expr -> prefix_op guard_expr: -   case '$2' of -       {float, Pos, N} -> -	   case '$1' of -	       {'-', _} -> -		   {float, Pos, -N}; -	       {'+', _} -> -		   {float, Pos, N}; -	       {Op, Pos1} -> -		   {arith, Pos1, Op, {float, Pos, N}} -	   end; -       {integer, Pos, N} -> -	   case '$1' of -	       {'-', _} -> -		   {integer, Pos, -N}; -	       {'+', _} -> -		   {integer, Pos, N}; -	       {Op, Pos1} -> -		   {arith, Pos1, Op, {integer, Pos, N}} -	   end; -       _ -> -	   {Op, Pos} = '$1', -	   {arith, Pos, Op, '$2'} -   end. - -guard_expr_list -> '[' ']' : {nil, element(2, '$1')}. -guard_expr_list -> '[' guard_expr guard_expr_tail ']' : -   {cons, element(2, '$1'), '$2', '$3'}. - -guard_expr_tail -> '|' guard_expr : '$2'. -guard_expr_tail -> ',' guard_expr guard_expr_tail : - {cons, element(2, '$2'), '$2', '$3'}. -guard_expr_tail -> '$empty' : {nil, 0}. - -guard_expr_tuple -> '{' '}' : {tuple, element(2, '$1'), []}. -guard_expr_tuple -> '{' guard_exprs '}' : {tuple, element(2, '$1'), '$2'}. - -guard_exprs -> guard_expr : ['$1']. -guard_exprs -> guard_expr ',' guard_exprs : ['$1' | '$3']. - - -guard_call -> atom '(' guard_parameter_list ')' : -   case erl_parse:erlang_guard_bif(element(3, '$1'), length('$3')) of -       true -> -	   {bif, element(2, '$1'), element(3, '$1'), '$3'}; -       false -> -	   throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}}) -   end. - -guard_parameter_list -> guard_exprs : '$1'. -guard_parameter_list -> '$empty' : []. - - -bif_test -> atom '(' guard_parameter_list ')' : -   case erl_parse:erlang_guard_test(element(3, '$1'), length('$3')) of -       true -> -	   {test, element(2, '$1'), element(3, '$1'), '$3'}; -       false -> -	   throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}}) -   end. - - -guard_test -> bif_test : '$1'. -guard_test -> guard_expr comp_op guard_expr : -   {Op, Pos} = '$2', -   {comp, Pos, Op, '$1', '$3'}. - -guard_tests -> guard_test : ['$1']. -guard_tests -> guard_test ',' guard_tests : ['$1' | '$3']. - -% guard -> 'true' : []. -guard -> atom : -   case '$1' of -       {atom, _, true} -> -           []; -       _ -> -	   throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}}) -   end. -guard -> guard_tests : '$1'. - - -function_clause -> clause_head clause_guard clause_body : -   {Name, Line, Arity, Parameters} = '$1', -   {function, Line, Name, Arity, -    [{clause, element(2, hd('$3')), Parameters, '$2', '$3'}]}. - -clause_head -> atom '(' formal_parameter_list ')' : -   {element(3, '$1'), element(2, '$1'), length('$3'), '$3'}. - -formal_parameter_list -> patterns : '$1'. -formal_parameter_list -> '$empty' : []. - -clause_guard -> 'when' guard : '$2'. -clause_guard -> '$empty' : []. - -clause_body -> '->' exprs: '$2'. - - -function -> function_clause : '$1'. -function -> function_clause ';' function : -   case '$1' of -       {function, Pos1, Name1, Arity1, [Clause]} -> -	   case '$3' of -	       {function, _, Name1, Arity2, Clauses} -> -		   if -		       Arity1 /= Arity2 -> -			   throw({error, {Pos1, yecc, -				  io_lib:format('arity conflict in definition of ~w', -						[Name1])}}); -		       true -> -			   {function, Pos1, Name1, Arity1, [Clause | Clauses]} -		   end; -	       _ -> -		   throw({error, {Pos1, yecc, -			  io_lib:format('missing final dot in def of ~w/~w', -					[Name1, Arity1])}}) -	   end -   end. - - -attribute -> atom : element(3, '$1'). -attribute -> '[' farity_list ']' : '$2'. - -farity_list -> farity : ['$1']. -farity_list -> farity ',' farity_list : ['$1' | '$3']. - -farity -> atom '/' integer : {element(3, '$1'), element(3, '$3')}. - - -form -> '-' atom '(' attribute ')' : -   {attribute, element(2, '$2'), element(3, '$2'), '$4'}. -form -> function : '$1'. diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl index cdf20461d9..bbef4053b4 100644 --- a/lib/parsetools/src/leex.erl +++ b/lib/parsetools/src/leex.erl @@ -58,6 +58,7 @@                 gfile=[],        % Graph file                 module,          % Module name                 opts=[],         % Options +               encoding=none,   % Encoding of Xrl file                 % posix=false,   % POSIX regular expressions                 errors=[],                 warnings=[] @@ -146,7 +147,9 @@ format_error({regexp,E})->           end,      ["bad regexp `",Es,"'"];  format_error(ignored_characters) -> -    "ignored characters". +    "ignored characters"; +format_error(cannot_parse) -> +    io_lib:fwrite("cannot parse; probably encoding mismatch", []).  %%%  %%% Local functions @@ -298,10 +301,10 @@ pack_warnings([]) ->  report_errors(St) ->      when_opt(fun () ->                        foreach(fun({File,{none,Mod,E}}) ->  -                                     io:fwrite("~s: ~s\n", +                                     io:fwrite("~s: ~ts\n",                                                 [File,Mod:format_error(E)]);                                  ({File,{Line,Mod,E}}) ->  -                                     io:fwrite("~s:~w: ~s\n", +                                     io:fwrite("~s:~w: ~ts\n",                                                 [File,Line,Mod:format_error(E)])                               end, sort(St#leex.errors))               end, report_errors, St#leex.opts). @@ -316,11 +319,11 @@ report_warnings(St) ->      ShouldReport = member(report_warnings, St#leex.opts) orelse ReportWerror,      when_bool(fun () ->  		      foreach(fun({File,{none,Mod,W}}) -> -				      io:fwrite("~s: ~s~s\n", +				      io:fwrite("~s: ~s~ts\n",  						[File,Prefix,  						 Mod:format_error(W)]);  				 ({File,{Line,Mod,W}}) -> -				      io:fwrite("~s:~w: ~s~s\n", +				      io:fwrite("~s:~w: ~s~ts\n",  						[File,Line,Prefix,  						 Mod:format_error(W)])  			      end, sort(St#leex.warnings)) @@ -396,17 +399,18 @@ verbose_print(St, Format, Args) ->  parse_file(St0) ->      case file:open(St0#leex.xfile, [read]) of          {ok,Xfile} -> +            St1 = St0#leex{encoding = epp:set_encoding(Xfile)},              try -                verbose_print(St0, "Parsing file ~s, ", [St0#leex.xfile]), +                verbose_print(St1, "Parsing file ~s, ", [St1#leex.xfile]),                  %% We KNOW that errors throw so we can ignore them here. -                {ok,Line1,St1} = parse_head(Xfile, St0), -                {ok,Line2,Macs,St2} = parse_defs(Xfile, Line1, St1), -                {ok,Line3,REAs,Actions,St3} =  -                    parse_rules(Xfile, Line2, Macs, St2), -                {ok,Code,St4} = parse_code(Xfile, Line3, St3), -                verbose_print(St1, "contained ~w rules.~n", [length(REAs)]), -                {ok,REAs,Actions,Code,St4} -            after file:close(Xfile) +                {ok,Line1,St2} = parse_head(Xfile, St1), +                {ok,Line2,Macs,St3} = parse_defs(Xfile, Line1, St2), +                {ok,Line3,REAs,Actions,St4} = +                    parse_rules(Xfile, Line2, Macs, St3), +                {ok,Code,St5} = parse_code(Xfile, Line3, St4), +                verbose_print(St5, "contained ~w rules.~n", [length(REAs)]), +                {ok,REAs,Actions,Code,St5} +            after ok = file:close(Xfile)              end;          {error,Error} ->              add_error({none,leex,{file_error,Error}}, St0) @@ -415,7 +419,7 @@ parse_file(St0) ->  %% parse_head(File, State) -> {ok,NextLine,State}.  %%  Parse the head of the file. Skip all comments and blank lines. -parse_head(Ifile, St) -> {ok,nextline(Ifile, 0),St}. +parse_head(Ifile, St) -> {ok,nextline(Ifile, 0, St),St}.  %% parse_defs(File, Line, State) -> {ok,NextLine,Macros,State}.  %%  Parse the macro definition section of a file. This must exist. @@ -423,7 +427,7 @@ parse_head(Ifile, St) -> {ok,nextline(Ifile, 0),St}.  parse_defs(Ifile, {ok,?DEFS_HEAD ++ Rest,L}, St) ->      St1 = warn_ignored_chars(L, Rest, St), -    parse_defs(Ifile, nextline(Ifile, L), [], St1); +    parse_defs(Ifile, nextline(Ifile, L, St), [], St1);  parse_defs(_, {ok,_,L}, St) ->      add_error({L,leex,missing_defs}, St);  parse_defs(_, {eof,L}, St) -> @@ -435,7 +439,7 @@ parse_defs(Ifile, {ok,Chars,L}=Line, Ms, St) ->      case re:run(Chars, MS, [{capture,all_but_first,list}]) of          {match,[Name,Def]} ->              %%io:fwrite("~p = ~p\n", [Name,Def]), -            parse_defs(Ifile, nextline(Ifile, L), [{Name,Def}|Ms], St); +            parse_defs(Ifile, nextline(Ifile, L, St), [{Name,Def}|Ms], St);          _ -> {ok,Line,Ms,St}                    % Anything else      end;  parse_defs(_, Line, Ms, St) -> @@ -446,7 +450,7 @@ parse_defs(_, Line, Ms, St) ->  parse_rules(Ifile, {ok,?RULE_HEAD ++ Rest,L}, Ms, St) ->      St1 = warn_ignored_chars(L, Rest, St), -    parse_rules(Ifile, nextline(Ifile, L), Ms, [], [], 0, St1); +    parse_rules(Ifile, nextline(Ifile, L, St), Ms, [], [], 0, St1);  parse_rules(_, {ok,_,L}, _, St) ->      add_error({L,leex,missing_rules}, St);  parse_rules(_, {eof,L}, _, St) -> @@ -464,7 +468,7 @@ parse_rules(Ifile, NextLine, Ms, REAs, As, N, St) ->              case collect_rule(Ifile, Chars, L0) of                  {ok,Re,Atoks,L1} ->                      {ok,REA,A,St1} = parse_rule(Re, L0, Atoks, Ms, N, St), -                    parse_rules(Ifile, nextline(Ifile, L1), Ms, +                    parse_rules(Ifile, nextline(Ifile, L1, St), Ms,                                  [REA|REAs], [A|As], N+1, St1);                  {error,E} -> add_error(E, St)              end; @@ -497,8 +501,10 @@ collect_rule(Ifile, Chars, L0) ->          {error,E,_} -> {error,E}      end. +collect_action(_Ifile, {error, _}, L, _Cont0) -> +    {error, {L, leex, cannot_parse}, ignored_end_line};  collect_action(Ifile, Chars, L0, Cont0) -> -    case erl_scan:tokens(Cont0, Chars, L0) of +    case erl_scan:tokens(Cont0, Chars, L0, [unicode]) of          {done,{ok,Toks,_},_} -> {ok,Toks,L0};          {done,{eof,_},_} -> {eof,L0};          {done,{error,E,_},_} -> {error,E,L0}; @@ -560,29 +566,32 @@ parse_code(Ifile, {ok,?CODE_HEAD ++ Rest,CodeL}, St) ->      St1 = warn_ignored_chars(CodeL, Rest, St),      {ok, CodePos} = file:position(Ifile, cur),      %% Just count the lines; copy the code from file to file later. -    NCodeLines = count_lines(Ifile, 0), +    EndCodeLine = count_lines(Ifile, CodeL, St), +    NCodeLines = EndCodeLine - CodeL,      {ok,{CodeL,CodePos,NCodeLines},St1};  parse_code(_, {ok,_,L}, St) ->      add_error({L,leex,missing_code}, St);  parse_code(_, {eof,L}, St) ->      add_error({L,leex,missing_code}, St). -count_lines(File, N) -> +count_lines(File, N, St) ->      case io:get_line(File, leex) of          eof -> N; -        _Line -> count_lines(File, N+1) +        {error, _} -> add_error({N+1, leex, cannot_parse}, St); +        _Line -> count_lines(File, N+1, St)      end. -%% nextline(InputFile, PrevLineNo) -> {ok,Chars,LineNo} | {eof,LineNo}. +%% nextline(InputFile, PrevLineNo, State) -> {ok,Chars,LineNo} | {eof,LineNo}.  %%  Get the next line skipping comment lines and blank lines. -nextline(Ifile, L) -> +nextline(Ifile, L, St) ->      case io:get_line(Ifile, leex) of          eof -> {eof,L}; +        {error, _} -> add_error({L+1, leex, cannot_parse}, St);          Chars ->              case substr(Chars, span(Chars, " \t\n")+1) of -                [$%|_Rest] -> nextline(Ifile, L+1); -                [] -> nextline(Ifile, L+1); +                [$%|_Rest] -> nextline(Ifile, L+1, St); +                [] -> nextline(Ifile, L+1, St);                  _Other -> {ok,Chars,L+1}              end      end. @@ -1289,19 +1298,21 @@ out_file(St0, DFA, DF, Actions, Code) ->              try                  case file:open(St0#leex.efile, [write]) of                      {ok,Ofile} -> +                        set_encoding(St0, Ofile),                          try  +                            output_encoding_comment(Ofile, St0),                              output_file_directive(Ofile, St0#leex.ifile, 0),                              out_file(Ifile, Ofile, St0, DFA, DF, Actions,                                       Code, 1),                              verbose_print(St0, "ok~n", []),                              St0 -                        after file:close(Ofile) +                        after ok = file:close(Ofile)                          end;                      {error,Error} ->                          verbose_print(St0, "error~n", []),                          add_error({none,leex,{file_error,Error}}, St0)                  end -            after file:close(Ifile) +            after ok = file:close(Ifile)              end;          {{error,Error},Ifile} ->              add_error(Ifile, {none,leex,{file_error,Error}}, St0) @@ -1310,7 +1321,9 @@ out_file(St0, DFA, DF, Actions, Code) ->  open_inc_file(State) ->      Ifile = State#leex.ifile,      case file:open(Ifile, [read]) of -        {ok,F} -> {ok,F}; +        {ok,F} -> +            _ = epp:set_encoding(F), +            {ok,F};          Error -> {Error,Ifile}      end. @@ -1328,6 +1341,7 @@ inc_file_name(Filename) ->  out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L) ->      case io:get_line(Ifile, leex) of          eof -> output_file_directive(Ofile, St#leex.ifile, L); +        {error, _} -> add_error(St#leex.ifile, {L, leex, cannot_parse}, St);          Line ->              case substr(Line, 1, 5) of                  "##mod" -> out_module(Ofile, St); @@ -1347,14 +1361,23 @@ out_erlang_code(File, St, Code, L) ->      output_file_directive(File, St#leex.xfile, CodeL),      {ok,Xfile} = file:open(St#leex.xfile, [read]),      try +        set_encoding(St, Xfile),          {ok,_} = file:position(Xfile, CodePos), -        {ok,_} = file:copy(Xfile, File) +        ok = file_copy(Xfile, File)      after  -        file:close(Xfile) +        ok = file:close(Xfile)      end,      io:nl(File),      output_file_directive(File, St#leex.ifile, L). +file_copy(From, To) -> +    case io:get_line(From, leex) of +        eof -> ok; +        Line when is_list(Line) -> +            io:fwrite(To, "~ts", [Line]), +            file_copy(From, To) +    end. +  out_dfa(File, St, DFA, Code, DF, L) ->      {_CodeL,_CodePos,NCodeLines} = Code,      %% Three file attributes before this one... @@ -1569,7 +1592,7 @@ out_dfa_graph(St, DFA, DF) ->                  io:fwrite(Gfile, "}~n", []),                  verbose_print(St, "ok~n", []),                  St -            after file:close(Gfile) +            after ok = file:close(Gfile)              end;          {error,Error} ->              verbose_print(St, "error~n", []), @@ -1610,6 +1633,16 @@ dfa_edgelabel(Cranges) ->                     (C)     -> [quote(C)]                 end, Cranges) ++ "]". +set_encoding(#leex{encoding = none}, File) -> +    ok = io:setopts(File, [{encoding, epp:default_encoding()}]); +set_encoding(#leex{encoding = E}, File) -> +    ok = io:setopts(File, [{encoding, E}]). + +output_encoding_comment(_File, #leex{encoding = none}) -> +    ok; +output_encoding_comment(File, #leex{encoding = Encoding}) -> +    io:fwrite(File, <<"%% ~s\n">>, [epp:encoding_to_string(Encoding)]). +  output_file_directive(File, Filename, Line) ->      io:fwrite(File, <<"-file(~s, ~w).\n">>,                 [format_filename(Filename), Line]). diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl index b0792a6ed8..dbb7d025ae 100644 --- a/lib/parsetools/src/yecc.erl +++ b/lib/parsetools/src/yecc.erl @@ -1,7 +1,7 @@  %%  %% %CopyrightBegin%  %% -%% Copyright Ericsson AB 1996-2011. All Rights Reserved. +%% Copyright Ericsson AB 1996-2012. All Rights Reserved.  %%  %% The contents of this file are subject to the Erlang Public License,  %% Version 1.1, (the "License"); you may not use this file except in @@ -42,6 +42,7 @@            includefile,            includefile_version,            module, +          encoding = none,            options = [],            verbose = false,            file_attrs = true, @@ -224,7 +225,11 @@ format_error({unused_nonterminal, Nonterminal}) ->                    [format_symbol(Nonterminal)]);  format_error({unused_terminal, Terminal}) ->      io_lib:fwrite("terminal symbol ~s not used",  -                  [format_symbol(Terminal)]). +                  [format_symbol(Terminal)]); +format_error({bad_symbol, String}) -> +    io_lib:fwrite("bad symbol ~ts", [String]); +format_error(cannot_parse) -> +    io_lib:fwrite("cannot parse; possibly encoding mismatch", []).  file(File) ->      file(File, [report_errors, report_warnings]). @@ -257,7 +262,7 @@ yecc(Infile, Outfile, Verbose) ->      yecc(Infile, Outfile, Verbose, []).  yecc(Infilex, Outfilex, Verbose, Includefilex) -> -    statistics(runtime), +    _ = statistics(runtime),      case file(Infilex, [{parserfile, Outfilex},                           {verbose, Verbose},                           {report, true}, @@ -407,7 +412,9 @@ infile(Parent, Infilex, Options) ->      St = case file:open(St0#yecc.infile, [read, read_ahead]) of               {ok, Inport} ->                   try  -                     outfile(St0#yecc{inport = Inport}) +                     Encoding = epp:set_encoding(Inport), +                     St1 = St0#yecc{inport = Inport, encoding = Encoding}, +                     outfile(St1)                   after                       ok = file:close(Inport)                   end; @@ -428,6 +435,8 @@ outfile(St0) ->      case file:open(St0#yecc.outfile, [write, delayed_write]) of          {ok, Outport} ->              try  +                %% Set the same encoding as infile: +                set_encoding(St0, Outport),                  generate(St0#yecc{outport = Outport, line = 1})              catch                   throw: St1  -> @@ -466,13 +475,14 @@ timeit(Name, Fun, St0) ->  -define(PASS(P), {P, fun P/1}).  generate(St0) -> +    St1 = output_encoding_comment(St0),      Passes = [?PASS(parse_grammar), ?PASS(check_grammar),                ?PASS(states_and_goto_table), ?PASS(parse_actions),                ?PASS(action_conflicts), ?PASS(write_file)], -    F = case member(time, St0#yecc.options) of +    F = case member(time, St1#yecc.options) of              true ->                   io:fwrite(<<"Generating parser from grammar in ~s\n">>,  -                          [format_filename(St0#yecc.infile)]), +                          [format_filename(St1#yecc.infile)]),                  fun timeit/3;              false ->                  fun(_Name, Fn, St) -> Fn(St) end @@ -484,13 +494,13 @@ generate(St0) ->                        true -> throw(St2)                    end            end, -    foldl(Fun, St0, Passes). +    foldl(Fun, St1, Passes).  parse_grammar(St) ->      parse_grammar(St#yecc.inport, 1, St).  parse_grammar(Inport, Line, St) -> -    {NextLine, Grammar} = read_grammar(Inport, Line), +    {NextLine, Grammar} = read_grammar(Inport, St, Line),      parse_grammar(Grammar, Inport, NextLine, St).  parse_grammar(eof, _Inport, _NextLine, St) -> @@ -523,6 +533,8 @@ parse_grammar({rule, Rule, Tokens}, St0) ->      St#yecc{rules_list = [RuleDef | St#yecc.rules_list]};  parse_grammar({prec, Prec}, St) ->      St#yecc{prec = Prec ++ St#yecc.prec}; +parse_grammar({#symbol{}, [{string,Line,String}]}, St) -> +    add_error(Line, {bad_symbol, String}, St);  parse_grammar({#symbol{line = Line, name = Name}, Symbols}, St) ->      CF = fun(I) ->                   case element(I, St) of @@ -543,12 +555,17 @@ parse_grammar({#symbol{line = Line, name = Name}, Symbols}, St) ->          _ -> add_warning(Line, bad_declaration, St)      end. -read_grammar(Inport, Line) -> +read_grammar(Inport, St, Line) ->      case yeccscan:scan(Inport, '', Line) of          {eof, NextLine} ->              {NextLine, eof};          {error, {ErrorLine, Mod, What}, NextLine} ->              {NextLine, {error, ErrorLine, {error, Mod, What}}}; +        {error, terminated} -> +            throw(St); +        {error, _} -> +            File = St#yecc.infile, +            throw(add_error(File, none, cannot_parse, St));          {ok, Input, NextLine} ->              {NextLine, case yeccparser:parse(Input) of                             {error, {ErrorLine, Mod, Message}} -> @@ -738,9 +755,9 @@ states_and_goto_table(St0) ->      create_precedence_table(St).  parse_actions(St) -> -    erase(), % the pd is used when decoding lookahead sets +    _ = erase(), % the pd is used when decoding lookahead sets      ParseActions = compute_parse_actions(St#yecc.n_states, St, []), -    erase(), +    _ = erase(),      St#yecc{parse_actions = ParseActions, state_tab = []}.  action_conflicts(St0) -> @@ -841,10 +858,10 @@ report_errors(St) ->      case member(report_errors, St#yecc.options) of          true ->              foreach(fun({File,{none,Mod,E}}) ->  -                            io:fwrite(<<"~s: ~s\n">>,  +                            io:fwrite(<<"~s: ~ts\n">>,                                        [File,Mod:format_error(E)]);                         ({File,{Line,Mod,E}}) ->  -                            io:fwrite(<<"~s:~w: ~s\n">>,  +                            io:fwrite(<<"~s:~w: ~ts\n">>,                                        [File,Line,Mod:format_error(E)])                      end, sort(St#yecc.errors));          false ->  @@ -861,11 +878,11 @@ report_warnings(St) ->      case member(report_warnings, St#yecc.options) orelse ReportWerror of          true ->              foreach(fun({File,{none,Mod,W}}) ->  -                            io:fwrite(<<"~s: ~s~s\n">>, +                            io:fwrite(<<"~s: ~s~ts\n">>,                                        [File,Prefix,  				       Mod:format_error(W)]);                         ({File,{Line,Mod,W}}) ->  -                            io:fwrite(<<"~s:~w: ~s~s\n">>, +                            io:fwrite(<<"~s:~w: ~s~ts\n">>,                                        [File,Line,Prefix,  				       Mod:format_error(W)])                      end, sort(St#yecc.warnings)); @@ -1024,7 +1041,7 @@ compute_states(St0) ->                     rp_info = RulePointerInfo,                     goto = GotoTab}, -    erase(), +    _ = erase(),      EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab),      {StateId, State0} = compute_state([{EndsymCode, 1}], Tables), @@ -1923,9 +1940,10 @@ output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] ->                  {St20, 0, no_erlang_code};              Next_line ->                  St_10 = output_file_directive(St20, Infile, Next_line-1), -                Nmbr_of_lines = include1([], Inport, Outport), -                {St_10, Nmbr_of_lines,  -                 {last_erlang_code_line, Next_line+Nmbr_of_lines}} +                Last_line = include1([], Inport, Outport, Infile, +                                     Next_line, St_10), +                Nmbr_of_lines = Last_line - Next_line, +                {St_10, Nmbr_of_lines, {last_erlang_code_line, Last_line}}      end,      St30 = nl(St25),      IncludeFile =  @@ -1946,13 +1964,13 @@ output_prelude(Outport, Inport, St0) ->              {St30, N_lines_1, no_erlang_code};          Next_line ->              St = output_file_directive(St30, Infile, Next_line-1), -            Nmbr_of_lines = include1([], Inport, Outport), -            {St, Nmbr_of_lines + N_lines_1,  -             {last_erlang_code_line, Next_line+Nmbr_of_lines}} +            Last_line = include1([], Inport, Outport, Infile, Next_line, St), +            Nmbr_of_lines = Last_line - Next_line, +            {St, Nmbr_of_lines + N_lines_1, {last_erlang_code_line, Last_line}}      end.  output_header(St0) -> -    lists:foldl(fun(Str, St) -> fwrite(St, <<"~s\n">>, [Str])  +    lists:foldl(fun(Str, St) -> fwrite(St, <<"~ts\n">>, [Str])                  end, St0, St0#yecc.header).  output_goto(St, [{_Nonterminal, []} | Go], StateInfo) -> @@ -2250,8 +2268,8 @@ output_inlined(St0, FunctionName, Reduce, Infile) ->                            [append(["[", tl(A), " | __Stack]"])])             end,      St = St40#yecc{line = St40#yecc.line + NLines}, -    fwrite(St, <<" [begin\n  ~s\n  end | ~s].\n\n">>,  -           [pp_tokens(Tokens, Line0), Stack]). +    fwrite(St, <<" [begin\n  ~ts\n  end | ~s].\n\n">>, +           [pp_tokens(Tokens, Line0, St#yecc.encoding), Stack]).  inlined_function_name(State, "Cat") ->      inlined_function_name(State, ""); @@ -2421,24 +2439,24 @@ include(St, File, Outport) ->          {error, Reason} ->              throw(add_error(File, none, {file_error, Reason}, St));          {ok, Inport} -> +            _ = epp:set_encoding(Inport),              Line = io:get_line(Inport, ''), -            N_lines = include1(Line, Inport, Outport), -            file:close(Inport), -            N_lines +            try include1(Line, Inport, Outport, File, 1, St) - 1 +            after ok = file:close(Inport) +            end      end. -include1(Line, Inport, Outport) -> -    include1(Line, Inport, Outport, 0). - -include1(eof, _, _, Nmbr_of_lines) -> -    Nmbr_of_lines; -include1(Line, Inport, Outport, Nmbr_of_lines) -> +include1(eof, _, _, _File, L, _St) -> +    L; +include1({error, _}=_Error, _Inport, _Outport, File, L, St) -> +    throw(add_error(File, L, cannot_parse, St)); +include1(Line, Inport, Outport, File, L, St) ->      Incr = case member($\n, Line) of                 true -> 1;                 false -> 0             end,      io:put_chars(Outport, Line), -    include1(io:get_line(Inport, ''), Inport, Outport, Nmbr_of_lines + Incr). +    include1(io:get_line(Inport, ''), Inport, Outport, File, L + Incr, St).  includefile_version([]) ->      {1,4}; @@ -2465,18 +2483,22 @@ parse_file(Epp) ->      end.  %% Keeps the line breaks of the original code. -pp_tokens(Tokens, Line0) -> -    concat(pp_tokens1(Tokens, Line0, [])). +pp_tokens(Tokens, Line0, Enc) -> +    concat(pp_tokens1(Tokens, Line0, Enc, [])). -pp_tokens1([], _Line0, _T0) -> +pp_tokens1([], _Line0, _Enc, _T0) ->      []; -pp_tokens1([T | Ts], Line0, T0) -> +pp_tokens1([T | Ts], Line0, Enc, T0) ->      Line = element(2, T), -    [pp_sep(Line, Line0, T0), pp_symbol(T) | pp_tokens1(Ts, Line, T)]. +    [pp_sep(Line, Line0, T0), pp_symbol(T, Enc)|pp_tokens1(Ts, Line, Enc, T)]. -pp_symbol({var,_,Var}) -> Var; -pp_symbol({_,_,Symbol}) -> io_lib:fwrite(<<"~p">>, [Symbol]); -pp_symbol({Symbol, _}) -> Symbol. +pp_symbol({var,_,Var}, _Enc) -> Var; +pp_symbol({string,_,String}, latin1) -> +    io_lib:write_unicode_string_as_latin1(String); +pp_symbol({string,_,String}, _Enc) -> io_lib:write_unicode_string(String); +pp_symbol({_,_,Symbol}, latin1) -> io_lib:fwrite(<<"~p">>, [Symbol]); +pp_symbol({_,_,Symbol}, _Enc) -> io_lib:fwrite(<<"~tp">>, [Symbol]); +pp_symbol({Symbol, _}, _Enc) -> Symbol.  pp_sep(Line, Line0, T0) when Line > Line0 ->       ["\n   " | pp_sep(Line - 1, Line0, T0)]; @@ -2485,6 +2507,16 @@ pp_sep(_Line, _Line0, {'.',_}) ->  pp_sep(_Line, _Line0, _T0) ->       " ". +set_encoding(#yecc{encoding = none}, Port) -> +    ok = io:setopts(Port, [{encoding, epp:default_encoding()}]); +set_encoding(#yecc{encoding = E}, Port) -> +    ok = io:setopts(Port, [{encoding, E}]). + +output_encoding_comment(#yecc{encoding = none}=St) -> +    St; +output_encoding_comment(#yecc{encoding = Encoding}=St) -> +    fwrite(St, <<"%% ~s\n">>, [epp:encoding_to_string(Encoding)]). +  output_file_directive(St, Filename, Line) when St#yecc.file_attrs ->      fwrite(St, <<"-file(~s, ~w).\n">>,              [format_filename(Filename), Line]); @@ -2529,7 +2561,7 @@ format_assoc(nonassoc) ->  format_symbol(Symbol) ->      String = concat([Symbol]), -    case erl_scan:string(String) of +    case erl_scan:string(String, 1, [unicode]) of          {ok, [{atom, _, _}], _} ->              io_lib:fwrite(<<"~w">>, [Symbol]);          {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' -> diff --git a/lib/parsetools/src/yeccscan.erl b/lib/parsetools/src/yeccscan.erl index d7ec3ba8d3..9e0e85143a 100644 --- a/lib/parsetools/src/yeccscan.erl +++ b/lib/parsetools/src/yeccscan.erl @@ -1,7 +1,7 @@  %%  %% %CopyrightBegin%  %%  -%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% Copyright Ericsson AB 1996-2012. All Rights Reserved.  %%   %% The contents of this file are subject to the Erlang Public License,  %% Version 1.1, (the "License"); you may not use this file except in @@ -24,7 +24,7 @@ scan(Inport) ->      scan(Inport, '', 1).  scan(Inport, Prompt, Line1) -> -    case catch io:scan_erl_form(Inport, Prompt, Line1) of +    case catch io:scan_erl_form(Inport, Prompt, Line1, [unicode]) of  	{eof, Line2} ->  	    {eof, Line2};  	{ok, Tokens, Line2} -> @@ -34,6 +34,8 @@ scan(Inport, Prompt, Line1) ->  		_ ->  		    {ok, lex(Tokens), Line2}  	    end; +        {error, Reason} -> +            {error, Reason};  	{error, Descriptor, Line2} ->  	    {error, Descriptor, Line2};  	{'EXIT', Why} -> | 
