aboutsummaryrefslogtreecommitdiffstats
path: root/lib/parsetools
diff options
context:
space:
mode:
Diffstat (limited to 'lib/parsetools')
-rw-r--r--lib/parsetools/include/yeccpre.hrl6
-rw-r--r--lib/parsetools/src/esyntax.yrl360
-rw-r--r--lib/parsetools/src/leex.erl99
-rw-r--r--lib/parsetools/src/yecc.erl120
-rw-r--r--lib/parsetools/src/yeccscan.erl6
-rw-r--r--lib/parsetools/test/leex_SUITE.erl115
-rw-r--r--lib/parsetools/test/yecc_SUITE.erl154
7 files changed, 412 insertions, 448 deletions
diff --git a/lib/parsetools/include/yeccpre.hrl b/lib/parsetools/include/yeccpre.hrl
index 3672394fc5..e4c3ba52be 100644
--- a/lib/parsetools/include/yeccpre.hrl
+++ b/lib/parsetools/include/yeccpre.hrl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2011. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -36,7 +36,7 @@ parse_and_scan({M, F, A}) ->
-spec format_error(any()) -> [char() | list()].
format_error(Message) ->
- case io_lib:deep_char_list(Message) of
+ case io_lib:deep_unicode_char_list(Message) of
true ->
Message;
_ ->
@@ -164,7 +164,7 @@ yecctoken_location(Token) ->
yecctoken2string({atom, _, A}) -> io_lib:write(A);
yecctoken2string({integer,_,N}) -> io_lib:write(N);
yecctoken2string({float,_,F}) -> io_lib:write(F);
-yecctoken2string({char,_,C}) -> io_lib:write_char(C);
+yecctoken2string({char,_,C}) -> io_lib:write_unicode_char(C);
yecctoken2string({var,_,V}) -> io_lib:format("~s", [V]);
yecctoken2string({string,_,S}) -> io_lib:write_unicode_string(S);
yecctoken2string({reserved_symbol, _, A}) -> io_lib:write(A);
diff --git a/lib/parsetools/src/esyntax.yrl b/lib/parsetools/src/esyntax.yrl
deleted file mode 100644
index 1ecb54f0a7..0000000000
--- a/lib/parsetools/src/esyntax.yrl
+++ /dev/null
@@ -1,360 +0,0 @@
-%%
-%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
-%%
-%% The contents of this file are subject to the Erlang Public License,
-%% Version 1.1, (the "License"); you may not use this file except in
-%% compliance with the License. You should have received a copy of the
-%% Erlang Public License along with this software. If not, it can be
-%% retrieved online at http://www.erlang.org/.
-%%
-%% Software distributed under the License is distributed on an "AS IS"
-%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
-%% the License for the specific language governing rights and limitations
-%% under the License.
-%%
-%% %CopyrightEnd%
-%%
-Nonterminals
-add_op attribute basic_type bif_test
-case_expr clause_body
-clause_guard clause_head comp_op cr_clause cr_clauses expr expr_tail
-exprs farity farity_list form formal_parameter_list function
-function_call function_clause guard guard_call guard_expr
-guard_expr_list guard_exprs guard_expr_tail guard_expr_tuple
-guard_parameter_list
-guard_tests guard_test if_clause if_clauses if_expr list match_expr
-mult_op parameter_list pattern patterns pattern_list pattern_tail pattern_tuple
-prefix_op receive_expr send_expr tuple.
-
-Terminals
-'!' '(' ')' '*' '+' ',' '-' '->' '/' '/=' ':' ';' '<' '=' '=/=' '=:='
-'=<' '==' '>' '>=' '[' ']' 'after' 'band' 'begin' 'bnot'
-'bor' 'bsl' 'bsr' 'bxor' 'case' 'catch' 'div' 'end' 'if' 'of'
-'receive' 'rem' 'when' '{' '|' '}' atom float integer string var.
-% 'receive' 'rem' 'true' 'when' '{' '|' '}' atom float integer string var.
-
-Rootsymbol form.
-
-Endsymbol dot.
-
-Unary 0 'catch'.
-Right 200 '='.
-Right 200 '!'.
-Left 300 add_op.
-Left 400 mult_op.
-Unary 500 prefix_op.
-
-
-add_op -> '+' : '$1'.
-add_op -> '-' : '$1'.
-add_op -> 'bor' : '$1'.
-add_op -> 'bxor' : '$1'.
-add_op -> 'bsl' : '$1'.
-add_op -> 'bsr' : '$1'.
-
-comp_op -> '==' : '$1'.
-comp_op -> '/=' : '$1'.
-comp_op -> '=<' : '$1'.
-comp_op -> '<' : '$1'.
-comp_op -> '>=' : '$1'.
-comp_op -> '>' : '$1'.
-comp_op -> '=:=' : '$1'.
-comp_op -> '=/=' : '$1'.
-
-mult_op -> '*' : '$1'.
-mult_op -> '/' : '$1'.
-mult_op -> 'div' : '$1'.
-mult_op -> 'rem' : '$1'.
-mult_op -> 'band' : '$1'.
-
-prefix_op -> '+' : '$1'.
-prefix_op -> '-' : '$1'.
-prefix_op -> 'bnot' : '$1'.
-
-
-basic_type -> atom : '$1'.
-basic_type -> float : '$1'.
-basic_type -> integer : '$1'.
-basic_type -> string : '$1'.
-basic_type -> var : '$1'.
-% basic_type -> 'true' : {atom, element(2, '$1'), 'true'}.
-
-
-pattern -> basic_type : '$1'.
-pattern -> pattern_list : '$1'.
-pattern -> pattern_tuple : '$1'.
-
-pattern_list -> '[' ']' : {nil, element(2, '$1')}.
-pattern_list -> '[' pattern pattern_tail ']' :
- {cons, element(2, '$1'), '$2', '$3'}.
-
-pattern_tail -> '|' pattern : '$2'.
-pattern_tail -> ',' pattern pattern_tail :
- {cons, element(2, '$2'), '$2', '$3'}.
-pattern_tail -> '$empty' : {nil, 0}.
-
-pattern_tuple -> '{' '}' : {tuple, element(2, '$1'), []}.
-pattern_tuple -> '{' patterns '}' : {tuple, element(2, '$1'), '$2'}.
-
-patterns -> pattern : ['$1'].
-patterns -> pattern ',' patterns : ['$1' | '$3'].
-
-
-expr -> basic_type : '$1'.
-expr -> list : '$1'.
-expr -> tuple : '$1'.
-expr -> function_call : '$1'.
-
-expr -> expr add_op expr :
- {Op, Pos} = '$2',
- {arith, Pos, Op, '$1', '$3'}.
-expr -> expr mult_op expr :
- {Op, Pos} = '$2',
- {arith, Pos, Op, '$1', '$3'}.
-expr -> prefix_op expr:
- case '$2' of
- {float, Pos, N} ->
- case '$1' of
- {'-', _} ->
- {float, Pos, -N};
- {'+', _} ->
- {float, Pos, N};
- {Op, Pos1} ->
- {arith, Pos1, Op, {float, Pos, N}}
- end;
- {integer, Pos, N} ->
- case '$1' of
- {'-', _} ->
- {integer, Pos, -N};
- {'+', _} ->
- {integer, Pos, N};
- {Op, Pos1} ->
- {arith, Pos1, Op, {integer, Pos, N}}
- end;
- _ ->
- {Op, Pos} = '$1',
- {arith, Pos, Op, '$2'}
- end.
-
-expr -> '(' expr ')' : '$2'.
-expr -> 'begin' exprs 'end' : {block, element(2, '$1'), '$2'}.
-expr -> 'catch' expr : {'catch', element(2, '$1'), '$2'}.
-
-expr -> case_expr : '$1'.
-expr -> if_expr : '$1'.
-expr -> receive_expr : '$1'.
-expr -> match_expr : '$1'.
-expr -> send_expr : '$1'.
-
-
-list -> '[' ']' : {nil, element(2, '$1')}.
-list -> '[' expr expr_tail ']' : {cons, element(2, '$1'), '$2', '$3'}.
-
-expr_tail -> '|' expr : '$2'.
-expr_tail -> ',' expr expr_tail : {cons, element(2, '$2'), '$2', '$3'}.
-expr_tail -> '$empty' : {nil, 0}.
-
-tuple -> '{' '}' : {tuple, element(2, '$1'), []}.
-tuple -> '{' exprs '}' : {tuple, element(2, '$1'), '$2'}.
-
-
-function_call -> atom '(' parameter_list ')' :
- case erl_parse:erlang_bif(element(3, '$1'), length('$3')) of
- true ->
- {bif, element(2, '$1'), element(3, '$1'), '$3'};
- false ->
- {call, element(2, '$1'), [], element(3, '$1'), '$3'}
- end.
-function_call -> atom ':' atom '(' parameter_list ')' :
- {call, element(2, '$1'), element(3, '$1'), element(3, '$3'), '$5'}.
-
-parameter_list -> exprs : '$1'.
-parameter_list -> '$empty' : [].
-
-
-case_expr -> 'case' expr 'of' cr_clauses 'end' :
- {'case', element(2, '$1'), '$2', '$4'}.
-
-cr_clause -> pattern clause_guard clause_body :
- {clause, element(2, '$1'), ['$1'], '$2', '$3'}.
-
-cr_clauses -> cr_clause : ['$1'].
-cr_clauses -> cr_clause ';' cr_clauses : ['$1' | '$3'].
-
-if_expr -> 'if' if_clauses 'end' : {'if', element(2, '$1'), '$2'}.
-
-if_clause -> guard clause_body : {clause, element(2, hd('$2')), '$1', '$2'}.
-
-if_clauses -> if_clause : ['$1'].
-if_clauses -> if_clause ';' if_clauses : ['$1' | '$3'].
-
-receive_expr -> 'receive' 'after' expr clause_body 'end' :
- {'receive', element(2, '$1'), [], '$3', '$4'}.
-receive_expr -> 'receive' cr_clauses 'end' :
- {'receive', element(2, '$1'), '$2'}.
-receive_expr -> 'receive' cr_clauses 'after' expr clause_body 'end' :
- {'receive', element(2, '$1'), '$2', '$4', '$5'}.
-
-
-match_expr -> expr '=' expr :
- case erl_parse:is_term('$1') of
- true ->
- {match, element(2, '$1'), '$1', '$3'};
- false ->
- throw({error, {element(2, '$1'), yecc, "illegal lhs in match **"}})
- end.
-
-send_expr -> expr '!' expr :
- Pos = element(2, '$1'),
- {send, Pos, '$1', '$3'}.
-
-
-exprs -> expr : ['$1'].
-exprs -> expr ',' exprs : ['$1' | '$3'].
-
-
-guard_expr -> basic_type : '$1'.
-guard_expr -> guard_expr_list : '$1'.
-guard_expr -> guard_expr_tuple : '$1'.
-guard_expr -> guard_call : '$1'.
-guard_expr -> '(' guard_expr ')' : '$2'.
-guard_expr -> guard_expr add_op guard_expr :
- {Op, Pos} = '$2',
- {arith, Pos, Op, '$1', '$3'}.
-guard_expr -> guard_expr mult_op guard_expr :
- {Op, Pos} = '$2',
- {arith, Pos, Op, '$1', '$3'}.
-guard_expr -> prefix_op guard_expr:
- case '$2' of
- {float, Pos, N} ->
- case '$1' of
- {'-', _} ->
- {float, Pos, -N};
- {'+', _} ->
- {float, Pos, N};
- {Op, Pos1} ->
- {arith, Pos1, Op, {float, Pos, N}}
- end;
- {integer, Pos, N} ->
- case '$1' of
- {'-', _} ->
- {integer, Pos, -N};
- {'+', _} ->
- {integer, Pos, N};
- {Op, Pos1} ->
- {arith, Pos1, Op, {integer, Pos, N}}
- end;
- _ ->
- {Op, Pos} = '$1',
- {arith, Pos, Op, '$2'}
- end.
-
-guard_expr_list -> '[' ']' : {nil, element(2, '$1')}.
-guard_expr_list -> '[' guard_expr guard_expr_tail ']' :
- {cons, element(2, '$1'), '$2', '$3'}.
-
-guard_expr_tail -> '|' guard_expr : '$2'.
-guard_expr_tail -> ',' guard_expr guard_expr_tail :
- {cons, element(2, '$2'), '$2', '$3'}.
-guard_expr_tail -> '$empty' : {nil, 0}.
-
-guard_expr_tuple -> '{' '}' : {tuple, element(2, '$1'), []}.
-guard_expr_tuple -> '{' guard_exprs '}' : {tuple, element(2, '$1'), '$2'}.
-
-guard_exprs -> guard_expr : ['$1'].
-guard_exprs -> guard_expr ',' guard_exprs : ['$1' | '$3'].
-
-
-guard_call -> atom '(' guard_parameter_list ')' :
- case erl_parse:erlang_guard_bif(element(3, '$1'), length('$3')) of
- true ->
- {bif, element(2, '$1'), element(3, '$1'), '$3'};
- false ->
- throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}})
- end.
-
-guard_parameter_list -> guard_exprs : '$1'.
-guard_parameter_list -> '$empty' : [].
-
-
-bif_test -> atom '(' guard_parameter_list ')' :
- case erl_parse:erlang_guard_test(element(3, '$1'), length('$3')) of
- true ->
- {test, element(2, '$1'), element(3, '$1'), '$3'};
- false ->
- throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}})
- end.
-
-
-guard_test -> bif_test : '$1'.
-guard_test -> guard_expr comp_op guard_expr :
- {Op, Pos} = '$2',
- {comp, Pos, Op, '$1', '$3'}.
-
-guard_tests -> guard_test : ['$1'].
-guard_tests -> guard_test ',' guard_tests : ['$1' | '$3'].
-
-% guard -> 'true' : [].
-guard -> atom :
- case '$1' of
- {atom, _, true} ->
- [];
- _ ->
- throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}})
- end.
-guard -> guard_tests : '$1'.
-
-
-function_clause -> clause_head clause_guard clause_body :
- {Name, Line, Arity, Parameters} = '$1',
- {function, Line, Name, Arity,
- [{clause, element(2, hd('$3')), Parameters, '$2', '$3'}]}.
-
-clause_head -> atom '(' formal_parameter_list ')' :
- {element(3, '$1'), element(2, '$1'), length('$3'), '$3'}.
-
-formal_parameter_list -> patterns : '$1'.
-formal_parameter_list -> '$empty' : [].
-
-clause_guard -> 'when' guard : '$2'.
-clause_guard -> '$empty' : [].
-
-clause_body -> '->' exprs: '$2'.
-
-
-function -> function_clause : '$1'.
-function -> function_clause ';' function :
- case '$1' of
- {function, Pos1, Name1, Arity1, [Clause]} ->
- case '$3' of
- {function, _, Name1, Arity2, Clauses} ->
- if
- Arity1 /= Arity2 ->
- throw({error, {Pos1, yecc,
- io_lib:format('arity conflict in definition of ~w',
- [Name1])}});
- true ->
- {function, Pos1, Name1, Arity1, [Clause | Clauses]}
- end;
- _ ->
- throw({error, {Pos1, yecc,
- io_lib:format('missing final dot in def of ~w/~w',
- [Name1, Arity1])}})
- end
- end.
-
-
-attribute -> atom : element(3, '$1').
-attribute -> '[' farity_list ']' : '$2'.
-
-farity_list -> farity : ['$1'].
-farity_list -> farity ',' farity_list : ['$1' | '$3'].
-
-farity -> atom '/' integer : {element(3, '$1'), element(3, '$3')}.
-
-
-form -> '-' atom '(' attribute ')' :
- {attribute, element(2, '$2'), element(3, '$2'), '$4'}.
-form -> function : '$1'.
diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl
index cdf20461d9..bbef4053b4 100644
--- a/lib/parsetools/src/leex.erl
+++ b/lib/parsetools/src/leex.erl
@@ -58,6 +58,7 @@
gfile=[], % Graph file
module, % Module name
opts=[], % Options
+ encoding=none, % Encoding of Xrl file
% posix=false, % POSIX regular expressions
errors=[],
warnings=[]
@@ -146,7 +147,9 @@ format_error({regexp,E})->
end,
["bad regexp `",Es,"'"];
format_error(ignored_characters) ->
- "ignored characters".
+ "ignored characters";
+format_error(cannot_parse) ->
+ io_lib:fwrite("cannot parse; probably encoding mismatch", []).
%%%
%%% Local functions
@@ -298,10 +301,10 @@ pack_warnings([]) ->
report_errors(St) ->
when_opt(fun () ->
foreach(fun({File,{none,Mod,E}}) ->
- io:fwrite("~s: ~s\n",
+ io:fwrite("~s: ~ts\n",
[File,Mod:format_error(E)]);
({File,{Line,Mod,E}}) ->
- io:fwrite("~s:~w: ~s\n",
+ io:fwrite("~s:~w: ~ts\n",
[File,Line,Mod:format_error(E)])
end, sort(St#leex.errors))
end, report_errors, St#leex.opts).
@@ -316,11 +319,11 @@ report_warnings(St) ->
ShouldReport = member(report_warnings, St#leex.opts) orelse ReportWerror,
when_bool(fun () ->
foreach(fun({File,{none,Mod,W}}) ->
- io:fwrite("~s: ~s~s\n",
+ io:fwrite("~s: ~s~ts\n",
[File,Prefix,
Mod:format_error(W)]);
({File,{Line,Mod,W}}) ->
- io:fwrite("~s:~w: ~s~s\n",
+ io:fwrite("~s:~w: ~s~ts\n",
[File,Line,Prefix,
Mod:format_error(W)])
end, sort(St#leex.warnings))
@@ -396,17 +399,18 @@ verbose_print(St, Format, Args) ->
parse_file(St0) ->
case file:open(St0#leex.xfile, [read]) of
{ok,Xfile} ->
+ St1 = St0#leex{encoding = epp:set_encoding(Xfile)},
try
- verbose_print(St0, "Parsing file ~s, ", [St0#leex.xfile]),
+ verbose_print(St1, "Parsing file ~s, ", [St1#leex.xfile]),
%% We KNOW that errors throw so we can ignore them here.
- {ok,Line1,St1} = parse_head(Xfile, St0),
- {ok,Line2,Macs,St2} = parse_defs(Xfile, Line1, St1),
- {ok,Line3,REAs,Actions,St3} =
- parse_rules(Xfile, Line2, Macs, St2),
- {ok,Code,St4} = parse_code(Xfile, Line3, St3),
- verbose_print(St1, "contained ~w rules.~n", [length(REAs)]),
- {ok,REAs,Actions,Code,St4}
- after file:close(Xfile)
+ {ok,Line1,St2} = parse_head(Xfile, St1),
+ {ok,Line2,Macs,St3} = parse_defs(Xfile, Line1, St2),
+ {ok,Line3,REAs,Actions,St4} =
+ parse_rules(Xfile, Line2, Macs, St3),
+ {ok,Code,St5} = parse_code(Xfile, Line3, St4),
+ verbose_print(St5, "contained ~w rules.~n", [length(REAs)]),
+ {ok,REAs,Actions,Code,St5}
+ after ok = file:close(Xfile)
end;
{error,Error} ->
add_error({none,leex,{file_error,Error}}, St0)
@@ -415,7 +419,7 @@ parse_file(St0) ->
%% parse_head(File, State) -> {ok,NextLine,State}.
%% Parse the head of the file. Skip all comments and blank lines.
-parse_head(Ifile, St) -> {ok,nextline(Ifile, 0),St}.
+parse_head(Ifile, St) -> {ok,nextline(Ifile, 0, St),St}.
%% parse_defs(File, Line, State) -> {ok,NextLine,Macros,State}.
%% Parse the macro definition section of a file. This must exist.
@@ -423,7 +427,7 @@ parse_head(Ifile, St) -> {ok,nextline(Ifile, 0),St}.
parse_defs(Ifile, {ok,?DEFS_HEAD ++ Rest,L}, St) ->
St1 = warn_ignored_chars(L, Rest, St),
- parse_defs(Ifile, nextline(Ifile, L), [], St1);
+ parse_defs(Ifile, nextline(Ifile, L, St), [], St1);
parse_defs(_, {ok,_,L}, St) ->
add_error({L,leex,missing_defs}, St);
parse_defs(_, {eof,L}, St) ->
@@ -435,7 +439,7 @@ parse_defs(Ifile, {ok,Chars,L}=Line, Ms, St) ->
case re:run(Chars, MS, [{capture,all_but_first,list}]) of
{match,[Name,Def]} ->
%%io:fwrite("~p = ~p\n", [Name,Def]),
- parse_defs(Ifile, nextline(Ifile, L), [{Name,Def}|Ms], St);
+ parse_defs(Ifile, nextline(Ifile, L, St), [{Name,Def}|Ms], St);
_ -> {ok,Line,Ms,St} % Anything else
end;
parse_defs(_, Line, Ms, St) ->
@@ -446,7 +450,7 @@ parse_defs(_, Line, Ms, St) ->
parse_rules(Ifile, {ok,?RULE_HEAD ++ Rest,L}, Ms, St) ->
St1 = warn_ignored_chars(L, Rest, St),
- parse_rules(Ifile, nextline(Ifile, L), Ms, [], [], 0, St1);
+ parse_rules(Ifile, nextline(Ifile, L, St), Ms, [], [], 0, St1);
parse_rules(_, {ok,_,L}, _, St) ->
add_error({L,leex,missing_rules}, St);
parse_rules(_, {eof,L}, _, St) ->
@@ -464,7 +468,7 @@ parse_rules(Ifile, NextLine, Ms, REAs, As, N, St) ->
case collect_rule(Ifile, Chars, L0) of
{ok,Re,Atoks,L1} ->
{ok,REA,A,St1} = parse_rule(Re, L0, Atoks, Ms, N, St),
- parse_rules(Ifile, nextline(Ifile, L1), Ms,
+ parse_rules(Ifile, nextline(Ifile, L1, St), Ms,
[REA|REAs], [A|As], N+1, St1);
{error,E} -> add_error(E, St)
end;
@@ -497,8 +501,10 @@ collect_rule(Ifile, Chars, L0) ->
{error,E,_} -> {error,E}
end.
+collect_action(_Ifile, {error, _}, L, _Cont0) ->
+ {error, {L, leex, cannot_parse}, ignored_end_line};
collect_action(Ifile, Chars, L0, Cont0) ->
- case erl_scan:tokens(Cont0, Chars, L0) of
+ case erl_scan:tokens(Cont0, Chars, L0, [unicode]) of
{done,{ok,Toks,_},_} -> {ok,Toks,L0};
{done,{eof,_},_} -> {eof,L0};
{done,{error,E,_},_} -> {error,E,L0};
@@ -560,29 +566,32 @@ parse_code(Ifile, {ok,?CODE_HEAD ++ Rest,CodeL}, St) ->
St1 = warn_ignored_chars(CodeL, Rest, St),
{ok, CodePos} = file:position(Ifile, cur),
%% Just count the lines; copy the code from file to file later.
- NCodeLines = count_lines(Ifile, 0),
+ EndCodeLine = count_lines(Ifile, CodeL, St),
+ NCodeLines = EndCodeLine - CodeL,
{ok,{CodeL,CodePos,NCodeLines},St1};
parse_code(_, {ok,_,L}, St) ->
add_error({L,leex,missing_code}, St);
parse_code(_, {eof,L}, St) ->
add_error({L,leex,missing_code}, St).
-count_lines(File, N) ->
+count_lines(File, N, St) ->
case io:get_line(File, leex) of
eof -> N;
- _Line -> count_lines(File, N+1)
+ {error, _} -> add_error({N+1, leex, cannot_parse}, St);
+ _Line -> count_lines(File, N+1, St)
end.
-%% nextline(InputFile, PrevLineNo) -> {ok,Chars,LineNo} | {eof,LineNo}.
+%% nextline(InputFile, PrevLineNo, State) -> {ok,Chars,LineNo} | {eof,LineNo}.
%% Get the next line skipping comment lines and blank lines.
-nextline(Ifile, L) ->
+nextline(Ifile, L, St) ->
case io:get_line(Ifile, leex) of
eof -> {eof,L};
+ {error, _} -> add_error({L+1, leex, cannot_parse}, St);
Chars ->
case substr(Chars, span(Chars, " \t\n")+1) of
- [$%|_Rest] -> nextline(Ifile, L+1);
- [] -> nextline(Ifile, L+1);
+ [$%|_Rest] -> nextline(Ifile, L+1, St);
+ [] -> nextline(Ifile, L+1, St);
_Other -> {ok,Chars,L+1}
end
end.
@@ -1289,19 +1298,21 @@ out_file(St0, DFA, DF, Actions, Code) ->
try
case file:open(St0#leex.efile, [write]) of
{ok,Ofile} ->
+ set_encoding(St0, Ofile),
try
+ output_encoding_comment(Ofile, St0),
output_file_directive(Ofile, St0#leex.ifile, 0),
out_file(Ifile, Ofile, St0, DFA, DF, Actions,
Code, 1),
verbose_print(St0, "ok~n", []),
St0
- after file:close(Ofile)
+ after ok = file:close(Ofile)
end;
{error,Error} ->
verbose_print(St0, "error~n", []),
add_error({none,leex,{file_error,Error}}, St0)
end
- after file:close(Ifile)
+ after ok = file:close(Ifile)
end;
{{error,Error},Ifile} ->
add_error(Ifile, {none,leex,{file_error,Error}}, St0)
@@ -1310,7 +1321,9 @@ out_file(St0, DFA, DF, Actions, Code) ->
open_inc_file(State) ->
Ifile = State#leex.ifile,
case file:open(Ifile, [read]) of
- {ok,F} -> {ok,F};
+ {ok,F} ->
+ _ = epp:set_encoding(F),
+ {ok,F};
Error -> {Error,Ifile}
end.
@@ -1328,6 +1341,7 @@ inc_file_name(Filename) ->
out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L) ->
case io:get_line(Ifile, leex) of
eof -> output_file_directive(Ofile, St#leex.ifile, L);
+ {error, _} -> add_error(St#leex.ifile, {L, leex, cannot_parse}, St);
Line ->
case substr(Line, 1, 5) of
"##mod" -> out_module(Ofile, St);
@@ -1347,14 +1361,23 @@ out_erlang_code(File, St, Code, L) ->
output_file_directive(File, St#leex.xfile, CodeL),
{ok,Xfile} = file:open(St#leex.xfile, [read]),
try
+ set_encoding(St, Xfile),
{ok,_} = file:position(Xfile, CodePos),
- {ok,_} = file:copy(Xfile, File)
+ ok = file_copy(Xfile, File)
after
- file:close(Xfile)
+ ok = file:close(Xfile)
end,
io:nl(File),
output_file_directive(File, St#leex.ifile, L).
+file_copy(From, To) ->
+ case io:get_line(From, leex) of
+ eof -> ok;
+ Line when is_list(Line) ->
+ io:fwrite(To, "~ts", [Line]),
+ file_copy(From, To)
+ end.
+
out_dfa(File, St, DFA, Code, DF, L) ->
{_CodeL,_CodePos,NCodeLines} = Code,
%% Three file attributes before this one...
@@ -1569,7 +1592,7 @@ out_dfa_graph(St, DFA, DF) ->
io:fwrite(Gfile, "}~n", []),
verbose_print(St, "ok~n", []),
St
- after file:close(Gfile)
+ after ok = file:close(Gfile)
end;
{error,Error} ->
verbose_print(St, "error~n", []),
@@ -1610,6 +1633,16 @@ dfa_edgelabel(Cranges) ->
(C) -> [quote(C)]
end, Cranges) ++ "]".
+set_encoding(#leex{encoding = none}, File) ->
+ ok = io:setopts(File, [{encoding, epp:default_encoding()}]);
+set_encoding(#leex{encoding = E}, File) ->
+ ok = io:setopts(File, [{encoding, E}]).
+
+output_encoding_comment(_File, #leex{encoding = none}) ->
+ ok;
+output_encoding_comment(File, #leex{encoding = Encoding}) ->
+ io:fwrite(File, <<"%% ~s\n">>, [epp:encoding_to_string(Encoding)]).
+
output_file_directive(File, Filename, Line) ->
io:fwrite(File, <<"-file(~s, ~w).\n">>,
[format_filename(Filename), Line]).
diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl
index b0792a6ed8..dbb7d025ae 100644
--- a/lib/parsetools/src/yecc.erl
+++ b/lib/parsetools/src/yecc.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2011. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -42,6 +42,7 @@
includefile,
includefile_version,
module,
+ encoding = none,
options = [],
verbose = false,
file_attrs = true,
@@ -224,7 +225,11 @@ format_error({unused_nonterminal, Nonterminal}) ->
[format_symbol(Nonterminal)]);
format_error({unused_terminal, Terminal}) ->
io_lib:fwrite("terminal symbol ~s not used",
- [format_symbol(Terminal)]).
+ [format_symbol(Terminal)]);
+format_error({bad_symbol, String}) ->
+ io_lib:fwrite("bad symbol ~ts", [String]);
+format_error(cannot_parse) ->
+ io_lib:fwrite("cannot parse; possibly encoding mismatch", []).
file(File) ->
file(File, [report_errors, report_warnings]).
@@ -257,7 +262,7 @@ yecc(Infile, Outfile, Verbose) ->
yecc(Infile, Outfile, Verbose, []).
yecc(Infilex, Outfilex, Verbose, Includefilex) ->
- statistics(runtime),
+ _ = statistics(runtime),
case file(Infilex, [{parserfile, Outfilex},
{verbose, Verbose},
{report, true},
@@ -407,7 +412,9 @@ infile(Parent, Infilex, Options) ->
St = case file:open(St0#yecc.infile, [read, read_ahead]) of
{ok, Inport} ->
try
- outfile(St0#yecc{inport = Inport})
+ Encoding = epp:set_encoding(Inport),
+ St1 = St0#yecc{inport = Inport, encoding = Encoding},
+ outfile(St1)
after
ok = file:close(Inport)
end;
@@ -428,6 +435,8 @@ outfile(St0) ->
case file:open(St0#yecc.outfile, [write, delayed_write]) of
{ok, Outport} ->
try
+ %% Set the same encoding as infile:
+ set_encoding(St0, Outport),
generate(St0#yecc{outport = Outport, line = 1})
catch
throw: St1 ->
@@ -466,13 +475,14 @@ timeit(Name, Fun, St0) ->
-define(PASS(P), {P, fun P/1}).
generate(St0) ->
+ St1 = output_encoding_comment(St0),
Passes = [?PASS(parse_grammar), ?PASS(check_grammar),
?PASS(states_and_goto_table), ?PASS(parse_actions),
?PASS(action_conflicts), ?PASS(write_file)],
- F = case member(time, St0#yecc.options) of
+ F = case member(time, St1#yecc.options) of
true ->
io:fwrite(<<"Generating parser from grammar in ~s\n">>,
- [format_filename(St0#yecc.infile)]),
+ [format_filename(St1#yecc.infile)]),
fun timeit/3;
false ->
fun(_Name, Fn, St) -> Fn(St) end
@@ -484,13 +494,13 @@ generate(St0) ->
true -> throw(St2)
end
end,
- foldl(Fun, St0, Passes).
+ foldl(Fun, St1, Passes).
parse_grammar(St) ->
parse_grammar(St#yecc.inport, 1, St).
parse_grammar(Inport, Line, St) ->
- {NextLine, Grammar} = read_grammar(Inport, Line),
+ {NextLine, Grammar} = read_grammar(Inport, St, Line),
parse_grammar(Grammar, Inport, NextLine, St).
parse_grammar(eof, _Inport, _NextLine, St) ->
@@ -523,6 +533,8 @@ parse_grammar({rule, Rule, Tokens}, St0) ->
St#yecc{rules_list = [RuleDef | St#yecc.rules_list]};
parse_grammar({prec, Prec}, St) ->
St#yecc{prec = Prec ++ St#yecc.prec};
+parse_grammar({#symbol{}, [{string,Line,String}]}, St) ->
+ add_error(Line, {bad_symbol, String}, St);
parse_grammar({#symbol{line = Line, name = Name}, Symbols}, St) ->
CF = fun(I) ->
case element(I, St) of
@@ -543,12 +555,17 @@ parse_grammar({#symbol{line = Line, name = Name}, Symbols}, St) ->
_ -> add_warning(Line, bad_declaration, St)
end.
-read_grammar(Inport, Line) ->
+read_grammar(Inport, St, Line) ->
case yeccscan:scan(Inport, '', Line) of
{eof, NextLine} ->
{NextLine, eof};
{error, {ErrorLine, Mod, What}, NextLine} ->
{NextLine, {error, ErrorLine, {error, Mod, What}}};
+ {error, terminated} ->
+ throw(St);
+ {error, _} ->
+ File = St#yecc.infile,
+ throw(add_error(File, none, cannot_parse, St));
{ok, Input, NextLine} ->
{NextLine, case yeccparser:parse(Input) of
{error, {ErrorLine, Mod, Message}} ->
@@ -738,9 +755,9 @@ states_and_goto_table(St0) ->
create_precedence_table(St).
parse_actions(St) ->
- erase(), % the pd is used when decoding lookahead sets
+ _ = erase(), % the pd is used when decoding lookahead sets
ParseActions = compute_parse_actions(St#yecc.n_states, St, []),
- erase(),
+ _ = erase(),
St#yecc{parse_actions = ParseActions, state_tab = []}.
action_conflicts(St0) ->
@@ -841,10 +858,10 @@ report_errors(St) ->
case member(report_errors, St#yecc.options) of
true ->
foreach(fun({File,{none,Mod,E}}) ->
- io:fwrite(<<"~s: ~s\n">>,
+ io:fwrite(<<"~s: ~ts\n">>,
[File,Mod:format_error(E)]);
({File,{Line,Mod,E}}) ->
- io:fwrite(<<"~s:~w: ~s\n">>,
+ io:fwrite(<<"~s:~w: ~ts\n">>,
[File,Line,Mod:format_error(E)])
end, sort(St#yecc.errors));
false ->
@@ -861,11 +878,11 @@ report_warnings(St) ->
case member(report_warnings, St#yecc.options) orelse ReportWerror of
true ->
foreach(fun({File,{none,Mod,W}}) ->
- io:fwrite(<<"~s: ~s~s\n">>,
+ io:fwrite(<<"~s: ~s~ts\n">>,
[File,Prefix,
Mod:format_error(W)]);
({File,{Line,Mod,W}}) ->
- io:fwrite(<<"~s:~w: ~s~s\n">>,
+ io:fwrite(<<"~s:~w: ~s~ts\n">>,
[File,Line,Prefix,
Mod:format_error(W)])
end, sort(St#yecc.warnings));
@@ -1024,7 +1041,7 @@ compute_states(St0) ->
rp_info = RulePointerInfo,
goto = GotoTab},
- erase(),
+ _ = erase(),
EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab),
{StateId, State0} = compute_state([{EndsymCode, 1}], Tables),
@@ -1923,9 +1940,10 @@ output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] ->
{St20, 0, no_erlang_code};
Next_line ->
St_10 = output_file_directive(St20, Infile, Next_line-1),
- Nmbr_of_lines = include1([], Inport, Outport),
- {St_10, Nmbr_of_lines,
- {last_erlang_code_line, Next_line+Nmbr_of_lines}}
+ Last_line = include1([], Inport, Outport, Infile,
+ Next_line, St_10),
+ Nmbr_of_lines = Last_line - Next_line,
+ {St_10, Nmbr_of_lines, {last_erlang_code_line, Last_line}}
end,
St30 = nl(St25),
IncludeFile =
@@ -1946,13 +1964,13 @@ output_prelude(Outport, Inport, St0) ->
{St30, N_lines_1, no_erlang_code};
Next_line ->
St = output_file_directive(St30, Infile, Next_line-1),
- Nmbr_of_lines = include1([], Inport, Outport),
- {St, Nmbr_of_lines + N_lines_1,
- {last_erlang_code_line, Next_line+Nmbr_of_lines}}
+ Last_line = include1([], Inport, Outport, Infile, Next_line, St),
+ Nmbr_of_lines = Last_line - Next_line,
+ {St, Nmbr_of_lines + N_lines_1, {last_erlang_code_line, Last_line}}
end.
output_header(St0) ->
- lists:foldl(fun(Str, St) -> fwrite(St, <<"~s\n">>, [Str])
+ lists:foldl(fun(Str, St) -> fwrite(St, <<"~ts\n">>, [Str])
end, St0, St0#yecc.header).
output_goto(St, [{_Nonterminal, []} | Go], StateInfo) ->
@@ -2250,8 +2268,8 @@ output_inlined(St0, FunctionName, Reduce, Infile) ->
[append(["[", tl(A), " | __Stack]"])])
end,
St = St40#yecc{line = St40#yecc.line + NLines},
- fwrite(St, <<" [begin\n ~s\n end | ~s].\n\n">>,
- [pp_tokens(Tokens, Line0), Stack]).
+ fwrite(St, <<" [begin\n ~ts\n end | ~s].\n\n">>,
+ [pp_tokens(Tokens, Line0, St#yecc.encoding), Stack]).
inlined_function_name(State, "Cat") ->
inlined_function_name(State, "");
@@ -2421,24 +2439,24 @@ include(St, File, Outport) ->
{error, Reason} ->
throw(add_error(File, none, {file_error, Reason}, St));
{ok, Inport} ->
+ _ = epp:set_encoding(Inport),
Line = io:get_line(Inport, ''),
- N_lines = include1(Line, Inport, Outport),
- file:close(Inport),
- N_lines
+ try include1(Line, Inport, Outport, File, 1, St) - 1
+ after ok = file:close(Inport)
+ end
end.
-include1(Line, Inport, Outport) ->
- include1(Line, Inport, Outport, 0).
-
-include1(eof, _, _, Nmbr_of_lines) ->
- Nmbr_of_lines;
-include1(Line, Inport, Outport, Nmbr_of_lines) ->
+include1(eof, _, _, _File, L, _St) ->
+ L;
+include1({error, _}=_Error, _Inport, _Outport, File, L, St) ->
+ throw(add_error(File, L, cannot_parse, St));
+include1(Line, Inport, Outport, File, L, St) ->
Incr = case member($\n, Line) of
true -> 1;
false -> 0
end,
io:put_chars(Outport, Line),
- include1(io:get_line(Inport, ''), Inport, Outport, Nmbr_of_lines + Incr).
+ include1(io:get_line(Inport, ''), Inport, Outport, File, L + Incr, St).
includefile_version([]) ->
{1,4};
@@ -2465,18 +2483,22 @@ parse_file(Epp) ->
end.
%% Keeps the line breaks of the original code.
-pp_tokens(Tokens, Line0) ->
- concat(pp_tokens1(Tokens, Line0, [])).
+pp_tokens(Tokens, Line0, Enc) ->
+ concat(pp_tokens1(Tokens, Line0, Enc, [])).
-pp_tokens1([], _Line0, _T0) ->
+pp_tokens1([], _Line0, _Enc, _T0) ->
[];
-pp_tokens1([T | Ts], Line0, T0) ->
+pp_tokens1([T | Ts], Line0, Enc, T0) ->
Line = element(2, T),
- [pp_sep(Line, Line0, T0), pp_symbol(T) | pp_tokens1(Ts, Line, T)].
+ [pp_sep(Line, Line0, T0), pp_symbol(T, Enc)|pp_tokens1(Ts, Line, Enc, T)].
-pp_symbol({var,_,Var}) -> Var;
-pp_symbol({_,_,Symbol}) -> io_lib:fwrite(<<"~p">>, [Symbol]);
-pp_symbol({Symbol, _}) -> Symbol.
+pp_symbol({var,_,Var}, _Enc) -> Var;
+pp_symbol({string,_,String}, latin1) ->
+ io_lib:write_unicode_string_as_latin1(String);
+pp_symbol({string,_,String}, _Enc) -> io_lib:write_unicode_string(String);
+pp_symbol({_,_,Symbol}, latin1) -> io_lib:fwrite(<<"~p">>, [Symbol]);
+pp_symbol({_,_,Symbol}, _Enc) -> io_lib:fwrite(<<"~tp">>, [Symbol]);
+pp_symbol({Symbol, _}, _Enc) -> Symbol.
pp_sep(Line, Line0, T0) when Line > Line0 ->
["\n " | pp_sep(Line - 1, Line0, T0)];
@@ -2485,6 +2507,16 @@ pp_sep(_Line, _Line0, {'.',_}) ->
pp_sep(_Line, _Line0, _T0) ->
" ".
+set_encoding(#yecc{encoding = none}, Port) ->
+ ok = io:setopts(Port, [{encoding, epp:default_encoding()}]);
+set_encoding(#yecc{encoding = E}, Port) ->
+ ok = io:setopts(Port, [{encoding, E}]).
+
+output_encoding_comment(#yecc{encoding = none}=St) ->
+ St;
+output_encoding_comment(#yecc{encoding = Encoding}=St) ->
+ fwrite(St, <<"%% ~s\n">>, [epp:encoding_to_string(Encoding)]).
+
output_file_directive(St, Filename, Line) when St#yecc.file_attrs ->
fwrite(St, <<"-file(~s, ~w).\n">>,
[format_filename(Filename), Line]);
@@ -2529,7 +2561,7 @@ format_assoc(nonassoc) ->
format_symbol(Symbol) ->
String = concat([Symbol]),
- case erl_scan:string(String) of
+ case erl_scan:string(String, 1, [unicode]) of
{ok, [{atom, _, _}], _} ->
io_lib:fwrite(<<"~w">>, [Symbol]);
{ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' ->
diff --git a/lib/parsetools/src/yeccscan.erl b/lib/parsetools/src/yeccscan.erl
index d7ec3ba8d3..9e0e85143a 100644
--- a/lib/parsetools/src/yeccscan.erl
+++ b/lib/parsetools/src/yeccscan.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -24,7 +24,7 @@ scan(Inport) ->
scan(Inport, '', 1).
scan(Inport, Prompt, Line1) ->
- case catch io:scan_erl_form(Inport, Prompt, Line1) of
+ case catch io:scan_erl_form(Inport, Prompt, Line1, [unicode]) of
{eof, Line2} ->
{eof, Line2};
{ok, Tokens, Line2} ->
@@ -34,6 +34,8 @@ scan(Inport, Prompt, Line1) ->
_ ->
{ok, lex(Tokens), Line2}
end;
+ {error, Reason} ->
+ {error, Reason};
{error, Descriptor, Line2} ->
{error, Descriptor, Line2};
{'EXIT', Why} ->
diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl
index 1e50aedf07..a0d4fd7c48 100644
--- a/lib/parsetools/test/leex_SUITE.erl
+++ b/lib/parsetools/test/leex_SUITE.erl
@@ -1,7 +1,8 @@
+%% -*= coding: latin-1 -*-
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2010-2011. All Rights Reserved.
+%% Copyright Ericsson AB 2010-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -42,7 +43,9 @@
-export([
file/1, compile/1, syntax/1,
- pt/1, man/1, ex/1, ex2/1, not_yet/1]).
+ pt/1, man/1, ex/1, ex2/1, not_yet/1,
+
+ otp_10302/1]).
% Default timetrap timeout (set in init_per_testcase).
-define(default_timeout, ?t:minutes(1)).
@@ -63,7 +66,8 @@ all() ->
groups() ->
[{checks, [], [file, compile, syntax]},
- {examples, [], [pt, man, ex, ex2, not_yet]}].
+ {examples, [], [pt, man, ex, ex2, not_yet]},
+ {tickets, [], [otp_10302]}].
init_per_suite(Config) ->
Config.
@@ -875,6 +879,111 @@ not_yet(Config) when is_list(Config) ->
ok.
+otp_10302(doc) ->
+ "OTP-10302. Unicode characters scanner/parser.";
+otp_10302(suite) -> [];
+otp_10302(Config) when is_list(Config) ->
+ Dir = ?privdir,
+ Filename = filename:join(Dir, "file.xrl"),
+ Ret = [return, {report, true}],
+
+ ok = file:write_file(Filename,<<
+ "%% coding: UTF-8\n"
+ "�"
+ >>),
+ {error,[{_,[{2,leex,cannot_parse}]}],[]} =
+ leex:file(Filename, Ret),
+
+ ok = file:write_file(Filename,<<
+ "%% coding: UTF-8\n"
+ "Definitions.\n"
+ "�"
+ >>),
+ {error,[{_,[{3,leex,cannot_parse}]}],[]} = leex:file(Filename, Ret),
+
+ ok = file:write_file(Filename,<<
+ "%% coding: UTF-8\n"
+ "Definitions.\n"
+ "A = a\n"
+ "L = [{A}-{Z}]\n"
+ "Z = z\n"
+ "Rules.\n"
+ "{L}+ : {token,{list_to_atom(TokenChars),H�pp}}.\n"
+ >>),
+ {error,[{_,[{7,leex,cannot_parse}]}],[]} = leex:file(Filename, Ret),
+
+ ok = file:write_file(Filename,<<
+ "%% coding: UTF-8\n"
+ "Definitions.\n"
+ "A = a\n"
+ "L = [{A}-{Z}]\n"
+ "Z = z\n"
+ "Rules.\n"
+ "{L}+ : {token,{list_to_atom(TokenChars)}}.\n"
+ "Erlang code.\n"
+ "-export([t/0]).\n"
+ "t() ->\n"
+ " H�pp\n"
+ >>),
+ {error,[{_,[{11,leex,cannot_parse}]}],[]} = leex:file(Filename, Ret),
+
+ Mini = <<"Definitions.\n"
+ "D = [0-9]\n"
+ "Rules.\n"
+ "{L}+ : {token,{word,TokenLine,TokenChars}}.\n"
+ "Erlang code.\n">>,
+ LeexPre = filename:join(Dir, "leexinc.hrl"),
+ ?line ok = file:write_file(LeexPre, <<"%% coding: UTF-8\n �">>),
+ PreErrors = run_test(Config, Mini, LeexPre),
+ {error,[{IncludeFile,[{2,leex,cannot_parse}]}],[]} = PreErrors,
+ "leexinc.hrl" = filename:basename(IncludeFile),
+
+ Ts = [{uni_1,
+ <<"%% coding: UTF-8\n"
+ "Definitions.\n"
+ "A = a\n"
+ "L = [{A}-{Z}]\n"
+ "Z = z\n"
+ "Rules.\n"
+ "{L}+ : {token,{list_to_atom(TokenChars),\n"
+ "begin Häpp = foo, Häpp end,"
+ " 'Häpp',\"\\x{400}B\",\"örn_Ѐ\"}}.\n"
+ "Erlang code.\n"
+ "-export([t/0]).\n"
+ "t() ->\n"
+ " %% Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ѐ\"\n"
+ " {ok, [R], 1} = string(\"tip\"),\n"
+ " {tip,foo,'Häpp',[1024,66],[246,114,110,95,1024]} = R,\n"
+ " Häpp = foo,\n"
+ " {tip, Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ѐ\"} = R,\n"
+ " ok.\n">>,
+ default,
+ ok},
+ {uni_2,
+ <<"%% coding: Latin-1\n"
+ "Definitions.\n"
+ "A = a\n"
+ "L = [{A}-{Z}]\n"
+ "Z = z\n"
+ "Rules.\n"
+ "{L}+ : {token,{list_to_atom(TokenChars),\n"
+ "begin H�pp = foo, H�pp end,"
+ " 'H�pp',\"\\x{400}B\",\"örn_Ѐ\"}}.\n"
+ "Erlang code.\n"
+ "-export([t/0]).\n"
+ "t() ->\n"
+ " %% H�pp, 'H�pp',\"\\x{400}B\",\"örn_Ѐ\"\n"
+ " {ok, [R], 1} = string(\"tip\"),\n"
+ " {tip,foo,'H�pp',[1024,66],[195,182,114,110,95,208,128]} = R,\n"
+ " H�pp = foo,\n"
+ " {tip, H�pp, 'H�pp',\"\\x{400}B\",\"örn_Ѐ\"} = R,\n"
+ " ok.\n">>,
+ default,
+ ok}],
+ run(Config, Ts),
+
+ ok.
+
unwritable(Fname) ->
{ok, Info} = file:read_file_info(Fname),
Mode = Info#file_info.mode - 8#00200,
diff --git a/lib/parsetools/test/yecc_SUITE.erl b/lib/parsetools/test/yecc_SUITE.erl
index 3d26adf1be..c306dbe833 100644
--- a/lib/parsetools/test/yecc_SUITE.erl
+++ b/lib/parsetools/test/yecc_SUITE.erl
@@ -1,7 +1,8 @@
+%% -*- coding: latin-1 -*-
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2005-2011. All Rights Reserved.
+%% Copyright Ericsson AB 2005-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -48,7 +49,7 @@
otp_5369/1, otp_6362/1, otp_7945/1, otp_8483/1, otp_8486/1,
- otp_7292/1, otp_7969/1, otp_8919/1]).
+ otp_7292/1, otp_7969/1, otp_8919/1, otp_10302/1]).
% Default timetrap timeout (set in init_per_testcase).
-define(default_timeout, ?t:minutes(1)).
@@ -75,7 +76,7 @@ groups() ->
[empty, prec, yeccpre, lalr, old_yecc, other_examples]},
{bugs, [],
[otp_5369, otp_6362, otp_7945, otp_8483, otp_8486]},
- {improvements, [], [otp_7292, otp_7969, otp_8919]}].
+ {improvements, [], [otp_7292, otp_7969, otp_8919, otp_10302]}].
init_per_suite(Config) ->
Config.
@@ -1815,6 +1816,153 @@ otp_8919(Config) when is_list(Config) ->
"syntax error before: \"hello\"" = lists:flatten(Mod:format_error(Mess)),
ok.
+otp_10302(doc) ->
+ "OTP-10302. Unicode characters scanner/parser.";
+otp_10302(suite) -> [];
+otp_10302(Config) when is_list(Config) ->
+ Dir = ?privdir,
+ Filename = filename:join(Dir, "OTP-10302.yrl"),
+ Ret = [return, {report, true}],
+ Mini1 = <<"%% coding: utf-8
+ Nonterminals H�pp.
+ nt -> t.">>,
+ ok = file:write_file(Filename, Mini1),
+ %% This could (and should) be refined:
+ {error,[{Filename,[{2,Mod1,Err1}]}],[]} =
+ yecc:file(Filename, Ret),
+ "cannot translate from UTF-8" = Mod1:format_error(Err1),
+
+ Mini2 = <<"%% coding: Utf-8
+ Nonterminals Hopp.
+ Terminals t.
+ Rootsymbol Hopp.
+
+ Hopp -> t.
+
+ Erlang code.
+
+ t() ->
+ H�pp.">>,
+ ok = file:write_file(Filename, Mini2),
+ {error,[{Filename,[{11,Mod2,Err2}]}],[]} =
+ yecc:file(Filename, Ret),
+ "cannot parse; possibly encoding mismatch" = Mod2:format_error(Err2),
+
+ Mini3 = <<"%% coding: latin-1
+ Nonterminals Hopp.
+ Terminals t.
+ Rootsymbol Hopp.
+
+ Hopp -> t.
+
+ Erlang code.
+
+ t() ->
+ H�pp.">>,
+ ok = file:write_file(Filename, Mini3),
+ YeccPre = filename:join(Dir, "yeccpre.hrl"),
+ ok = file:write_file(YeccPre, [<<"%% coding: UTF-8\n �.\n">>]),
+ Inc = [{includefile,YeccPre}],
+ {error,[{_,[{2,yecc,cannot_parse}]}],[]} =
+ yecc:file(Filename, Inc ++ Ret),
+
+ ok = file:write_file(Filename,
+ <<"%% coding: UTF-8
+ Nonterminals Hopp.
+ Terminals t.
+ Rootsymbol \"örn_Ѐ\".
+ Hopp -> t : '$1'.">>),
+ {error,[{Filename,[{4,yecc,{bad_symbol,"�rn_"++[1024]}}]}],[]} =
+ yecc:file(Filename, Ret),
+
+ ok = file:write_file(Filename,
+ <<"%% coding: UTF-8
+ Nonterminals Hopp.
+ Terminals t.
+ Rootsymbol Hopp.
+ Endsymbol \"örn_Ѐ\".
+ Hopp -> t : '$1'.">>),
+ {error,[{Filename,[{5,yecc,{bad_symbol,"�rn_"++[1024]}}]}],[]} =
+ yecc:file(Filename, Ret),
+
+ ok = file:write_file(Filename,
+ <<"%% coding: UTF-8
+ Nonterminals Hopp.
+ Terminals t.
+ Rootsymbol Hopp.
+ Expect \"örn_Ѐ\".
+ Hopp -> t : '$1'.">>),
+ {error,[{Filename,[{5,yecc,{bad_symbol,"�rn_"++[1024]}}]}],[]} =
+ yecc:file(Filename, Ret),
+
+ ok = file:write_file(Filename,
+ <<"%% coding: UTF-8
+ Nonterminals Hopp.
+ Terminals t.
+ Rootsymbol Hopp.
+ States \"örn_Ѐ\".
+ Hopp -> t : '$1'.">>),
+ {error,[{Filename,[{5,yecc,{bad_symbol,"�rn_"++[1024]}}]}],[]} =
+ yecc:file(Filename, Ret),
+
+ Ts = [{otp_10302_1,<<"
+ %% coding: UTF-8
+ Header \"%% örn_Ѐ\" \"%% \\x{400}B\".
+ Nonterminals Häpp list.
+ Terminals element.
+ Rootsymbol Häpp.
+
+ Häpp -> list : '$1'.
+
+ list -> element : '$1'.
+ list -> list element :
+ begin
+ Häpp = foo,
+ {Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ѐ\"}
+ end.
+
+ Erlang code.
+
+ -export([t/0]).
+
+ t() ->
+ L = [{element, 1}, {element,2}],
+ {ok, R} = parse(L),
+ Häpp = foo,
+ {_,_,[1024,66],[246,114,110,95,1024]} = R,
+ {Häpp,'Häpp',\"\\x{400}B\",\"örn_Ѐ\"} = R,
+ ok.
+ ">>,default,ok},
+ {otp_10302_2,<<"
+ %% coding: Latin-1
+ Nonterminals H�pp list.
+ Terminals element.
+ Rootsymbol H�pp.
+
+ H�pp -> list : '$1'.
+
+ list -> element : '$1'.
+ list -> list element :
+ begin
+ H�pp = foo,
+ {H�pp, 'H�pp',\"\\x{400}B\",\"örn_Ѐ\"}
+ end.
+
+ Erlang code.
+
+ -export([t/0]).
+
+ t() ->
+ L = [{element, 1}, {element,2}],
+ {ok, R} = parse(L),
+ H�pp = foo,
+ {_,_,[1024,66],[195,182,114,110,95,208,128]} = R,
+ {H�pp,'H�pp',\"\\x{400}B\",\"örn_Ѐ\"} = R,
+ ok.
+ ">>,default,ok}],
+ run(Config, Ts),
+ ok.
+
yeccpre_size() ->
yeccpre_size(default_yeccpre()).