diff options
author | Hans Bolinder <[email protected]> | 2012-10-04 15:58:26 +0200 |
---|---|---|
committer | Hans Bolinder <[email protected]> | 2013-01-02 10:15:17 +0100 |
commit | 300c5466a7c9cfe3ed22bba2a88ba21058406402 (patch) | |
tree | b8c30800b17d5ae98255de2fd2818d8b5d4d6eba /lib/parsetools/src/yecc.erl | |
parent | 7a884a31cfcaaf23f7920ba1a006aa2855529030 (diff) | |
download | otp-300c5466a7c9cfe3ed22bba2a88ba21058406402.tar.gz otp-300c5466a7c9cfe3ed22bba2a88ba21058406402.tar.bz2 otp-300c5466a7c9cfe3ed22bba2a88ba21058406402.zip |
[stdlib, kernel] Introduce Unicode support for Erlang source files
Expect modifications, additions and corrections.
There is a kludge in file_io_server and
erl_scan:continuation_location() that's not so pleasing.
Diffstat (limited to 'lib/parsetools/src/yecc.erl')
-rw-r--r-- | lib/parsetools/src/yecc.erl | 120 |
1 files changed, 76 insertions, 44 deletions
diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl index b0792a6ed8..dbb7d025ae 100644 --- a/lib/parsetools/src/yecc.erl +++ b/lib/parsetools/src/yecc.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2011. All Rights Reserved. +%% Copyright Ericsson AB 1996-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -42,6 +42,7 @@ includefile, includefile_version, module, + encoding = none, options = [], verbose = false, file_attrs = true, @@ -224,7 +225,11 @@ format_error({unused_nonterminal, Nonterminal}) -> [format_symbol(Nonterminal)]); format_error({unused_terminal, Terminal}) -> io_lib:fwrite("terminal symbol ~s not used", - [format_symbol(Terminal)]). + [format_symbol(Terminal)]); +format_error({bad_symbol, String}) -> + io_lib:fwrite("bad symbol ~ts", [String]); +format_error(cannot_parse) -> + io_lib:fwrite("cannot parse; possibly encoding mismatch", []). file(File) -> file(File, [report_errors, report_warnings]). @@ -257,7 +262,7 @@ yecc(Infile, Outfile, Verbose) -> yecc(Infile, Outfile, Verbose, []). yecc(Infilex, Outfilex, Verbose, Includefilex) -> - statistics(runtime), + _ = statistics(runtime), case file(Infilex, [{parserfile, Outfilex}, {verbose, Verbose}, {report, true}, @@ -407,7 +412,9 @@ infile(Parent, Infilex, Options) -> St = case file:open(St0#yecc.infile, [read, read_ahead]) of {ok, Inport} -> try - outfile(St0#yecc{inport = Inport}) + Encoding = epp:set_encoding(Inport), + St1 = St0#yecc{inport = Inport, encoding = Encoding}, + outfile(St1) after ok = file:close(Inport) end; @@ -428,6 +435,8 @@ outfile(St0) -> case file:open(St0#yecc.outfile, [write, delayed_write]) of {ok, Outport} -> try + %% Set the same encoding as infile: + set_encoding(St0, Outport), generate(St0#yecc{outport = Outport, line = 1}) catch throw: St1 -> @@ -466,13 +475,14 @@ timeit(Name, Fun, St0) -> -define(PASS(P), {P, fun P/1}). generate(St0) -> + St1 = output_encoding_comment(St0), Passes = [?PASS(parse_grammar), ?PASS(check_grammar), ?PASS(states_and_goto_table), ?PASS(parse_actions), ?PASS(action_conflicts), ?PASS(write_file)], - F = case member(time, St0#yecc.options) of + F = case member(time, St1#yecc.options) of true -> io:fwrite(<<"Generating parser from grammar in ~s\n">>, - [format_filename(St0#yecc.infile)]), + [format_filename(St1#yecc.infile)]), fun timeit/3; false -> fun(_Name, Fn, St) -> Fn(St) end @@ -484,13 +494,13 @@ generate(St0) -> true -> throw(St2) end end, - foldl(Fun, St0, Passes). + foldl(Fun, St1, Passes). parse_grammar(St) -> parse_grammar(St#yecc.inport, 1, St). parse_grammar(Inport, Line, St) -> - {NextLine, Grammar} = read_grammar(Inport, Line), + {NextLine, Grammar} = read_grammar(Inport, St, Line), parse_grammar(Grammar, Inport, NextLine, St). parse_grammar(eof, _Inport, _NextLine, St) -> @@ -523,6 +533,8 @@ parse_grammar({rule, Rule, Tokens}, St0) -> St#yecc{rules_list = [RuleDef | St#yecc.rules_list]}; parse_grammar({prec, Prec}, St) -> St#yecc{prec = Prec ++ St#yecc.prec}; +parse_grammar({#symbol{}, [{string,Line,String}]}, St) -> + add_error(Line, {bad_symbol, String}, St); parse_grammar({#symbol{line = Line, name = Name}, Symbols}, St) -> CF = fun(I) -> case element(I, St) of @@ -543,12 +555,17 @@ parse_grammar({#symbol{line = Line, name = Name}, Symbols}, St) -> _ -> add_warning(Line, bad_declaration, St) end. -read_grammar(Inport, Line) -> +read_grammar(Inport, St, Line) -> case yeccscan:scan(Inport, '', Line) of {eof, NextLine} -> {NextLine, eof}; {error, {ErrorLine, Mod, What}, NextLine} -> {NextLine, {error, ErrorLine, {error, Mod, What}}}; + {error, terminated} -> + throw(St); + {error, _} -> + File = St#yecc.infile, + throw(add_error(File, none, cannot_parse, St)); {ok, Input, NextLine} -> {NextLine, case yeccparser:parse(Input) of {error, {ErrorLine, Mod, Message}} -> @@ -738,9 +755,9 @@ states_and_goto_table(St0) -> create_precedence_table(St). parse_actions(St) -> - erase(), % the pd is used when decoding lookahead sets + _ = erase(), % the pd is used when decoding lookahead sets ParseActions = compute_parse_actions(St#yecc.n_states, St, []), - erase(), + _ = erase(), St#yecc{parse_actions = ParseActions, state_tab = []}. action_conflicts(St0) -> @@ -841,10 +858,10 @@ report_errors(St) -> case member(report_errors, St#yecc.options) of true -> foreach(fun({File,{none,Mod,E}}) -> - io:fwrite(<<"~s: ~s\n">>, + io:fwrite(<<"~s: ~ts\n">>, [File,Mod:format_error(E)]); ({File,{Line,Mod,E}}) -> - io:fwrite(<<"~s:~w: ~s\n">>, + io:fwrite(<<"~s:~w: ~ts\n">>, [File,Line,Mod:format_error(E)]) end, sort(St#yecc.errors)); false -> @@ -861,11 +878,11 @@ report_warnings(St) -> case member(report_warnings, St#yecc.options) orelse ReportWerror of true -> foreach(fun({File,{none,Mod,W}}) -> - io:fwrite(<<"~s: ~s~s\n">>, + io:fwrite(<<"~s: ~s~ts\n">>, [File,Prefix, Mod:format_error(W)]); ({File,{Line,Mod,W}}) -> - io:fwrite(<<"~s:~w: ~s~s\n">>, + io:fwrite(<<"~s:~w: ~s~ts\n">>, [File,Line,Prefix, Mod:format_error(W)]) end, sort(St#yecc.warnings)); @@ -1024,7 +1041,7 @@ compute_states(St0) -> rp_info = RulePointerInfo, goto = GotoTab}, - erase(), + _ = erase(), EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab), {StateId, State0} = compute_state([{EndsymCode, 1}], Tables), @@ -1923,9 +1940,10 @@ output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] -> {St20, 0, no_erlang_code}; Next_line -> St_10 = output_file_directive(St20, Infile, Next_line-1), - Nmbr_of_lines = include1([], Inport, Outport), - {St_10, Nmbr_of_lines, - {last_erlang_code_line, Next_line+Nmbr_of_lines}} + Last_line = include1([], Inport, Outport, Infile, + Next_line, St_10), + Nmbr_of_lines = Last_line - Next_line, + {St_10, Nmbr_of_lines, {last_erlang_code_line, Last_line}} end, St30 = nl(St25), IncludeFile = @@ -1946,13 +1964,13 @@ output_prelude(Outport, Inport, St0) -> {St30, N_lines_1, no_erlang_code}; Next_line -> St = output_file_directive(St30, Infile, Next_line-1), - Nmbr_of_lines = include1([], Inport, Outport), - {St, Nmbr_of_lines + N_lines_1, - {last_erlang_code_line, Next_line+Nmbr_of_lines}} + Last_line = include1([], Inport, Outport, Infile, Next_line, St), + Nmbr_of_lines = Last_line - Next_line, + {St, Nmbr_of_lines + N_lines_1, {last_erlang_code_line, Last_line}} end. output_header(St0) -> - lists:foldl(fun(Str, St) -> fwrite(St, <<"~s\n">>, [Str]) + lists:foldl(fun(Str, St) -> fwrite(St, <<"~ts\n">>, [Str]) end, St0, St0#yecc.header). output_goto(St, [{_Nonterminal, []} | Go], StateInfo) -> @@ -2250,8 +2268,8 @@ output_inlined(St0, FunctionName, Reduce, Infile) -> [append(["[", tl(A), " | __Stack]"])]) end, St = St40#yecc{line = St40#yecc.line + NLines}, - fwrite(St, <<" [begin\n ~s\n end | ~s].\n\n">>, - [pp_tokens(Tokens, Line0), Stack]). + fwrite(St, <<" [begin\n ~ts\n end | ~s].\n\n">>, + [pp_tokens(Tokens, Line0, St#yecc.encoding), Stack]). inlined_function_name(State, "Cat") -> inlined_function_name(State, ""); @@ -2421,24 +2439,24 @@ include(St, File, Outport) -> {error, Reason} -> throw(add_error(File, none, {file_error, Reason}, St)); {ok, Inport} -> + _ = epp:set_encoding(Inport), Line = io:get_line(Inport, ''), - N_lines = include1(Line, Inport, Outport), - file:close(Inport), - N_lines + try include1(Line, Inport, Outport, File, 1, St) - 1 + after ok = file:close(Inport) + end end. -include1(Line, Inport, Outport) -> - include1(Line, Inport, Outport, 0). - -include1(eof, _, _, Nmbr_of_lines) -> - Nmbr_of_lines; -include1(Line, Inport, Outport, Nmbr_of_lines) -> +include1(eof, _, _, _File, L, _St) -> + L; +include1({error, _}=_Error, _Inport, _Outport, File, L, St) -> + throw(add_error(File, L, cannot_parse, St)); +include1(Line, Inport, Outport, File, L, St) -> Incr = case member($\n, Line) of true -> 1; false -> 0 end, io:put_chars(Outport, Line), - include1(io:get_line(Inport, ''), Inport, Outport, Nmbr_of_lines + Incr). + include1(io:get_line(Inport, ''), Inport, Outport, File, L + Incr, St). includefile_version([]) -> {1,4}; @@ -2465,18 +2483,22 @@ parse_file(Epp) -> end. %% Keeps the line breaks of the original code. -pp_tokens(Tokens, Line0) -> - concat(pp_tokens1(Tokens, Line0, [])). +pp_tokens(Tokens, Line0, Enc) -> + concat(pp_tokens1(Tokens, Line0, Enc, [])). -pp_tokens1([], _Line0, _T0) -> +pp_tokens1([], _Line0, _Enc, _T0) -> []; -pp_tokens1([T | Ts], Line0, T0) -> +pp_tokens1([T | Ts], Line0, Enc, T0) -> Line = element(2, T), - [pp_sep(Line, Line0, T0), pp_symbol(T) | pp_tokens1(Ts, Line, T)]. + [pp_sep(Line, Line0, T0), pp_symbol(T, Enc)|pp_tokens1(Ts, Line, Enc, T)]. -pp_symbol({var,_,Var}) -> Var; -pp_symbol({_,_,Symbol}) -> io_lib:fwrite(<<"~p">>, [Symbol]); -pp_symbol({Symbol, _}) -> Symbol. +pp_symbol({var,_,Var}, _Enc) -> Var; +pp_symbol({string,_,String}, latin1) -> + io_lib:write_unicode_string_as_latin1(String); +pp_symbol({string,_,String}, _Enc) -> io_lib:write_unicode_string(String); +pp_symbol({_,_,Symbol}, latin1) -> io_lib:fwrite(<<"~p">>, [Symbol]); +pp_symbol({_,_,Symbol}, _Enc) -> io_lib:fwrite(<<"~tp">>, [Symbol]); +pp_symbol({Symbol, _}, _Enc) -> Symbol. pp_sep(Line, Line0, T0) when Line > Line0 -> ["\n " | pp_sep(Line - 1, Line0, T0)]; @@ -2485,6 +2507,16 @@ pp_sep(_Line, _Line0, {'.',_}) -> pp_sep(_Line, _Line0, _T0) -> " ". +set_encoding(#yecc{encoding = none}, Port) -> + ok = io:setopts(Port, [{encoding, epp:default_encoding()}]); +set_encoding(#yecc{encoding = E}, Port) -> + ok = io:setopts(Port, [{encoding, E}]). + +output_encoding_comment(#yecc{encoding = none}=St) -> + St; +output_encoding_comment(#yecc{encoding = Encoding}=St) -> + fwrite(St, <<"%% ~s\n">>, [epp:encoding_to_string(Encoding)]). + output_file_directive(St, Filename, Line) when St#yecc.file_attrs -> fwrite(St, <<"-file(~s, ~w).\n">>, [format_filename(Filename), Line]); @@ -2529,7 +2561,7 @@ format_assoc(nonassoc) -> format_symbol(Symbol) -> String = concat([Symbol]), - case erl_scan:string(String) of + case erl_scan:string(String, 1, [unicode]) of {ok, [{atom, _, _}], _} -> io_lib:fwrite(<<"~w">>, [Symbol]); {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' -> |