aboutsummaryrefslogtreecommitdiffstats
path: root/lib/parsetools/src/leex.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/parsetools/src/leex.erl')
-rw-r--r--lib/parsetools/src/leex.erl130
1 files changed, 72 insertions, 58 deletions
diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl
index 602e47404d..8a4a5e8d86 100644
--- a/lib/parsetools/src/leex.erl
+++ b/lib/parsetools/src/leex.erl
@@ -37,7 +37,6 @@
-import(lists, [member/2,reverse/1,sort/1,delete/2,
keysort/2,keydelete/3,
map/2,foldl/3,foreach/2,flatmap/2]).
--import(string, [substr/2,substr/3,span/2]).
-import(ordsets, [is_element/2,add_element/2,union/2]).
-import(orddict, [store/3]).
@@ -251,10 +250,10 @@ is_filename(T) ->
shorten_filename(Name0) ->
{ok,Cwd} = file:get_cwd(),
- case lists:prefix(Cwd, Name0) of
- false -> Name0;
- true ->
- case lists:nthtail(length(Cwd), Name0) of
+ case string:prefix(Name0, Cwd) of
+ nomatch -> Name0;
+ Rest ->
+ case unicode:characters_to_list(Rest) of
"/"++N -> N;
N -> N
end
@@ -490,12 +489,9 @@ parse_rules_end(_, NextLine, REAs, As, St) ->
%% action has been read. Keep track of line number.
collect_rule(Ifile, Chars, L0) ->
- %% Erlang strings are 1 based, but re 0 :-(
- {match,[{St0,Len}|_]} = re:run(Chars, "[^ \t\r\n]+", [unicode]),
- St = St0 + 1,
- %%io:fwrite("RE = ~p~n", [substr(Chars, St, Len)]),
- case collect_action(Ifile, substr(Chars, St+Len), L0, []) of
- {ok,[{':',_}|Toks],L1} -> {ok,substr(Chars, St, Len),Toks,L1};
+ {RegExp,Rest} = string:take(Chars, " \t\r\n", true),
+ case collect_action(Ifile, Rest, L0, []) of
+ {ok,[{':',_}|Toks],L1} -> {ok,RegExp,Toks,L1};
{ok,_,_} -> {error,{L0,leex,bad_rule}};
{eof,L1} -> {error,{L1,leex,bad_rule}};
{error,E,_} -> {error,E}
@@ -549,7 +545,7 @@ var_used(Name, Toks) ->
parse_rule_regexp(RE0, [{M,Exp}|Ms], St) ->
Split= re:split(RE0, "\\{" ++ M ++ "\\}", [{return,list},unicode]),
- RE1 = string:join(Split, Exp),
+ RE1 = lists:append(lists:join(Exp, Split)),
parse_rule_regexp(RE1, Ms, St);
parse_rule_regexp(RE, [], St) ->
%%io:fwrite("RE = ~p~n", [RE]),
@@ -589,9 +585,9 @@ nextline(Ifile, L, St) ->
eof -> {eof,L};
{error, _} -> add_error({L+1, leex, cannot_parse}, St);
Chars ->
- case substr(Chars, span(Chars, " \t\n")+1) of
- [$%|_Rest] -> nextline(Ifile, L+1, St);
- [] -> nextline(Ifile, L+1, St);
+ case string:take(Chars, " \t\n") of
+ {_, [$%|_Rest]} -> nextline(Ifile, L+1, St);
+ {_, []} -> nextline(Ifile, L+1, St);
_Other -> {ok,Chars,L+1}
end
end.
@@ -824,7 +820,7 @@ re_char_class(Cs, Cc, _) -> {reverse(Cc),Cs}. % Preserve order
%% posix_cc("space" ++ Cs) -> {space,Cs};
%% posix_cc("upper" ++ Cs) -> {upper,Cs};
%% posix_cc("xdigit" ++ Cs) -> {xdigit,Cs};
-%% posix_cc(Cs) -> parse_error({posix_cc,substr(Cs, 1, 5)}).
+%% posix_cc(Cs) -> parse_error({posix_cc,string:slice(Cs, 0, 5)}).
escape_char($n) -> $\n; % \n = LF
escape_char($r) -> $\r; % \r = CR
@@ -863,7 +859,7 @@ escape_char(C) -> C. % Pass it straight through
%% re_number(Cs, Acc) -> {Acc,Cs}.
string_between(Cs1, Cs2) ->
- substr(Cs1, 1, length(Cs1)-length(Cs2)).
+ string:slice(Cs1, 0, string:length(Cs1)-string:length(Cs2)).
%% We use standard methods, Thompson's construction and subset
%% construction, to create first an NFA and then a DFA from the
@@ -1264,7 +1260,7 @@ pack_dfa([], _, Rs, PDFA) -> {PDFA,Rs}.
%% {Action, AcceptLength, CurrTokLen, RestChars, Line, State}.
%% The return CurrTokLen is always the current number of characters
-%% scanned in the current token. The returns have the follwoing
+%% scanned in the current token. The returns have the following
%% meanings:
%% {Action, AcceptLength, RestChars, Line} -
%% The scanner has reached an accepting end-state, for example after
@@ -1281,7 +1277,7 @@ pack_dfa([], _, Rs, PDFA) -> {PDFA,Rs}.
%%
%% {reject, AcceptLength, CurrTokLen, RestChars, Line, State} -
%% {Action, AcceptLength, CurrTokLen, RestChars, Line, State} -
-%% The scanner has reached a non-accepting transistion state. If
+%% The scanner has reached a non-accepting transition state. If
%% RestChars == [] we need to get more characters to continue.
%% Otherwise if 'reject' then no accepting state has been reached it
%% is an error. If we have an Action and AcceptLength then these are
@@ -1343,7 +1339,7 @@ out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L) ->
eof -> output_file_directive(Ofile, St#leex.ifile, L);
{error, _} -> add_error(St#leex.ifile, {L, leex, cannot_parse}, St);
Line ->
- case substr(Line, 1, 5) of
+ case string:slice(Line, 0, 5) of
"##mod" -> out_module(Ofile, St);
"##cod" -> out_erlang_code(Ofile, St, Code, L);
"##dfa" -> out_dfa(Ofile, St, DFA, Code, DF, L);
@@ -1523,7 +1519,7 @@ prep_out_actions(As) ->
Name = list_to_atom(lists:concat([yyaction_,A])),
[Chars,Len,Line,_,_] = Vars,
Args = [V || V <- [Chars,Len,Line], V =/= "_"],
- ArgsChars = string:join(Args, ", "),
+ ArgsChars = lists:join(", ", Args),
{A,Code,Vars,Name,Args,ArgsChars}
end, As).
@@ -1548,22 +1544,23 @@ out_action_code(File, XrlFile, {_A,Code,_Vars,Name,Args,ArgsChars}) ->
L = erl_scan:line(hd(Code)),
output_file_directive(File, XrlFile, L-2),
io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]),
- io:fwrite(File, " ~s\n", [pp_tokens(Code, L)]).
+ io:fwrite(File, " ~ts\n", [pp_tokens(Code, L, File)]).
-%% pp_tokens(Tokens, Line) -> [char()].
+%% pp_tokens(Tokens, Line, File) -> [char()].
%% Prints the tokens keeping the line breaks of the original code.
-pp_tokens(Tokens, Line0) -> pp_tokens(Tokens, Line0, none).
+pp_tokens(Tokens, Line0, File) -> pp_tokens(Tokens, Line0, File, none).
-pp_tokens([], _Line0, _) -> [];
-pp_tokens([T | Ts], Line0, Prev) ->
+pp_tokens([], _Line0, _, _) -> [];
+pp_tokens([T | Ts], Line0, File, Prev) ->
Line = erl_scan:line(T),
- [pp_sep(Line, Line0, Prev, T), pp_symbol(T) | pp_tokens(Ts, Line, T)].
+ [pp_sep(Line, Line0, Prev, T),
+ pp_symbol(T, File) | pp_tokens(Ts, Line, File, T)].
-pp_symbol({var,_,Var}) -> atom_to_list(Var);
-pp_symbol({_,_,Symbol}) -> io_lib:fwrite("~p", [Symbol]);
-pp_symbol({dot, _}) -> ".";
-pp_symbol({Symbol, _}) -> atom_to_list(Symbol).
+pp_symbol({var,_,Var}, _) -> atom_to_list(Var);
+pp_symbol({_,_,Symbol}, File) -> format_symbol(Symbol, File);
+pp_symbol({dot, _}, _) -> ".";
+pp_symbol({Symbol, _}, _) -> atom_to_list(Symbol).
pp_sep(Line, Line0, Prev, T) when Line > Line0 ->
["\n " | pp_sep(Line - 1, Line0, Prev, T)];
@@ -1622,17 +1619,17 @@ out_dfa_edges(File, DFA) ->
end, orddict:new(), Pt),
foreach(fun (T) ->
Crs = orddict:fetch(T, Tdict),
- Edgelab = dfa_edgelabel(Crs),
+ Edgelab = dfa_edgelabel(Crs, File),
io:fwrite(File, " ~b -> ~b [label=\"~ts\"];~n",
[S,T,Edgelab])
end, sort(orddict:fetch_keys(Tdict)))
end, DFA).
-dfa_edgelabel([C]) when is_integer(C) -> quote(C);
-dfa_edgelabel(Cranges) ->
+dfa_edgelabel([C], File) when is_integer(C) -> quote(C, File);
+dfa_edgelabel(Cranges, File) ->
%% io:fwrite("el: ~p\n", [Cranges]),
- "[" ++ map(fun ({A,B}) -> [quote(A), "-", quote(B)];
- (C) -> [quote(C)]
+ "[" ++ map(fun ({A,B}) -> [quote(A, File), "-", quote(B, File)];
+ (C) -> [quote(C, File)]
end, Cranges) ++ "]".
set_encoding(#leex{encoding = none}, File) ->
@@ -1651,33 +1648,50 @@ output_file_directive(File, Filename, Line) ->
format_filename(Filename0, File) ->
Filename = filename:flatten(Filename0),
+ case enc(File) of
+ unicode -> io_lib:write_string(Filename);
+ latin1 -> io_lib:write_string_as_latin1(Filename)
+ end.
+
+format_symbol(Symbol, File) ->
+ Format = case enc(File) of
+ latin1 -> "~p";
+ unicode -> "~tp"
+ end,
+ io_lib:fwrite(Format, [Symbol]).
+
+enc(File) ->
case lists:keyfind(encoding, 1, io:getopts(File)) of
- {encoding, unicode} -> io_lib:write_string(Filename);
- _ -> io_lib:write_string_as_latin1(Filename)
+ false -> latin1; % should never happen
+ {encoding, Enc} -> Enc
end.
-quote($^) -> "\\^";
-quote($.) -> "\\.";
-quote($$) -> "\\$";
-quote($-) -> "\\-";
-quote($[) -> "\\[";
-quote($]) -> "\\]";
-quote($\s) -> "\\\\s";
-quote($\") -> "\\\"";
-quote($\b) -> "\\\\b";
-quote($\f) -> "\\\\f";
-quote($\n) -> "\\\\n";
-quote($\r) -> "\\\\r";
-quote($\t) -> "\\\\t";
-quote($\e) -> "\\\\e";
-quote($\v) -> "\\\\v";
-quote($\d) -> "\\\\d";
-quote($\\) -> "\\\\";
-quote(C) when is_integer(C) ->
+quote($^, _File) -> "\\^";
+quote($., _File) -> "\\.";
+quote($$, _File) -> "\\$";
+quote($-, _File) -> "\\-";
+quote($[, _File) -> "\\[";
+quote($], _File) -> "\\]";
+quote($\s, _File) -> "\\\\s";
+quote($\", _File) -> "\\\"";
+quote($\b, _File) -> "\\\\b";
+quote($\f, _File) -> "\\\\f";
+quote($\n, _File) -> "\\\\n";
+quote($\r, _File) -> "\\\\r";
+quote($\t, _File) -> "\\\\t";
+quote($\e, _File) -> "\\\\e";
+quote($\v, _File) -> "\\\\v";
+quote($\d, _File) -> "\\\\d";
+quote($\\, _File) -> "\\\\";
+quote(C, File) when is_integer(C) ->
%% Must remove the $ and get the \'s right.
- case io_lib:write_char(C) of
+ S = case enc(File) of
+ unicode -> io_lib:write_char(C);
+ latin1 -> io_lib:write_char_as_latin1(C)
+ end,
+ case S of
[$$,$\\|Cs] -> "\\\\" ++ Cs;
[$$|Cs] -> Cs
end;
-quote(maxchar) ->
+quote(maxchar, _File) ->
"MAXCHAR".