aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
authorDan Gudmundsson <dgud@erlang.org>2017-08-17 12:13:40 +0200
committerDan Gudmundsson <dgud@erlang.org>2017-08-17 12:13:40 +0200
commit0cf60db18f71f727a28ba726e0338ef41ec17542 (patch)
treeac22a7a16254f1fd80832449e33bb1e40da139b1 /lib/stdlib/src
parent33b3697ae98147e0470d8d08bca6c51042dcb3d1 (diff)
parent5f56b49c752a16ee28244981ca9b197ffd5fa691 (diff)
downloadotp-0cf60db18f71f727a28ba726e0338ef41ec17542.tar.gz
otp-0cf60db18f71f727a28ba726e0338ef41ec17542.tar.bz2
otp-0cf60db18f71f727a28ba726e0338ef41ec17542.zip
Merge branch 'dgud/stdlib/edit-unicode' into maint
* dgud/stdlib/edit-unicode: stdlib: Improve edlin handling of unicode chars OTP-14542
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/edlin.erl132
1 files changed, 80 insertions, 52 deletions
diff --git a/lib/stdlib/src/edlin.erl b/lib/stdlib/src/edlin.erl
index 71e8471c45..64d5a71f3c 100644
--- a/lib/stdlib/src/edlin.erl
+++ b/lib/stdlib/src/edlin.erl
@@ -83,7 +83,7 @@ edit_line(Cs, {line,P,L,M}) ->
edit_line1(Cs, {line,P,L,{blink,N}}) ->
edit(Cs, P, L, none, [{move_rel,N}]);
edit_line1(Cs, {line,P,{[],[]},none}) ->
- {more_chars, {line,P,{lists:reverse(Cs),[]},none},[{put_chars, unicode, Cs}]};
+ {more_chars, {line,P,{string:reverse(Cs),[]},none},[{put_chars, unicode, Cs}]};
edit_line1(Cs, {line,P,L,M}) ->
edit(Cs, P, L, M, []).
@@ -93,14 +93,14 @@ edit([C|Cs], P, {Bef,Aft}, Prefix, Rs0) ->
case key_map(C, Prefix) of
meta ->
edit(Cs, P, {Bef,Aft}, meta, Rs0);
- meta_o ->
- edit(Cs, P, {Bef,Aft}, meta_o, Rs0);
- meta_csi ->
- edit(Cs, P, {Bef,Aft}, meta_csi, Rs0);
- meta_meta ->
- edit(Cs, P, {Bef,Aft}, meta_meta, Rs0);
- {csi, _} = Csi ->
- edit(Cs, P, {Bef,Aft}, Csi, Rs0);
+ meta_o ->
+ edit(Cs, P, {Bef,Aft}, meta_o, Rs0);
+ meta_csi ->
+ edit(Cs, P, {Bef,Aft}, meta_csi, Rs0);
+ meta_meta ->
+ edit(Cs, P, {Bef,Aft}, meta_meta, Rs0);
+ {csi, _} = Csi ->
+ edit(Cs, P, {Bef,Aft}, Csi, Rs0);
meta_left_sq_bracket ->
edit(Cs, P, {Bef,Aft}, meta_left_sq_bracket, Rs0);
search_meta ->
@@ -110,8 +110,8 @@ edit([C|Cs], P, {Bef,Aft}, Prefix, Rs0) ->
ctlx ->
edit(Cs, P, {Bef,Aft}, ctlx, Rs0);
new_line ->
- {done, reverse(Bef, Aft ++ "\n"), Cs,
- reverse(Rs0, [{move_rel,length(Aft)},{put_chars,unicode,"\n"}])};
+ {done, get_line(Bef, Aft ++ "\n"), Cs,
+ reverse(Rs0, [{move_rel,cp_len(Aft)},{put_chars,unicode,"\n"}])};
redraw_line ->
Rs1 = erase(P, Bef, Aft, Rs0),
Rs = redraw(P, Bef, Aft, Rs1),
@@ -157,7 +157,7 @@ edit([], P, L, {blink,N}, Rs) ->
edit([], P, L, Prefix, Rs) ->
{more_chars,{line,P,L,Prefix},reverse(Rs)};
edit(eof, _, {Bef,Aft}, _, Rs) ->
- {done,reverse(Bef, Aft),[],reverse(Rs, [{move_rel,length(Aft)}])}.
+ {done,get_line(Bef, Aft),[],reverse(Rs, [{move_rel,cp_len(Aft)}])}.
%% %% Assumes that arg is a string
%% %% Horizontal whitespace only.
@@ -279,11 +279,21 @@ key_map(C, search) -> {insert_search,C};
key_map(C, _) -> {undefined,C}.
%% do_op(Action, Before, After, Requests)
-
-do_op({insert,C}, Bef, [], Rs) ->
- {{[C|Bef],[]},[{put_chars, unicode,[C]}|Rs]};
-do_op({insert,C}, Bef, Aft, Rs) ->
- {{[C|Bef],Aft},[{insert_chars, unicode, [C]}|Rs]};
+%% Before and After are of lists of type string:grapheme_cluster()
+do_op({insert,C}, [], [], Rs) ->
+ {{[C],[]},[{put_chars, unicode,[C]}|Rs]};
+do_op({insert,C}, [Bef|Bef0], [], Rs) ->
+ case string:to_graphemes([Bef,C]) of
+ [GC] -> {{[GC|Bef0],[]},[{put_chars, unicode,[C]}|Rs]};
+ _ -> {{[C,Bef|Bef0],[]},[{put_chars, unicode,[C]}|Rs]}
+ end;
+do_op({insert,C}, [], Aft, Rs) ->
+ {{[C],Aft},[{insert_chars, unicode,[C]}|Rs]};
+do_op({insert,C}, [Bef|Bef0], Aft, Rs) ->
+ case string:to_graphemes([Bef,C]) of
+ [GC] -> {{[GC|Bef0],Aft},[{insert_chars, unicode,[C]}|Rs]};
+ _ -> {{[C,Bef|Bef0],Aft},[{insert_chars, unicode,[C]}|Rs]}
+ end;
%% Search mode prompt always looks like (search)`$TERMS': $RESULT.
%% the {insert_search, _} handlings allow to share this implementation
%% correctly with group.erl. This module provides $TERMS, and group.erl
@@ -299,13 +309,13 @@ do_op({insert_search, C}, Bef, [], Rs) ->
[{insert_chars, unicode, [C]++Aft}, {delete_chars,-3} | Rs],
search};
do_op({insert_search, C}, Bef, Aft, Rs) ->
- Offset= length(Aft),
+ Offset= cp_len(Aft),
NAft = "': ",
{{[C|Bef],NAft},
[{insert_chars, unicode, [C]++NAft}, {delete_chars,-Offset} | Rs],
search};
do_op({search, backward_delete_char}, [_|Bef], Aft, Rs) ->
- Offset= length(Aft)+1,
+ Offset= cp_len(Aft)+1,
NAft = "': ",
{{Bef,NAft},
[{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs],
@@ -314,13 +324,13 @@ do_op({search, backward_delete_char}, [], _Aft, Rs) ->
Aft="': ",
{{[],Aft}, Rs, search};
do_op({search, skip_up}, Bef, Aft, Rs) ->
- Offset= length(Aft),
+ Offset= cp_len(Aft),
NAft = "': ",
{{[$\^R|Bef],NAft}, % we insert ^R as a flag to whoever called us
[{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs],
search};
do_op({search, skip_down}, Bef, Aft, Rs) ->
- Offset= length(Aft),
+ Offset= cp_len(Aft),
NAft = "': ",
{{[$\^S|Bef],NAft}, % we insert ^S as a flag to whoever called us
[{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs],
@@ -328,12 +338,12 @@ do_op({search, skip_down}, Bef, Aft, Rs) ->
do_op({search, search_found}, _Bef, Aft, Rs) ->
"': "++NAft = Aft,
{{[],NAft},
- [{put_chars, unicode, "\n"}, {move_rel,-length(Aft)} | Rs],
+ [{put_chars, unicode, "\n"}, {move_rel,-cp_len(Aft)} | Rs],
search_found};
do_op({search, search_quit}, _Bef, Aft, Rs) ->
"': "++NAft = Aft,
{{[],NAft},
- [{put_chars, unicode, "\n"}, {move_rel,-length(Aft)} | Rs],
+ [{put_chars, unicode, "\n"}, {move_rel,-cp_len(Aft)} | Rs],
search_quit};
%% do blink after $$
do_op({blink,C,M}, Bef=[$$,$$|_], Aft, Rs) ->
@@ -361,14 +371,16 @@ do_op(auto_blink, Bef, Aft, Rs) ->
N -> {blink,N+1,{Bef,Aft},
[{move_rel,-(N+1)}|Rs]}
end;
-do_op(forward_delete_char, Bef, [_|Aft], Rs) ->
- {{Bef,Aft},[{delete_chars,1}|Rs]};
-do_op(backward_delete_char, [_|Bef], Aft, Rs) ->
- {{Bef,Aft},[{delete_chars,-1}|Rs]};
+do_op(forward_delete_char, Bef, [GC|Aft], Rs) ->
+ {{Bef,Aft},[{delete_chars,gc_len(GC)}|Rs]};
+do_op(backward_delete_char, [GC|Bef], Aft, Rs) ->
+ {{Bef,Aft},[{delete_chars,-gc_len(GC)}|Rs]};
do_op(transpose_char, [C1,C2|Bef], [], Rs) ->
- {{[C2,C1|Bef],[]},[{put_chars, unicode,[C1,C2]},{move_rel,-2}|Rs]};
+ Len = gc_len(C1)+gc_len(C2),
+ {{[C2,C1|Bef],[]},[{put_chars, unicode,[C1,C2]},{move_rel,-Len}|Rs]};
do_op(transpose_char, [C2|Bef], [C1|Aft], Rs) ->
- {{[C2,C1|Bef],Aft},[{put_chars, unicode,[C1,C2]},{move_rel,-1}|Rs]};
+ Len = gc_len(C2),
+ {{[C2,C1|Bef],Aft},[{put_chars, unicode,[C1,C2]},{move_rel,-Len}|Rs]};
do_op(kill_word, Bef, Aft0, Rs) ->
{Aft1,Kill0,N0} = over_non_word(Aft0, [], 0),
{Aft,Kill,N} = over_word(Aft1, Kill0, N0),
@@ -381,7 +393,7 @@ do_op(backward_kill_word, Bef0, Aft, Rs) ->
{{Bef,Aft},[{delete_chars,-N}|Rs]};
do_op(kill_line, Bef, Aft, Rs) ->
put(kill_buffer, Aft),
- {{Bef,[]},[{delete_chars,length(Aft)}|Rs]};
+ {{Bef,[]},[{delete_chars,cp_len(Aft)}|Rs]};
do_op(yank, Bef, [], Rs) ->
Kill = get(kill_buffer),
{{reverse(Kill, Bef),[]},[{put_chars, unicode,Kill}|Rs]};
@@ -389,9 +401,9 @@ do_op(yank, Bef, Aft, Rs) ->
Kill = get(kill_buffer),
{{reverse(Kill, Bef),Aft},[{insert_chars, unicode,Kill}|Rs]};
do_op(forward_char, Bef, [C|Aft], Rs) ->
- {{[C|Bef],Aft},[{move_rel,1}|Rs]};
+ {{[C|Bef],Aft},[{move_rel,gc_len(C)}|Rs]};
do_op(backward_char, [C|Bef], Aft, Rs) ->
- {{Bef,[C|Aft]},[{move_rel,-1}|Rs]};
+ {{Bef,[C|Aft]},[{move_rel,-gc_len(C)}|Rs]};
do_op(forward_word, Bef0, Aft0, Rs) ->
{Aft1,Bef1,N0} = over_non_word(Aft0, Bef0, 0),
{Aft,Bef,N} = over_word(Aft1, Bef1, N0),
@@ -401,16 +413,16 @@ do_op(backward_word, Bef0, Aft0, Rs) ->
{Bef,Aft,N} = over_word(Bef1, Aft1, N0),
{{Bef,Aft},[{move_rel,-N}|Rs]};
do_op(beginning_of_line, [C|Bef], Aft, Rs) ->
- {{[],reverse(Bef, [C|Aft])},[{move_rel,-(length(Bef)+1)}|Rs]};
+ {{[],reverse(Bef, [C|Aft])},[{move_rel,-(cp_len(Bef)+1)}|Rs]};
do_op(beginning_of_line, [], Aft, Rs) ->
{{[],Aft},Rs};
do_op(end_of_line, Bef, [C|Aft], Rs) ->
- {{reverse(Aft, [C|Bef]),[]},[{move_rel,length(Aft)+1}|Rs]};
+ {{reverse(Aft, [C|Bef]),[]},[{move_rel,cp_len(Aft)+1}|Rs]};
do_op(end_of_line, Bef, [], Rs) ->
{{Bef,[]},Rs};
do_op(ctlu, Bef, Aft, Rs) ->
put(kill_buffer, reverse(Bef)),
- {{[], Aft}, [{delete_chars, -length(Bef)} | Rs]};
+ {{[], Aft}, [{delete_chars, -cp_len(Bef)} | Rs]};
do_op(beep, Bef, Aft, Rs) ->
{{Bef,Aft},[beep|Rs]};
do_op(_, Bef, Aft, Rs) ->
@@ -436,7 +448,7 @@ over_word(Cs, Stack, N) ->
until_quote([$\'|Cs], Stack, N) ->
{Cs, [$\'|Stack], N+1};
until_quote([C|Cs], Stack, N) ->
- until_quote(Cs, [C|Stack], N+1).
+ until_quote(Cs, [C|Stack], N+gc_len(C)).
over_word1([$\'=C|Cs], Stack, N) ->
until_quote(Cs, [C|Stack], N+1);
@@ -445,7 +457,7 @@ over_word1(Cs, Stack, N) ->
over_word2([C|Cs], Stack, N) ->
case word_char(C) of
- true -> over_word2(Cs, [C|Stack], N+1);
+ true -> over_word2(Cs, [C|Stack], N+gc_len(C));
false -> {[C|Cs],Stack,N}
end;
over_word2([], Stack, N) when is_integer(N) ->
@@ -454,7 +466,7 @@ over_word2([], Stack, N) when is_integer(N) ->
over_non_word([C|Cs], Stack, N) ->
case word_char(C) of
true -> {[C|Cs],Stack,N};
- false -> over_non_word(Cs, [C|Stack], N+1)
+ false -> over_non_word(Cs, [C|Stack], N+gc_len(C))
end;
over_non_word([], Stack, N) ->
{[],Stack,N}.
@@ -465,6 +477,7 @@ word_char(C) when C >= $a, C =< $z -> true;
word_char(C) when C >= $ß, C =< $ÿ, C =/= $÷ -> true;
word_char(C) when C >= $0, C =< $9 -> true;
word_char(C) when C =:= $_ -> true;
+word_char([_|_]) -> true; %% Is grapheme
word_char(_) -> false.
%% over_white(Chars, InitialStack, InitialCount) ->
@@ -488,8 +501,8 @@ over_paren(Chars, Paren, Match) ->
over_paren([C,$$,$$|Cs], Paren, Match, D, N, L) ->
over_paren([C|Cs], Paren, Match, D, N+2, L);
-over_paren([_,$$|Cs], Paren, Match, D, N, L) ->
- over_paren(Cs, Paren, Match, D, N+2, L);
+over_paren([GC,$$|Cs], Paren, Match, D, N, L) ->
+ over_paren(Cs, Paren, Match, D, N+1+gc_len(GC), L);
over_paren([Match|_], _Paren, Match, 1, N, _) ->
N;
over_paren([Match|Cs], Paren, Match, D, N, [Match|L]) ->
@@ -518,8 +531,8 @@ over_paren([$[|_], _, _, _, _, _) ->
over_paren([${|_], _, _, _, _, _) ->
beep;
-over_paren([_|Cs], Paren, Match, D, N, L) ->
- over_paren(Cs, Paren, Match, D, N+1, L);
+over_paren([GC|Cs], Paren, Match, D, N, L) ->
+ over_paren(Cs, Paren, Match, D, N+gc_len(GC), L);
over_paren([], _, _, _, _, _) ->
0.
@@ -529,8 +542,8 @@ over_paren_auto(Chars) ->
over_paren_auto([C,$$,$$|Cs], D, N, L) ->
over_paren_auto([C|Cs], D, N+2, L);
-over_paren_auto([_,$$|Cs], D, N, L) ->
- over_paren_auto(Cs, D, N+2, L);
+over_paren_auto([GC,$$|Cs], D, N, L) ->
+ over_paren_auto(Cs, D, N+1+gc_len(GC), L);
over_paren_auto([$(|_], _, N, []) ->
{N, $)};
@@ -553,8 +566,8 @@ over_paren_auto([$[|Cs], D, N, [$[|L]) ->
over_paren_auto([${|Cs], D, N, [${|L]) ->
over_paren_auto(Cs, D, N+1, L);
-over_paren_auto([_|Cs], D, N, L) ->
- over_paren_auto(Cs, D, N+1, L);
+over_paren_auto([GC|Cs], D, N, L) ->
+ over_paren_auto(Cs, D, N+gc_len(GC), L);
over_paren_auto([], _, _, _) ->
0.
@@ -574,28 +587,43 @@ erase_inp({line,_,{Bef,Aft},_}) ->
reverse(erase([], Bef, Aft, [])).
erase(Pbs, Bef, Aft, Rs) ->
- [{delete_chars,-length(Pbs)-length(Bef)},{delete_chars,length(Aft)}|Rs].
+ [{delete_chars,-cp_len(Pbs)-cp_len(Bef)},{delete_chars,cp_len(Aft)}|Rs].
redraw_line({line,Pbs,{Bef,Aft},_}) ->
reverse(redraw(Pbs, Bef, Aft, [])).
redraw(Pbs, Bef, Aft, Rs) ->
- [{move_rel,-length(Aft)},{put_chars, unicode,reverse(Bef, Aft)},{put_chars, unicode,Pbs}|Rs].
+ [{move_rel,-cp_len(Aft)},{put_chars, unicode,reverse(Bef, Aft)},{put_chars, unicode,Pbs}|Rs].
length_before({line,Pbs,{Bef,_Aft},_}) ->
- length(Pbs) + length(Bef).
+ cp_len(Pbs) + cp_len(Bef).
length_after({line,_,{_Bef,Aft},_}) ->
- length(Aft).
+ cp_len(Aft).
prompt({line,Pbs,_,_}) ->
Pbs.
current_line({line,_,{Bef, Aft},_}) ->
- reverse(Bef, Aft ++ "\n").
+ get_line(Bef, Aft ++ "\n").
current_chars({line,_,{Bef,Aft},_}) ->
- reverse(Bef, Aft).
+ get_line(Bef, Aft).
+
+get_line(Bef, Aft) ->
+ unicode:characters_to_list(reverse(Bef, Aft)).
+
+%% Grapheme length in codepoints
+gc_len(CP) when is_integer(CP) -> 1;
+gc_len(CPs) when is_list(CPs) -> length(CPs).
+
+%% String length in codepoints
+cp_len(Str) ->
+ cp_len(Str, 0).
+
+cp_len([GC|R], Len) ->
+ cp_len(R, Len + gc_len(GC));
+cp_len([], Len) -> Len.
%% %% expand(CurrentBefore) ->
%% %% {yes,Expansion} | no