Merge branch 'dgud/stdlib/edit-unicode' into maint

* dgud/stdlib/edit-unicode: stdlib: Improve edlin handling of unicode chars OTP-14542
author: Dan Gudmundsson <[email protected]> 2017-08-17 12:13:40 +0200
committer: Dan Gudmundsson <[email protected]> 2017-08-17 12:13:40 +0200
commit: 0cf60db18f71f727a28ba726e0338ef41ec17542 (patch)
tree: ac22a7a16254f1fd80832449e33bb1e40da139b1
parent: 33b3697ae98147e0470d8d08bca6c51042dcb3d1 (diff)
parent: 5f56b49c752a16ee28244981ca9b197ffd5fa691 (diff)
download: otp-0cf60db18f71f727a28ba726e0338ef41ec17542.tar.gz
otp-0cf60db18f71f727a28ba726e0338ef41ec17542.tar.bz2
otp-0cf60db18f71f727a28ba726e0338ef41ec17542.zip
2 files changed, 130 insertions, 95 deletions
diff --git a/erts/emulator/drivers/unix/ttsl_drv.c b/erts/emulator/drivers/unix/ttsl_drv.c
index e425b99f16..bce097d944 100644
--- a/erts/emulator/drivers/unix/ttsl_drv.c
+++ b/erts/emulator/drivers/unix/ttsl_drv.c
@@ -108,16 +108,15 @@ static int lbuf_size = BUFSIZ;
 static Uint32 *lbuf;		/* The current line buffer */
 static int llen;		/* The current line length */
 static int lpos;                /* The current "cursor position" in the line buffer */
-
+                                /* NOTE: not the same as column position a char may not take a"
+                                 * column to display or it might take many columns
+                                 */
 /* 
  * Tags used in line buffer to show that these bytes represent special characters,
  * Max unicode is 0x0010ffff, so we have lots of place for meta tags... 
  */
 #define CONTROL_TAG 0x10000000U /* Control character, value in first position */
 #define ESCAPED_TAG 0x01000000U /* Escaped character, value in first position */
-#ifdef HAVE_WCWIDTH
-#define WIDE_TAG    0x02000000U /* Wide character, value in first position    */
-#endif
 #define TAG_MASK    0xFF000000U
 
 #define MAXSIZE (1 << 16)
@@ -156,6 +155,8 @@ static int insert_buf(byte*,int);
 static int write_buf(Uint32 *,int);
 static int outc(int c);
 static int move_cursor(int,int);
+static int cp_pos_to_col(int cp_pos);
+
 
 /* Termcap functions. */
 static int start_termcap(void);
@@ -991,24 +992,26 @@ static int del_chars(int n)
 {
     int i, l, r;
     int pos;
+    int gcs; /* deleted grapheme characters */
 
     update_cols();
 
     /* Step forward or backwards over n logical characters. */
     pos = step_over_chars(n);
-
+    DEBUGLOG(("del_chars: %d from %d %d %d\n", n, lpos, pos, llen));
     if (pos > lpos) {
 	l = pos - lpos;		/* Buffer characters to delete */
 	r = llen - lpos - l;	/* Characters after deleted */
+        gcs = cp_pos_to_col(pos) - cp_pos_to_col(lpos);
 	/* Fix up buffer and buffer pointers. */
 	if (r > 0)
 	    memmove(lbuf + lpos, lbuf + pos, r * sizeof(Uint32));
 	llen -= l;
 	/* Write out characters after, blank the tail and jump back to lpos. */
 	write_buf(lbuf + lpos, r);
-	for (i = l ; i > 0; --i)
+	for (i = gcs ; i > 0; --i)
 	  outc(' ');
-	if (COL(llen+l) == 0 && xn)
+	if (xn && COL(cp_pos_to_col(llen)+gcs) == 0)
 	{
 	   outc(' ');
 	   move_left(1);
@@ -1018,7 +1021,7 @@ static int del_chars(int n)
     else if (pos < lpos) {
 	l = lpos - pos;		/* Buffer characters */
 	r = llen - lpos;	/* Characters after deleted */
-	move_cursor(lpos, lpos-l);	/* Move back */
+	gcs = -move_cursor(lpos, lpos-l);	/* Move back */
 	/* Fix up buffer and buffer pointers. */
 	if (r > 0)
 	    memmove(lbuf + pos, lbuf + lpos, r * sizeof(Uint32));
@@ -1026,14 +1029,14 @@ static int del_chars(int n)
 	llen -= l;
 	/* Write out characters after, blank the tail and jump back to lpos. */
 	write_buf(lbuf + lpos, r);
-	for (i = l ; i > 0; --i)
-	  outc(' ');
-	if (COL(llen+l) == 0 && xn)
+	for (i = gcs ; i > 0; --i)
+          outc(' ');
+        if (xn && COL(cp_pos_to_col(llen)+gcs) == 0)
 	{
-	   outc(' ');
-	   move_left(1);
+          outc(' ');
+          move_left(1);
 	}
-	move_cursor(llen + l, lpos);
+        move_cursor(llen + l, lpos);
     }
     return TRUE;
 }
@@ -1047,22 +1050,12 @@ static int step_over_chars(int n)
     end = lbuf + llen;
     c = lbuf + lpos;
     for ( ; n > 0 && c < end; --n) {
-#ifdef HAVE_WCWIDTH
-	while (*c & WIDE_TAG) {
-	    c++;
-	}
-#endif
 	c++;
 	while (c < end && (*c & TAG_MASK) && ((*c & ~TAG_MASK) == 0))
 	    c++;
     }
     for ( ; n < 0 && c > beg; n++) {
 	--c;
-#ifdef HAVE_WCWIDTH
-	while (c > beg + 1 && (c[-1] & WIDE_TAG)) {
-	    --c;
-	}
-#endif
 	while (c > beg && (*c & TAG_MASK) && ((*c & ~TAG_MASK) == 0))
 	    --c;
     }
@@ -1088,15 +1081,6 @@ static int insert_buf(byte *s, int n)
 	    ++pos;
 	}
 	if ((utf8_mode && (ch >= 128 || isprint(ch))) || (ch <= 255 && isprint(ch))) {
-#ifdef HAVE_WCWIDTH
-	    int width;
-	    if ((width = wcwidth(ch)) > 1) {
-		while (--width) {
-		    DEBUGLOG(("insert_buf: Wide(UTF-8):%d,%d",width,ch));
-		    lbuf[lpos++] = (WIDE_TAG | ((Uint32) ch));
-		}
-	    }
-#endif
 	    DEBUGLOG(("insert_buf: Printable(UTF-8):%d",ch));
 	    lbuf[lpos++] = (Uint32) ch;
 	} else if (ch >= 128) { /* not utf8 mode */
@@ -1204,10 +1188,6 @@ static int write_buf(Uint32 *s, int n)
 	    if (octbuff != octtmp) {
 		driver_free(octbuff);
 	    }
-#ifdef HAVE_WCWIDTH
-	} else if (*s & WIDE_TAG) {
-	    --n; s++;
-#endif
 	} else {
 	    DEBUGLOG(("write_buf: Very unexpected character %d",(int) *s));
 	    ++n;
@@ -1216,7 +1196,7 @@ static int write_buf(Uint32 *s, int n)
     }
     /* Check landed in first column of new line and have 'xn' bug. */
     n = s - lbuf;
-    if (COL(n) == 0 && xn && n != 0) {
+    if (xn && n != 0 && COL(cp_pos_to_col(n)) == 0) {
 	if (n >= llen) {
 	    outc(' ');
 	} else if (lastput == 0) { /* A multibyte UTF8 character */
@@ -1246,14 +1226,19 @@ static int outc(int c)
     return 1;
 }
 
-static int move_cursor(int from, int to)
+static int move_cursor(int from_pos, int to_pos)
 {
+    int from_col, to_col;
     int dc, dl;
-
     update_cols();
 
-    dc = COL(to) - COL(from);
-    dl = LINE(to) - LINE(from);
+    from_col = cp_pos_to_col(from_pos);
+    to_col = cp_pos_to_col(to_pos);
+
+    dc = COL(to_col) - COL(from_col);
+    dl = LINE(to_col) - LINE(from_col);
+    DEBUGLOG(("move_cursor: from %d %d to %d %d => %d %d\n",
+              from_pos, from_col, to_pos, to_col, dl, dc));
     if (dl > 0)
       move_down(dl);
     else if (dl < 0)
@@ -1262,7 +1247,29 @@ static int move_cursor(int from, int to)
       move_right(dc);
     else if (dc < 0)
       move_left(-dc);
-    return TRUE;
+    return to_col-from_col;
+}
+
+static int cp_pos_to_col(int cp_pos)
+{
+#ifdef HAVE_WCWIDTH
+    int i;
+    int col = 0;
+
+    for (i = 0; i < cp_pos; i++) {
+        int w = wcwidth(lbuf[i]);
+        if (w > 0) {
+            col += w;
+        }
+    }
+    return col;
+#else
+    /*
+     * We dont' have any character width information. Assume that
+     * code points are one column wide.
+     */
+    return cp_pos;
+#endif
 }
 
 static int start_termcap(void)
diff --git a/lib/stdlib/src/edlin.erl b/lib/stdlib/src/edlin.erl
index 71e8471c45..64d5a71f3c 100644
--- a/lib/stdlib/src/edlin.erl
+++ b/lib/stdlib/src/edlin.erl
@@ -83,7 +83,7 @@ edit_line(Cs, {line,P,L,M}) ->
 edit_line1(Cs, {line,P,L,{blink,N}}) ->
     edit(Cs, P, L, none, [{move_rel,N}]);
 edit_line1(Cs, {line,P,{[],[]},none}) ->
-    {more_chars, {line,P,{lists:reverse(Cs),[]},none},[{put_chars, unicode, Cs}]};
+    {more_chars, {line,P,{string:reverse(Cs),[]},none},[{put_chars, unicode, Cs}]};
 edit_line1(Cs, {line,P,L,M}) ->
     edit(Cs, P, L, M, []).
 
@@ -93,14 +93,14 @@ edit([C|Cs], P, {Bef,Aft}, Prefix, Rs0) ->
     case key_map(C, Prefix) of
 	meta ->
 	    edit(Cs, P, {Bef,Aft}, meta, Rs0);
-    meta_o ->
-        edit(Cs, P, {Bef,Aft}, meta_o, Rs0);
-    meta_csi ->
-        edit(Cs, P, {Bef,Aft}, meta_csi, Rs0);
-    meta_meta ->
-        edit(Cs, P, {Bef,Aft}, meta_meta, Rs0);
-    {csi, _} = Csi ->
-        edit(Cs, P, {Bef,Aft}, Csi, Rs0);
+        meta_o ->
+            edit(Cs, P, {Bef,Aft}, meta_o, Rs0);
+        meta_csi ->
+            edit(Cs, P, {Bef,Aft}, meta_csi, Rs0);
+        meta_meta ->
+            edit(Cs, P, {Bef,Aft}, meta_meta, Rs0);
+        {csi, _} = Csi ->
+            edit(Cs, P, {Bef,Aft}, Csi, Rs0);
 	meta_left_sq_bracket ->
 	    edit(Cs, P, {Bef,Aft}, meta_left_sq_bracket, Rs0);
 	search_meta ->
@@ -110,8 +110,8 @@ edit([C|Cs], P, {Bef,Aft}, Prefix, Rs0) ->
 	ctlx ->
 	    edit(Cs, P, {Bef,Aft}, ctlx, Rs0);
 	new_line ->
-	    {done, reverse(Bef, Aft ++ "\n"), Cs,
-	     reverse(Rs0, [{move_rel,length(Aft)},{put_chars,unicode,"\n"}])};
+	    {done, get_line(Bef, Aft ++ "\n"), Cs,
+	     reverse(Rs0, [{move_rel,cp_len(Aft)},{put_chars,unicode,"\n"}])};
 	redraw_line ->
 	    Rs1 = erase(P, Bef, Aft, Rs0),
 	    Rs = redraw(P, Bef, Aft, Rs1),
@@ -157,7 +157,7 @@ edit([], P, L, {blink,N}, Rs) ->
 edit([], P, L, Prefix, Rs) ->
     {more_chars,{line,P,L,Prefix},reverse(Rs)};
 edit(eof, _, {Bef,Aft}, _, Rs) ->
-    {done,reverse(Bef, Aft),[],reverse(Rs, [{move_rel,length(Aft)}])}.
+    {done,get_line(Bef, Aft),[],reverse(Rs, [{move_rel,cp_len(Aft)}])}.
 
 %% %% Assumes that arg is a string
 %% %% Horizontal whitespace only.
@@ -279,11 +279,21 @@ key_map(C, search) -> {insert_search,C};
 key_map(C, _) -> {undefined,C}.
 
 %% do_op(Action, Before, After, Requests)
-
-do_op({insert,C}, Bef, [], Rs) ->
-    {{[C|Bef],[]},[{put_chars, unicode,[C]}|Rs]};
-do_op({insert,C}, Bef, Aft, Rs) ->
-    {{[C|Bef],Aft},[{insert_chars, unicode, [C]}|Rs]};
+%% Before and After are of lists of type string:grapheme_cluster()
+do_op({insert,C}, [], [], Rs) ->
+    {{[C],[]},[{put_chars, unicode,[C]}|Rs]};
+do_op({insert,C}, [Bef|Bef0], [], Rs) ->
+    case string:to_graphemes([Bef,C]) of
+        [GC] -> {{[GC|Bef0],[]},[{put_chars, unicode,[C]}|Rs]};
+        _ -> {{[C,Bef|Bef0],[]},[{put_chars, unicode,[C]}|Rs]}
+    end;
+do_op({insert,C}, [], Aft, Rs) ->
+    {{[C],Aft},[{insert_chars, unicode,[C]}|Rs]};
+do_op({insert,C}, [Bef|Bef0], Aft, Rs) ->
+    case string:to_graphemes([Bef,C]) of
+        [GC] -> {{[GC|Bef0],Aft},[{insert_chars, unicode,[C]}|Rs]};
+        _ -> {{[C,Bef|Bef0],Aft},[{insert_chars, unicode,[C]}|Rs]}
+    end;
 %% Search mode prompt always looks like (search)`$TERMS': $RESULT.
 %% the {insert_search, _} handlings allow to share this implementation
 %% correctly with group.erl. This module provides $TERMS, and group.erl
@@ -299,13 +309,13 @@ do_op({insert_search, C}, Bef, [], Rs) ->
      [{insert_chars, unicode, [C]++Aft}, {delete_chars,-3} | Rs],
      search};
 do_op({insert_search, C}, Bef, Aft, Rs) ->
-    Offset= length(Aft),
+    Offset= cp_len(Aft),
     NAft = "': ",
     {{[C|Bef],NAft},
      [{insert_chars, unicode, [C]++NAft}, {delete_chars,-Offset} | Rs],
      search};
 do_op({search, backward_delete_char}, [_|Bef], Aft, Rs) ->
-    Offset= length(Aft)+1,
+    Offset= cp_len(Aft)+1,
     NAft = "': ",
     {{Bef,NAft},
      [{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs],
@@ -314,13 +324,13 @@ do_op({search, backward_delete_char}, [], _Aft, Rs) ->
     Aft="': ",
     {{[],Aft}, Rs, search};
 do_op({search, skip_up}, Bef, Aft, Rs) ->
-    Offset= length(Aft),
+    Offset= cp_len(Aft),
     NAft = "': ",
     {{[$\^R|Bef],NAft}, % we insert ^R as a flag to whoever called us
      [{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs],
      search};
 do_op({search, skip_down}, Bef, Aft, Rs) ->
-    Offset= length(Aft),
+    Offset= cp_len(Aft),
     NAft = "': ",
     {{[$\^S|Bef],NAft}, % we insert ^S as a flag to whoever called us
      [{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs],
@@ -328,12 +338,12 @@ do_op({search, skip_down}, Bef, Aft, Rs) ->
 do_op({search, search_found}, _Bef, Aft, Rs) ->
     "': "++NAft = Aft,
     {{[],NAft},
-     [{put_chars, unicode, "\n"}, {move_rel,-length(Aft)} | Rs],
+     [{put_chars, unicode, "\n"}, {move_rel,-cp_len(Aft)} | Rs],
      search_found};
 do_op({search, search_quit}, _Bef, Aft, Rs) ->
     "': "++NAft = Aft,
     {{[],NAft},
-     [{put_chars, unicode, "\n"}, {move_rel,-length(Aft)} | Rs],
+     [{put_chars, unicode, "\n"}, {move_rel,-cp_len(Aft)} | Rs],
      search_quit};
 %% do blink after $$
 do_op({blink,C,M}, Bef=[$$,$$|_], Aft, Rs) ->
@@ -361,14 +371,16 @@ do_op(auto_blink, Bef, Aft, Rs) ->
 	N -> {blink,N+1,{Bef,Aft},
 	      [{move_rel,-(N+1)}|Rs]}
     end;
-do_op(forward_delete_char, Bef, [_|Aft], Rs) ->
-    {{Bef,Aft},[{delete_chars,1}|Rs]};
-do_op(backward_delete_char, [_|Bef], Aft, Rs) ->
-    {{Bef,Aft},[{delete_chars,-1}|Rs]};
+do_op(forward_delete_char, Bef, [GC|Aft], Rs) ->
+    {{Bef,Aft},[{delete_chars,gc_len(GC)}|Rs]};
+do_op(backward_delete_char, [GC|Bef], Aft, Rs) ->
+    {{Bef,Aft},[{delete_chars,-gc_len(GC)}|Rs]};
 do_op(transpose_char, [C1,C2|Bef], [], Rs) ->
-    {{[C2,C1|Bef],[]},[{put_chars, unicode,[C1,C2]},{move_rel,-2}|Rs]};
+    Len = gc_len(C1)+gc_len(C2),
+    {{[C2,C1|Bef],[]},[{put_chars, unicode,[C1,C2]},{move_rel,-Len}|Rs]};
 do_op(transpose_char, [C2|Bef], [C1|Aft], Rs) ->
-    {{[C2,C1|Bef],Aft},[{put_chars, unicode,[C1,C2]},{move_rel,-1}|Rs]};
+    Len = gc_len(C2),
+    {{[C2,C1|Bef],Aft},[{put_chars, unicode,[C1,C2]},{move_rel,-Len}|Rs]};
 do_op(kill_word, Bef, Aft0, Rs) ->
     {Aft1,Kill0,N0} = over_non_word(Aft0, [], 0),
     {Aft,Kill,N} = over_word(Aft1, Kill0, N0),
@@ -381,7 +393,7 @@ do_op(backward_kill_word, Bef0, Aft, Rs) ->
     {{Bef,Aft},[{delete_chars,-N}|Rs]};
 do_op(kill_line, Bef, Aft, Rs) ->
     put(kill_buffer, Aft),
-    {{Bef,[]},[{delete_chars,length(Aft)}|Rs]};
+    {{Bef,[]},[{delete_chars,cp_len(Aft)}|Rs]};
 do_op(yank, Bef, [], Rs) ->
     Kill = get(kill_buffer),
     {{reverse(Kill, Bef),[]},[{put_chars, unicode,Kill}|Rs]};
@@ -389,9 +401,9 @@ do_op(yank, Bef, Aft, Rs) ->
     Kill = get(kill_buffer),
     {{reverse(Kill, Bef),Aft},[{insert_chars, unicode,Kill}|Rs]};
 do_op(forward_char, Bef, [C|Aft], Rs) ->
-    {{[C|Bef],Aft},[{move_rel,1}|Rs]};
+    {{[C|Bef],Aft},[{move_rel,gc_len(C)}|Rs]};
 do_op(backward_char, [C|Bef], Aft, Rs) ->
-    {{Bef,[C|Aft]},[{move_rel,-1}|Rs]};
+    {{Bef,[C|Aft]},[{move_rel,-gc_len(C)}|Rs]};
 do_op(forward_word, Bef0, Aft0, Rs) ->
     {Aft1,Bef1,N0} = over_non_word(Aft0, Bef0, 0),
     {Aft,Bef,N} = over_word(Aft1, Bef1, N0),
@@ -401,16 +413,16 @@ do_op(backward_word, Bef0, Aft0, Rs) ->
     {Bef,Aft,N} = over_word(Bef1, Aft1, N0),
     {{Bef,Aft},[{move_rel,-N}|Rs]};
 do_op(beginning_of_line, [C|Bef], Aft, Rs) ->
-    {{[],reverse(Bef, [C|Aft])},[{move_rel,-(length(Bef)+1)}|Rs]};
+    {{[],reverse(Bef, [C|Aft])},[{move_rel,-(cp_len(Bef)+1)}|Rs]};
 do_op(beginning_of_line, [], Aft, Rs) ->
     {{[],Aft},Rs};
 do_op(end_of_line, Bef, [C|Aft], Rs) ->
-    {{reverse(Aft, [C|Bef]),[]},[{move_rel,length(Aft)+1}|Rs]};
+    {{reverse(Aft, [C|Bef]),[]},[{move_rel,cp_len(Aft)+1}|Rs]};
 do_op(end_of_line, Bef, [], Rs) ->
     {{Bef,[]},Rs};
 do_op(ctlu, Bef, Aft, Rs) ->
     put(kill_buffer, reverse(Bef)),
-    {{[], Aft}, [{delete_chars, -length(Bef)} | Rs]};
+    {{[], Aft}, [{delete_chars, -cp_len(Bef)} | Rs]};
 do_op(beep, Bef, Aft, Rs) ->
     {{Bef,Aft},[beep|Rs]};
 do_op(_, Bef, Aft, Rs) ->
@@ -436,7 +448,7 @@ over_word(Cs, Stack, N) ->
 until_quote([$\'|Cs], Stack, N) ->
     {Cs, [$\'|Stack], N+1};
 until_quote([C|Cs], Stack, N) ->
-    until_quote(Cs, [C|Stack], N+1).
+    until_quote(Cs, [C|Stack], N+gc_len(C)).
 
 over_word1([$\'=C|Cs], Stack, N) ->
     until_quote(Cs, [C|Stack], N+1);
@@ -445,7 +457,7 @@ over_word1(Cs, Stack, N) ->
 
 over_word2([C|Cs], Stack, N) ->
     case word_char(C) of
-	true -> over_word2(Cs, [C|Stack], N+1);
+	true -> over_word2(Cs, [C|Stack], N+gc_len(C));
 	false -> {[C|Cs],Stack,N}
     end;
 over_word2([], Stack, N) when is_integer(N) ->
@@ -454,7 +466,7 @@ over_word2([], Stack, N) when is_integer(N) ->
 over_non_word([C|Cs], Stack, N) ->
     case word_char(C) of
 	true -> {[C|Cs],Stack,N};
-	false -> over_non_word(Cs, [C|Stack], N+1)
+	false -> over_non_word(Cs, [C|Stack], N+gc_len(C))
     end;
 over_non_word([], Stack, N) ->
     {[],Stack,N}.
@@ -465,6 +477,7 @@ word_char(C) when C >= $a, C =< $z -> true;
 word_char(C) when C >= $ß, C =< $ÿ, C =/= $÷ -> true;
 word_char(C) when C >= $0, C =< $9 -> true;
 word_char(C) when C =:= $_ -> true;
+word_char([_|_]) -> true; %% Is grapheme
 word_char(_) -> false.
 
 %% over_white(Chars, InitialStack, InitialCount) ->
@@ -488,8 +501,8 @@ over_paren(Chars, Paren, Match) ->
 
 over_paren([C,$$,$$|Cs], Paren, Match, D, N, L)  ->
     over_paren([C|Cs], Paren, Match, D, N+2, L);
-over_paren([_,$$|Cs], Paren, Match, D, N, L)  ->
-    over_paren(Cs, Paren, Match, D, N+2, L);
+over_paren([GC,$$|Cs], Paren, Match, D, N, L)  ->
+    over_paren(Cs, Paren, Match, D, N+1+gc_len(GC), L);
 over_paren([Match|_], _Paren, Match, 1, N, _) ->
     N;
 over_paren([Match|Cs], Paren, Match, D, N, [Match|L]) ->
@@ -518,8 +531,8 @@ over_paren([$[|_], _, _, _, _, _)  ->
 over_paren([${|_], _, _, _, _, _)  ->
     beep;
 
-over_paren([_|Cs], Paren, Match, D, N, L)  ->
-    over_paren(Cs, Paren, Match, D, N+1, L);
+over_paren([GC|Cs], Paren, Match, D, N, L)  ->
+    over_paren(Cs, Paren, Match, D, N+gc_len(GC), L);
 over_paren([], _, _, _, _, _) ->
     0.
 
@@ -529,8 +542,8 @@ over_paren_auto(Chars) ->
 
 over_paren_auto([C,$$,$$|Cs], D, N, L)  ->
     over_paren_auto([C|Cs], D, N+2, L);
-over_paren_auto([_,$$|Cs], D, N, L)  ->
-    over_paren_auto(Cs, D, N+2, L);
+over_paren_auto([GC,$$|Cs], D, N, L)  ->
+    over_paren_auto(Cs, D, N+1+gc_len(GC), L);
 
 over_paren_auto([$(|_], _, N, [])  ->
     {N, $)};
@@ -553,8 +566,8 @@ over_paren_auto([$[|Cs], D, N, [$[|L])  ->
 over_paren_auto([${|Cs], D, N, [${|L])  ->
     over_paren_auto(Cs, D, N+1, L);
 
-over_paren_auto([_|Cs], D, N, L)  ->
-    over_paren_auto(Cs, D, N+1, L);
+over_paren_auto([GC|Cs], D, N, L)  ->
+    over_paren_auto(Cs, D, N+gc_len(GC), L);
 over_paren_auto([], _, _, _) ->
     0.
 
@@ -574,28 +587,43 @@ erase_inp({line,_,{Bef,Aft},_}) ->
     reverse(erase([], Bef, Aft, [])).
 
 erase(Pbs, Bef, Aft, Rs) ->
-    [{delete_chars,-length(Pbs)-length(Bef)},{delete_chars,length(Aft)}|Rs].
+    [{delete_chars,-cp_len(Pbs)-cp_len(Bef)},{delete_chars,cp_len(Aft)}|Rs].
 
 redraw_line({line,Pbs,{Bef,Aft},_}) ->
     reverse(redraw(Pbs, Bef, Aft, [])).
 
 redraw(Pbs, Bef, Aft, Rs) ->
-    [{move_rel,-length(Aft)},{put_chars, unicode,reverse(Bef, Aft)},{put_chars, unicode,Pbs}|Rs].
+    [{move_rel,-cp_len(Aft)},{put_chars, unicode,reverse(Bef, Aft)},{put_chars, unicode,Pbs}|Rs].
 
 length_before({line,Pbs,{Bef,_Aft},_}) ->
-    length(Pbs) + length(Bef).
+    cp_len(Pbs) + cp_len(Bef).
 
 length_after({line,_,{_Bef,Aft},_}) ->
-    length(Aft).
+    cp_len(Aft).
 
 prompt({line,Pbs,_,_}) ->
     Pbs.
 
 current_line({line,_,{Bef, Aft},_}) ->
-    reverse(Bef, Aft ++ "\n").
+    get_line(Bef, Aft ++ "\n").
 
 current_chars({line,_,{Bef,Aft},_}) ->
-    reverse(Bef, Aft).
+    get_line(Bef, Aft).
+
+get_line(Bef, Aft) ->
+    unicode:characters_to_list(reverse(Bef, Aft)).
+
+%% Grapheme length in codepoints
+gc_len(CP) when is_integer(CP) -> 1;
+gc_len(CPs) when is_list(CPs) -> length(CPs).
+
+%% String length in codepoints
+cp_len(Str) ->
+    cp_len(Str, 0).
+
+cp_len([GC|R], Len) ->
+    cp_len(R, Len + gc_len(GC));
+cp_len([], Len) -> Len.
 
 %% %% expand(CurrentBefore) ->
 %% %%	{yes,Expansion} | no
author	Dan Gudmundsson <[email protected]>	2017-08-17 12:13:40 +0200
committer	Dan Gudmundsson <[email protected]>	2017-08-17 12:13:40 +0200
commit	0cf60db18f71f727a28ba726e0338ef41ec17542 (patch)
tree	ac22a7a16254f1fd80832449e33bb1e40da139b1
parent	33b3697ae98147e0470d8d08bca6c51042dcb3d1 (diff)
parent	5f56b49c752a16ee28244981ca9b197ffd5fa691 (diff)
download	otp-0cf60db18f71f727a28ba726e0338ef41ec17542.tar.gz otp-0cf60db18f71f727a28ba726e0338ef41ec17542.tar.bz2 otp-0cf60db18f71f727a28ba726e0338ef41ec17542.zip