stdlib: Improve edlin handling of unicode chars

Let edlin handle grapheme clusters instead of codepoints to improve the handling multi-codepoints characters. The ttsl driver (and protocol) still expects all lengths as codepoints. Previously it was expected that each codepoint used (at least) one terminal column for each codepoint, and a hack was made for wide characters (multicolumn) by patching in TAGGED characters to occupy the extra space so that codepoint index was equal column index. This didn't work at all for combining codepoints that do not occupy any more space than the previous character. Improved this handling by calculating column positions in move_cursor. This is based on wcwidth() and is not perfect, wcwidth() is wrong for some codepoints and wcwidth() can not know with Hangul graphemes for example. But it works better than before without making a major change in the protocol.
author: Dan Gudmundsson <[email protected]> 2017-06-02 14:25:02 +0200
committer: Dan Gudmundsson <[email protected]> 2017-08-16 10:03:28 +0200
commit: 5f56b49c752a16ee28244981ca9b197ffd5fa691 (patch)
tree: 17a9563e03ecb40ba4cfe96cea2b89209ed1875b
parent: b182febe36aa63eb8290f24ba4b7932673a9a9bc (diff)
download: otp-5f56b49c752a16ee28244981ca9b197ffd5fa691.tar.gz
otp-5f56b49c752a16ee28244981ca9b197ffd5fa691.tar.bz2
otp-5f56b49c752a16ee28244981ca9b197ffd5fa691.zip
2 files changed, 130 insertions, 95 deletions
diff --git a/erts/emulator/drivers/unix/ttsl_drv.c b/erts/emulator/drivers/unix/ttsl_drv.c
index e425b99f16..bce097d944 100644
--- a/erts/emulator/drivers/unix/ttsl_drv.c
+++ b/erts/emulator/drivers/unix/ttsl_drv.c
@@ -108,16 +108,15 @@ static int lbuf_size = BUFSIZ;
 static Uint32 *lbuf;		/* The current line buffer */
 static int llen;		/* The current line length */
 static int lpos;                /* The current "cursor position" in the line buffer */
-
+                                /* NOTE: not the same as column position a char may not take a"
+                                 * column to display or it might take many columns
+                                 */
 /* 
  * Tags used in line buffer to show that these bytes represent special characters,
  * Max unicode is 0x0010ffff, so we have lots of place for meta tags... 
  */
 #define CONTROL_TAG 0x10000000U /* Control character, value in first position */
 #define ESCAPED_TAG 0x01000000U /* Escaped character, value in first position */
-#ifdef HAVE_WCWIDTH
-#define WIDE_TAG    0x02000000U /* Wide character, value in first position    */
-#endif
 #define TAG_MASK    0xFF000000U
 
 #define MAXSIZE (1 << 16)
@@ -156,6 +155,8 @@ static int insert_buf(byte*,int);
 static int write_buf(Uint32 *,int);
 static int outc(int c);
 static int move_cursor(int,int);
+static int cp_pos_to_col(int cp_pos);
+
 
 /* Termcap functions. */
 static int start_termcap(void);
@@ -991,24 +992,26 @@ static int del_chars(int n)
 {
     int i, l, r;
     int pos;
+    int gcs; /* deleted grapheme characters */
 
     update_cols();
 
     /* Step forward or backwards over n logical characters. */
     pos = step_over_chars(n);
-
+    DEBUGLOG(("del_chars: %d from %d %d %d\n", n, lpos, pos, llen));
     if (pos > lpos) {
 	l = pos - lpos;		/* Buffer characters to delete */
 	r = llen - lpos - l;	/* Characters after deleted */
+        gcs = cp_pos_to_col(pos) - cp_pos_to_col(lpos);
 	/* Fix up buffer and buffer pointers. */
 	if (r > 0)
 	    memmove(lbuf + lpos, lbuf + pos, r * sizeof(Uint32));
 	llen -= l;
 	/* Write out characters after, blank the tail and jump back to lpos. */
 	write_buf(lbuf + lpos, r);
-	for (i = l ; i > 0; --i)
+	for (i = gcs ; i > 0; --i)
 	  outc(' ');
-	if (COL(llen+l) == 0 && xn)
+	if (xn && COL(cp_pos_to_col(llen)+gcs) == 0)
 	{
 	   outc(' ');
 	   move_left(1);
@@ -1018,7 +1021,7 @@ static int del_chars(int n)
     else if (pos < lpos) {
 	l = lpos - pos;		/* Buffer characters */
 	r = llen - lpos;	/* Characters after deleted */
-	move_cursor(lpos, lpos-l);	/* Move back */
+	gcs = -move_cursor(lpos, lpos-l);	/* Move back */
 	/* Fix up buffer and buffer pointers. */
 	if (r > 0)
 	    memmove(lbuf + pos, lbuf + lpos, r * sizeof(Uint32));
@@ -1026,14 +1029,14 @@ static int del_chars(int n)
 	llen -= l;
 	/* Write out characters after, blank the tail and jump back to lpos. */
 	write_buf(lbuf + lpos, r);
-	for (i = l ; i > 0; --i)
-	  outc(' ');
-	if (COL(llen+l) == 0 && xn)
+	for (i = gcs ; i > 0; --i)
+          outc(' ');
+        if (xn && COL(cp_pos_to_col(llen)+gcs) == 0)
 	{
-	   outc(' ');
-	   move_left(1);
+          outc(' ');
+          move_left(1);
 	}
-	move_cursor(llen + l, lpos);
+        move_cursor(llen + l, lpos);
     }
     return TRUE;
 }
@@ -1047,22 +1050,12 @@ static int step_over_chars(int n)
     end = lbuf + llen;
     c = lbuf + lpos;
     for ( ; n > 0 && c < end; --n) {
-#ifdef HAVE_WCWIDTH
-	while (*c & WIDE_TAG) {
-	    c++;
-	}
-#endif
 	c++;
 	while (c < end && (*c & TAG_MASK) && ((*c & ~TAG_MASK) == 0))
 	    c++;
     }
     for ( ; n < 0 && c > beg; n++) {
 	--c;
-#ifdef HAVE_WCWIDTH
-	while (c > beg + 1 && (c[-1] & WIDE_TAG)) {
-	    --c;
-	}
-#endif
 	while (c > beg && (*c & TAG_MASK) && ((*c & ~TAG_MASK) == 0))
 	    --c;
     }
@@ -1088,15 +1081,6 @@ static int insert_buf(byte *s, int n)
 	    ++pos;
 	}
 	if ((utf8_mode && (ch >= 128 || isprint(ch))) || (ch <= 255 && isprint(ch))) {
-#ifdef HAVE_WCWIDTH
-	    int width;
-	    if ((width = wcwidth(ch)) > 1) {
-		while (--width) {
-		    DEBUGLOG(("insert_buf: Wide(UTF-8):%d,%d",width,ch));
-		    lbuf[lpos++] = (WIDE_TAG | ((Uint32) ch));
-		}
-	    }
-#endif
 	    DEBUGLOG(("insert_buf: Printable(UTF-8):%d",ch));
 	    lbuf[lpos++] = (Uint32) ch;
 	} else if (ch >= 128) { /* not utf8 mode */
@@ -1204,10 +1188,6 @@ static int write_buf(Uint32 *s, int n)
 	    if (octbuff != octtmp) {
 		driver_free(octbuff);
 	    }
-#ifdef HAVE_WCWIDTH
-	} else if (*s & WIDE_TAG) {
-	    --n; s++;
-#endif
 	} else {
 	    DEBUGLOG(("write_buf: Very unexpected character %d",(int) *s));
 	    ++n;
@@ -1216,7 +1196,7 @@ static int write_buf(Uint32 *s, int n)
     }
     /* Check landed in first column of new line and have 'xn' bug. */
     n = s - lbuf;
-    if (COL(n) == 0 && xn && n != 0) {
+    if (xn && n != 0 && COL(cp_pos_to_col(n)) == 0) {
 	if (n >= llen) {
 	    outc(' ');
 	} else if (lastput == 0) { /* A multibyte UTF8 character */
@@ -1246,14 +1226,19 @@ static int outc(int c)
     return 1;
 }
 
-static int move_cursor(int from, int to)
+static int move_cursor(int from_pos, int to_pos)
 {
+    int from_col, to_col;
     int dc, dl;
-
     update_cols();
 
-    dc = COL(to) - COL(from);
-    dl = LINE(to) - LINE(from);
+    from_col = cp_pos_to_col(from_pos);
+    to_col = cp_pos_to_col(to_pos);
+
+    dc = COL(to_col) - COL(from_col);
+    dl = LINE(to_col) - LINE(from_col);
+    DEBUGLOG(("move_cursor: from %d %d to %d %d => %d %d\n",
+              from_pos, from_col, to_pos, to_col, dl, dc));
     if (dl > 0)
       move_down(dl);
     else if (dl < 0)
@@ -1262,7 +1247,29 @@ static int move_cursor(int from, int to)
       move_right(dc);
     else if (dc < 0)
       move_left(-dc);
-    return TRUE;
+    return to_col-from_col;
+}
+
+static int cp_pos_to_col(int cp_pos)
+{
+#ifdef HAVE_WCWIDTH
+    int i;
+    int col = 0;
+
+    for (i = 0; i < cp_pos; i++) {
+        int w = wcwidth(lbuf[i]);
+        if (w > 0) {
+            col += w;
+        }
+    }
+    return col;
+#else
+    /*
+     * We dont' have any character width information. Assume that
+     * code points are one column wide.
+     */
+    return cp_pos;
+#endif
 }
 
 static int start_termcap(void)
diff --git a/lib/stdlib/src/edlin.erl b/lib/stdlib/src/edlin.erl
index 71e8471c45..64d5a71f3c 100644
--- a/lib/stdlib/src/edlin.erl
+++ b/lib/stdlib/src/edlin.erl
@@ -83,7 +83,7 @@ edit_line(Cs, {line,P,L,M}) ->
 edit_line1(Cs, {line,P,L,{blink,N}}) ->
     edit(Cs, P, L, none, [{move_rel,N}]);
 edit_line1(Cs, {line,P,{[],[]},none}) ->
-    {more_chars, {line,P,{lists:reverse(Cs),[]},none},[{put_chars, unicode, Cs}]};
+    {more_chars, {line,P,{string:reverse(Cs),[]},none},[{put_chars, unicode, Cs}]};
 edit_line1(Cs, {line,P,L,M}) ->
     edit(Cs, P, L, M, []).
 
@@ -93,14 +93,14 @@ edit([C|Cs], P, {Bef,Aft}, Prefix, Rs0) ->
     case key_map(C, Prefix) of
 	meta ->
 	    edit(Cs, P, {Bef,Aft}, meta, Rs0);
-    meta_o ->
-        edit(Cs, P, {Bef,Aft}, meta_o, Rs0);
-    meta_csi ->
-        edit(Cs, P, {Bef,Aft}, meta_csi, Rs0);
-    meta_meta ->
-        edit(Cs, P, {Bef,Aft}, meta_meta, Rs0);
-    {csi, _} = Csi ->
-        edit(Cs, P, {Bef,Aft}, Csi, Rs0);
+        meta_o ->
+            edit(Cs, P, {Bef,Aft}, meta_o, Rs0);
+        meta_csi ->
+            edit(Cs, P, {Bef,Aft}, meta_csi, Rs0);
+        meta_meta ->
+            edit(Cs, P, {Bef,Aft}, meta_meta, Rs0);
+        {csi, _} = Csi ->
+            edit(Cs, P, {Bef,Aft}, Csi, Rs0);
 	meta_left_sq_bracket ->
 	    edit(Cs, P, {Bef,Aft}, meta_left_sq_bracket, Rs0);
 	search_meta ->
@@ -110,8 +110,8 @@ edit([C|Cs], P, {Bef,Aft}, Prefix, Rs0) ->
 	ctlx ->
 	    edit(Cs, P, {Bef,Aft}, ctlx, Rs0);
 	new_line ->
-	    {done, reverse(Bef, Aft ++ "\n"), Cs,
-	     reverse(Rs0, [{move_rel,length(Aft)},{put_chars,unicode,"\n"}])};
+	    {done, get_line(Bef, Aft ++ "\n"), Cs,
+	     reverse(Rs0, [{move_rel,cp_len(Aft)},{put_chars,unicode,"\n"}])};
 	redraw_line ->
 	    Rs1 = erase(P, Bef, Aft, Rs0),
 	    Rs = redraw(P, Bef, Aft, Rs1),
@@ -157,7 +157,7 @@ edit([], P, L, {blink,N}, Rs) ->
 edit([], P, L, Prefix, Rs) ->
     {more_chars,{line,P,L,Prefix},reverse(Rs)};
 edit(eof, _, {Bef,Aft}, _, Rs) ->
-    {done,reverse(Bef, Aft),[],reverse(Rs, [{move_rel,length(Aft)}])}.
+    {done,get_line(Bef, Aft),[],reverse(Rs, [{move_rel,cp_len(Aft)}])}.
 
 %% %% Assumes that arg is a string
 %% %% Horizontal whitespace only.
@@ -279,11 +279,21 @@ key_map(C, search) -> {insert_search,C};
 key_map(C, _) -> {undefined,C}.
 
 %% do_op(Action, Before, After, Requests)
-
-do_op({insert,C}, Bef, [], Rs) ->
-    {{[C|Bef],[]},[{put_chars, unicode,[C]}|Rs]};
-do_op({insert,C}, Bef, Aft, Rs) ->
-    {{[C|Bef],Aft},[{insert_chars, unicode, [C]}|Rs]};
+%% Before and After are of lists of type string:grapheme_cluster()
+do_op({insert,C}, [], [], Rs) ->
+    {{[C],[]},[{put_chars, unicode,[C]}|Rs]};
+do_op({insert,C}, [Bef|Bef0], [], Rs) ->
+    case string:to_graphemes([Bef,C]) of
+        [GC] -> {{[GC|Bef0],[]},[{put_chars, unicode,[C]}|Rs]};
+        _ -> {{[C,Bef|Bef0],[]},[{put_chars, unicode,[C]}|Rs]}
+    end;
+do_op({insert,C}, [], Aft, Rs) ->
+    {{[C],Aft},[{insert_chars, unicode,[C]}|Rs]};
+do_op({insert,C}, [Bef|Bef0], Aft, Rs) ->
+    case string:to_graphemes([Bef,C]) of
+        [GC] -> {{[GC|Bef0],Aft},[{insert_chars, unicode,[C]}|Rs]};
+        _ -> {{[C,Bef|Bef0],Aft},[{insert_chars, unicode,[C]}|Rs]}
+    end;
 %% Search mode prompt always looks like (search)`$TERMS': $RESULT.
 %% the {insert_search, _} handlings allow to share this implementation
 %% correctly with group.erl. This module provides $TERMS, and group.erl
@@ -299,13 +309,13 @@ do_op({insert_search, C}, Bef, [], Rs) ->
      [{insert_chars, unicode, [C]++Aft}, {delete_chars,-3} | Rs],
      search};
 do_op({insert_search, C}, Bef, Aft, Rs) ->
-    Offset= length(Aft),
+    Offset= cp_len(Aft),
     NAft = "': ",
     {{[C|Bef],NAft},
      [{insert_chars, unicode, [C]++NAft}, {delete_chars,-Offset} | Rs],
      search};
 do_op({search, backward_delete_char}, [_|Bef], Aft, Rs) ->
-    Offset= length(Aft)+1,
+    Offset= cp_len(Aft)+1,
     NAft = "': ",
     {{Bef,NAft},
      [{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs],
@@ -314,13 +324,13 @@ do_op({search, backward_delete_char}, [], _Aft, Rs) ->
     Aft="': ",
     {{[],Aft}, Rs, search};
 do_op({search, skip_up}, Bef, Aft, Rs) ->
-    Offset= length(Aft),
+    Offset= cp_len(Aft),
     NAft = "': ",
     {{[$\^R|Bef],NAft}, % we insert ^R as a flag to whoever called us
      [{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs],
      search};
 do_op({search, skip_down}, Bef, Aft, Rs) ->
-    Offset= length(Aft),
+    Offset= cp_len(Aft),
     NAft = "': ",
     {{[$\^S|Bef],NAft}, % we insert ^S as a flag to whoever called us
      [{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs],
@@ -328,12 +338,12 @@ do_op({search, skip_down}, Bef, Aft, Rs) ->
 do_op({search, search_found}, _Bef, Aft, Rs) ->
     "': "++NAft = Aft,
     {{[],NAft},
-     [{put_chars, unicode, "\n"}, {move_rel,-length(Aft)} | Rs],
+     [{put_chars, unicode, "\n"}, {move_rel,-cp_len(Aft)} | Rs],
      search_found};
 do_op({search, search_quit}, _Bef, Aft, Rs) ->
     "': "++NAft = Aft,
     {{[],NAft},
-     [{put_chars, unicode, "\n"}, {move_rel,-length(Aft)} | Rs],
+     [{put_chars, unicode, "\n"}, {move_rel,-cp_len(Aft)} | Rs],
      search_quit};
 %% do blink after $$
 do_op({blink,C,M}, Bef=[$$,$$|_], Aft, Rs) ->
@@ -361,14 +371,16 @@ do_op(auto_blink, Bef, Aft, Rs) ->
 	N -> {blink,N+1,{Bef,Aft},
 	      [{move_rel,-(N+1)}|Rs]}
     end;
-do_op(forward_delete_char, Bef, [_|Aft], Rs) ->
-    {{Bef,Aft},[{delete_chars,1}|Rs]};
-do_op(backward_delete_char, [_|Bef], Aft, Rs) ->
-    {{Bef,Aft},[{delete_chars,-1}|Rs]};
+do_op(forward_delete_char, Bef, [GC|Aft], Rs) ->
+    {{Bef,Aft},[{delete_chars,gc_len(GC)}|Rs]};
+do_op(backward_delete_char, [GC|Bef], Aft, Rs) ->
+    {{Bef,Aft},[{delete_chars,-gc_len(GC)}|Rs]};
 do_op(transpose_char, [C1,C2|Bef], [], Rs) ->
-    {{[C2,C1|Bef],[]},[{put_chars, unicode,[C1,C2]},{move_rel,-2}|Rs]};
+    Len = gc_len(C1)+gc_len(C2),
+    {{[C2,C1|Bef],[]},[{put_chars, unicode,[C1,C2]},{move_rel,-Len}|Rs]};
 do_op(transpose_char, [C2|Bef], [C1|Aft], Rs) ->
-    {{[C2,C1|Bef],Aft},[{put_chars, unicode,[C1,C2]},{move_rel,-1}|Rs]};
+    Len = gc_len(C2),
+    {{[C2,C1|Bef],Aft},[{put_chars, unicode,[C1,C2]},{move_rel,-Len}|Rs]};
 do_op(kill_word, Bef, Aft0, Rs) ->
     {Aft1,Kill0,N0} = over_non_word(Aft0, [], 0),
     {Aft,Kill,N} = over_word(Aft1, Kill0, N0),
@@ -381,7 +393,7 @@ do_op(backward_kill_word, Bef0, Aft, Rs) ->
     {{Bef,Aft},[{delete_chars,-N}|Rs]};
 do_op(kill_line, Bef, Aft, Rs) ->
     put(kill_buffer, Aft),
-    {{Bef,[]},[{delete_chars,length(Aft)}|Rs]};
+    {{Bef,[]},[{delete_chars,cp_len(Aft)}|Rs]};
 do_op(yank, Bef, [], Rs) ->
     Kill = get(kill_buffer),
     {{reverse(Kill, Bef),[]},[{put_chars, unicode,Kill}|Rs]};
@@ -389,9 +401,9 @@ do_op(yank, Bef, Aft, Rs) ->
     Kill = get(kill_buffer),
     {{reverse(Kill, Bef),Aft},[{insert_chars, unicode,Kill}|Rs]};
 do_op(forward_char, Bef, [C|Aft], Rs) ->
-    {{[C|Bef],Aft},[{move_rel,1}|Rs]};
+    {{[C|Bef],Aft},[{move_rel,gc_len(C)}|Rs]};
 do_op(backward_char, [C|Bef], Aft, Rs) ->
-    {{Bef,[C|Aft]},[{move_rel,-1}|Rs]};
+    {{Bef,[C|Aft]},[{move_rel,-gc_len(C)}|Rs]};
 do_op(forward_word, Bef0, Aft0, Rs) ->
     {Aft1,Bef1,N0} = over_non_word(Aft0, Bef0, 0),
     {Aft,Bef,N} = over_word(Aft1, Bef1, N0),
@@ -401,16 +413,16 @@ do_op(backward_word, Bef0, Aft0, Rs) ->
     {Bef,Aft,N} = over_word(Bef1, Aft1, N0),
     {{Bef,Aft},[{move_rel,-N}|Rs]};
 do_op(beginning_of_line, [C|Bef], Aft, Rs) ->
-    {{[],reverse(Bef, [C|Aft])},[{move_rel,-(length(Bef)+1)}|Rs]};
+    {{[],reverse(Bef, [C|Aft])},[{move_rel,-(cp_len(Bef)+1)}|Rs]};
 do_op(beginning_of_line, [], Aft, Rs) ->
     {{[],Aft},Rs};
 do_op(end_of_line, Bef, [C|Aft], Rs) ->
-    {{reverse(Aft, [C|Bef]),[]},[{move_rel,length(Aft)+1}|Rs]};
+    {{reverse(Aft, [C|Bef]),[]},[{move_rel,cp_len(Aft)+1}|Rs]};
 do_op(end_of_line, Bef, [], Rs) ->
     {{Bef,[]},Rs};
 do_op(ctlu, Bef, Aft, Rs) ->
     put(kill_buffer, reverse(Bef)),
-    {{[], Aft}, [{delete_chars, -length(Bef)} | Rs]};
+    {{[], Aft}, [{delete_chars, -cp_len(Bef)} | Rs]};
 do_op(beep, Bef, Aft, Rs) ->
     {{Bef,Aft},[beep|Rs]};
 do_op(_, Bef, Aft, Rs) ->
@@ -436,7 +448,7 @@ over_word(Cs, Stack, N) ->
 until_quote([$\'|Cs], Stack, N) ->
     {Cs, [$\'|Stack], N+1};
 until_quote([C|Cs], Stack, N) ->
-    until_quote(Cs, [C|Stack], N+1).
+    until_quote(Cs, [C|Stack], N+gc_len(C)).
 
 over_word1([$\'=C|Cs], Stack, N) ->
     until_quote(Cs, [C|Stack], N+1);
@@ -445,7 +457,7 @@ over_word1(Cs, Stack, N) ->
 
 over_word2([C|Cs], Stack, N) ->
     case word_char(C) of
-	true -> over_word2(Cs, [C|Stack], N+1);
+	true -> over_word2(Cs, [C|Stack], N+gc_len(C));
 	false -> {[C|Cs],Stack,N}
     end;
 over_word2([], Stack, N) when is_integer(N) ->
@@ -454,7 +466,7 @@ over_word2([], Stack, N) when is_integer(N) ->
 over_non_word([C|Cs], Stack, N) ->
     case word_char(C) of
 	true -> {[C|Cs],Stack,N};
-	false -> over_non_word(Cs, [C|Stack], N+1)
+	false -> over_non_word(Cs, [C|Stack], N+gc_len(C))
     end;
 over_non_word([], Stack, N) ->
     {[],Stack,N}.
@@ -465,6 +477,7 @@ word_char(C) when C >= $a, C =< $z -> true;
 word_char(C) when C >= $ß, C =< $ÿ, C =/= $÷ -> true;
 word_char(C) when C >= $0, C =< $9 -> true;
 word_char(C) when C =:= $_ -> true;
+word_char([_|_]) -> true; %% Is grapheme
 word_char(_) -> false.
 
 %% over_white(Chars, InitialStack, InitialCount) ->
@@ -488,8 +501,8 @@ over_paren(Chars, Paren, Match) ->
 
 over_paren([C,$$,$$|Cs], Paren, Match, D, N, L)  ->
     over_paren([C|Cs], Paren, Match, D, N+2, L);
-over_paren([_,$$|Cs], Paren, Match, D, N, L)  ->
-    over_paren(Cs, Paren, Match, D, N+2, L);
+over_paren([GC,$$|Cs], Paren, Match, D, N, L)  ->
+    over_paren(Cs, Paren, Match, D, N+1+gc_len(GC), L);
 over_paren([Match|_], _Paren, Match, 1, N, _) ->
     N;
 over_paren([Match|Cs], Paren, Match, D, N, [Match|L]) ->
@@ -518,8 +531,8 @@ over_paren([$[|_], _, _, _, _, _)  ->
 over_paren([${|_], _, _, _, _, _)  ->
     beep;
 
-over_paren([_|Cs], Paren, Match, D, N, L)  ->
-    over_paren(Cs, Paren, Match, D, N+1, L);
+over_paren([GC|Cs], Paren, Match, D, N, L)  ->
+    over_paren(Cs, Paren, Match, D, N+gc_len(GC), L);
 over_paren([], _, _, _, _, _) ->
     0.
 
@@ -529,8 +542,8 @@ over_paren_auto(Chars) ->
 
 over_paren_auto([C,$$,$$|Cs], D, N, L)  ->
     over_paren_auto([C|Cs], D, N+2, L);
-over_paren_auto([_,$$|Cs], D, N, L)  ->
-    over_paren_auto(Cs, D, N+2, L);
+over_paren_auto([GC,$$|Cs], D, N, L)  ->
+    over_paren_auto(Cs, D, N+1+gc_len(GC), L);
 
 over_paren_auto([$(|_], _, N, [])  ->
     {N, $)};
@@ -553,8 +566,8 @@ over_paren_auto([$[|Cs], D, N, [$[|L])  ->
 over_paren_auto([${|Cs], D, N, [${|L])  ->
     over_paren_auto(Cs, D, N+1, L);
 
-over_paren_auto([_|Cs], D, N, L)  ->
-    over_paren_auto(Cs, D, N+1, L);
+over_paren_auto([GC|Cs], D, N, L)  ->
+    over_paren_auto(Cs, D, N+gc_len(GC), L);
 over_paren_auto([], _, _, _) ->
     0.
 
@@ -574,28 +587,43 @@ erase_inp({line,_,{Bef,Aft},_}) ->
     reverse(erase([], Bef, Aft, [])).
 
 erase(Pbs, Bef, Aft, Rs) ->
-    [{delete_chars,-length(Pbs)-length(Bef)},{delete_chars,length(Aft)}|Rs].
+    [{delete_chars,-cp_len(Pbs)-cp_len(Bef)},{delete_chars,cp_len(Aft)}|Rs].
 
 redraw_line({line,Pbs,{Bef,Aft},_}) ->
     reverse(redraw(Pbs, Bef, Aft, [])).
 
 redraw(Pbs, Bef, Aft, Rs) ->
-    [{move_rel,-length(Aft)},{put_chars, unicode,reverse(Bef, Aft)},{put_chars, unicode,Pbs}|Rs].
+    [{move_rel,-cp_len(Aft)},{put_chars, unicode,reverse(Bef, Aft)},{put_chars, unicode,Pbs}|Rs].
 
 length_before({line,Pbs,{Bef,_Aft},_}) ->
-    length(Pbs) + length(Bef).
+    cp_len(Pbs) + cp_len(Bef).
 
 length_after({line,_,{_Bef,Aft},_}) ->
-    length(Aft).
+    cp_len(Aft).
 
 prompt({line,Pbs,_,_}) ->
     Pbs.
 
 current_line({line,_,{Bef, Aft},_}) ->
-    reverse(Bef, Aft ++ "\n").
+    get_line(Bef, Aft ++ "\n").
 
 current_chars({line,_,{Bef,Aft},_}) ->
-    reverse(Bef, Aft).
+    get_line(Bef, Aft).
+
+get_line(Bef, Aft) ->
+    unicode:characters_to_list(reverse(Bef, Aft)).
+
+%% Grapheme length in codepoints
+gc_len(CP) when is_integer(CP) -> 1;
+gc_len(CPs) when is_list(CPs) -> length(CPs).
+
+%% String length in codepoints
+cp_len(Str) ->
+    cp_len(Str, 0).
+
+cp_len([GC|R], Len) ->
+    cp_len(R, Len + gc_len(GC));
+cp_len([], Len) -> Len.
 
 %% %% expand(CurrentBefore) ->
 %% %%	{yes,Expansion} | no
author	Dan Gudmundsson <[email protected]>	2017-06-02 14:25:02 +0200
committer	Dan Gudmundsson <[email protected]>	2017-08-16 10:03:28 +0200
commit	5f56b49c752a16ee28244981ca9b197ffd5fa691 (patch)
tree	17a9563e03ecb40ba4cfe96cea2b89209ed1875b
parent	b182febe36aa63eb8290f24ba4b7932673a9a9bc (diff)
download	otp-5f56b49c752a16ee28244981ca9b197ffd5fa691.tar.gz otp-5f56b49c752a16ee28244981ca9b197ffd5fa691.tar.bz2 otp-5f56b49c752a16ee28244981ca9b197ffd5fa691.zip