16 files changed, 1826 insertions, 252 deletions
diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile
index ed3dfb342c..0864cfeff6 100644
--- a/lib/stdlib/src/Makefile
+++ b/lib/stdlib/src/Makefile
@@ -119,6 +119,7 @@ MODULES= \
 	sys \
 	timer \
 	unicode \
+	unicode_util \
 	win32reg \
 	zip
 
@@ -200,6 +201,9 @@ $(APP_TARGET): $(APP_SRC) ../vsn.mk
 $(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk
 	$(vsn_verbose)sed -e 's;%VSN%;$(VSN);' $< > $@
 
+unicode_util.erl: ../uc_spec/*
+	escript ../uc_spec/gen_unicode_mod.escript
+
 # ----------------------------------------------------
 # Release Target
 # ---------------------------------------------------- 
diff --git a/lib/stdlib/src/erl_internal.erl b/lib/stdlib/src/erl_internal.erl
index 006e7946af..9a1b17fdb7 100644
--- a/lib/stdlib/src/erl_internal.erl
+++ b/lib/stdlib/src/erl_internal.erl
@@ -331,6 +331,8 @@ bif(list_to_float, 1) -> true;
 bif(list_to_integer, 1) -> true;
 bif(list_to_integer, 2) -> true;
 bif(list_to_pid, 1) -> true;
+bif(list_to_port, 1) -> true;
+bif(list_to_ref, 1) -> true;
 bif(list_to_tuple, 1) -> true;
 bif(load_module, 2) -> true;
 bif(make_ref, 0) -> true;
@@ -348,6 +350,7 @@ bif(nodes, 1) -> true;
 bif(now, 0) -> true;
 bif(open_port, 2) -> true;
 bif(pid_to_list, 1) -> true;
+bif(port_to_list, 1) -> true;
 bif(port_close, 1) -> true;
 bif(port_command, 2) -> true;
 bif(port_command, 3) -> true;
@@ -361,6 +364,7 @@ bif(process_info, 2) -> true;
 bif(processes, 0) -> true;
 bif(purge_module, 1) -> true;
 bif(put, 2) -> true;
+bif(ref_to_list, 1) -> true;
 bif(register, 2) -> true;
 bif(registered, 0) -> true;
 bif(round, 1) -> true;
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 0789f5dfb7..78b7a0e751 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -404,6 +404,10 @@ format_error({not_exported_opaque, {TypeName, Arity}}) ->
 format_error({underspecified_opaque, {TypeName, Arity}}) ->
     io_lib:format("opaque type ~w~s is underspecified and therefore meaningless",
                   [TypeName, gen_type_paren(Arity)]);
+format_error({bad_dialyzer_attribute,Term}) ->
+    io_lib:format("badly formed dialyzer attribute: ~w", [Term]);
+format_error({bad_dialyzer_option,Term}) ->
+    io_lib:format("unknown dialyzer warning option: ~w", [Term]);
 %% --- obsolete? unused? ---
 format_error({format_error, {Fmt, Args}}) ->
     io_lib:format(Fmt, Args).
@@ -796,8 +800,7 @@ attribute_state(Form, St) ->
 %%      State'
 %%  Allow for record, type and opaque type definitions and spec
 %%  declarations to be intersperced within function definitions.
-%%  Dialyzer attributes are also allowed everywhere, but are not
-%%  checked at all.
+%%  Dialyzer attributes are also allowed everywhere.
 
 function_state({attribute,L,record,{Name,Fields}}, St) ->
     record_def(L, Name, Fields, St);
@@ -883,7 +886,8 @@ post_traversal_check(Forms, St0) ->
     StD = check_on_load(StC),
     StE = check_unused_records(Forms, StD),
     StF = check_local_opaque_types(StE),
-    check_callback_information(StF).
+    StG = check_dialyzer_attribute(Forms, StF),
+    check_callback_information(StG).
 
 %% check_behaviour(State0) -> State
 %% Check that the behaviour attribute is valid.
@@ -3116,6 +3120,59 @@ check_local_opaque_types(St) ->
         end,
     dict:fold(FoldFun, St, Ts).
 
+check_dialyzer_attribute(Forms, St0) ->
+    Vals = [{L,V} ||
+               {attribute,L,dialyzer,Val} <- Forms,
+               V0 <- lists:flatten([Val]),
+               V <- case V0 of
+                        {O,F} ->
+                            [{A,B} ||
+                                A <- lists:flatten([O]),
+                                B <- lists:flatten([F])];
+                        T -> [T]
+                    end],
+    {Wellformed, Bad} =
+        lists:partition(fun ({_,{Option,FA}}) when is_atom(Option) ->
+                                is_fa(FA);
+                            ({_,Option}) when is_atom(Option) -> true;
+                            (_) -> false
+                        end, Vals),
+    St1 = foldl(fun ({L,Term}, St) ->
+		  add_error(L, {bad_dialyzer_attribute,Term}, St)
+	  end, St0, Bad),
+    DefFunctions = (gb_sets:to_list(St0#lint.defined) -- pseudolocals()),
+    Fun = fun ({L,{Option,FA}}, St) ->
+                  case is_function_dialyzer_option(Option) of
+                      true ->
+                          case lists:member(FA, DefFunctions) of
+                              true -> St;
+                              false ->
+                                  add_error(L, {undefined_function,FA}, St)
+                          end;
+                      false ->
+                          add_error(L, {bad_dialyzer_option,Option}, St)
+                  end;
+              ({L,Option}, St) ->
+                  case is_module_dialyzer_option(Option) of
+                      true -> St;
+                      false ->
+                          add_error(L, {bad_dialyzer_option,Option}, St)
+                  end
+          end,
+    foldl(Fun, St1, Wellformed).
+
+is_function_dialyzer_option(nowarn_function) -> true;
+is_function_dialyzer_option(Option) ->
+    is_module_dialyzer_option(Option).
+
+is_module_dialyzer_option(Option) ->
+    lists:member(Option,
+                 [no_return,no_unused,no_improper_lists,no_fun_app,
+                  no_match,no_opaque,no_fail_call,no_contracts,
+                  no_behaviours,no_undefined_callbacks,unmatched_returns,
+                  error_handling,race_conditions,no_missing_calls,
+                  specdiffs,overspecs,underspecs,unknown]).
+
 %% icrt_clauses(Clauses, In, ImportVarTable, State) ->
 %%      {UpdVt,State}.
 
diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl
index 6068afb293..ee5e7a11bf 100644
--- a/lib/stdlib/src/erl_pp.erl
+++ b/lib/stdlib/src/erl_pp.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -44,7 +44,7 @@
                 | {encoding, latin1 | unicode | utf8}).
 -type(options() :: hook_function() | [option()]).
 
--record(pp, {string_fun, char_fun}).
+-record(pp, {value_fun, string_fun, char_fun}).
 
 -record(options, {hook, encoding, opts}).
 
@@ -214,11 +214,15 @@ state(_Hook) ->
     state().
 
 state() ->
-    #pp{string_fun = fun io_lib:write_string_as_latin1/1,
+    Options = [{encoding,latin1}],
+    #pp{value_fun  = fun(V) -> io_lib_pretty:print(V, Options) end,
+        string_fun = fun io_lib:write_string_as_latin1/1,
         char_fun   = fun io_lib:write_char_as_latin1/1}.
 
 unicode_state() ->
-    #pp{string_fun = fun io_lib:write_string/1,
+    Options = [{encoding,unicode}],
+    #pp{value_fun  = fun(V) -> io_lib_pretty:print(V, Options) end,
+        string_fun = fun io_lib:write_string/1,
         char_fun   = fun io_lib:write_char/1}.
 
 encoding(Options) ->
@@ -253,31 +257,30 @@ lattribute({attribute,_Line,Name,Arg}, Opts) ->
 
 lattribute(module, {M,Vs}, _Opts) ->
     A = a0(),
-    attr("module",[{var,A,pname(M)},
-                   foldr(fun(V, C) -> {cons,A,{var,A,V},C}
-                         end, {nil,A}, Vs)]);
+    attr(module,[{var,A,pname(M)},
+                 foldr(fun(V, C) -> {cons,A,{var,A,V},C}
+                       end, {nil,A}, Vs)]);
 lattribute(module, M, _Opts) ->
-    attr("module", [{var,a0(),pname(M)}]);
+    attr(module, [{var,a0(),pname(M)}]);
 lattribute(export, Falist, _Opts) ->
-    call({var,a0(),"-export"}, [falist(Falist)], 0, options(none));
+    attrib(export, falist(Falist));
 lattribute(import, Name, _Opts) when is_list(Name) ->
-    attr("import", [{var,a0(),pname(Name)}]);
+    attr(import, [{var,a0(),pname(Name)}]);
 lattribute(import, {From,Falist}, _Opts) ->
-    attr("import",[{var,a0(),pname(From)},falist(Falist)]);
+    attrib(import, [leaf(pname(From)),falist(Falist)]);
 lattribute(export_type, Talist, _Opts) ->
-    call({var,a0(),"-export_type"}, [falist(Talist)], 0, options(none));
+    attrib(export_type, falist(Talist));
 lattribute(optional_callbacks, Falist, Opts) ->
-    ArgL = try falist(Falist)
-           catch _:_ -> abstract(Falist, Opts)
-           end,
-    call({var,a0(),"-optional_callbacks"}, [ArgL], 0, options(none));
+    try attrib(optional_callbacks, falist(Falist))
+    catch _:_ -> attr(optional_callbacks, [abstract(Falist, Opts)])
+    end;
 lattribute(file, {Name,Line}, _Opts) ->
-    attr("file", [{string,a0(),Name},{integer,a0(),Line}]);
+    attr(file, [{string,a0(),Name},{integer,a0(),Line}]);
 lattribute(record, {Name,Is}, Opts) ->
-    Nl = leaf(format("-record(~w,", [Name])),
+    Nl = [leaf("-record("),{atom,Name},$,],
     [{first,Nl,record_fields(Is, Opts)},$)];
 lattribute(Name, Arg, Options) ->
-    attr(write(Name), [abstract(Arg, Options)]).
+    attr(Name, [abstract(Arg, Options)]).
 
 abstract(Arg, #options{encoding = Encoding}) ->
     erl_parse:abstract(Arg, [{encoding,Encoding}]).
@@ -340,7 +343,7 @@ ltype({user_type,Line,T,Ts}, _) ->
 ltype({remote_type,Line,[M,F,Ts]}, _) ->
     simple_type({remote,Line,M,F}, Ts);
 ltype({atom,_,T}, _) ->
-    leaf(write(T));
+    {atom,T};
 ltype(E, P) ->
     lexpr(E, P, options(none)).
 
@@ -382,12 +385,12 @@ tuple_type(Ts, F) ->
 specattr(SpecKind, {FuncSpec,TypeSpecs}) ->
     Func = case FuncSpec of
                {F,_A} ->
-                   format("~w", [F]);
+                   {atom,F};
                {M,F,_A} ->
-                   format("~w:~w", [M, F])
+                   [{atom,M},$:,{atom,F}]
            end,
     {first,leaf(lists:concat(["-", SpecKind, " "])),
-     {list,[{first,leaf(Func),spec_clauses(TypeSpecs)}]}}.
+     {list,[{first,Func,spec_clauses(TypeSpecs)}]}}.
 
 spec_clauses(TypeSpecs) ->
     {prefer_nl,[$;],[sig_type(T) || T <- TypeSpecs]}.
@@ -429,7 +432,10 @@ ltypes(Ts, F, Prec) ->
     [F(T, Prec) || T <- Ts].
 
 attr(Name, Args) ->
-    call({var,a0(),format("-~s", [Name])}, Args, 0, options(none)).
+    {first,[$-,{atom,Name}],args(Args, options(none))}.
+
+attrib(Name, Args) ->
+    {first,[$-,{atom,Name}],[{seq,$(,$),[$,],Args}]}.
 
 pname(['' | As]) ->
     [$. | pname(As)];
@@ -441,10 +447,13 @@ pname(A) when is_atom(A) ->
     write(A).
 
 falist([]) ->
-    {nil,a0()};
-falist([{Name,Arity}|Falist]) ->
-    A = a0(),
-    {cons,A,{var,A,format("~w/~w", [Name,Arity])},falist(Falist)}.
+    [leaf("[]")];
+falist(Falist) ->
+    L = [begin
+             {Name,Arity} = Fa,
+             [{atom,Name},leaf(format("/~w", [Arity]))]
+         end || Fa <- Falist],
+    [{seq,$[,$],$,,L}].
 
 lfunction({function,_Line,Name,_Arity,Cs}, Opts) ->
     Cll = nl_clauses(fun (C, H) -> func_clause(Name, C, H) end, $;, Opts, Cs),
@@ -489,7 +498,7 @@ lexpr({var,_,V}, _, _) -> leaf(format("~ts", [V]));
 lexpr({char,_,C}, _, _) -> {char,C};
 lexpr({integer,_,N}, _, _) -> leaf(write(N));
 lexpr({float,_,F}, _, _) -> leaf(write(F));
-lexpr({atom,_,A}, _, _) -> leaf(write(A));
+lexpr({atom,_,A}, _, _) -> {atom,A};
 lexpr({string,_,S}, _, _) -> {string,S};
 lexpr({nil,_}, _, _) -> '[]';
 lexpr({cons,_,H,T}, _, Opts) ->
@@ -519,7 +528,7 @@ lexpr({record, _, Name, Fs}, Prec, Opts) ->
 lexpr({record_field, _, Rec, Name, F}, Prec, Opts) ->
     {L,P,R} = inop_prec('#'),
     Rl = lexpr(Rec, L, Opts),
-    Nl = leaf(format("#~w.", [Name])),
+    Nl = [$#,{atom,Name},$.],
     El = [Rl,Nl,lexpr(F, R, Opts)],
     maybe_paren(P, Prec, El);
 lexpr({record, _, Rec, Name, Fs}, Prec, Opts) ->
@@ -538,12 +547,12 @@ lexpr({record_field, _, Rec, F}, Prec, Opts) ->
     maybe_paren(P, Prec, El);
 lexpr({map, _, Fs}, Prec, Opts) ->
     {P,_R} = preop_prec('#'),
-    El = {first,leaf("#"),map_fields(Fs, Opts)},
+    El = {first,$#,map_fields(Fs, Opts)},
     maybe_paren(P, Prec, El);
 lexpr({map, _, Map, Fs}, Prec, Opts) ->
     {L,P,_R} = inop_prec('#'),
     Rl = lexpr(Map, L, Opts),
-    El = {first,[Rl,leaf("#")],map_fields(Fs, Opts)},
+    El = {first,[Rl,$#],map_fields(Fs, Opts)},
     maybe_paren(P, Prec, El);
 lexpr({block,_,Es}, _, Opts) ->
     {list,[{step,'begin',body(Es, Opts)},'end']};
@@ -563,13 +572,16 @@ lexpr({'receive',_,Cs,To,ToOpt}, _, Opts) ->
            {step,'after',Al},
            'end']};
 lexpr({'fun',_,{function,F,A}}, _Prec, _Opts) ->
-    leaf(format("fun ~w/~w", [F,A]));
-lexpr({'fun',_,{function,F,A},Extra}, _Prec, _Opts) ->
-    {force_nl,fun_info(Extra),leaf(format("fun ~w/~w", [F,A]))};
-lexpr({'fun',_,{function,M,F,A}}, _Prec, _Opts)
+    [leaf("fun "),{atom,F},leaf(format("/~w", [A]))];
+lexpr({'fun',L,{function,_,_}=Func,Extra}, Prec, Opts) ->
+    {force_nl,fun_info(Extra),lexpr({'fun',L,Func}, Prec, Opts)};
+lexpr({'fun',L,{function,M,F,A}}, Prec, Opts)
   when is_atom(M), is_atom(F), is_integer(A) ->
     %% For backward compatibility with pre-R15 abstract format.
-    leaf(format("fun ~w:~w/~w", [M,F,A]));
+    Mod = erl_parse:abstract(M),
+    Fun = erl_parse:abstract(F),
+    Arity = erl_parse:abstract(A),
+    lexpr({'fun',L,{function,Mod,Fun,Arity}}, Prec, Opts);
 lexpr({'fun',_,{function,M,F,A}}, _Prec, Opts) ->
     %% New format in R15.
     NameItem = lexpr(M, Opts),
@@ -660,7 +672,7 @@ lexpr({bin,_,Fs}, _, Opts) ->
     bit_grp(Fs, Opts);
 %% Special case for straight values.
 lexpr({value,_,Val}, _,_) ->
-    leaf(write(Val));
+    {value,Val};
 %% Now do the hook.
 lexpr(Other, _Precedence, #options{hook = none}) ->
     leaf(format("INVALID-FORM:~w:",[Other]));
@@ -676,7 +688,7 @@ call(Name, Args, Prec, Opts) ->
     maybe_paren(P, Prec, Item).
 
 fun_info(Extra) ->
-    leaf(format("% fun-info: ~w", [Extra])).
+    [leaf("% fun-info: "),{value,Extra}].
 
 %% BITS:
 
@@ -717,7 +729,7 @@ bit_elem_type(T) ->
 %% end of BITS
 
 record_name(Name) ->
-    leaf(format("#~w", [Name])).
+    [$#,{atom,Name}].
 
 record_fields(Fs, Opts) ->
     tuple(Fs, fun record_field/2, Opts).
@@ -919,8 +931,10 @@ frmt(Item, I, PP) ->
 %%% - {force_nl,ExtraInfo,I}: fun-info (a comment) forces linebreak before I.
 %%% - {prefer_nl,Sep,IPs}: forces linebreak between Is unlesss negative
 %%%   indentation.
+%%% - {atom,A}: an atom
 %%% - {char,C}: a character
 %%% - {string,S}: a string.
+%%% - {value,T}: a term.
 %%% - {hook,...}, {ehook,...}: hook expressions.
 %%%
 %%% list, first, seq, force_nl, and prefer_nl all accept IPs, where each
@@ -981,6 +995,10 @@ f({prefer_nl,Sep,LItems}, I0, ST, WT, PP) ->
         true ->
             {insert_newlines(CharsSize2L, I0, ST),nsz(lists:last(Sizes), I0)}
     end;
+f({value,V}, I, ST, WT, PP) ->
+    f(write_a_value(V, PP), I, ST, WT, PP);
+f({atom,A}, I, ST, WT, PP) ->
+    f(write_an_atom(A, PP), I, ST, WT, PP);
 f({char,C}, I, ST, WT, PP) ->
     f(write_a_char(C, PP), I, ST, WT, PP);
 f({string,S}, I, ST, WT, PP) ->
@@ -1119,6 +1137,12 @@ has_nl([C|Cs]) ->
 has_nl([]) ->
     false.
 
+write_a_value(V, PP) ->
+    flat_leaf(write_value(V, PP)).
+
+write_an_atom(A, PP) ->
+    flat_leaf(write_atom(A, PP)).
+
 write_a_char(C, PP) ->
     flat_leaf(write_char(C, PP)).
 
@@ -1135,7 +1159,7 @@ write_a_string([], _N, _Len, _PP) ->
 write_a_string(S, N, Len, PP) ->
     SS = string:sub_string(S, 1, N),
     Sl = write_string(SS, PP),
-    case (length(Sl) > Len) and (N > ?MIN_SUBSTRING) of
+    case (chars_size(Sl) > Len) and (N > ?MIN_SUBSTRING) of
         true ->
             write_a_string(S, N-1, Len, PP);
         false ->
@@ -1147,11 +1171,17 @@ flat_leaf(S) ->
     L = lists:flatten(S),
     {leaf,length(L),L}.
 
+write_value(V, PP) ->
+    (PP#pp.value_fun)(V).
+
+write_atom(A, PP) ->
+    (PP#pp.value_fun)(A).
+
 write_string(S, PP) ->
-    lists:flatten((PP#pp.string_fun)(S)).
+    (PP#pp.string_fun)(S).
 
 write_char(C, PP) ->
-    lists:flatten((PP#pp.char_fun)(C)).
+    (PP#pp.char_fun)(C).
 
 %%
 %% Utilities
diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index a54df939bf..168ea4002c 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -69,6 +69,8 @@ format_error(invalid_gnu_1_0_sparsemap) ->
     "Invalid GNU sparse map (version 1.0)";
 format_error({invalid_gnu_0_1_sparsemap, Format}) ->
     lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
+format_error(unsafe_path) ->
+    "The path points above the current working directory";
 format_error({Name,Reason}) ->
     lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
 format_error(Atom) when is_atom(Atom) ->
@@ -120,26 +122,38 @@ do_extract(Handle, Opts) when is_list(Opts) ->
 
 extract1(eof, Reader, _, Acc) when is_list(Acc) ->
     {ok, {ok, lists:reverse(Acc)}, Reader};
+extract1(eof, Reader, _, leading_slash) ->
+    error_logger:info_msg("erl_tar: removed leading '/' from member names\n"),
+    {ok, ok, Reader};
 extract1(eof, Reader, _, Acc) ->
     {ok, Acc, Reader};
-extract1(#tar_header{name=Name,size=Size}=Header, Reader, Opts, Acc) ->
+extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) ->
     case check_extract(Name, Opts) of
         true ->
-            case do_read(Reader, Size) of
-                {ok, Bin, Reader2} ->
-                    case write_extracted_element(Header, Bin, Opts) of
-                        ok ->
-                            {ok, Acc, Reader2};
-                        {ok, NameBin} when is_list(Acc) ->
-                            {ok, [NameBin | Acc], Reader2};
-                        {error, _} = Err ->
-                            throw(Err)
-                    end;
+            case do_read(Reader0, Size) of
+                {ok, Bin, Reader1} ->
+                    Acc = extract2(Header, Bin, Opts, Acc0),
+                    {ok, Acc, Reader1};
                 {error, _} = Err ->
                     throw(Err)
             end;
         false ->
-            {ok, Acc, skip_file(Reader)}
+            {ok, Acc0, skip_file(Reader0)}
+    end.
+
+extract2(Header, Bin, Opts, Acc) ->
+    case write_extracted_element(Header, Bin, Opts) of
+        ok ->
+            case Header of
+                #tar_header{name="/"++_} ->
+                    leading_slash;
+                #tar_header{} ->
+                    Acc
+            end;
+        {ok, NameBin} when is_list(Acc) ->
+            [NameBin | Acc];
+        {error, _} = Err ->
+            throw(Err)
     end.
 
 %% Checks if the file Name should be extracted.
@@ -1052,14 +1066,11 @@ unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0)
 
 
 safe_join_path([], Name) ->
-    strip_slashes(Name, both);
+    filename:join([Name]);
 safe_join_path(Prefix, []) ->
-    strip_slashes(Prefix, right);
+    filename:join([Prefix]);
 safe_join_path(Prefix, Name) ->
-    filename:join(strip_slashes(Prefix, right), strip_slashes(Name, both)).
-
-strip_slashes(Str, Direction) ->
-    string:strip(Str, Direction, $/).
+    filename:join(Prefix, Name).
 
 new_sparse_file_reader(Reader, Sparsemap, RealSize) ->
     true = validate_sparse_entries(Sparsemap, RealSize),
@@ -1557,7 +1568,7 @@ write_extracted_element(#tar_header{name=Name,typeflag=Type},
             ok
     end;
 write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) ->
-    Name1 = filename:absname(Name0, Opts#read_opts.cwd),
+    Name1 = make_safe_path(Name0, Opts),
     Created =
         case typeflag(Header#tar_header.typeflag) of
             regular ->
@@ -1585,6 +1596,16 @@ write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) ->
         not_written -> ok
     end.
 
+make_safe_path([$/|Path], Opts) ->
+    make_safe_path(Path, Opts);
+make_safe_path(Path, #read_opts{cwd=Cwd}) ->
+    case filename:safe_relative_path(Path) of
+        unsafe ->
+            throw({error,{Path,unsafe_path}});
+        SafePath ->
+            filename:absname(SafePath, Cwd)
+    end.
+
 create_regular(Name, NameInArchive, Bin, Opts) ->
     case write_extracted_file(Name, Bin, Opts) of
         not_written ->
diff --git a/lib/stdlib/src/gen_event.erl b/lib/stdlib/src/gen_event.erl
index 0aebf1bdc5..0c50b2aa08 100644
--- a/lib/stdlib/src/gen_event.erl
+++ b/lib/stdlib/src/gen_event.erl
@@ -109,7 +109,8 @@
       State :: term(),
       Status :: term().
 
--optional_callbacks([format_status/2]).
+-optional_callbacks(
+    [handle_info/2, terminate/2, code_change/3, format_status/2]).
 
 %%---------------------------------------------------------------------------
 
@@ -577,6 +578,10 @@ server_update(Handler1, Func, Event, SName) ->
 	    do_terminate(Mod1, Handler1, remove_handler, State,
 			 remove, SName, normal),
 	    no;
+        {'EXIT', {undef, [{Mod1, handle_info, [_,_], _}|_]}} ->
+            error_logger:warning_msg("** Undefined handle_info in ~p~n"
+                                     "** Unhandled message: ~p~n", [Mod1, Event]),
+           {ok, Handler1};
 	Other ->
 	    do_terminate(Mod1, Handler1, {error, Other}, State,
 			 Event, SName, crash),
@@ -698,9 +703,15 @@ server_call_update(Handler1, Query, SName) ->
     end.
 
 do_terminate(Mod, Handler, Args, State, LastIn, SName, Reason) ->
-    Res = (catch Mod:terminate(Args, State)),
-    report_terminate(Handler, Reason, Args, State, LastIn, SName, Res),
-    Res.
+    case erlang:function_exported(Mod, terminate, 2) of
+	true ->
+	    Res = (catch Mod:terminate(Args, State)),
+	    report_terminate(Handler, Reason, Args, State, LastIn, SName, Res),
+	    Res;
+	false ->
+	    report_terminate(Handler, Reason, Args, State, LastIn, SName, ok),
+	    ok
+    end.
 
 report_terminate(Handler, crash, {error, Why}, State, LastIn, SName, _) ->
     report_terminate(Handler, Why, State, LastIn, SName);
diff --git a/lib/stdlib/src/gen_fsm.erl b/lib/stdlib/src/gen_fsm.erl
index e925a75fe8..39a8fd42fe 100644
--- a/lib/stdlib/src/gen_fsm.erl
+++ b/lib/stdlib/src/gen_fsm.erl
@@ -169,7 +169,8 @@
       State :: term(),
       Status :: term().
 
--optional_callbacks([format_status/2]).
+-optional_callbacks(
+    [handle_info/3, terminate/3, code_change/4, format_status/2]).
 
 %%% ---------------------------------------------------
 %%% Starts a generic state machine.
@@ -466,6 +467,10 @@ handle_msg(Msg, Parent, Name, StateName, StateData, Mod, _Time) -> %No debug her
 					   StateName, NStateData, [])),
 	    reply(From, Reply),
 	    exit(R);
+        {'EXIT', {undef, [{Mod, handle_info, [_,_,_], _}|_]}} ->
+            error_logger:warning_msg("** Undefined handle_info in ~p~n"
+                                     "** Unhandled message: ~p~n", [Mod, Msg]),
+            loop(Parent, Name, StateName, StateData, Mod, infinity, []);
 	{'EXIT', What} ->
 	    terminate(What, Name, Msg, Mod, StateName, StateData, []);
 	Reply ->
@@ -540,24 +545,30 @@ reply(Name, {To, Tag}, Reply, Debug, StateName) ->
 -spec terminate(term(), _, _, atom(), _, _, _) -> no_return().
 
 terminate(Reason, Name, Msg, Mod, StateName, StateData, Debug) ->
-    case catch Mod:terminate(Reason, StateName, StateData) of
-	{'EXIT', R} ->
-	    FmtStateData = format_status(terminate, Mod, get(), StateData),
-	    error_info(R, Name, Msg, StateName, FmtStateData, Debug),
-	    exit(R);
-	_ ->
-	    case Reason of
-		normal ->
-		    exit(normal);
-		shutdown ->
-		    exit(shutdown);
- 		{shutdown,_}=Shutdown ->
- 		    exit(Shutdown);
+    case erlang:function_exported(Mod, terminate, 3) of
+	true ->
+	    case catch Mod:terminate(Reason, StateName, StateData) of
+		{'EXIT', R} ->
+		    FmtStateData = format_status(terminate, Mod, get(), StateData),
+		    error_info(R, Name, Msg, StateName, FmtStateData, Debug),
+		    exit(R);
 		_ ->
-                    FmtStateData = format_status(terminate, Mod, get(), StateData),
-		    error_info(Reason,Name,Msg,StateName,FmtStateData,Debug),
-		    exit(Reason)
-	    end
+		    ok
+	    end;
+	false ->
+	    ok
+    end,
+    case Reason of
+	normal ->
+	    exit(normal);
+	shutdown ->
+	    exit(shutdown);
+ 	{shutdown,_}=Shutdown ->
+ 	    exit(Shutdown);
+	_ ->
+	    FmtStateData1 = format_status(terminate, Mod, get(), StateData),
+	    error_info(Reason,Name,Msg,StateName,FmtStateData1,Debug),
+	    exit(Reason)
     end.
 
 error_info(Reason, Name, Msg, StateName, StateData, Debug) ->
diff --git a/lib/stdlib/src/gen_server.erl b/lib/stdlib/src/gen_server.erl
index 284810c971..8504af86f8 100644
--- a/lib/stdlib/src/gen_server.erl
+++ b/lib/stdlib/src/gen_server.erl
@@ -146,8 +146,8 @@
       State :: term(),
       Status :: term().
 
--optional_callbacks([format_status/2]).
-
+-optional_callbacks(
+    [handle_info/2, terminate/2, code_change/3, format_status/2]).
 
 %%%  -----------------------------------------------------------------
 %%% Starts a generic server.
@@ -602,6 +602,17 @@ try_dispatch(Mod, Func, Msg, State) ->
     catch
 	throw:R ->
 	    {ok, R};
+        error:undef = R when Func == handle_info ->
+            case erlang:function_exported(Mod, handle_info, 2) of
+                false ->
+                    error_logger:warning_msg("** Undefined handle_info in ~p~n"
+                                             "** Unhandled message: ~p~n",
+                                             [Mod, Msg]),
+                    {ok, {noreply, State}};
+                true ->
+                    Stacktrace = erlang:get_stacktrace(),
+                    {'EXIT', {R, Stacktrace}, {R, Stacktrace}}
+            end;
 	error:R ->
 	    Stacktrace = erlang:get_stacktrace(),
 	    {'EXIT', {R, Stacktrace}, {R, Stacktrace}};
@@ -625,17 +636,22 @@ try_handle_call(Mod, Msg, From, State) ->
     end.
 
 try_terminate(Mod, Reason, State) ->
-    try
-	{ok, Mod:terminate(Reason, State)}
-    catch
-	throw:R ->
-	    {ok, R};
-	error:R ->
-	    Stacktrace = erlang:get_stacktrace(),
-	    {'EXIT', {R, Stacktrace}, {R, Stacktrace}};
-	exit:R ->
-	    Stacktrace = erlang:get_stacktrace(),
-	    {'EXIT', R, {R, Stacktrace}}
+    case erlang:function_exported(Mod, terminate, 2) of
+	true ->
+	    try
+		{ok, Mod:terminate(Reason, State)}
+	    catch
+		throw:R ->
+		    {ok, R};
+		error:R ->
+		    Stacktrace = erlang:get_stacktrace(),
+		    {'EXIT', {R, Stacktrace}, {R, Stacktrace}};
+		exit:R ->
+		    Stacktrace = erlang:get_stacktrace(),
+		    {'EXIT', R, {R, Stacktrace}}
+	   end;
+	false ->
+	    {ok, ok}
     end.
 
 
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index a91143a764..28e5007e5a 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -68,8 +68,8 @@
 -export([write_atom/1,write_string/1,write_string/2,write_latin1_string/1,
          write_latin1_string/2, write_char/1, write_latin1_char/1]).
 
--export([write_string_as_latin1/1, write_string_as_latin1/2,
-         write_char_as_latin1/1]).
+-export([write_atom_as_latin1/1, write_string_as_latin1/1,
+         write_string_as_latin1/2, write_char_as_latin1/1]).
 
 -export([quote_atom/2, char_list/1, latin1_char_list/1,
 	 deep_char_list/1, deep_latin1_char_list/1,
@@ -344,6 +344,11 @@ write_binary_body(B, _D) ->
     <<X:L>> = B,
     [integer_to_list(X),$:,integer_to_list(L)].
 
+%%% There are two functions to write Unicode atoms:
+%%% - they both escape control characters < 160;
+%%% - write_atom() never escapes characters >= 160;
+%%% - write_atom_as_latin1() also escapes characters >= 255.
+
 %% write_atom(Atom) -> [Char]
 %%  Generate the list of characters needed to print an atom.
 
@@ -351,17 +356,26 @@ write_binary_body(B, _D) ->
       Atom :: atom().
 
 write_atom(Atom) ->
+    write_possibly_quoted_atom(Atom, fun write_string/2).
+
+-spec write_atom_as_latin1(Atom) -> latin1_string() when
+      Atom :: atom().
+
+write_atom_as_latin1(Atom) ->
+    write_possibly_quoted_atom(Atom, fun write_string_as_latin1/2).
+
+write_possibly_quoted_atom(Atom, PFun) ->
     Chars = atom_to_list(Atom),
     case quote_atom(Atom, Chars) of
 	true ->
-	    write_string(Chars, $');   %'
+            PFun(Chars, $');   %'
 	false ->
 	    Chars
     end.
 
 %% quote_atom(Atom, CharList)
 %%  Return 'true' if atom with chars in CharList needs to be quoted, else
-%%  return 'false'.
+%%  return 'false'. Notice that characters >= 160 are always quoted.
 
 -spec quote_atom(atom(), chars()) -> boolean().
 
diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl
index aabccfc5d9..ff368d02da 100644
--- a/lib/stdlib/src/io_lib_pretty.erl
+++ b/lib/stdlib/src/io_lib_pretty.erl
@@ -105,6 +105,8 @@ print(_, _, _, 0, _M, _RF, _Enc, _Str) -> "...";
 print(Term, Col, Ll, D, M, RecDefFun, Enc, Str) when Col =< 0 ->
     %% ensure Col is at least 1
     print(Term, 1, Ll, D, M, RecDefFun, Enc, Str);
+print(Atom, _Col, _Ll, _D, _M, _RF, Enc, _Str) when is_atom(Atom) ->
+    write_atom(Atom, Enc);
 print(Term, Col, Ll, D, M0, RecDefFun, Enc, Str) when is_tuple(Term);
                                                       is_list(Term);
                                                       is_map(Term);
@@ -407,6 +409,9 @@ print_length({}, _D, _RF, _Enc, _Str) ->
     {"{}", 2};
 print_length(#{}=M, _D, _RF, _Enc, _Str) when map_size(M) =:= 0 ->
     {"#{}", 3};
+print_length(Atom, _D, _RF, Enc, _Str) when is_atom(Atom) ->
+    S = write_atom(Atom, Enc),
+    {S, lists:flatlength(S)};
 print_length(List, D, RF, Enc, Str) when is_list(List) ->
     %% only flat lists are "printable"
     case Str andalso printable_list(List, D, Enc) of
@@ -500,7 +505,7 @@ print_length_tuple(Tuple, D, RF, Enc, Str) ->
 print_length_record(_Tuple, 1, _RF, _RDefs, _Enc, _Str) ->
     {"{...}", 5};
 print_length_record(Tuple, D, RF, RDefs, Enc, Str) ->
-    Name = [$# | io_lib:write_atom(element(1, Tuple))],
+    Name = [$# | write_atom(element(1, Tuple), Enc)],
     NameL = length(Name),
     Elements = tl(tuple_to_list(Tuple)),
     L = print_length_fields(RDefs, D - 1, Elements, RF, Enc, Str),
@@ -515,7 +520,7 @@ print_length_fields([Def | Defs], D, [E | Es], RF, Enc, Str) ->
      print_length_fields(Defs, D - 1, Es, RF, Enc, Str)].
 
 print_length_field(Def, D, E, RF, Enc, Str) ->
-    Name = io_lib:write_atom(Def),
+    Name = write_atom(Def, Enc),
     {S, L} = print_length(E, D, RF, Enc, Str),
     NameL = length(Name) + 3,
     {{field, Name, NameL, {S, L}}, NameL + L}.
@@ -664,6 +669,11 @@ printable_char(C,unicode) ->
     C > 16#DFFF andalso C < 16#FFFE orelse
     C > 16#FFFF andalso C =< 16#10FFFF.
 
+write_atom(A, latin1) ->
+    io_lib:write_atom_as_latin1(A);
+write_atom(A, _Uni) ->
+    io_lib:write_atom(A).
+
 write_string(S, latin1) ->
     io_lib:write_latin1_string(S, $"); %"
 write_string(S, _Uni) ->
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index 2a130dcb43..7a8a5e6d4a 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -31,7 +31,7 @@
 	 export_seed/0, export_seed_s/1,
          uniform/0, uniform/1, uniform_s/1, uniform_s/2,
          jump/0, jump/1,
-	 normal/0, normal_s/1
+	     normal/0, normal/2, normal_s/1, normal_s/3
 	]).
 
 -compile({inline, [exs64_next/1, exsplus_next/1,
@@ -358,6 +358,13 @@ normal() ->
     _ = seed_put(Seed),
     X.
 
+%% normal/2: returns a random float with N(μ, σ²) normal distribution
+%% updating the state in the process dictionary.
+
+-spec normal(Mean :: number(), Variance :: number()) -> float().
+normal(Mean, Variance) ->
+    Mean + (math:sqrt(Variance) * normal()).
+
 %% normal_s/1: returns a random float with standard normal distribution
 %% The Ziggurat Method for generating random variables - Marsaglia and Tsang
 %% Paper and reference code: http://www.jstatsoft.org/v05/i08/
@@ -378,6 +385,13 @@ normal_s(State0) ->
 	false -> normal_s(Idx, Sign, -X, State)
     end.
 
+%% normal_s/3: returns a random float with normal N(μ, σ²) distribution
+
+-spec normal_s(Mean :: number(), Variance :: number(), state()) -> {float(), NewS :: state()}.
+normal_s(Mean, Variance, State0) when Variance > 0 ->
+    {X, State} = normal_s(State0),
+    {Mean + (math:sqrt(Variance) * X), State}.
+
 %% =====================================================================
 %% Internal functions
 
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index 52d3c35608..28aab7b590 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -33,7 +33,12 @@
 
 %%% BIFs
 
--export([compile/1, compile/2, run/2, run/3, inspect/2]).
+-export([version/0, compile/1, compile/2, run/2, run/3, inspect/2]).
+
+-spec version() -> binary().
+
+version() ->
+    erlang:nif_error(undef).
 
 -spec compile(Regexp) -> {ok, MP} | {error, ErrSpec} when
       Regexp :: iodata(),
diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl
index 28f37ef8bf..961f5f8a30 100644
--- a/lib/stdlib/src/shell.erl
+++ b/lib/stdlib/src/shell.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -967,10 +967,11 @@ local_func(f, [{var,_,Name}], Bs, _Shell, _RT, _Lf, _Ef) ->
     {value,ok,erl_eval:del_binding(Name, Bs)};
 local_func(f, [_Other], _Bs, _Shell, _RT, _Lf, _Ef) ->
     erlang:raise(error, function_clause, [{shell,f,1}]);
-local_func(rd, [{atom,_,RecName},RecDef0], Bs, _Shell, RT, _Lf, _Ef) ->
+local_func(rd, [{atom,_,RecName0},RecDef0], Bs, _Shell, RT, _Lf, _Ef) ->
     RecDef = expand_value(RecDef0),
     RDs = lists:flatten(erl_pp:expr(RecDef)),
-    Attr = lists:concat(["-record('", RecName, "',", RDs, ")."]),
+    RecName = io_lib:write_atom_as_latin1(RecName0),
+    Attr = lists:concat(["-record(", RecName, ",", RDs, ")."]),
     {ok, Tokens, _} = erl_scan:string(Attr),
     case erl_parse:parse_form(Tokens) of
         {ok,AttrForm} ->
@@ -1417,9 +1418,11 @@ columns() ->
         {ok,N} -> N;
         _ -> 80
     end.
+
 encoding() ->
     [{encoding, Encoding}] = enc(),
     Encoding.
+
 enc() ->
     case lists:keyfind(encoding, 1, io:getopts()) of
 	false -> [{encoding,latin1}]; % should never happen
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index 82ab484ea6..c8900d74e8 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -99,6 +99,7 @@
 	     sys,
 	     timer,
 	     unicode,
+             unicode_util,
 	     win32reg,
 	     zip]},
   {registered,[timer_server,rsh_starter,take_over_monitor,pool_master,
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index c659db78bd..a0fa60fbc7 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -17,43 +17,1322 @@
 %% 
 %% %CopyrightEnd%
 %%
+%% A string library that works on grapheme clusters, with the exception
+%% of codepoints of class 'prepend' and non modern (or decomposed) Hangul.
+%% If these codepoints appear, functions like 'find/2' may return a string
+%% which starts inside a grapheme cluster.
+%% These exceptions are made because the codepoints classes are
+%% seldom used and require that we are able look at previous codepoints in
+%% the stream and is thus hard to implement effectively.
+%%
+%% GC (grapheme cluster) implies that the length of string 'ß↑e̊' is 3 though
+%% it is represented by the codepoints [223,8593,101,778] or the
+%% utf8 binary <<195,159,226,134,145,101,204,138>>
+%%
+%% And that searching for strings or graphemes finds the correct positions:
+%%
+%% find("eeeee̊eee", "e̊") -> "e̊ee".:
+%% find("1£4e̊abcdef", "e") -> "ef"
+%%
+%% Most functions expect all input to be normalized to one form,
+%% see unicode:characters_to_nfc and unicode:characters_to_nfd functions.
+%% When appending strings no checking is done to verify that the
+%% result is valid unicode strings.
+%%
+%% The functions may crash for invalid utf-8 input.
+%%
+%% Return value should be kept consistent when return type is
+%% unicode:chardata() i.e. binary input => binary output,
+%% list input => list output mixed input => mixed output
+%%
 -module(string).
 
--export([len/1,equal/2,concat/2,chr/2,rchr/2,str/2,rstr/2,
-	 span/2,cspan/2,substr/2,substr/3,tokens/2,chars/2,chars/3]).
+-export([is_empty/1, length/1, to_graphemes/1,
+         reverse/1,
+         equal/2, equal/3, equal/4,
+         slice/2, slice/3,
+         pad/2, pad/3, pad/4, trim/1, trim/2, trim/3, chomp/1,
+         take/2, take/3, take/4,
+         lexemes/2, nth_lexeme/3,
+         uppercase/1, lowercase/1, titlecase/1,casefold/1,
+         prefix/2,
+         split/2,split/3,replace/3,replace/4,
+         find/2,find/3,
+         next_codepoint/1, next_grapheme/1
+        ]).
+
+-export([to_float/1, to_integer/1]).
+
+%% Old (will be deprecated) lists/string API kept for backwards compability
+-export([len/1, concat/2, % equal/2, (extended in the new api)
+         chr/2,rchr/2,str/2,rstr/2,
+	 span/2,cspan/2,substr/2,substr/3, tokens/2,
+         chars/2,chars/3]).
 -export([copies/2,words/1,words/2,strip/1,strip/2,strip/3,
 	 sub_word/2,sub_word/3,left/2,left/3,right/2,right/3,
 	 sub_string/2,sub_string/3,centre/2,centre/3, join/2]).
 -export([to_upper/1, to_lower/1]).
+%%
+-import(lists,[member/2]).
 
--import(lists,[reverse/1,member/2]).
+-compile({no_auto_import,[length/1]}).
 
-%%---------------------------------------------------------------------------
+-export_type([grapheme_cluster/0]).
 
-%%% BIFs
+-type grapheme_cluster() :: char() | [char()].
+-type direction() :: 'leading' | 'trailing'.
 
--export([to_float/1, to_integer/1]).
+-dialyzer({no_improper_lists, stack/2}).
+%%% BIFs internal (not documented) should not to be used outside of this module
+%%% May be removed
+-export([list_to_float/1, list_to_integer/1]).
 
--spec to_float(String) -> {Float, Rest} | {error, Reason} when
+%% Uses bifs: string:list_to_float/1 and string:list_to_integer/1
+-spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when
       String :: string(),
       Float :: float(),
       Rest :: string(),
-      Reason :: no_float | not_a_list.
+      Reason :: 'no_float' | 'not_a_list'.
 
-to_float(_) ->
+list_to_float(_) ->
     erlang:nif_error(undef).
 
--spec to_integer(String) -> {Int, Rest} | {error, Reason} when
+-spec list_to_integer(String) -> {Int, Rest} | {'error', Reason} when
       String :: string(),
       Int :: integer(),
       Rest :: string(),
-      Reason :: no_integer | not_a_list.
+      Reason :: 'no_integer' | 'not_a_list'.
 
-to_integer(_) ->
+list_to_integer(_) ->
     erlang:nif_error(undef).
 
 %%% End of BIFs
 
+%% Check if string is the empty string
+-spec is_empty(String::unicode:chardata()) -> boolean().
+is_empty([]) -> true;
+is_empty(<<>>) -> true;
+is_empty([L|R]) -> is_empty(L) andalso is_empty(R);
+is_empty(_) -> false.
+
+%% Count the number of grapheme clusters in chardata
+-spec length(String::unicode:chardata()) -> non_neg_integer().
+length(CD) ->
+    length_1(unicode_util:gc(CD), 0).
+
+%% Convert a string to a list of grapheme clusters
+-spec to_graphemes(String::unicode:chardata()) -> [grapheme_cluster()].
+to_graphemes(CD0) ->
+    case unicode_util:gc(CD0) of
+        [GC|CD] -> [GC|to_graphemes(CD)];
+        [] -> []
+    end.
+
+%% Compare two strings return boolean, assumes that the input are
+%% normalized to same form, see unicode:characters_to_nfX_xxx(..)
+-spec equal(A, B) -> boolean() when
+      A::unicode:chardata(),
+      B::unicode:chardata().
+equal(A,B) when is_binary(A), is_binary(B) ->
+    A =:= B;
+equal(A,B) ->
+    equal_1(A,B).
+
+%% Compare two strings return boolean, assumes that the input are
+%% normalized to same form, see unicode:characters_to_nfX_xxx(..)
+%% does casefold on the fly
+-spec equal(A, B, IgnoreCase) -> boolean() when
+      A::unicode:chardata(),
+      B::unicode:chardata(),
+      IgnoreCase :: boolean().
+equal(A, B, false) ->
+    equal(A,B);
+equal(A, B, true) ->
+    equal_nocase(A,B).
+
+%% Compare two strings return boolean
+%% if specified does casefold and normalization on the fly
+-spec equal(A, B, IgnoreCase, Norm) -> boolean() when
+      A :: unicode:chardata(),
+      B :: unicode:chardata(),
+      IgnoreCase :: boolean(),
+      Norm :: 'none' | 'nfc' | 'nfd' | 'nfkc' | 'nfkd'.
+equal(A, B, Case, none) ->
+    equal(A,B,Case);
+equal(A, B, false, Norm) ->
+    equal_norm(A, B, Norm);
+equal(A, B, true, Norm) ->
+    equal_norm_nocase(A, B, Norm).
+
+%% Reverse grapheme clusters
+-spec reverse(String::unicode:chardata()) -> [grapheme_cluster()].
+reverse(CD) ->
+    reverse_1(CD, []).
+
+%% Slice a string and return rest of string
+%% Note: counts grapheme_clusters
+-spec slice(String, Start) -> Slice when
+      String::unicode:chardata(),
+      Start :: non_neg_integer(),
+      Slice :: unicode:chardata().
+slice(CD, N) when is_integer(N), N >= 0 ->
+    slice_l(CD, N, is_binary(CD)).
+
+-spec slice(String, Start, Length) -> Slice when
+      String::unicode:chardata(),
+      Start :: non_neg_integer(),
+      Length :: 'infinity' | non_neg_integer(),
+      Slice :: unicode:chardata().
+slice(CD, N, Length)
+  when is_integer(N), N >= 0, is_integer(Length), Length > 0 ->
+    slice_trail(slice_l(CD, N, is_binary(CD)), Length);
+slice(CD, N, infinity) ->
+    slice_l(CD, N, is_binary(CD));
+slice(CD, _, 0) ->
+    case is_binary(CD) of
+        true  -> <<>>;
+        false -> []
+    end.
+
+%% Pad a string to desired length
+-spec pad(String, Length) -> unicode:charlist() when
+      String ::unicode:chardata(),
+      Length :: integer().
+pad(CD, Length) ->
+    pad(CD, Length, trailing, $\s).
+
+-spec pad(String, Length, Dir) -> unicode:charlist() when
+      String ::unicode:chardata(),
+      Length :: integer(),
+      Dir :: direction() | 'both'.
+pad(CD, Length, Dir) ->
+    pad(CD, Length, Dir, $\s).
+
+-spec pad(String, Length, Dir, Char) -> unicode:charlist() when
+      String ::unicode:chardata(),
+      Length :: integer(),
+      Dir :: direction() | 'both',
+      Char :: grapheme_cluster().
+pad(CD, Length, leading, Char) when is_integer(Length) ->
+    Len = length(CD),
+    [lists:duplicate(max(0, Length-Len), Char), CD];
+pad(CD, Length, trailing, Char) when is_integer(Length) ->
+    Len = length(CD),
+    [CD|lists:duplicate(max(0, Length-Len), Char)];
+pad(CD, Length, both, Char) when is_integer(Length) ->
+    Len = length(CD),
+    Size = max(0, Length-Len),
+    Pre = lists:duplicate(Size div 2, Char),
+    Post = case Size rem 2 of
+               1 -> [Char];
+               _ -> []
+           end,
+    [Pre, CD, Pre|Post].
+
+%%  Strip characters from whitespace or Separator in Direction
+-spec trim(String) -> unicode:chardata() when
+      String :: unicode:chardata().
+trim(Str) ->
+    trim(Str, both, unicode_util:whitespace()).
+
+-spec trim(String, Dir) -> unicode:chardata() when
+      String :: unicode:chardata(),
+      Dir :: direction() | 'both'.
+trim(Str, Dir) ->
+    trim(Str, Dir, unicode_util:whitespace()).
+
+-spec trim(String, Dir, Characters) -> unicode:chardata() when
+      String :: unicode:chardata(),
+      Dir :: direction() | 'both',
+      Characters :: [grapheme_cluster()].
+trim(Str, _, []) -> Str;
+trim(Str, leading, Sep) when is_list(Sep) ->
+    trim_l(Str, search_pattern(Sep));
+trim(Str, trailing, Sep) when is_list(Sep) ->
+    trim_t(Str, 0, search_pattern(Sep));
+trim(Str, both, Sep0) when is_list(Sep0) ->
+    Sep = search_pattern(Sep0),
+    trim_t(trim_l(Str,Sep), 0, Sep).
+
+%% Delete trailing newlines or \r\n
+-spec chomp(String::unicode:chardata()) -> unicode:chardata().
+chomp(Str) ->
+    trim_t(Str,0, {[[$\r,$\n],$\n], [$\r,$\n], [<<$\r>>,<<$\n>>]}).
+
+%% Split String into two parts where the leading part consists of Characters
+-spec take(String, Characters) -> {Leading, Trailing} when
+      String::unicode:chardata(),
+      Characters::[grapheme_cluster()],
+      Leading::unicode:chardata(),
+      Trailing::unicode:chardata().
+take(Str, Sep) ->
+    take(Str, Sep, false, leading).
+-spec take(String, Characters, Complement) -> {Leading, Trailing} when
+      String::unicode:chardata(),
+      Characters::[grapheme_cluster()],
+      Complement::boolean(),
+      Leading::unicode:chardata(),
+      Trailing::unicode:chardata().
+take(Str, Sep, Complement) ->
+    take(Str, Sep, Complement, leading).
+-spec take(String, Characters, Complement, Dir) -> {Leading, Trailing} when
+      String::unicode:chardata(),
+      Characters::[grapheme_cluster()],
+      Complement::boolean(),
+      Dir::direction(),
+      Leading::unicode:chardata(),
+      Trailing::unicode:chardata().
+take(Str, [], Complement, Dir) ->
+    Empty = case is_binary(Str) of true -> <<>>; false -> [] end,
+    case {Complement,Dir} of
+        {false, leading} -> {Empty, Str};
+        {false, trailing} -> {Str, Empty};
+        {true,  leading} -> {Str, Empty};
+        {true,  trailing} -> {Empty, Str}
+    end;
+take(Str, Sep0, false, leading) ->
+    Sep = search_pattern(Sep0),
+    take_l(Str, Sep, []);
+take(Str, Sep0, true, leading) ->
+    Sep = search_pattern(Sep0),
+    take_lc(Str, Sep, []);
+take(Str, Sep0, false, trailing) ->
+    Sep = search_pattern(Sep0),
+    take_t(Str, 0, Sep);
+take(Str, Sep0, true, trailing) ->
+    Sep = search_pattern(Sep0),
+    take_tc(Str, 0, Sep).
+
+%% Uppercase all chars in Str
+-spec uppercase(String::unicode:chardata()) -> unicode:chardata().
+uppercase(CD) when is_list(CD) ->
+    uppercase_list(CD);
+uppercase(CD) when is_binary(CD) ->
+    uppercase_bin(CD,<<>>).
+
+%% Lowercase all chars in Str
+-spec lowercase(String::unicode:chardata()) -> unicode:chardata().
+lowercase(CD) when is_list(CD) ->
+    lowercase_list(CD);
+lowercase(CD) when is_binary(CD) ->
+    lowercase_bin(CD,<<>>).
+
+%% Make a titlecase of the first char in Str
+-spec titlecase(String::unicode:chardata()) -> unicode:chardata().
+titlecase(CD) when is_list(CD) ->
+    case unicode_util:titlecase(CD) of
+        [GC|Tail] -> append(GC,Tail);
+        Empty -> Empty
+    end;
+titlecase(CD) when is_binary(CD) ->
+    case unicode_util:titlecase(CD) of
+        [CP|Chars] when is_integer(CP) -> <<CP/utf8,Chars/binary>>;
+        [CPs|Chars] ->
+            << << <<CP/utf8>> || CP <- CPs>>/binary, Chars/binary>>;
+        [] -> <<>>
+    end.
+
+%% Make a comparable string of the Str should be used for equality tests only
+-spec casefold(String::unicode:chardata()) -> unicode:chardata().
+casefold(CD) when is_list(CD) ->
+    casefold_list(CD);
+casefold(CD) when is_binary(CD) ->
+    casefold_bin(CD,<<>>).
+
+-spec to_integer(String) -> {Int, Rest} | {'error', Reason} when
+      String :: unicode:chardata(),
+      Int :: integer(),
+      Rest :: unicode:chardata(),
+      Reason :: 'no_integer' | badarg.
+
+to_integer(String) ->
+    try take(String, "+-0123456789") of
+        {Head, Tail} ->
+            case is_empty(Head) of
+                true -> {error, no_integer};
+                false ->
+                    List = unicode:characters_to_list(Head),
+                    case string:list_to_integer(List) of
+                        {error, _} = Err -> Err;
+                        {Int, Rest} ->
+                            to_number(String, Int, Rest, List, Tail)
+                    end
+            end
+    catch _:_ -> {error, badarg}
+    end.
+
+-spec to_float(String) -> {Float, Rest} | {'error', Reason} when
+      String :: unicode:chardata(),
+      Float :: float(),
+      Rest :: unicode:chardata(),
+      Reason :: 'no_float' | 'badarg'.
+
+to_float(String) ->
+    try take(String, "+-0123456789eE.,") of
+        {Head, Tail} ->
+            case is_empty(Head) of
+                true -> {error, no_float};
+                false ->
+                    List = unicode:characters_to_list(Head),
+                    case string:list_to_float(List) of
+                        {error, _} = Err -> Err;
+                        {Float, Rest} ->
+                            to_number(String, Float, Rest, List, Tail)
+                    end
+            end
+    catch _:_ -> {error, badarg}
+    end.
+
+to_number(String, Number, Rest, List, _Tail) when is_binary(String) ->
+    BSz = length(List)-length(Rest),
+    <<_:BSz/binary, Cont/binary>> = String,
+    {Number, Cont};
+to_number(_, Number, Rest, _, Tail) ->
+    {Number, concat(Rest,Tail)}.
+
+%% Return the remaining string with prefix removed or else nomatch
+-spec prefix(String::unicode:chardata(), Prefix::unicode:chardata()) ->
+                    'nomatch' | unicode:chardata().
+prefix(Str, []) -> Str;
+prefix(Str, Prefix0) ->
+    Prefix = unicode:characters_to_list(Prefix0),
+    case prefix_1(Str, Prefix) of
+        [] when is_binary(Str) -> <<>>;
+        Res -> Res
+    end.
+
+%% split String with the first occurrence of SearchPattern, return list of splits
+-spec split(String, SearchPattern) -> [unicode:chardata()] when
+      String :: unicode:chardata(),
+      SearchPattern :: unicode:chardata().
+split(String, SearchPattern) ->
+    split(String, SearchPattern, leading).
+
+%% split String with SearchPattern, return list of splits
+-spec split(String, SearchPattern, Where) -> [unicode:chardata()] when
+      String :: unicode:chardata(),
+      SearchPattern :: unicode:chardata(),
+      Where :: direction() | 'all'.
+split(String, SearchPattern, Where) ->
+    case is_empty(SearchPattern) of
+        true -> [String];
+        false ->
+            SearchPatternCPs = unicode:characters_to_list(SearchPattern),
+            case split_1(String, SearchPatternCPs, 0, Where, [], []) of
+                {_Curr, []} -> [String];
+                {_Curr, Acc} when Where =:= trailing -> Acc;
+                {Curr, Acc} when Where =:= all -> lists:reverse([Curr|Acc]);
+                Acc when is_list(Acc) -> Acc
+            end
+    end.
+
+%% Replace the first SearchPattern in String with Replacement
+-spec replace(String, SearchPattern, Replacement) ->
+                     [unicode:chardata()] when
+      String :: unicode:chardata(),
+      SearchPattern :: unicode:chardata(),
+      Replacement :: unicode:chardata().
+replace(String, SearchPattern, Replacement) ->
+    lists:join(Replacement, split(String, SearchPattern)).
+
+%% Replace Where SearchPattern in String with Replacement
+-spec replace(String, SearchPattern, Replacement, Where) ->
+                     [unicode:chardata()] when
+      String :: unicode:chardata(),
+      SearchPattern :: unicode:chardata(),
+      Replacement :: unicode:chardata(),
+      Where :: direction() | 'all'.
+replace(String, SearchPattern, Replacement, Where) ->
+    lists:join(Replacement, split(String, SearchPattern, Where)).
+
+%% Split Str into a list of chardata separated by one of the grapheme
+%% clusters in Seps
+-spec lexemes(String::unicode:chardata(),
+              SeparatorList::[grapheme_cluster()]) ->
+                     [unicode:chardata()].
+lexemes([], _) -> [];
+lexemes(Str, Seps0) when is_list(Seps0) ->
+    Seps = search_pattern(Seps0),
+    lexemes_m(Str, Seps, []).
+
+-spec nth_lexeme(String, N, SeparatorList) -> unicode:chardata() when
+      String::unicode:chardata(),
+      N::non_neg_integer(),
+      SeparatorList::[grapheme_cluster()].
+
+nth_lexeme(Str, 1, []) -> Str;
+nth_lexeme(Str, N, Seps0) when is_list(Seps0), is_integer(N), N > 0 ->
+    Seps = search_pattern(Seps0),
+    nth_lexeme_m(Str, Seps, N).
+
+%% find first SearchPattern in String return rest of string
+-spec find(String, SearchPattern) -> unicode:chardata() | 'nomatch' when
+      String::unicode:chardata(),
+      SearchPattern::unicode:chardata().
+find(String, SearchPattern) ->
+    find(String, SearchPattern, leading).
+
+%% find SearchPattern in String (search in Dir direction) return rest of string
+-spec find(String, SearchPattern, Dir) -> unicode:chardata() | 'nomatch' when
+      String::unicode:chardata(),
+      SearchPattern::unicode:chardata(),
+      Dir::direction().
+find(String, "", _) -> String;
+find(String, <<>>, _) -> String;
+find(String, SearchPattern, leading) ->
+    find_l(String, unicode:characters_to_list(SearchPattern));
+find(String, SearchPattern, trailing) ->
+    find_r(String, unicode:characters_to_list(SearchPattern), nomatch).
+
+%% Fetch first codepoint and return rest in tail
+-spec next_grapheme(String::unicode:chardata()) ->
+                           maybe_improper_list(grapheme_cluster(),unicode:chardata()).
+next_grapheme(CD) -> unicode_util:gc(CD).
+
+%% Fetch first grapheme cluster and return rest in tail
+-spec next_codepoint(String::unicode:chardata()) ->
+                            maybe_improper_list(char(),unicode:chardata()).
+next_codepoint(CD) -> unicode_util:cp(CD).
+
+%% Internals
+
+length_1([_|Rest], N) ->
+    length_1(unicode_util:gc(Rest), N+1);
+length_1([], N) ->
+    N.
+
+equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) ->
+    A =:= B andalso equal_1(AR, BR);
+equal_1([], BR) -> is_empty(BR);
+equal_1(A0,B0) ->
+    case {unicode_util:cp(A0), unicode_util:cp(B0)} of
+        {[CP|A],[CP|B]} -> equal_1(A,B);
+        {[], []} -> true;
+        _ -> false
+    end.
+
+equal_nocase(A, A) -> true;
+equal_nocase(A0, B0) ->
+    case {unicode_util:cp(unicode_util:casefold(A0)),
+          unicode_util:cp(unicode_util:casefold(B0))} of
+        {[CP|A],[CP|B]} -> equal_nocase(A,B);
+        {[], []} -> true;
+        _ -> false
+    end.
+
+equal_norm(A, A, _Norm) -> true;
+equal_norm(A0, B0, Norm) ->
+    case {unicode_util:cp(unicode_util:Norm(A0)),
+          unicode_util:cp(unicode_util:Norm(B0))} of
+        {[CP|A],[CP|B]} -> equal_norm(A,B, Norm);
+        {[], []} -> true;
+        _ -> false
+    end.
+
+equal_norm_nocase(A, A, _Norm) -> true;
+equal_norm_nocase(A0, B0, Norm) ->
+    case {unicode_util:cp(unicode_util:casefold(unicode_util:Norm(A0))),
+          unicode_util:cp(unicode_util:casefold(unicode_util:Norm(B0)))} of
+        {[CP|A],[CP|B]} -> equal_norm_nocase(A,B, Norm);
+        {[], []} -> true;
+        _ -> false
+    end.
+
+reverse_1(CD, Acc) ->
+    case unicode_util:gc(CD) of
+        [GC|Rest] -> reverse_1(Rest, [GC|Acc]);
+        [] -> Acc
+    end.
+
+slice_l(CD, N, Binary) when N > 0 ->
+    case unicode_util:gc(CD) of
+        [_|Cont] -> slice_l(Cont, N-1, Binary);
+        [] when Binary -> <<>>;
+        [] -> []
+    end;
+slice_l(Cont, 0, Binary) ->
+    case is_empty(Cont) of
+        true when Binary -> <<>>;
+        _ -> Cont
+    end.
+
+slice_trail(CD, N) when is_list(CD) ->
+    slice_list(CD, N);
+slice_trail(CD, N) when is_binary(CD) ->
+    slice_bin(CD, N, CD).
+
+slice_list(CD, N) when N > 0 ->
+    case unicode_util:gc(CD) of
+        [GC|Cont] -> append(GC, slice_list(Cont, N-1));
+        [] -> []
+    end;
+slice_list(_, 0) ->
+    [].
+
+slice_bin(CD, N, Orig) when N > 0 ->
+    case unicode_util:gc(CD) of
+        [_|Cont] -> slice_bin(Cont, N-1, Orig);
+        [] -> Orig
+    end;
+slice_bin([], 0, Orig) ->
+    Orig;
+slice_bin(CD, 0, Orig) ->
+    Sz = byte_size(Orig) - byte_size(CD),
+    <<Keep:Sz/binary, _/binary>> = Orig,
+    Keep.
+
+uppercase_list(CPs0) ->
+    case unicode_util:uppercase(CPs0) of
+        [Char|CPs] -> append(Char,uppercase_list(CPs));
+        [] -> []
+    end.
+
+uppercase_bin(CPs0, Acc) ->
+    case unicode_util:uppercase(CPs0) of
+        [Char|CPs] when is_integer(Char) ->
+            uppercase_bin(CPs, <<Acc/binary, Char/utf8>>);
+        [Chars|CPs] ->
+            uppercase_bin(CPs, <<Acc/binary,
+                                 << <<CP/utf8>> || CP <- Chars>>/binary >>);
+        [] -> Acc
+    end.
+
+lowercase_list(CPs0) ->
+    case unicode_util:lowercase(CPs0) of
+        [Char|CPs] -> append(Char,lowercase_list(CPs));
+        [] -> []
+    end.
+
+lowercase_bin(CPs0, Acc) ->
+    case unicode_util:lowercase(CPs0) of
+        [Char|CPs] when is_integer(Char) ->
+            lowercase_bin(CPs, <<Acc/binary, Char/utf8>>);
+        [Chars|CPs] ->
+            lowercase_bin(CPs, <<Acc/binary,
+                                 << <<CP/utf8>> || CP <- Chars>>/binary >>);
+        [] -> Acc
+    end.
+
+casefold_list(CPs0) ->
+    case unicode_util:casefold(CPs0) of
+        [Char|CPs] -> append(Char, casefold_list(CPs));
+        [] -> []
+    end.
+
+casefold_bin(CPs0, Acc) ->
+    case unicode_util:casefold(CPs0) of
+        [Char|CPs] when is_integer(Char) ->
+            casefold_bin(CPs, <<Acc/binary, Char/utf8>>);
+        [Chars|CPs] ->
+            casefold_bin(CPs, <<Acc/binary,
+                                << <<CP/utf8>> || CP <- Chars>>/binary >>);
+        [] -> Acc
+    end.
+
+
+trim_l([Bin|Cont0], Sep) when is_binary(Bin) ->
+    case bin_search_inv(Bin, Cont0, Sep) of
+        {nomatch, Cont} -> trim_l(Cont, Sep);
+        Keep -> Keep
+    end;
+trim_l(Str, {GCs, _, _}=Sep) when is_list(Str) ->
+    case unicode_util:gc(Str) of
+        [C|Cs] ->
+            case lists:member(C, GCs) of
+                true -> trim_l(Cs, Sep);
+                false -> Str
+            end;
+        [] -> []
+    end;
+trim_l(Bin, Sep) when is_binary(Bin) ->
+    case bin_search_inv(Bin, [], Sep) of
+        {nomatch,_} -> <<>>;
+        [Keep] -> Keep
+    end.
+
+trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search(Rest, Cont0, Sep) of
+        {nomatch,_} ->
+            stack(Bin, trim_t(Cont0, 0, Sep));
+        [SepStart|Cont1] ->
+            case bin_search_inv(SepStart, Cont1, Sep) of
+                {nomatch, Cont} ->
+                    Tail = trim_t(Cont, 0, Sep),
+                    case is_empty(Tail) of
+                        true ->
+                            KeepSz = byte_size(Bin) - byte_size(SepStart),
+                            <<Keep:KeepSz/binary, _/binary>> = Bin,
+                            Keep;
+                        false ->
+                            Used = cp_prefix(Cont0, Cont),
+                            stack(Bin, stack(Used, Tail))
+                    end;
+                [NonSep|Cont] when is_binary(NonSep) ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    trim_t([Bin|Cont], KeepSz, Sep)
+            end
+    end;
+trim_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
+    case unicode_util:cp(Str) of
+        [CP|Cs] ->
+            case lists:member(CP, CPs) of
+                true ->
+                    [GC|Cs1] = unicode_util:gc(Str),
+                    case lists:member(GC, GCs) of
+                        true ->
+                            Tail = trim_t(Cs1, 0, Sep),
+                            case is_empty(Tail) of
+                                true -> [];
+                                false -> append(GC,Tail)
+                            end;
+                        false ->
+                            append(GC,trim_t(Cs1, 0, Sep))
+                    end;
+                false ->
+                    append(CP,trim_t(Cs, 0, Sep))
+            end;
+        [] -> []
+    end;
+trim_t(Bin, N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search(Rest, Sep) of
+        {nomatch,_} -> Bin;
+        [SepStart] ->
+            case bin_search_inv(SepStart, [], Sep) of
+                {nomatch,_} ->
+                    KeepSz = byte_size(Bin) - byte_size(SepStart),
+                    <<Keep:KeepSz/binary, _/binary>> = Bin,
+                    Keep;
+                [NonSep] ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    trim_t(Bin, KeepSz, Sep)
+            end
+    end.
+
+take_l([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
+    case bin_search_inv(Bin, Cont0, Sep) of
+        {nomatch, Cont} ->
+            Used = cp_prefix(Cont0, Cont),
+            take_l(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+        [Bin1|_]=After when is_binary(Bin1) ->
+            First = byte_size(Bin) - byte_size(Bin1),
+            <<Keep:First/binary, _/binary>> = Bin,
+            {btoken(Keep,Acc), After}
+    end;
+take_l(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+    case unicode_util:gc(Str) of
+        [C|Cs] ->
+            case lists:member(C, GCs) of
+                true -> take_l(Cs, Sep, append(rev(C),Acc));
+                false -> {rev(Acc), Str}
+            end;
+        [] -> {rev(Acc), []}
+    end;
+take_l(Bin, Sep, Acc) when is_binary(Bin) ->
+    case bin_search_inv(Bin, [], Sep) of
+        {nomatch,_} ->
+            {btoken(Bin, Acc), <<>>};
+        [After] ->
+            First = byte_size(Bin) - byte_size(After),
+            <<Keep:First/binary, _/binary>> = Bin,
+            {btoken(Keep, Acc), After}
+    end.
+
+take_lc([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
+    case bin_search(Bin, Cont0, Sep) of
+        {nomatch, Cont} ->
+            Used = cp_prefix(Cont0, Cont),
+            take_lc(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+        [Bin1|_]=After when is_binary(Bin1) ->
+            First = byte_size(Bin) - byte_size(Bin1),
+            <<Keep:First/binary, _/binary>> = Bin,
+            {btoken(Keep,Acc), After}
+    end;
+take_lc(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+    case unicode_util:gc(Str) of
+        [C|Cs] ->
+            case lists:member(C, GCs) of
+                false -> take_lc(Cs, Sep, append(rev(C),Acc));
+                true  -> {rev(Acc), Str}
+            end;
+        [] -> {rev(Acc), []}
+    end;
+take_lc(Bin, Sep, Acc) when is_binary(Bin) ->
+    case bin_search(Bin, [], Sep) of
+        {nomatch,_} ->
+            {btoken(Bin, Acc), <<>>};
+        [After] ->
+            First = byte_size(Bin) - byte_size(After),
+            <<Keep:First/binary, _/binary>> = Bin,
+            {btoken(Keep, Acc), After}
+    end.
+
+take_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search(Rest, Cont0, Sep) of
+        {nomatch,Cont} ->
+            Used = cp_prefix(Cont0, Cont),
+            {Head, Tail} = take_t(Cont, 0, Sep),
+            {stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
+        [SepStart|Cont1] ->
+            case bin_search_inv(SepStart, Cont1, Sep) of
+                {nomatch, Cont} ->
+                    {Head, Tail} = take_t(Cont, 0, Sep),
+                    Used = cp_prefix(Cont0, Cont),
+                    case equal(Tail, Cont) of
+                        true ->
+                            KeepSz = byte_size(Bin) - byte_size(SepStart),
+                            <<Keep:KeepSz/binary, End/binary>> = Bin,
+                            {stack(Keep,Head), stack(stack(End,Used),Tail)};
+                        false ->
+                            {stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
+                    end;
+                [NonSep|Cont] when is_binary(NonSep) ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    take_t([Bin|Cont], KeepSz, Sep)
+            end
+    end;
+take_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
+    case unicode_util:cp(Str) of
+        [CP|Cs] ->
+            case lists:member(CP, CPs) of
+                true ->
+                    [GC|Cs1] = unicode_util:gc(Str),
+                    case lists:member(GC, GCs) of
+                        true ->
+                            {Head, Tail} = take_t(Cs1, 0, Sep),
+                            case equal(Tail, Cs1) of
+                                true -> {Head, append(GC,Tail)};
+                                false -> {append(GC,Head), Tail}
+                            end;
+                        false ->
+                            {Head, Tail} = take_t(Cs, 0, Sep),
+                            {append(CP,Head), Tail}
+                    end;
+                false ->
+                    {Head, Tail} = take_t(Cs, 0, Sep),
+                    {append(CP,Head), Tail}
+            end;
+        [] -> {[],[]}
+    end;
+take_t(Bin, N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search(Rest, Sep) of
+        {nomatch,_} -> {Bin, <<>>};
+        [SepStart] ->
+            case bin_search_inv(SepStart, [], Sep) of
+                {nomatch,_} ->
+                    KeepSz = byte_size(Bin) - byte_size(SepStart),
+                    <<Before:KeepSz/binary, End/binary>> = Bin,
+                    {Before, End};
+                [NonSep] ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    take_t(Bin, KeepSz, Sep)
+            end
+    end.
+
+take_tc([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search_inv(Rest, Cont0, Sep) of
+        {nomatch,Cont} ->
+            Used = cp_prefix(Cont0, Cont),
+            {Head, Tail} = take_tc(Cont, 0, Sep),
+            {stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
+        [SepStart|Cont1] ->
+            case bin_search(SepStart, Cont1, Sep) of
+                {nomatch, Cont} ->
+                    {Head, Tail} = take_tc(Cont, 0, Sep),
+                    Used = cp_prefix(Cont0, Cont),
+                    case equal(Tail, Cont) of
+                        true ->
+                            KeepSz = byte_size(Bin) - byte_size(SepStart),
+                            <<Keep:KeepSz/binary, End/binary>> = Bin,
+                            {stack(Keep,Head), stack(stack(End,Used),Tail)};
+                        false ->
+                            {stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
+                    end;
+                [NonSep|Cont] when is_binary(NonSep) ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    take_tc([Bin|Cont], KeepSz, Sep)
+            end
+    end;
+take_tc(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
+    case unicode_util:cp(Str) of
+        [CP|Cs] ->
+            case lists:member(CP, CPs) of
+                true ->
+                    [GC|Cs1] = unicode_util:gc(Str),
+                    case lists:member(GC, GCs) of
+                        false ->
+                            {Head, Tail} = take_tc(Cs1, 0, Sep),
+                            case equal(Tail, Cs1) of
+                                true -> {Head, append(GC,Tail)};
+                                false -> {append(GC,Head), Tail}
+                            end;
+                        true ->
+                            {Head, Tail} = take_tc(Cs1, 0, Sep),
+                            {append(GC,Head), Tail}
+                    end;
+                false ->
+                    {Head, Tail} = take_tc(Cs, 0, Sep),
+                    case equal(Tail, Cs) of
+                        true  -> {Head, append(CP,Tail)};
+                        false -> {append(CP,Head), Tail}
+                    end
+            end;
+        [] -> {[],[]}
+    end;
+take_tc(Bin, N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search_inv(Rest, [], Sep) of
+        {nomatch,_} -> {Bin, <<>>};
+        [SepStart] ->
+            case bin_search(SepStart, [], Sep) of
+                {nomatch,_} ->
+                    KeepSz = byte_size(Bin) - byte_size(SepStart),
+                    <<Before:KeepSz/binary, End/binary>> = Bin,
+                    {Before, End};
+                [NonSep] ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    take_tc(Bin, KeepSz, Sep)
+            end
+    end.
+
+prefix_1(Cs, []) -> Cs;
+prefix_1(Cs, [_]=Pre) ->
+    prefix_2(unicode_util:gc(Cs), Pre);
+prefix_1(Cs, Pre) ->
+    prefix_2(unicode_util:cp(Cs), Pre).
+
+prefix_2([C|Cs], [C|Pre]) ->
+    prefix_1(Cs, Pre);
+prefix_2(_, _) ->
+    nomatch.
+
+split_1([Bin|Cont0], Needle, Start, Where, Curr0, Acc)
+  when is_binary(Bin) ->
+    case bin_search_str(Bin, Start, Cont0, Needle) of
+        {nomatch,Sz,Cont} ->
+            <<Keep:Sz/binary, _/binary>> = Bin,
+            split_1(Cont, Needle, 0, Where, [Keep|Curr0], Acc);
+        {Before, [Cs0|Cont], After} ->
+            Curr = add_non_empty(Before,Curr0),
+            case Where of
+                leading ->
+                    [rev(Curr),After];
+                trailing ->
+                    <<_/utf8, Cs/binary>> = Cs0,
+                    Next = byte_size(Bin) - byte_size(Cs),
+                    split_1([Bin|Cont], Needle, Next, Where,
+                            Curr0, [rev(Curr),After]);
+                all ->
+                    split_1(After, Needle, 0, Where, [], [rev(Curr)|Acc])
+            end
+    end;
+split_1(Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_list(Cs0) ->
+    case unicode_util:cp(Cs0) of
+        [C|Cs] ->
+            case prefix_1(Cs0, Needle) of
+                nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc);
+                Rest when Where =:= leading ->
+                    [rev(Curr), Rest];
+                Rest when Where =:= trailing ->
+                    split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]);
+                Rest when Where =:= all ->
+                    split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc])
+            end;
+        [Other|Cs] ->
+            split_1(Cs, Needle, 0, Where, append(Other,Curr), Acc);
+        [] ->
+            {rev(Curr), Acc}
+    end;
+split_1(Bin, [_C|_]=Needle, Start, Where, Curr0, Acc) ->
+    case bin_search_str(Bin, Start, [], Needle) of
+        {nomatch,_,_} ->
+            <<_:Start/binary, Keep/binary>> = Bin,
+            {rev([Keep|Curr0]), Acc};
+        {Before, [Cs0], After} ->
+            case Where of
+                leading ->
+                    [rev([Before|Curr0]),After];
+                trailing ->
+                    <<_/utf8, Cs/binary>> = Cs0,
+                    Next = byte_size(Bin) - byte_size(Cs),
+                    split_1(Bin, Needle, Next, Where, Curr0,
+                            [btoken(Before,Curr0),After]);
+                all ->
+                    Next = byte_size(Bin) - byte_size(After),
+                    <<_:Start/binary, Keep/binary>> = Before,
+                    Curr = [Keep|Curr0],
+                    split_1(Bin, Needle, Next, Where, [], [rev(Curr)|Acc])
+            end
+    end.
+
+lexemes_m([Bin|Cont0], Seps, Ts) when is_binary(Bin) ->
+    case bin_search_inv(Bin, Cont0, Seps) of
+        {nomatch,Cont} ->
+            lexemes_m(Cont, Seps, Ts);
+        Cs ->
+            {Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
+            lexemes_m(Rest, Seps, [Lexeme|Ts])
+    end;
+lexemes_m(Cs0, {GCs, _, _}=Seps, Ts) when is_list(Cs0) ->
+    case unicode_util:gc(Cs0) of
+        [C|Cs] ->
+            case lists:member(C, GCs) of
+                true  ->
+                    lexemes_m(Cs, Seps, Ts);
+                false ->
+                    {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
+                    lexemes_m(Rest, Seps, [Lexeme|Ts])
+            end;
+        [] ->
+            lists:reverse(Ts)
+    end;
+lexemes_m(Bin, Seps, Ts) when is_binary(Bin) ->
+    case bin_search_inv(Bin, [], Seps) of
+        {nomatch,_} ->
+            lists:reverse(Ts);
+        [Cs] ->
+            {Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
+            lexemes_m(Rest, Seps, add_non_empty(Lexeme,Ts))
+    end.
+
+lexeme_pick([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps, Tkn) when is_integer(CP) ->
+    case lists:member(CP, CPs) of
+        true  ->
+            [GC|Cs2] = unicode_util:gc(Cs0),
+            case lists:member(GC, GCs) of
+                true -> {rev(Tkn), Cs2};
+                false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn))
+            end;
+        false -> lexeme_pick(Cs1, Seps, [CP|Tkn])
+    end;
+lexeme_pick([Bin|Cont0], Seps, Tkn) when is_binary(Bin) ->
+    case bin_search(Bin, Cont0, Seps) of
+        {nomatch,_} ->
+            lexeme_pick(Cont0, Seps, [Bin|Tkn]);
+        [Left|_Cont] = Cs ->
+            Bytes = byte_size(Bin) - byte_size(Left),
+            <<Lexeme:Bytes/binary, _/binary>> = Bin,
+            {btoken(Lexeme, Tkn), Cs}
+    end;
+lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
+    case unicode_util:cp(Cs0) of
+        [CP|Cs] ->
+            case lists:member(CP, CPs) of
+                true ->
+                    [GC|Cs2] = unicode_util:gc(Cs0),
+                    case lists:member(GC, GCs) of
+                        true -> {rev(Tkn), Cs0};
+                        false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn))
+                    end;
+                false ->
+                    lexeme_pick(Cs, Seps, append(CP,Tkn))
+            end;
+        [] ->
+            {rev(Tkn), []}
+    end;
+lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
+    case bin_search(Bin, Seps) of
+        {nomatch,_} ->
+            {btoken(Bin,Tkn), []};
+        [Left] ->
+            Bytes = byte_size(Bin) - byte_size(Left),
+            <<Lexeme:Bytes/binary, _/binary>> = Bin,
+            {btoken(Lexeme, Tkn), Left}
+    end.
+
+nth_lexeme_m([Bin|Cont0], Seps, N) when is_binary(Bin) ->
+    case bin_search_inv(Bin, Cont0, Seps) of
+        {nomatch,Cont} ->
+            nth_lexeme_m(Cont, Seps, N);
+        Cs when N > 1 ->
+            Rest = lexeme_skip(Cs, Seps),
+            nth_lexeme_m(Rest, Seps, N-1);
+        Cs ->
+            {Lexeme,_} = lexeme_pick(Cs, Seps, []),
+            Lexeme
+    end;
+nth_lexeme_m(Cs0, {GCs, _, _}=Seps, N) when is_list(Cs0) ->
+    case unicode_util:gc(Cs0) of
+        [C|Cs] ->
+            case lists:member(C, GCs) of
+                true ->
+                    nth_lexeme_m(Cs, Seps, N);
+                false when N > 1 ->
+                    Cs1 = lexeme_skip(Cs, Seps),
+                    nth_lexeme_m(Cs1, Seps, N-1);
+                false ->
+                    {Lexeme,_} = lexeme_pick(Cs0, Seps, []),
+                    Lexeme
+            end;
+        [] ->
+            []
+    end;
+nth_lexeme_m(Bin, Seps, N) when is_binary(Bin) ->
+    case bin_search_inv(Bin, [], Seps) of
+        [Cs] when N > 1 ->
+            Cs1 = lexeme_skip(Cs, Seps),
+            nth_lexeme_m(Cs1, Seps, N-1);
+        [Cs] ->
+            {Lexeme,_} = lexeme_pick(Cs, Seps, []),
+            Lexeme;
+        {nomatch,_} ->
+            <<>>
+    end.
+
+lexeme_skip([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps) when is_integer(CP) ->
+    case lists:member(CP, CPs) of
+        true  ->
+            [GC|Cs2] = unicode_util:gc(Cs0),
+            case lists:member(GC, GCs) of
+                true -> Cs0;
+                false -> lexeme_skip(Cs2, Seps)
+            end;
+        false ->
+            lexeme_skip(Cs1, Seps)
+    end;
+lexeme_skip([Bin|Cont0], Seps) when is_binary(Bin) ->
+    case bin_search(Bin, Cont0, Seps) of
+        {nomatch,_} -> lexeme_skip(Cont0, Seps);
+        Cs -> Cs
+    end;
+lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
+    case unicode_util:cp(Cs0) of
+        [CP|Cs] ->
+            case lists:member(CP, CPs) of
+                true ->
+                    [GC|Cs2] = unicode_util:gc(Cs0),
+                    case lists:member(GC, GCs) of
+                        true -> Cs0;
+                        false -> lexeme_skip(Cs2, Seps)
+                    end;
+                false ->
+                    lexeme_skip(Cs, Seps)
+            end;
+        [] ->
+            []
+    end;
+lexeme_skip(Bin, Seps) when is_binary(Bin) ->
+    case bin_search(Bin, Seps) of
+        {nomatch,_} -> <<>>;
+        [Left] -> Left
+    end.
+
+find_l([Bin|Cont0], Needle) when is_binary(Bin) ->
+    case bin_search_str(Bin, 0, Cont0, Needle) of
+        {nomatch, _, Cont} ->
+            find_l(Cont, Needle);
+        {_Before, Cs, _After} ->
+            Cs
+    end;
+find_l(Cs0, [C|_]=Needle) when is_list(Cs0) ->
+    case unicode_util:cp(Cs0) of
+        [C|Cs] ->
+            case prefix_1(Cs0, Needle) of
+                nomatch -> find_l(Cs, Needle);
+                _ -> Cs0
+            end;
+        [_C|Cs] ->
+            find_l(Cs, Needle);
+        [] -> nomatch
+    end;
+find_l(Bin, Needle) ->
+    case bin_search_str(Bin, 0, [], Needle) of
+        {nomatch,_,_} -> nomatch;
+        {_Before, [Cs], _After} -> Cs
+    end.
+
+find_r([Bin|Cont0], Needle, Res) when is_binary(Bin) ->
+    case bin_search_str(Bin, 0, Cont0, Needle) of
+        {nomatch,_,Cont} ->
+            find_r(Cont, Needle, Res);
+        {_, Cs0, _} ->
+            [_|Cs] = unicode_util:gc(Cs0),
+            find_r(Cs, Needle, Cs0)
+    end;
+find_r(Cs0, [C|_]=Needle, Res) when is_list(Cs0) ->
+    case unicode_util:cp(Cs0) of
+        [C|Cs] ->
+            case prefix_1(Cs0, Needle) of
+                nomatch -> find_r(Cs, Needle, Res);
+                _ -> find_r(Cs, Needle, Cs0)
+            end;
+        [_C|Cs] ->
+            find_r(Cs, Needle, Res);
+        [] -> Res
+    end;
+find_r(Bin, Needle, Res) ->
+    case bin_search_str(Bin, 0, [], Needle) of
+        {nomatch,_,_} -> Res;
+        {_Before, [Cs0], _After} ->
+            <<_/utf8, Cs/binary>> = Cs0,
+            find_r(Cs, Needle, Cs0)
+    end.
+
+%% These are used to avoid creating lists around binaries
+%% might be unnecessary, is there a better solution?
+btoken(Token, []) -> Token;
+btoken(BinPart, [C]) when is_integer(C) -> <<C/utf8, BinPart/binary>>;
+btoken(<<>>, Tkn) -> lists:reverse(Tkn);
+btoken(BinPart, Cs) -> [lists:reverse(Cs),BinPart].
+
+rev([B]) when is_binary(B) -> B;
+rev(L) when is_list(L) -> lists:reverse(L);
+rev(C) when is_integer(C) -> C.
+
+append(Char, <<>>) when is_integer(Char) -> [Char];
+append(Char, <<>>) when is_list(Char) -> Char;
+append(Char, Bin) when is_binary(Bin) -> [Char,Bin];
+append(Char, Str) when is_integer(Char) -> [Char|Str];
+append(GC, Str) when is_list(GC) -> GC ++ Str.
+
+stack(Bin, []) -> Bin;
+stack(<<>>, St) -> St;
+stack([], St) -> St;
+stack(Bin, St) -> [Bin|St].
+
+add_non_empty(<<>>, L) -> L;
+add_non_empty(Token, L) -> [Token|L].
+
+cp_prefix(Orig, Cont) ->
+    case unicode_util:cp(Cont) of
+        [] -> Orig;
+        [Cp|Rest] -> cp_prefix_1(Orig, Cp, Rest)
+    end.
+
+cp_prefix_1(Orig, Until, Cont) ->
+    case unicode_util:cp(Orig) of
+        [Until|Rest] ->
+            case equal(Rest, Cont) of
+                true -> [];
+                false-> [Until|cp_prefix_1(Rest, Until, Cont)]
+            end;
+        [CP|Rest] -> [CP|cp_prefix_1(Rest, Until, Cont)]
+    end.
+
+
+%% Binary special
+bin_search(Bin, Seps) ->
+    bin_search(Bin, [], Seps).
+
+bin_search(_Bin, Cont, {[],_,_}) ->
+    {nomatch, Cont};
+bin_search(Bin, Cont, {Seps,_,BP}) ->
+    bin_search_loop(Bin, 0, BP, Cont, Seps).
+
+%% Need to work with [<<$a>>, <<778/utf8>>],
+%% i.e. å in nfd form  $a "COMBINING RING ABOVE"
+%% and PREPEND characters like "ARABIC NUMBER SIGN" 1536 <<216,128>>
+%% combined with other characters are currently ignored.
+search_pattern(Seps) ->
+    CPs = search_cp(Seps),
+    Bin = bin_pattern(CPs),
+    {Seps, CPs, Bin}.
+
+search_cp([CP|Seps]) when is_integer(CP) ->
+    [CP|search_cp(Seps)];
+search_cp([Pattern|Seps]) ->
+    [CP|_] = unicode_util:cp(Pattern),
+    [CP|search_cp(Seps)];
+search_cp([]) -> [].
+
+bin_pattern([CP|Seps]) ->
+    [<<CP/utf8>>|bin_pattern(Seps)];
+bin_pattern([]) -> [].
+
+bin_search_loop(Bin0, Start, _, Cont, _Seps)
+  when byte_size(Bin0) =< Start; Start < 0 ->
+    {nomatch, Cont};
+bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
+    <<_:Start/binary, Bin/binary>> = Bin0,
+    case binary:match(Bin, BinSeps) of
+        nomatch ->
+            {nomatch,Cont};
+        {Where, _CL} ->
+            <<_:Where/binary, Cont0/binary>> = Bin,
+            Cont1 = stack(Cont0, Cont),
+            [GC|Cont2] = unicode_util:gc(Cont1),
+            case lists:member(GC, Seps) of
+                false ->
+                    case Cont2 of
+                        [BinR|Cont] when is_binary(BinR) ->
+                            Next = byte_size(Bin0) - byte_size(BinR),
+                            bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
+                        BinR when is_binary(BinR), Cont =:= [] ->
+                            Next = byte_size(Bin0) - byte_size(BinR),
+                            bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
+                        _ ->
+                            {nomatch, Cont2}
+                    end;
+                true when is_list(Cont1) ->
+                    Cont1;
+                true ->
+                    [Cont1]
+            end
+    end.
+
+bin_search_inv(Bin, Cont, {[], _, _}) ->
+    [Bin|Cont];
+bin_search_inv(Bin, Cont, {[Sep], _, _}) ->
+    bin_search_inv_1([Bin|Cont], Sep);
+bin_search_inv(Bin, Cont, {Seps, _, _}) ->
+    bin_search_inv_n([Bin|Cont], Seps).
+
+bin_search_inv_1([<<>>|CPs], _) ->
+    {nomatch, CPs};
+bin_search_inv_1(CPs = [Bin0|Cont], Sep) when is_binary(Bin0) ->
+    case unicode_util:gc(CPs) of
+        [Sep|Bin] when is_binary(Bin), Cont =:= [] ->
+            bin_search_inv_1([Bin], Sep);
+        [Sep|[Bin|Cont]=Cs] when is_binary(Bin) ->
+            bin_search_inv_1(Cs, Sep);
+        [Sep|Cs] ->
+            {nomatch, Cs};
+        _ -> CPs
+    end.
+
+bin_search_inv_n([<<>>|CPs], _) ->
+    {nomatch, CPs};
+bin_search_inv_n([Bin0|Cont]=CPs, Seps) when is_binary(Bin0) ->
+    [C|Cs0] = unicode_util:gc(CPs),
+    case {lists:member(C, Seps), Cs0} of
+        {true, Cs} when is_binary(Cs), Cont =:= [] ->
+            bin_search_inv_n([Cs], Seps);
+        {true, [Bin|Cont]=Cs} when is_binary(Bin) ->
+            bin_search_inv_n(Cs, Seps);
+        {true, Cs} -> {nomatch, Cs};
+        {false, _} -> CPs
+    end.
+
+bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
+    <<_:Start/binary, Bin/binary>> = Bin0,
+    case binary:match(Bin, <<CP/utf8>>) of
+        nomatch -> {nomatch, byte_size(Bin0), Cont};
+        {Where0, _} ->
+            Where = Start+Where0,
+            <<Keep:Where/binary, Cs0/binary>> = Bin0,
+            [GC|Cs]=unicode_util:gc(Cs0),
+            case prefix_1(stack(Cs0,Cont), SearchCPs) of
+                nomatch when is_binary(Cs) ->
+                    KeepSz = byte_size(Bin0) - byte_size(Cs),
+                    bin_search_str(Bin0, KeepSz, Cont, SearchCPs);
+                nomatch ->
+                    {nomatch, Where, stack([GC|Cs],Cont)};
+                [] ->
+                    {Keep, [Cs0|Cont], <<>>};
+                Rest ->
+                    {Keep, [Cs0|Cont], Rest}
+            end
+    end.
+
+
+%%---------------------------------------------------------------------------
+%% OLD lists API kept for backwards compability
+%%---------------------------------------------------------------------------
+
 %% Robert's bit
 
 %% len(String)
@@ -68,12 +1347,12 @@ len(S) -> length(S).
 %% equal(String1, String2)
 %%  Test if 2 strings are equal.
 
--spec equal(String1, String2) -> boolean() when
-      String1 :: string(),
-      String2 :: string().
+%% -spec equal(String1, String2) -> boolean() when
+%%       String1 :: string(),
+%%       String2 :: string().
 
-equal(S, S) -> true;
-equal(_, _) -> false.
+%% equal(S, S) -> true;
+%% equal(_, _) -> false.
 
 %% concat(String1, String2)
 %%  Concatenate 2 strings.
@@ -127,7 +1406,7 @@ rchr([], _C, _I, L) -> L.
 str(S, Sub) when is_list(Sub) -> str(S, Sub, 1).
 
 str([C|S], [C|Sub], I) ->
-    case prefix(Sub, S) of
+    case l_prefix(Sub, S) of
 	true -> I;
 	false -> str(S, [C|Sub], I+1)
     end;
@@ -142,16 +1421,16 @@ str([], _Sub, _I) -> 0.
 rstr(S, Sub) when is_list(Sub) -> rstr(S, Sub, 1, 0).
 
 rstr([C|S], [C|Sub], I, L) ->
-    case prefix(Sub, S) of
+    case l_prefix(Sub, S) of
 	true -> rstr(S, [C|Sub], I+1, I);
 	false -> rstr(S, [C|Sub], I+1, L)
     end;
 rstr([_|S], Sub, I, L) -> rstr(S, Sub, I+1, L);
 rstr([], _Sub, _I, L) -> L.
 
-prefix([C|Pre], [C|String]) -> prefix(Pre, String);
-prefix([], String) when is_list(String) -> true;
-prefix(Pre, String) when is_list(Pre), is_list(String) -> false.
+l_prefix([C|Pre], [C|String]) -> l_prefix(Pre, String);
+l_prefix([], String) when is_list(String) -> true;
+l_prefix(Pre, String) when is_list(Pre), is_list(String) -> false.
 
 %% span(String, Chars) -> Length.
 %% cspan(String, Chars) -> Length.
@@ -229,9 +1508,9 @@ tokens(S, Seps) ->
 		[_|_] -> [S]
 	    end;
 	[C] ->
-	    tokens_single_1(reverse(S), C, []);
+	    tokens_single_1(lists:reverse(S), C, []);
 	[_|_] ->
-	    tokens_multiple_1(reverse(S), Seps, [])
+	    tokens_multiple_1(lists:reverse(S), Seps, [])
     end.
 
 tokens_single_1([Sep|S], Sep, Toks) ->
@@ -342,8 +1621,8 @@ sub_word(String, Index, Char) when is_integer(Index), is_integer(Char) ->
 	    s_word(strip(String, left, Char), Index, Char, 1, [])
     end.
 
-s_word([], _, _, _,Res) -> reverse(Res);
-s_word([Char|_],Index,Char,Index,Res) -> reverse(Res);
+s_word([], _, _, _,Res) -> lists:reverse(Res);
+s_word([Char|_],Index,Char,Index,Res) -> lists:reverse(Res);
 s_word([H|T],Index,Char,Index,Res) -> s_word(T,Index,Char,Index,[H|Res]);
 s_word([Char|T],Stop,Char,Index,Res) when Index < Stop -> 
     s_word(strip(T,left,Char),Stop,Char,Index+1,Res);
@@ -359,7 +1638,7 @@ strip(String) -> strip(String, both).
 -spec strip(String, Direction) -> Stripped when
       String :: string(),
       Stripped :: string(),
-      Direction :: left | right | both.
+      Direction :: 'left' | 'right' | 'both'.
 
 strip(String, left) -> strip_left(String, $\s);
 strip(String, right) -> strip_right(String, $\s);
@@ -369,7 +1648,7 @@ strip(String, both) ->
 -spec strip(String, Direction, Character) -> Stripped when
       String :: string(),
       Stripped :: string(),
-      Direction :: left | right | both,
+      Direction :: 'left' | 'right' | 'both',
       Character :: char().
 
 strip(String, right, Char) -> strip_right(String, Char);
diff --git a/lib/stdlib/src/unicode.erl b/lib/stdlib/src/unicode.erl
index 617da11ba8..59499021cb 100644
--- a/lib/stdlib/src/unicode.erl
+++ b/lib/stdlib/src/unicode.erl
@@ -1,8 +1,8 @@
 %%
 %% %CopyrightBegin%
-%% 
+%%
 %% Copyright Ericsson AB 2008-2016. All Rights Reserved.
-%% 
+%%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
 %% You may obtain a copy of the License at
@@ -14,7 +14,7 @@
 %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 %% See the License for the specific language governing permissions and
 %% limitations under the License.
-%% 
+%%
 %% %CopyrightEnd%
 %%
 -module(unicode).
@@ -22,7 +22,12 @@
 -export([characters_to_list/1, characters_to_list_int/2,
 	 characters_to_binary/1, characters_to_binary_int/2,
 	 characters_to_binary/3,
-	 bom_to_encoding/1, encoding_to_bom/1]).
+	 bom_to_encoding/1, encoding_to_bom/1,
+         characters_to_nfd_list/1, characters_to_nfd_binary/1,
+         characters_to_nfc_list/1, characters_to_nfc_binary/1,
+         characters_to_nfkd_list/1, characters_to_nfkd_binary/1,
+         characters_to_nfkc_list/1, characters_to_nfkc_binary/1
+        ]).
 
 -export_type([chardata/0, charlist/0, encoding/0, external_chardata/0,
               external_charlist/0, latin1_char/0, latin1_chardata/0,
@@ -102,35 +107,6 @@ characters_to_list(_, _) ->
 characters_to_list(ML) ->
     unicode:characters_to_list(ML,unicode).
 
-characters_to_list_int(ML, Encoding) ->
-    try
-	do_characters_to_list(ML,Encoding)
-    catch
-	error:AnyError ->
-	    TheError = case AnyError of
-			   system_limit ->
-			       system_limit;
-			   _ ->
-			       badarg
-		       end,
-	    {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} =
-		(catch erlang:error(new_stacktrace,
-				    [ML,Encoding])),
-	    erlang:raise(error,TheError,[{Mod,characters_to_list,L}|Rest])
-    end.
-
-% XXX: Optimize me!
-do_characters_to_list(ML, Encoding) -> 
-    case unicode:characters_to_binary(ML,Encoding) of
-	Bin when is_binary(Bin) ->
-	    unicode:characters_to_list(Bin,utf8); 
-	{error,Encoded,Rest} ->
-	    {error,unicode:characters_to_list(Encoded,utf8),Rest};
-	{incomplete, Encoded2, Rest2} ->
-	    {incomplete,unicode:characters_to_list(Encoded2,utf8),Rest2}
-    end.
-
-
 -spec characters_to_binary(Data) -> Result when
       Data :: latin1_chardata() | chardata() | external_chardata(),
       Result :: binary()
@@ -154,24 +130,6 @@ characters_to_binary(ML) ->
 				    [ML])),
 	    erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest])
     end.
-	
-
-characters_to_binary_int(ML,InEncoding) ->
-    try
-	characters_to_binary_int(ML,InEncoding,unicode)
-    catch
-	error:AnyError ->
-	    TheError = case AnyError of
-			   system_limit ->
-			       system_limit;
-			   _ ->
-			       badarg
-		       end,
-	    {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} =
-		(catch erlang:error(new_stacktrace,
-				    [ML,InEncoding])),
-	    erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest])
-    end.
 
 -spec characters_to_binary(Data, InEncoding, OutEncoding) -> Result when
       Data :: latin1_chardata() | chardata() | external_chardata(),
@@ -192,7 +150,7 @@ characters_to_binary(ML, latin1, Uni) when is_binary(ML) and ((Uni =:= utf8) or
 	        try
 		    characters_to_binary_int(ML,latin1,utf8)
 		catch
-		    error:AnyError ->	    
+		    error:AnyError ->
 			TheError = case AnyError of
 				       system_limit ->
 					   system_limit;
@@ -228,7 +186,7 @@ characters_to_binary(ML,Uni,latin1) when is_binary(ML) and ((Uni =:= utf8) or
 				     [{Mod,characters_to_binary,L}|Rest])
 		end
     end;
-    
+
 characters_to_binary(ML, InEncoding, OutEncoding) ->
     try
 	characters_to_binary_int(ML,InEncoding,OutEncoding)
@@ -246,53 +204,6 @@ characters_to_binary(ML, InEncoding, OutEncoding) ->
 	    erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest])
     end.
 
-characters_to_binary_int(ML, InEncoding, OutEncoding) when 
-    InEncoding =:= latin1, OutEncoding =:= unicode; 
-    InEncoding =:= latin1, OutEncoding =:= utf8;
-    InEncoding =:= unicode, OutEncoding =:= unicode; 
-    InEncoding =:= unicode, OutEncoding =:= utf8; 
-    InEncoding =:= utf8, OutEncoding =:= unicode; 
-    InEncoding =:= utf8, OutEncoding =:= utf8 -> 
-    unicode:characters_to_binary(ML,InEncoding);
-
-characters_to_binary_int(ML, InEncoding, OutEncoding) ->
-    {InTrans,Limit} = case OutEncoding of
-		  latin1 -> {i_trans_chk(InEncoding),255};
-		  _ -> {i_trans(InEncoding),case InEncoding of latin1 -> 255; _ -> 16#10FFFF end}
-	      end,
-    OutTrans = o_trans(OutEncoding),
-    Res = 
-	ml_map(ML,
-	       fun(Part,Accum) when is_binary(Part) ->
-		       case InTrans(Part) of
-			   List when is_list(List) ->
-			       Tail = OutTrans(List),
-			       <<Accum/binary, Tail/binary>>;
-			   {error, Translated, Rest} -> 
-			       Tail = OutTrans(Translated),
-			       {error, <<Accum/binary,Tail/binary>>, Rest};
-			   {incomplete, Translated, Rest, Missing}  ->
-			       Tail = OutTrans(Translated),
-			       {incomplete, <<Accum/binary,Tail/binary>>, Rest,
-				Missing}
-		       end;
-		  (Part, Accum) when is_integer(Part), Part =< Limit ->
-		       case OutTrans([Part]) of
-			   Binary when is_binary(Binary) ->
-			       <<Accum/binary, Binary/binary>>;
-			   {error, _, [Part]} ->
-			       {error,Accum,[Part]}
-		       end;
-		  (Part, Accum) ->
-		       {error, Accum, [Part]}
-	       end,<<>>),
-    case Res of
-	{incomplete,A,B,_} ->
-	    {incomplete,A,B};
-	_ ->
-	    Res
-    end.
-
 -spec bom_to_encoding(Bin) -> {Encoding, Length} when
       Bin :: binary(),
       Encoding ::  'latin1' | 'utf8'
@@ -335,11 +246,194 @@ encoding_to_bom({utf32,little}) ->
     <<255,254,0,0>>;
 encoding_to_bom(latin1) ->
     <<>>.
-	    
 
-cbv(utf8,<<1:1,1:1,0:1,_:5>>) -> 
+-define(GC_N, 200). %% arbitrary number
+
+%% Canonical decompose string to list of chars
+-spec characters_to_nfd_list(chardata()) -> [char()].
+characters_to_nfd_list(CD) ->
+    case unicode_util:nfd(CD) of
+        [GC|Str] when is_list(GC) -> GC++characters_to_nfd_list(Str);
+        [CP|Str] -> [CP|characters_to_nfd_list(Str)];
+        [] -> []
+    end.
+
+-spec characters_to_nfd_binary(chardata()) -> unicode_binary().
+characters_to_nfd_binary(CD) ->
+    list_to_binary(characters_to_nfd_binary(CD, ?GC_N, [])).
+
+characters_to_nfd_binary(CD, N, Row) when N > 0 ->
+    case unicode_util:nfd(CD) of
+        [GC|Str] -> characters_to_nfd_binary(Str, N-1, [GC|Row]);
+        [] -> [characters_to_binary(lists:reverse(Row))]
+    end;
+characters_to_nfd_binary(CD, _, Row) ->
+    [characters_to_binary(lists:reverse(Row))|characters_to_nfd_binary(CD,?GC_N,[])].
+
+%% Compability Canonical decompose string to list of chars.
+-spec characters_to_nfkd_list(chardata()) -> [char()].
+characters_to_nfkd_list(CD) ->
+    case unicode_util:nfkd(CD) of
+        [GC|Str] when is_list(GC) -> GC++characters_to_nfkd_list(Str);
+        [CP|Str] -> [CP|characters_to_nfkd_list(Str)];
+        [] -> []
+    end.
+
+-spec characters_to_nfkd_binary(chardata()) -> unicode_binary().
+characters_to_nfkd_binary(CD) ->
+    list_to_binary(characters_to_nfkd_binary(CD, ?GC_N, [])).
+
+characters_to_nfkd_binary(CD, N, Row) when N > 0 ->
+    case unicode_util:nfkd(CD) of
+        [GC|Str] -> characters_to_nfkd_binary(Str, N-1, [GC|Row]);
+        [] -> [characters_to_binary(lists:reverse(Row))]
+    end;
+characters_to_nfkd_binary(CD, _, Row) ->
+    [characters_to_binary(lists:reverse(Row))|characters_to_nfkd_binary(CD,?GC_N,[])].
+
+
+%% Canonical compose string to list of chars
+-spec characters_to_nfc_list(chardata()) -> [char()].
+characters_to_nfc_list(CD) ->
+    case unicode_util:nfc(CD) of
+        [CPs|Str] when is_list(CPs) -> CPs ++ characters_to_nfc_list(Str);
+        [CP|Str] -> [CP|characters_to_nfc_list(Str)];
+        [] -> []
+    end.
+
+-spec characters_to_nfc_binary(chardata()) -> unicode_binary().
+characters_to_nfc_binary(CD) ->
+    list_to_binary(characters_to_nfc_binary(CD, ?GC_N, [])).
+
+characters_to_nfc_binary(CD, N, Row) when N > 0 ->
+    case unicode_util:nfc(CD) of
+        [GC|Str] -> characters_to_nfc_binary(Str, N-1, [GC|Row]);
+        [] -> [characters_to_binary(lists:reverse(Row))]
+    end;
+characters_to_nfc_binary(CD, _, Row) ->
+    [characters_to_binary(lists:reverse(Row))|characters_to_nfc_binary(CD,?GC_N,[])].
+
+%% Compability Canonical compose string to list of chars
+-spec characters_to_nfkc_list(chardata()) -> [char()].
+characters_to_nfkc_list(CD) ->
+    case unicode_util:nfkc(CD) of
+        [CPs|Str] when is_list(CPs) -> CPs ++ characters_to_nfkc_list(Str);
+        [CP|Str] -> [CP|characters_to_nfkc_list(Str)];
+        [] -> []
+    end.
+
+-spec characters_to_nfkc_binary(chardata()) -> unicode_binary().
+characters_to_nfkc_binary(CD) ->
+    list_to_binary(characters_to_nfkc_binary(CD, ?GC_N, [])).
+
+characters_to_nfkc_binary(CD, N, Row) when N > 0 ->
+    case unicode_util:nfkc(CD) of
+        [GC|Str] -> characters_to_nfkc_binary(Str, N-1, [GC|Row]);
+        [] -> [characters_to_binary(lists:reverse(Row))]
+    end;
+characters_to_nfkc_binary(CD, _, Row) ->
+    [characters_to_binary(lists:reverse(Row))|characters_to_nfkc_binary(CD,?GC_N,[])].
+
+%% internals
+
+characters_to_list_int(ML, Encoding) ->
+    try
+	do_characters_to_list(ML,Encoding)
+    catch
+	error:AnyError ->
+	    TheError = case AnyError of
+			   system_limit ->
+			       system_limit;
+			   _ ->
+			       badarg
+		       end,
+	    {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} =
+		(catch erlang:error(new_stacktrace,
+				    [ML,Encoding])),
+	    erlang:raise(error,TheError,[{Mod,characters_to_list,L}|Rest])
+    end.
+
+                                                % XXX: Optimize me!
+do_characters_to_list(ML, Encoding) ->
+    case unicode:characters_to_binary(ML,Encoding) of
+	Bin when is_binary(Bin) ->
+	    unicode:characters_to_list(Bin,utf8);
+	{error,Encoded,Rest} ->
+	    {error,unicode:characters_to_list(Encoded,utf8),Rest};
+	{incomplete, Encoded2, Rest2} ->
+	    {incomplete,unicode:characters_to_list(Encoded2,utf8),Rest2}
+    end.
+
+
+characters_to_binary_int(ML,InEncoding) ->
+    try
+	characters_to_binary_int(ML,InEncoding,unicode)
+    catch
+	error:AnyError ->
+	    TheError = case AnyError of
+			   system_limit ->
+			       system_limit;
+			   _ ->
+			       badarg
+		       end,
+	    {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} =
+		(catch erlang:error(new_stacktrace,
+				    [ML,InEncoding])),
+	    erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest])
+    end.
+
+
+characters_to_binary_int(ML, InEncoding, OutEncoding) when
+      InEncoding =:= latin1, OutEncoding =:= unicode;
+      InEncoding =:= latin1, OutEncoding =:= utf8;
+      InEncoding =:= unicode, OutEncoding =:= unicode;
+      InEncoding =:= unicode, OutEncoding =:= utf8;
+      InEncoding =:= utf8, OutEncoding =:= unicode;
+      InEncoding =:= utf8, OutEncoding =:= utf8 ->
+    unicode:characters_to_binary(ML,InEncoding);
+
+characters_to_binary_int(ML, InEncoding, OutEncoding) ->
+    {InTrans,Limit} = case OutEncoding of
+                          latin1 -> {i_trans_chk(InEncoding),255};
+                          _ -> {i_trans(InEncoding),case InEncoding of latin1 -> 255; _ -> 16#10FFFF end}
+                      end,
+    OutTrans = o_trans(OutEncoding),
+    Res =
+	ml_map(ML,
+	       fun(Part,Accum) when is_binary(Part) ->
+		       case InTrans(Part) of
+			   List when is_list(List) ->
+			       Tail = OutTrans(List),
+			       <<Accum/binary, Tail/binary>>;
+			   {error, Translated, Rest} ->
+			       Tail = OutTrans(Translated),
+			       {error, <<Accum/binary,Tail/binary>>, Rest};
+			   {incomplete, Translated, Rest, Missing}  ->
+			       Tail = OutTrans(Translated),
+			       {incomplete, <<Accum/binary,Tail/binary>>, Rest,
+				Missing}
+		       end;
+		  (Part, Accum) when is_integer(Part), Part =< Limit ->
+		       case OutTrans([Part]) of
+			   Binary when is_binary(Binary) ->
+			       <<Accum/binary, Binary/binary>>;
+			   {error, _, [Part]} ->
+			       {error,Accum,[Part]}
+		       end;
+		  (Part, Accum) ->
+		       {error, Accum, [Part]}
+	       end,<<>>),
+    case Res of
+	{incomplete,A,B,_} ->
+	    {incomplete,A,B};
+	_ ->
+	    Res
+    end.
+
+
+cbv(utf8,<<1:1,1:1,0:1,_:5>>) ->
     1;
-cbv(utf8,<<1:1,1:1,1:1,0:1,_:4,R/binary>>) -> 
+cbv(utf8,<<1:1,1:1,1:1,0:1,_:4,R/binary>>) ->
     case R of
 	<<>> ->
 	    2;
@@ -386,18 +480,18 @@ cbv({utf32,big}, <<0:8>>) ->
     3;
 cbv({utf32,big}, <<0:8,X:8>>) when X =< 16 ->
     2;
-cbv({utf32,big}, <<0:8,X:8,Y:8>>) 
+cbv({utf32,big}, <<0:8,X:8,Y:8>>)
   when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) ->
     1;
 cbv({utf32,big},_) ->
     false;
 cbv({utf32,little},<<_:8>>) ->
     3;
-cbv({utf32,little},<<_:8,_:8>>) -> 
+cbv({utf32,little},<<_:8,_:8>>) ->
     2;
 cbv({utf32,little},<<X:8,255:8,0:8>>) when X =:= 254; X =:= 255 ->
     false;
-cbv({utf32,little},<<_:8,Y:8,X:8>>) 
+cbv({utf32,little},<<_:8,Y:8,X:8>>)
   when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) ->
     1;
 cbv({utf32,little},_) ->
@@ -417,8 +511,8 @@ ml_map([Part|T],Fun,Accum) when is_integer(Part) ->
 		Bin2 when is_binary(Bin2) ->
 		    Bin2;
 		{error, Converted, Rest} ->
-		    {error, Converted, Rest}; 
-		{incomplete, Converted, Rest,X} -> 
+		    {error, Converted, Rest};
+		{incomplete, Converted, Rest,X} ->
 		    {incomplete, Converted, Rest,X}
 	    end;
 	% Can not be incomplete - it's an integer
@@ -471,7 +565,7 @@ ml_map(Part,Fun,Accum) when is_binary(Part), byte_size(Part) > 8192 ->
 ml_map(Bin,Fun,Accum) when is_binary(Bin) ->
     Fun(Bin,Accum).
 
- 
+
 
 
 
@@ -523,7 +617,7 @@ o_trans(utf8) ->
 				<<One/utf8>>
 			end, L)
     end;
-    
+
 o_trans(utf16) ->
     fun(L) ->
 	    do_o_binary(fun(One) ->
@@ -577,9 +671,9 @@ do_o_binary2(F,[H|T]) ->
 		    [Bin|Bin3]
 	    end
     end.
- 
+
 %% Specific functions only allowing codepoints in latin1 range
-	
+
 do_i_utf8_chk(<<>>) ->
     [];
 do_i_utf8_chk(<<U/utf8,R/binary>>) when U =< 255 ->