19 files changed, 2324 insertions, 374 deletions
diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile
index ed3dfb342c..a7d53af7bc 100644
--- a/lib/stdlib/src/Makefile
+++ b/lib/stdlib/src/Makefile
@@ -58,6 +58,7 @@ MODULES= \
 	edlin \
 	edlin_expand \
 	epp \
+	erl_abstract_code \
 	erl_anno \
 	erl_bits \
 	erl_compile \
@@ -119,6 +120,7 @@ MODULES= \
 	sys \
 	timer \
 	unicode \
+	unicode_util \
 	win32reg \
 	zip
 
@@ -200,6 +202,9 @@ $(APP_TARGET): $(APP_SRC) ../vsn.mk
 $(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk
 	$(vsn_verbose)sed -e 's;%VSN%;$(VSN);' $< > $@
 
+unicode_util.erl: ../uc_spec/*
+	escript ../uc_spec/gen_unicode_mod.escript
+
 # ----------------------------------------------------
 # Release Target
 # ---------------------------------------------------- 
diff --git a/lib/stdlib/src/beam_lib.erl b/lib/stdlib/src/beam_lib.erl
index 461acf03be..9e5e7b2e7e 100644
--- a/lib/stdlib/src/beam_lib.erl
+++ b/lib/stdlib/src/beam_lib.erl
@@ -54,6 +54,7 @@
 %%-------------------------------------------------------------------------
 
 -type beam() :: module() | file:filename() | binary().
+-type debug_info() :: {DbgiVersion :: atom(), Backend :: module(), Data :: term()} | 'no_debug_info'.
 
 -type forms()     :: [erl_parse:abstract_form() | erl_parse:form_info()].
 
@@ -63,8 +64,9 @@
 -type label()     :: integer().
 
 -type chunkid()   :: nonempty_string(). % approximation of the strings below
-%% "Abst" | "Attr" | "CInf" | "ExpT" | "ImpT" | "LocT" | "Atom" | "AtU8".
--type chunkname() :: 'abstract_code' | 'attributes' | 'compile_info'
+%% "Abst" | "Dbgi" | "Attr" | "CInf" | "ExpT" | "ImpT" | "LocT" | "Atom" | "AtU8".
+-type chunkname() :: 'abstract_code' | 'debug_info'
+                   | 'attributes' | 'compile_info'
                    | 'exports' | 'labeled_exports'
                    | 'imports' | 'indexed_imports'
                    | 'locals' | 'labeled_locals'
@@ -77,6 +79,7 @@
 
 -type chunkdata() :: {chunkid(), dataB()}
                    | {'abstract_code', abst_code()}
+                   | {'debug_info', debug_info()}
                    | {'attributes', [attrib_entry()]}
                    | {'compile_info', [compinfo_entry()]}
                    | {'exports', [{atom(), arity()}]}
@@ -99,7 +102,7 @@
                    | {'file_error', file:filename(), file:posix()}.
 -type chnk_rsn()  :: {'unknown_chunk', file:filename(), atom()}
                    | {'key_missing_or_invalid', file:filename(),
-		      'abstract_code'}
+		      'abstract_code' | 'debug_info'}
                    | info_rsn().
 -type cmp_rsn()   :: {'modules_different', module(), module()}
                    | {'chunks_different', chunkid()}
@@ -267,9 +270,9 @@ format_error({modules_different, Module1, Module2}) ->
 		  [Module1, Module2]);
 format_error({not_a_directory, Name}) ->
     io_lib:format("~tp: Not a directory~n", [Name]);
-format_error({key_missing_or_invalid, File, abstract_code}) ->
-    io_lib:format("~tp: Cannot decrypt abstract code because key is missing or invalid",
-		  [File]);
+format_error({key_missing_or_invalid, File, ChunkId}) ->
+    io_lib:format("~tp: Cannot decrypt ~ts because key is missing or invalid",
+		  [File, ChunkId]);
 format_error(badfun) ->
     "not a fun or the fun has the wrong arity";
 format_error(exists) ->
@@ -510,9 +513,9 @@ read_chunk_data(File0, ChunkNames) ->
 read_chunk_data(File0, ChunkNames0, Options)
   when is_atom(File0); is_list(File0); is_binary(File0) ->
     File = beam_filename(File0),
-    {ChunkIds, Names} = check_chunks(ChunkNames0, File, [], []),
+    {ChunkIds, Names, Optional} = check_chunks(ChunkNames0, File, [], [], []),
     AllowMissingChunks = member(allow_missing_chunks, Options),
-    {ok, Module, Chunks} = scan_beam(File, ChunkIds, AllowMissingChunks),
+    {ok, Module, Chunks} = scan_beam(File, ChunkIds, AllowMissingChunks, Optional),
     AT = ets:new(beam_symbols, []),
     T = {empty, AT},
     try chunks_to_data(Names, Chunks, File, Chunks, Module, T, [])
@@ -520,31 +523,34 @@ read_chunk_data(File0, ChunkNames0, Options)
     end.
 
 %% -> {ok, list()} | throw(Error)
-check_chunks([atoms | Ids], File, IL, L) ->
-    check_chunks(Ids, File, ["Atom", "AtU8" | IL], [{atom_chunk, atoms} | L]);
-check_chunks([ChunkName | Ids], File, IL, L) when is_atom(ChunkName) ->
+check_chunks([atoms | Ids], File, IL, L, O) ->
+    check_chunks(Ids, File, ["Atom", "AtU8" | IL],
+		 [{atom_chunk, atoms} | L], ["Atom", "AtU8" | O]);
+check_chunks([abstract_code | Ids], File, IL, L, O) ->
+    check_chunks(Ids, File, ["Abst", "Dbgi" | IL],
+		 [{abst_chunk, abstract_code} | L], ["Abst", "Dbgi" | O]);
+check_chunks([ChunkName | Ids], File, IL, L, O) when is_atom(ChunkName) ->
     ChunkId = chunk_name_to_id(ChunkName, File),
-    check_chunks(Ids, File, [ChunkId | IL], [{ChunkId, ChunkName} | L]);
-check_chunks([ChunkId | Ids], File, IL, L) -> % when is_list(ChunkId)
-    check_chunks(Ids, File, [ChunkId | IL], [{ChunkId, ChunkId} | L]);
-check_chunks([], _File, IL, L) ->
-    {lists:usort(IL), reverse(L)}.
+    check_chunks(Ids, File, [ChunkId | IL], [{ChunkId, ChunkName} | L], O);
+check_chunks([ChunkId | Ids], File, IL, L, O) -> % when is_list(ChunkId)
+    check_chunks(Ids, File, [ChunkId | IL], [{ChunkId, ChunkId} | L], O);
+check_chunks([], _File, IL, L, O) ->
+    {lists:usort(IL), reverse(L), O}.
 
 %% -> {ok, Module, Data} | throw(Error)
 scan_beam(File, What) ->
-    scan_beam(File, What, false).
+    scan_beam(File, What, false, []).
 
 %% -> {ok, Module, Data} | throw(Error)
-scan_beam(File, What0, AllowMissingChunks) ->
+scan_beam(File, What0, AllowMissingChunks, OptionalChunks) ->
     case scan_beam1(File, What0) of
 	{missing, _FD, Mod, Data, What} when AllowMissingChunks ->
 	    {ok, Mod, [{Id, missing_chunk} || Id <- What] ++ Data};
-	{missing, _FD, Mod, Data, ["Atom"]} ->
-	    {ok, Mod, Data};
-	{missing, _FD, Mod, Data, ["AtU8"]} ->
-	    {ok, Mod, Data};
-	{missing, FD, _Mod, _Data, What} ->
-	    error({missing_chunk, filename(FD), hd(What)});
+	{missing, FD, Mod, Data, What} ->
+	    case What -- OptionalChunks of
+		[] -> {ok, Mod, Data};
+		[Missing | _] -> error({missing_chunk, filename(FD), Missing})
+	    end;
 	R ->
 	    R
     end.
@@ -638,6 +644,22 @@ get_chunk(Id, Pos, Size, FD) ->
 chunks_to_data([{atom_chunk, Name} | CNs], Chunks, File, Cs, Module, Atoms, L) ->
     {NewAtoms, Ret} = chunk_to_data(Name, <<"">>, File, Cs, Atoms, Module),
     chunks_to_data(CNs, Chunks, File, Cs, Module, NewAtoms, [Ret | L]);
+chunks_to_data([{abst_chunk, Name} | CNs], Chunks, File, Cs, Module, Atoms, L) ->
+    DbgiChunk = proplists:get_value("Dbgi", Chunks, <<"">>),
+    {NewAtoms, Ret} =
+	case catch chunk_to_data(debug_info, DbgiChunk, File, Cs, Atoms, Module) of
+	    {DbgiAtoms, {debug_info, {debug_info_v1, Backend, Metadata}}} ->
+		case Backend:debug_info(erlang_v1, Module, Metadata, []) of
+		    {ok, Code} -> {DbgiAtoms, {abstract_code, {raw_abstract_v1, Code}}};
+		    {error, _} -> {DbgiAtoms, {abstract_code, no_abstract_code}}
+		end;
+            {error,beam_lib,{key_missing_or_invalid,Path,debug_info}} ->
+                error({key_missing_or_invalid,Path,abstract_code});
+	    _ ->
+		AbstChunk = proplists:get_value("Abst", Chunks, <<"">>),
+		chunk_to_data(Name, AbstChunk, File, Cs, Atoms, Module)
+	end,
+    chunks_to_data(CNs, Chunks, File, Cs, Module, NewAtoms, [Ret | L]);
 chunks_to_data([{Id, Name} | CNs], Chunks, File, Cs, Module, Atoms, L) ->
     {_Id, Chunk} = lists:keyfind(Id, 1, Chunks),
     {NewAtoms, Ret} = chunk_to_data(Name, Chunk, File, Cs, Atoms, Module),
@@ -660,13 +682,30 @@ chunk_to_data(compile_info=Id, Chunk, File, _Cs, AtomTable, _Mod) ->
 	error:badarg ->
 	    error({invalid_chunk, File, chunk_name_to_id(Id, File)})
     end;
+chunk_to_data(debug_info=Id, Chunk, File, _Cs, AtomTable, Mod) ->
+    case Chunk of
+	<<>> ->
+	    {AtomTable, {Id, no_debug_info}};
+	<<0:8,N:8,Mode0:N/binary,Rest/binary>> ->
+	    Mode = binary_to_atom(Mode0, utf8),
+	    Term = decrypt_chunk(Mode, Mod, File, Id, Rest),
+	    {AtomTable, {Id, Term}};
+	_ ->
+	    case catch binary_to_term(Chunk) of
+		{'EXIT', _} ->
+		    error({invalid_chunk, File, chunk_name_to_id(Id, File)});
+		Term ->
+                    {AtomTable, {Id, Term}}
+	    end
+    end;
 chunk_to_data(abstract_code=Id, Chunk, File, _Cs, AtomTable, Mod) ->
     case Chunk of
 	<<>> ->
 	    {AtomTable, {Id, no_abstract_code}};
 	<<0:8,N:8,Mode0:N/binary,Rest/binary>> ->
 	    Mode = binary_to_atom(Mode0, utf8),
-	    decrypt_abst(Mode, Mod, File, Id, AtomTable, Rest);
+	    Term = decrypt_chunk(Mode, Mod, File, Id, Rest),
+	    {AtomTable, {Id, anno_from_term(Term)}};
 	_ ->
 	    case catch binary_to_term(Chunk) of
 		{'EXIT', _} ->
@@ -705,6 +744,7 @@ chunk_name_to_id(locals, _)          -> "LocT";
 chunk_name_to_id(labeled_locals, _)  -> "LocT";
 chunk_name_to_id(attributes, _)      -> "Attr";
 chunk_name_to_id(abstract_code, _)   -> "Abst";
+chunk_name_to_id(debug_info, _)      -> "Dbgi";
 chunk_name_to_id(compile_info, _)    -> "CInf";
 chunk_name_to_id(Other, File) -> 
     error({unknown_chunk, File, Other}).
@@ -894,23 +934,18 @@ mandatory_chunks() ->
 
 -define(CRYPTO_KEY_SERVER, beam_lib__crypto_key_server).
 
-decrypt_abst(Type, Module, File, Id, AtomTable, Bin) ->
+decrypt_chunk(Type, Module, File, Id, Bin) ->
     try
 	KeyString = get_crypto_key({debug_info, Type, Module, File}),
-	Key = make_crypto_key(Type, KeyString),
-	Term = decrypt_abst_1(Key, Bin),
-	{AtomTable, {Id, Term}}
+	{Type,Key,IVec,_BlockSize} = make_crypto_key(Type, KeyString),
+	ok = start_crypto(),
+	NewBin = crypto:block_decrypt(Type, Key, IVec, Bin),
+	binary_to_term(NewBin)
     catch
 	_:_ ->
 	    error({key_missing_or_invalid, File, Id})
     end.
 
-decrypt_abst_1({Type,Key,IVec,_BlockSize}, Bin) ->
-    ok = start_crypto(),
-    NewBin = crypto:block_decrypt(Type, Key, IVec, Bin),
-    Term = binary_to_term(NewBin),
-    anno_from_term(Term).
-
 anno_from_term({raw_abstract_v1, Forms}) ->
     {raw_abstract_v1, anno_from_forms(Forms)};
 anno_from_term({Tag, Forms}) when Tag =:= abstract_v1; Tag =:= abstract_v2 ->
diff --git a/lib/stdlib/src/erl_abstract_code.erl b/lib/stdlib/src/erl_abstract_code.erl
new file mode 100644
index 0000000000..6e45f11aa3
--- /dev/null
+++ b/lib/stdlib/src/erl_abstract_code.erl
@@ -0,0 +1,28 @@
+-module(erl_abstract_code).
+-export([debug_info/4]).
+
+debug_info(_Format, _Module, {none,_CompilerOpts}, _Opts) ->
+    {error, missing};
+debug_info(erlang_v1, _Module, {AbstrCode,_CompilerOpts}, _Opts) ->
+    {ok, AbstrCode};
+debug_info(core_v1, _Module, {AbstrCode,CompilerOpts}, Opts) ->
+    CoreOpts = add_core_returns(delete_reports(CompilerOpts ++ Opts)),
+    try compile:noenv_forms(AbstrCode, CoreOpts) of
+	{ok, _, Core, _} -> {ok, Core};
+	_What -> {error, failed_conversion}
+    catch
+	error:_ -> {error, failed_conversion}
+    end;
+debug_info(_, _, _, _) ->
+    {error, unknown_format}.
+
+delete_reports(Opts) ->
+    [Opt || Opt <- Opts, not is_report_option(Opt)].
+
+is_report_option(report) -> true;
+is_report_option(report_errors) -> true;
+is_report_option(report_warnings) -> true;
+is_report_option(_) -> false.
+
+add_core_returns(Opts) ->
+    [to_core, return_errors, return_warnings] ++ Opts.
diff --git a/lib/stdlib/src/erl_internal.erl b/lib/stdlib/src/erl_internal.erl
index 006e7946af..9a1b17fdb7 100644
--- a/lib/stdlib/src/erl_internal.erl
+++ b/lib/stdlib/src/erl_internal.erl
@@ -331,6 +331,8 @@ bif(list_to_float, 1) -> true;
 bif(list_to_integer, 1) -> true;
 bif(list_to_integer, 2) -> true;
 bif(list_to_pid, 1) -> true;
+bif(list_to_port, 1) -> true;
+bif(list_to_ref, 1) -> true;
 bif(list_to_tuple, 1) -> true;
 bif(load_module, 2) -> true;
 bif(make_ref, 0) -> true;
@@ -348,6 +350,7 @@ bif(nodes, 1) -> true;
 bif(now, 0) -> true;
 bif(open_port, 2) -> true;
 bif(pid_to_list, 1) -> true;
+bif(port_to_list, 1) -> true;
 bif(port_close, 1) -> true;
 bif(port_command, 2) -> true;
 bif(port_command, 3) -> true;
@@ -361,6 +364,7 @@ bif(process_info, 2) -> true;
 bif(processes, 0) -> true;
 bif(purge_module, 1) -> true;
 bif(put, 2) -> true;
+bif(ref_to_list, 1) -> true;
 bif(register, 2) -> true;
 bif(registered, 0) -> true;
 bif(round, 1) -> true;
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 0789f5dfb7..7c40058dd8 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -404,6 +404,10 @@ format_error({not_exported_opaque, {TypeName, Arity}}) ->
 format_error({underspecified_opaque, {TypeName, Arity}}) ->
     io_lib:format("opaque type ~w~s is underspecified and therefore meaningless",
                   [TypeName, gen_type_paren(Arity)]);
+format_error({bad_dialyzer_attribute,Term}) ->
+    io_lib:format("badly formed dialyzer attribute: ~w", [Term]);
+format_error({bad_dialyzer_option,Term}) ->
+    io_lib:format("unknown dialyzer warning option: ~w", [Term]);
 %% --- obsolete? unused? ---
 format_error({format_error, {Fmt, Args}}) ->
     io_lib:format(Fmt, Args).
@@ -796,8 +800,7 @@ attribute_state(Form, St) ->
 %%      State'
 %%  Allow for record, type and opaque type definitions and spec
 %%  declarations to be intersperced within function definitions.
-%%  Dialyzer attributes are also allowed everywhere, but are not
-%%  checked at all.
+%%  Dialyzer attributes are also allowed everywhere.
 
 function_state({attribute,L,record,{Name,Fields}}, St) ->
     record_def(L, Name, Fields, St);
@@ -883,7 +886,8 @@ post_traversal_check(Forms, St0) ->
     StD = check_on_load(StC),
     StE = check_unused_records(Forms, StD),
     StF = check_local_opaque_types(StE),
-    check_callback_information(StF).
+    StG = check_dialyzer_attribute(Forms, StF),
+    check_callback_information(StG).
 
 %% check_behaviour(State0) -> State
 %% Check that the behaviour attribute is valid.
@@ -3116,6 +3120,59 @@ check_local_opaque_types(St) ->
         end,
     dict:fold(FoldFun, St, Ts).
 
+check_dialyzer_attribute(Forms, St0) ->
+    Vals = [{L,V} ||
+               {attribute,L,dialyzer,Val} <- Forms,
+               V0 <- lists:flatten([Val]),
+               V <- case V0 of
+                        {O,F} ->
+                            [{A,B} ||
+                                A <- lists:flatten([O]),
+                                B <- lists:flatten([F])];
+                        T -> [T]
+                    end],
+    {Wellformed, Bad} =
+        lists:partition(fun ({_,{Option,FA}}) when is_atom(Option) ->
+                                is_fa(FA);
+                            ({_,Option}) when is_atom(Option) -> true;
+                            (_) -> false
+                        end, Vals),
+    St1 = foldl(fun ({L,Term}, St) ->
+		  add_error(L, {bad_dialyzer_attribute,Term}, St)
+	  end, St0, Bad),
+    DefFunctions = (gb_sets:to_list(St0#lint.defined) -- pseudolocals()),
+    Fun = fun ({L,{Option,FA}}, St) ->
+                  case is_function_dialyzer_option(Option) of
+                      true ->
+                          case lists:member(FA, DefFunctions) of
+                              true -> St;
+                              false ->
+                                  add_error(L, {undefined_function,FA}, St)
+                          end;
+                      false ->
+                          add_error(L, {bad_dialyzer_option,Option}, St)
+                  end;
+              ({L,Option}, St) ->
+                  case is_module_dialyzer_option(Option) of
+                      true -> St;
+                      false ->
+                          add_error(L, {bad_dialyzer_option,Option}, St)
+                  end
+          end,
+    foldl(Fun, St1, Wellformed).
+
+is_function_dialyzer_option(nowarn_function) -> true;
+is_function_dialyzer_option(Option) ->
+    is_module_dialyzer_option(Option).
+
+is_module_dialyzer_option(Option) ->
+    lists:member(Option,
+                 [no_return,no_unused,no_improper_lists,no_fun_app,
+                  no_match,no_opaque,no_fail_call,no_contracts,
+                  no_behaviours,no_undefined_callbacks,unmatched_returns,
+                  error_handling,race_conditions,no_missing_calls,
+                  specdiffs,overspecs,underspecs,unknown]).
+
 %% icrt_clauses(Clauses, In, ImportVarTable, State) ->
 %%      {UpdVt,State}.
 
@@ -3826,6 +3883,10 @@ extract_sequence(4, [$t, $p | Fmt], Need) ->
     extract_sequence(5, [$p|Fmt], Need);
 extract_sequence(4, [$t, $P | Fmt], Need) ->
     extract_sequence(5, [$P|Fmt], Need);
+extract_sequence(4, [$t, $w | Fmt], Need) ->
+    extract_sequence(5, [$w|Fmt], Need);
+extract_sequence(4, [$t, $W | Fmt], Need) ->
+    extract_sequence(5, [$W|Fmt], Need);
 extract_sequence(4, [$t, C | _Fmt], _Need) ->
     {error,"invalid control ~t" ++ [C]};
 extract_sequence(4, [$l, $p | Fmt], Need) ->
diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index a54df939bf..168ea4002c 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -69,6 +69,8 @@ format_error(invalid_gnu_1_0_sparsemap) ->
     "Invalid GNU sparse map (version 1.0)";
 format_error({invalid_gnu_0_1_sparsemap, Format}) ->
     lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
+format_error(unsafe_path) ->
+    "The path points above the current working directory";
 format_error({Name,Reason}) ->
     lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
 format_error(Atom) when is_atom(Atom) ->
@@ -120,26 +122,38 @@ do_extract(Handle, Opts) when is_list(Opts) ->
 
 extract1(eof, Reader, _, Acc) when is_list(Acc) ->
     {ok, {ok, lists:reverse(Acc)}, Reader};
+extract1(eof, Reader, _, leading_slash) ->
+    error_logger:info_msg("erl_tar: removed leading '/' from member names\n"),
+    {ok, ok, Reader};
 extract1(eof, Reader, _, Acc) ->
     {ok, Acc, Reader};
-extract1(#tar_header{name=Name,size=Size}=Header, Reader, Opts, Acc) ->
+extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) ->
     case check_extract(Name, Opts) of
         true ->
-            case do_read(Reader, Size) of
-                {ok, Bin, Reader2} ->
-                    case write_extracted_element(Header, Bin, Opts) of
-                        ok ->
-                            {ok, Acc, Reader2};
-                        {ok, NameBin} when is_list(Acc) ->
-                            {ok, [NameBin | Acc], Reader2};
-                        {error, _} = Err ->
-                            throw(Err)
-                    end;
+            case do_read(Reader0, Size) of
+                {ok, Bin, Reader1} ->
+                    Acc = extract2(Header, Bin, Opts, Acc0),
+                    {ok, Acc, Reader1};
                 {error, _} = Err ->
                     throw(Err)
             end;
         false ->
-            {ok, Acc, skip_file(Reader)}
+            {ok, Acc0, skip_file(Reader0)}
+    end.
+
+extract2(Header, Bin, Opts, Acc) ->
+    case write_extracted_element(Header, Bin, Opts) of
+        ok ->
+            case Header of
+                #tar_header{name="/"++_} ->
+                    leading_slash;
+                #tar_header{} ->
+                    Acc
+            end;
+        {ok, NameBin} when is_list(Acc) ->
+            [NameBin | Acc];
+        {error, _} = Err ->
+            throw(Err)
     end.
 
 %% Checks if the file Name should be extracted.
@@ -1052,14 +1066,11 @@ unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0)
 
 
 safe_join_path([], Name) ->
-    strip_slashes(Name, both);
+    filename:join([Name]);
 safe_join_path(Prefix, []) ->
-    strip_slashes(Prefix, right);
+    filename:join([Prefix]);
 safe_join_path(Prefix, Name) ->
-    filename:join(strip_slashes(Prefix, right), strip_slashes(Name, both)).
-
-strip_slashes(Str, Direction) ->
-    string:strip(Str, Direction, $/).
+    filename:join(Prefix, Name).
 
 new_sparse_file_reader(Reader, Sparsemap, RealSize) ->
     true = validate_sparse_entries(Sparsemap, RealSize),
@@ -1557,7 +1568,7 @@ write_extracted_element(#tar_header{name=Name,typeflag=Type},
             ok
     end;
 write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) ->
-    Name1 = filename:absname(Name0, Opts#read_opts.cwd),
+    Name1 = make_safe_path(Name0, Opts),
     Created =
         case typeflag(Header#tar_header.typeflag) of
             regular ->
@@ -1585,6 +1596,16 @@ write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) ->
         not_written -> ok
     end.
 
+make_safe_path([$/|Path], Opts) ->
+    make_safe_path(Path, Opts);
+make_safe_path(Path, #read_opts{cwd=Cwd}) ->
+    case filename:safe_relative_path(Path) of
+        unsafe ->
+            throw({error,{Path,unsafe_path}});
+        SafePath ->
+            filename:absname(SafePath, Cwd)
+    end.
+
 create_regular(Name, NameInArchive, Bin, Opts) ->
     case write_extracted_file(Name, Bin, Opts) of
         not_written ->
diff --git a/lib/stdlib/src/gen_event.erl b/lib/stdlib/src/gen_event.erl
index 0aebf1bdc5..0c50b2aa08 100644
--- a/lib/stdlib/src/gen_event.erl
+++ b/lib/stdlib/src/gen_event.erl
@@ -109,7 +109,8 @@
       State :: term(),
       Status :: term().
 
--optional_callbacks([format_status/2]).
+-optional_callbacks(
+    [handle_info/2, terminate/2, code_change/3, format_status/2]).
 
 %%---------------------------------------------------------------------------
 
@@ -577,6 +578,10 @@ server_update(Handler1, Func, Event, SName) ->
 	    do_terminate(Mod1, Handler1, remove_handler, State,
 			 remove, SName, normal),
 	    no;
+        {'EXIT', {undef, [{Mod1, handle_info, [_,_], _}|_]}} ->
+            error_logger:warning_msg("** Undefined handle_info in ~p~n"
+                                     "** Unhandled message: ~p~n", [Mod1, Event]),
+           {ok, Handler1};
 	Other ->
 	    do_terminate(Mod1, Handler1, {error, Other}, State,
 			 Event, SName, crash),
@@ -698,9 +703,15 @@ server_call_update(Handler1, Query, SName) ->
     end.
 
 do_terminate(Mod, Handler, Args, State, LastIn, SName, Reason) ->
-    Res = (catch Mod:terminate(Args, State)),
-    report_terminate(Handler, Reason, Args, State, LastIn, SName, Res),
-    Res.
+    case erlang:function_exported(Mod, terminate, 2) of
+	true ->
+	    Res = (catch Mod:terminate(Args, State)),
+	    report_terminate(Handler, Reason, Args, State, LastIn, SName, Res),
+	    Res;
+	false ->
+	    report_terminate(Handler, Reason, Args, State, LastIn, SName, ok),
+	    ok
+    end.
 
 report_terminate(Handler, crash, {error, Why}, State, LastIn, SName, _) ->
     report_terminate(Handler, Why, State, LastIn, SName);
diff --git a/lib/stdlib/src/gen_fsm.erl b/lib/stdlib/src/gen_fsm.erl
index e925a75fe8..d413da3ea1 100644
--- a/lib/stdlib/src/gen_fsm.erl
+++ b/lib/stdlib/src/gen_fsm.erl
@@ -124,6 +124,26 @@
 	 system_replace_state/2,
 	 format_status/2]).
 
+-deprecated({start, 3, next_major_release}).
+-deprecated({start, 4, next_major_release}).
+-deprecated({start_link, 3, next_major_release}).
+-deprecated({start_link, 4, next_major_release}).
+-deprecated({stop, 1, next_major_release}).
+-deprecated({stop, 3, next_major_release}).
+-deprecated({send_event, 2, next_major_release}).
+-deprecated({sync_send_event, 2, next_major_release}).
+-deprecated({sync_send_event, 3, next_major_release}).
+-deprecated({send_all_state_event, 2, next_major_release}).
+-deprecated({sync_send_all_state_event, 2, next_major_release}).
+-deprecated({sync_send_all_state_event, 3, next_major_release}).
+-deprecated({reply, 2, next_major_release}).
+-deprecated({start_timer, 2, next_major_release}).
+-deprecated({send_event_after, 2, next_major_release}).
+-deprecated({cancel_timer, 1, next_major_release}).
+-deprecated({enter_loop, 4, next_major_release}).
+-deprecated({enter_loop, 5, next_major_release}).
+-deprecated({enter_loop, 6, next_major_release}).
+
 -import(error_logger, [format/2]).
 
 %%% ---------------------------------------------------
@@ -169,7 +189,8 @@
       State :: term(),
       Status :: term().
 
--optional_callbacks([format_status/2]).
+-optional_callbacks(
+    [handle_info/3, terminate/3, code_change/4, format_status/2]).
 
 %%% ---------------------------------------------------
 %%% Starts a generic state machine.
@@ -466,6 +487,10 @@ handle_msg(Msg, Parent, Name, StateName, StateData, Mod, _Time) -> %No debug her
 					   StateName, NStateData, [])),
 	    reply(From, Reply),
 	    exit(R);
+        {'EXIT', {undef, [{Mod, handle_info, [_,_,_], _}|_]}} ->
+            error_logger:warning_msg("** Undefined handle_info in ~p~n"
+                                     "** Unhandled message: ~p~n", [Mod, Msg]),
+            loop(Parent, Name, StateName, StateData, Mod, infinity, []);
 	{'EXIT', What} ->
 	    terminate(What, Name, Msg, Mod, StateName, StateData, []);
 	Reply ->
@@ -540,24 +565,30 @@ reply(Name, {To, Tag}, Reply, Debug, StateName) ->
 -spec terminate(term(), _, _, atom(), _, _, _) -> no_return().
 
 terminate(Reason, Name, Msg, Mod, StateName, StateData, Debug) ->
-    case catch Mod:terminate(Reason, StateName, StateData) of
-	{'EXIT', R} ->
-	    FmtStateData = format_status(terminate, Mod, get(), StateData),
-	    error_info(R, Name, Msg, StateName, FmtStateData, Debug),
-	    exit(R);
-	_ ->
-	    case Reason of
-		normal ->
-		    exit(normal);
-		shutdown ->
-		    exit(shutdown);
- 		{shutdown,_}=Shutdown ->
- 		    exit(Shutdown);
+    case erlang:function_exported(Mod, terminate, 3) of
+	true ->
+	    case catch Mod:terminate(Reason, StateName, StateData) of
+		{'EXIT', R} ->
+		    FmtStateData = format_status(terminate, Mod, get(), StateData),
+		    error_info(R, Name, Msg, StateName, FmtStateData, Debug),
+		    exit(R);
 		_ ->
-                    FmtStateData = format_status(terminate, Mod, get(), StateData),
-		    error_info(Reason,Name,Msg,StateName,FmtStateData,Debug),
-		    exit(Reason)
-	    end
+		    ok
+	    end;
+	false ->
+	    ok
+    end,
+    case Reason of
+	normal ->
+	    exit(normal);
+	shutdown ->
+	    exit(shutdown);
+ 	{shutdown,_}=Shutdown ->
+ 	    exit(Shutdown);
+	_ ->
+	    FmtStateData1 = format_status(terminate, Mod, get(), StateData),
+	    error_info(Reason,Name,Msg,StateName,FmtStateData1,Debug),
+	    exit(Reason)
     end.
 
 error_info(Reason, Name, Msg, StateName, StateData, Debug) ->
diff --git a/lib/stdlib/src/gen_server.erl b/lib/stdlib/src/gen_server.erl
index 284810c971..8504af86f8 100644
--- a/lib/stdlib/src/gen_server.erl
+++ b/lib/stdlib/src/gen_server.erl
@@ -146,8 +146,8 @@
       State :: term(),
       Status :: term().
 
--optional_callbacks([format_status/2]).
-
+-optional_callbacks(
+    [handle_info/2, terminate/2, code_change/3, format_status/2]).
 
 %%%  -----------------------------------------------------------------
 %%% Starts a generic server.
@@ -602,6 +602,17 @@ try_dispatch(Mod, Func, Msg, State) ->
     catch
 	throw:R ->
 	    {ok, R};
+        error:undef = R when Func == handle_info ->
+            case erlang:function_exported(Mod, handle_info, 2) of
+                false ->
+                    error_logger:warning_msg("** Undefined handle_info in ~p~n"
+                                             "** Unhandled message: ~p~n",
+                                             [Mod, Msg]),
+                    {ok, {noreply, State}};
+                true ->
+                    Stacktrace = erlang:get_stacktrace(),
+                    {'EXIT', {R, Stacktrace}, {R, Stacktrace}}
+            end;
 	error:R ->
 	    Stacktrace = erlang:get_stacktrace(),
 	    {'EXIT', {R, Stacktrace}, {R, Stacktrace}};
@@ -625,17 +636,22 @@ try_handle_call(Mod, Msg, From, State) ->
     end.
 
 try_terminate(Mod, Reason, State) ->
-    try
-	{ok, Mod:terminate(Reason, State)}
-    catch
-	throw:R ->
-	    {ok, R};
-	error:R ->
-	    Stacktrace = erlang:get_stacktrace(),
-	    {'EXIT', {R, Stacktrace}, {R, Stacktrace}};
-	exit:R ->
-	    Stacktrace = erlang:get_stacktrace(),
-	    {'EXIT', R, {R, Stacktrace}}
+    case erlang:function_exported(Mod, terminate, 2) of
+	true ->
+	    try
+		{ok, Mod:terminate(Reason, State)}
+	    catch
+		throw:R ->
+		    {ok, R};
+		error:R ->
+		    Stacktrace = erlang:get_stacktrace(),
+		    {'EXIT', {R, Stacktrace}, {R, Stacktrace}};
+		exit:R ->
+		    Stacktrace = erlang:get_stacktrace(),
+		    {'EXIT', R, {R, Stacktrace}}
+	   end;
+	false ->
+	    {ok, ok}
     end.
 
 
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index 28e5007e5a..5ed2f4d888 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -268,47 +268,61 @@ write(Term, D, false) ->
 
 -spec write(Term, Depth) -> chars() when
       Term :: term(),
+      Depth :: depth();
+           (Term, Options) -> chars() when
+      Term :: term(),
+      Options :: [Option],
+      Option :: {'depth', Depth}
+              | {'encoding', 'latin1' | 'utf8' | 'unicode'},
       Depth :: depth().
 
-write(_Term, 0) -> "...";
-write(Term, _D) when is_integer(Term) -> integer_to_list(Term);
-write(Term, _D) when is_float(Term) -> io_lib_format:fwrite_g(Term);
-write(Atom, _D) when is_atom(Atom) -> write_atom(Atom);
-write(Term, _D) when is_port(Term) -> write_port(Term);
-write(Term, _D) when is_pid(Term) -> pid_to_list(Term);
-write(Term, _D) when is_reference(Term) -> write_ref(Term);
-write(<<_/bitstring>>=Term, D) -> write_binary(Term, D);
-write([], _D) -> "[]";
-write({}, _D) -> "{}";
-write([H|T], D) ->
+write(Term, Options) when is_list(Options) ->
+    Depth = get_option(depth, Options, -1),
+    Encoding = get_option(encoding, Options, epp:default_encoding()),
+    write1(Term, Depth, Encoding);
+write(Term, Depth) ->
+    write1(Term, Depth, latin1).
+
+write1(_Term, 0, _E) -> "...";
+write1(Term, _D, _E) when is_integer(Term) -> integer_to_list(Term);
+write1(Term, _D, _E) when is_float(Term) -> io_lib_format:fwrite_g(Term);
+write1(Atom, _D, latin1) when is_atom(Atom) -> write_atom_as_latin1(Atom);
+write1(Atom, _D, _E) when is_atom(Atom) -> write_atom(Atom);
+write1(Term, _D, _E) when is_port(Term) -> write_port(Term);
+write1(Term, _D, _E) when is_pid(Term) -> pid_to_list(Term);
+write1(Term, _D, _E) when is_reference(Term) -> write_ref(Term);
+write1(<<_/bitstring>>=Term, D, _E) -> write_binary(Term, D);
+write1([], _D, _E) -> "[]";
+write1({}, _D, _E) -> "{}";
+write1([H|T], D, E) ->
     if
 	D =:= 1 -> "[...]";
 	true ->
-	    [$[,[write(H, D-1)|write_tail(T, D-1, $|)],$]]
+	    [$[,[write1(H, D-1, E)|write_tail(T, D-1, E, $|)],$]]
     end;
-write(F, _D) when is_function(F) ->
+write1(F, _D, _E) when is_function(F) ->
     erlang:fun_to_list(F);
-write(Term, D) when is_map(Term) ->
-    write_map(Term, D);
-write(T, D) when is_tuple(T) ->
+write1(Term, D, E) when is_map(Term) ->
+    write_map(Term, D, E);
+write1(T, D, E) when is_tuple(T) ->
     if
 	D =:= 1 -> "{...}";
 	true ->
 	    [${,
-	     [write(element(1, T), D-1)|
-              write_tail(tl(tuple_to_list(T)), D-1, $,)],
+	     [write1(element(1, T), D-1, E)|
+              write_tail(tl(tuple_to_list(T)), D-1, E, $,)],
 	     $}]
     end.
 
 %% write_tail(List, Depth, CharacterBeforeDots)
 %%  Test the terminating case first as this looks better with depth.
 
-write_tail([], _D, _S) -> "";
-write_tail(_, 1, S) -> [S | "..."];
-write_tail([H|T], D, S) ->
-    [$,,write(H, D-1)|write_tail(T, D-1, S)];
-write_tail(Other, D, S) ->
-    [S,write(Other, D-1)].
+write_tail([], _D, _E, _S) -> "";
+write_tail(_, 1, _E, S) -> [S | "..."];
+write_tail([H|T], D, E, S) ->
+    [$,,write1(H, D-1, E)|write_tail(T, D-1, E, S)];
+write_tail(Other, D, E, S) ->
+    [S,write1(Other, D-1, E)].
 
 write_port(Port) ->
     erlang:port_to_list(Port).
@@ -316,17 +330,17 @@ write_port(Port) ->
 write_ref(Ref) ->
     erlang:ref_to_list(Ref).
 
-write_map(Map, D) when is_integer(D) ->
-    [$#,${,write_map_body(maps:to_list(Map), D),$}].
+write_map(Map, D, E) when is_integer(D) ->
+    [$#,${,write_map_body(maps:to_list(Map), D, E),$}].
 
-write_map_body(_, 0) -> "...";
-write_map_body([],_) -> [];
-write_map_body([{K,V}],D) -> write_map_assoc(K,V,D);
-write_map_body([{K,V}|KVs], D) ->
-    [write_map_assoc(K,V,D),$, | write_map_body(KVs,D-1)].
+write_map_body(_, 0, _E) -> "...";
+write_map_body([], _, _E) -> [];
+write_map_body([{K,V}], D, E) -> write_map_assoc(K, V, D, E);
+write_map_body([{K,V}|KVs], D, E) ->
+    [write_map_assoc(K, V, D, E),$, | write_map_body(KVs, D-1, E)].
 
-write_map_assoc(K,V,D) ->
-    [write(K,D - 1),"=>",write(V,D-1)].
+write_map_assoc(K, V, D, E) ->
+    [write1(K, D - 1, E),"=>",write1(V, D-1, E)].
 
 write_binary(B, D) when is_integer(D) ->
     [$<,$<,write_binary_body(B, D),$>,$>].
@@ -344,6 +358,13 @@ write_binary_body(B, _D) ->
     <<X:L>> = B,
     [integer_to_list(X),$:,integer_to_list(L)].
 
+get_option(Key, TupleList, Default) ->
+    case lists:keyfind(Key, 1, TupleList) of
+	false -> Default;
+	{Key, Value} -> Value;
+	_ -> Default
+    end.
+
 %%% There are two functions to write Unicode atoms:
 %%% - they both escape control characters < 160;
 %%% - write_atom() never escapes characters >= 160;
diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index 3113767614..14d925bacf 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %% 
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
 %% 
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -257,12 +257,12 @@ indentation([], I) -> I.
 %%  This is the main dispatch function for the various formatting commands.
 %%  Field widths and precisions have already been calculated.
 
-control($w, [A], F, Adj, P, Pad, _Enc, _Str, _I) ->
-    term(io_lib:write(A, -1), F, Adj, P, Pad);
+control($w, [A], F, Adj, P, Pad, Enc, _Str, _I) ->
+    term(io_lib:write(A, [{depth,-1}, {encoding, Enc}]), F, Adj, P, Pad);
 control($p, [A], F, Adj, P, Pad, Enc, Str, I) ->
     print(A, -1, F, Adj, P, Pad, Enc, Str, I);
-control($W, [A,Depth], F, Adj, P, Pad, _Enc, _Str, _I) when is_integer(Depth) ->
-    term(io_lib:write(A, Depth), F, Adj, P, Pad);
+control($W, [A,Depth], F, Adj, P, Pad, Enc, _Str, _I) when is_integer(Depth) ->
+    term(io_lib:write(A, [{depth,Depth}, {encoding, Enc}]), F, Adj, P, Pad);
 control($P, [A,Depth], F, Adj, P, Pad, Enc, Str, I) when is_integer(Depth) ->
     print(A, Depth, F, Adj, P, Pad, Enc, Str, I);
 control($s, [A], F, Adj, P, Pad, latin1, _Str, _I) when is_atom(A) ->
diff --git a/lib/stdlib/src/io_lib_fread.erl b/lib/stdlib/src/io_lib_fread.erl
index 6a8f8f728e..983e8d4566 100644
--- a/lib/stdlib/src/io_lib_fread.erl
+++ b/lib/stdlib/src/io_lib_fread.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -159,8 +159,8 @@ fread_field([$t|Format], F, Sup, _Unic) ->
 fread_field(Format, F, Sup, Unic) ->
     {Format,F,Sup,Unic}.
 
-%% fread1(Format, FieldWidth, Suppress, Line, N, Results, AllFormat)
-%% fread1(Format, FieldWidth, Suppress, Line, N, Results)
+%% fread1(Format, FieldWidth, Suppress, Unicode, Line, N, Results, AllFormat)
+%% fread1(Format, FieldWidth, Suppress, Unicode, Line, N, Results)
 %%  The main dispatch function for the formatting commands. Done in two
 %%  stages so format commands that need no input can always be processed.
 
@@ -231,9 +231,8 @@ fread1([$s|Format], none, Sup, U, Line0, N0, Res) ->
 fread1([$s|Format], F, Sup, U, Line0, N, Res) ->
     {Line,Cs} = fread_chars(Line0, F, U),
     fread_string(Cs, Sup, U, Format, Line, N+F, Res);
-%% XXX:PaN Atoms still only latin1...
-fread1([$a|Format], none, Sup, false, Line0, N0, Res) ->
-    {Line,N,Cs} = fread_string_cs(Line0, N0, false),
+fread1([$a|Format], none, Sup, U, Line0, N0, Res) ->
+    {Line,N,Cs} = fread_string_cs(Line0, N0, U),
     fread_atom(Cs, Sup, Format, Line, N, Res);
 fread1([$a|Format], F, Sup, false, Line0, N, Res) ->
     {Line,Cs} = fread_chars(Line0, F, false),
diff --git a/lib/stdlib/src/otp_internal.erl b/lib/stdlib/src/otp_internal.erl
index d89ff4a624..42094e3088 100644
--- a/lib/stdlib/src/otp_internal.erl
+++ b/lib/stdlib/src/otp_internal.erl
@@ -55,6 +55,55 @@ obsolete_1(erlang, now, 0) ->
 obsolete_1(calendar, local_time_to_universal_time, 1) ->
     {deprecated, {calendar, local_time_to_universal_time_dst, 1}};
 
+%% *** STDLIB added in OTP 20 ***
+
+obsolete_1(gen_fsm, start, 3) ->
+    {deprecated, {gen_statem, start, 3}};
+obsolete_1(gen_fsm, start, 4) ->
+    {deprecated, {gen_statem, start, 4}};
+
+obsolete_1(gen_fsm, start_link, 3) ->
+    {deprecated, {gen_statem, start, 3}};
+obsolete_1(gen_fsm, start_link, 4) ->
+    {deprecated, {gen_statem, start, 4}};
+
+obsolete_1(gen_fsm, stop, 1) ->
+    {deprecated, {gen_statem, stop, 1}};
+obsolete_1(gen_fsm, stop, 3) ->
+    {deprecated, {gen_statem, stop, 3}};
+
+obsolete_1(gen_fsm, enter_loop, 4) ->
+    {deprecated, {gen_statem, enter_loop, 4}};
+obsolete_1(gen_fsm, enter_loop, 5) ->
+    {deprecated, {gen_statem, enter_loop, 5}};
+obsolete_1(gen_fsm, enter_loop, 6) ->
+    {deprecated, {gen_statem, enter_loop, 6}};
+
+obsolete_1(gen_fsm, reply, 2) ->
+    {deprecated, {gen_statem, reply, 2}};
+
+obsolete_1(gen_fsm, send_event, 2) ->
+    {deprecated, {gen_statem, cast, 1}};
+obsolete_1(gen_fsm, send_all_state_event, 2) ->
+    {deprecated, {gen_statem, cast, 1}};
+
+obsolete_1(gen_fsm, sync_send_event, 2) ->
+    {deprecated, {gen_statem, call, 2}};
+obsolete_1(gen_fsm, sync_send_event, 3) ->
+    {deprecated, {gen_statem, call, 3}};
+
+obsolete_1(gen_fsm, sync_send_all_state_event, 2) ->
+    {deprecated, {gen_statem, call, 2}};
+obsolete_1(gen_fsm, sync_send_all_state_event, 3) ->
+    {deprecated, {gen_statem, call, 3}};
+
+obsolete_1(gen_fsm, start_timer, 2) ->
+    {deprecated, {erlang, start_timer, 2}};
+obsolete_1(gen_fsm, cancel_timer, 1) ->
+    {deprecated, {erlang, cancel_timer, 1}};
+obsolete_1(gen_fsm, send_event_after, 2) ->
+    {deprecated, {erlang, send_after, 2}};
+
 %% *** CRYPTO added in OTP 20 ***
 
 obsolete_1(crypto, rand_uniform, 2) ->
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index dfd102f9ef..7a8a5e6d4a 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 2015-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2015-2017. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -20,6 +20,9 @@
 %% =====================================================================
 %% Multiple PRNG module for Erlang/OTP
 %% Copyright (c) 2015-2016 Kenji Rikitake
+%%
+%% exrop (xoroshiro116+) added and statistical distribution
+%% improvements by the Erlang/OTP team 2017
 %% =====================================================================
 
 -module(rand).
@@ -28,48 +31,179 @@
 	 export_seed/0, export_seed_s/1,
          uniform/0, uniform/1, uniform_s/1, uniform_s/2,
          jump/0, jump/1,
-	 normal/0, normal_s/1
+	     normal/0, normal/2, normal_s/1, normal_s/3
 	]).
 
 -compile({inline, [exs64_next/1, exsplus_next/1,
-		   exsplus_jump/1,
 		   exs1024_next/1, exs1024_calc/2,
-		   exs1024_jump/1,
+                   exrop_next/1, exrop_next_s/2,
 		   get_52/1, normal_kiwi/1]}).
 
--define(DEFAULT_ALG_HANDLER, exsplus).
+-define(DEFAULT_ALG_HANDLER, exrop).
 -define(SEED_DICT, rand_seed).
 
 %% =====================================================================
+%% Bit fiddling macros
+%% =====================================================================
+
+-define(BIT(Bits), (1 bsl (Bits))).
+-define(MASK(Bits), (?BIT(Bits) - 1)).
+-define(MASK(Bits, X), ((X) band ?MASK(Bits))).
+-define(
+   BSL(Bits, X, N),
+   %% N is evaluated 2 times
+   (?MASK((Bits)-(N), (X)) bsl (N))).
+-define(
+   ROTL(Bits, X, N),
+   %% Bits is evaluated 2 times
+   %% X is evaluated 2 times
+   %% N i evaluated 3 times
+   (?BSL((Bits), (X), (N)) bor ((X) bsr ((Bits)-(N))))).
+
+%%-define(TWO_POW_MINUS53, (math:pow(2, -53))).
+-define(TWO_POW_MINUS53, 1.11022302462515657e-16).
+
+%% =====================================================================
 %% Types
 %% =====================================================================
 
+-type uint64() :: 0..?MASK(64).
+-type uint58() :: 0..?MASK(58).
+
 %% This depends on the algorithm handler function
 -type alg_state() ::
-        exs64_state() | exsplus_state() | exs1024_state() | term().
+        exs64_state() | exsplus_state() | exs1024_state() |
+        exrop_state() | term().
 
-%% This is the algorithm handler function within this module
+%% This is the algorithm handling definition within this module,
+%% and the type to use for plugins.
+%%
+%% The 'type' field must be recognized by the module that implements
+%% the algorithm, to interpret an exported state.
+%%
+%% The 'bits' field indicates how many bits the integer
+%% returned from 'next' has got, i.e 'next' shall return
+%% an random integer in the range 0..(2^Bits - 1).
+%% At least 53 bits is required for the floating point
+%% producing fallbacks.  This field is only used when
+%% the 'uniform' or 'uniform_n' fields are not defined.
+%%
+%% The fields 'next', 'uniform' and 'uniform_n'
+%% implement the algorithm.  If 'uniform' or 'uinform_n'
+%% is not present there is a fallback using 'next' and either
+%% 'bits' or the deprecated 'max'.
+%%
 -type alg_handler() ::
         #{type := alg(),
-          max  := integer() | infinity,
+          bits => non_neg_integer(),
+          weak_low_bits => non_neg_integer(),
+          max => non_neg_integer(), % Deprecated
           next :=
-              fun((alg_state()) -> {non_neg_integer(), alg_state()}),
-          uniform :=
-              fun((state()) -> {float(), state()}),
-          uniform_n :=
-              fun((pos_integer(), state()) -> {pos_integer(), state()}),
-          jump :=
-              fun((state()) -> state())}.
+              fun ((alg_state()) -> {non_neg_integer(), alg_state()}),
+          uniform =>
+              fun ((state()) -> {float(), state()}),
+          uniform_n =>
+              fun ((pos_integer(), state()) -> {pos_integer(), state()}),
+          jump =>
+              fun ((state()) -> state())}.
 
 %% Algorithm state
 -type state() :: {alg_handler(), alg_state()}.
--type builtin_alg() :: exs64 | exsplus | exs1024.
+-type builtin_alg() :: exs64 | exsplus | exsp | exs1024 | exs1024s | exrop.
 -type alg() :: builtin_alg() | atom().
 -type export_state() :: {alg(), alg_state()}.
 -export_type(
    [builtin_alg/0, alg/0, alg_handler/0, alg_state/0,
     state/0, export_state/0]).
--export_type([exs64_state/0, exsplus_state/0, exs1024_state/0]).
+-export_type(
+   [exs64_state/0, exsplus_state/0, exs1024_state/0, exrop_state/0]).
+
+%% =====================================================================
+%% Range macro and helper
+%% =====================================================================
+
+-define(
+   uniform_range(Range, Alg, R, V, MaxMinusRange, I),
+   if
+       0 =< (MaxMinusRange) ->
+           if
+               %% Really work saving in odd cases;
+               %% large ranges in particular
+               (V) < (Range) ->
+                   {(V) + 1, {(Alg), (R)}};
+               true ->
+                   (I) = (V) rem (Range),
+                   if
+                       (V) - (I) =< (MaxMinusRange) ->
+                           {(I) + 1, {(Alg), (R)}};
+                       true ->
+                           %% V in the truncated top range
+                           %% - try again
+                           ?FUNCTION_NAME((Range), {(Alg), (R)})
+                   end
+           end;
+       true ->
+           uniform_range((Range), (Alg), (R), (V))
+   end).
+
+%% For ranges larger than the algorithm bit size
+uniform_range(Range, #{next:=Next, bits:=Bits} = Alg, R, V) ->
+    WeakLowBits =
+        case Alg of
+            #{weak_low_bits:=WLB} -> WLB;
+            #{} -> 0
+        end,
+    %% Maybe waste the lowest bit(s) when shifting in new bits
+    Shift = Bits - WeakLowBits,
+    ShiftMask = bnot ?MASK(WeakLowBits),
+    RangeMinus1 = Range - 1,
+    if
+        (Range band RangeMinus1) =:= 0 -> % Power of 2
+            %% Generate at least the number of bits for the range
+            {V1, R1, _} =
+                uniform_range(
+                  Range bsr Bits, Next, R, V, ShiftMask, Shift, Bits),
+            {(V1 band RangeMinus1) + 1, {Alg, R1}};
+        true ->
+            %% Generate a value with at least two bits more than the range
+            %% and try that for a fit, otherwise recurse
+            %%
+            %% Just one bit more should ensure that the generated
+            %% number range is at least twice the size of the requested
+            %% range, which would make the probability to draw a good
+            %% number better than 0.5.  And repeating that until
+            %% success i guess would take 2 times statistically amortized.
+            %% But since the probability for fairly many attemtpts
+            %% is not that low, use two bits more than the range which 
+            %% should make the probability to draw a bad number under 0.25,
+            %% which decreases the bad case probability a lot.
+            {V1, R1, B} =
+                uniform_range(
+                  Range bsr (Bits - 2), Next, R, V, ShiftMask, Shift, Bits),
+            I = V1 rem Range,
+            if
+                (V1 - I) =< (1 bsl B) - Range ->
+                    {I + 1, {Alg, R1}};
+                true ->
+                    %% V1 drawn from the truncated top range
+                    %% - try again
+                    {V2, R2} = Next(R1),
+                    uniform_range(Range, Alg, R2, V2)
+            end
+    end.
+%%
+uniform_range(Range, Next, R, V, ShiftMask, Shift, B) ->
+    if 
+        Range =< 1 ->
+            {V, R, B};
+        true ->
+            {V1, R1} = Next(R),
+            %% Waste the lowest bit(s) when shifting in new bits
+            uniform_range(
+              Range bsr Shift, Next, R1,
+              ((V band ShiftMask) bsl Shift) bor V1,
+              ShiftMask, Shift, B + Shift)
+    end.
 
 %% =====================================================================
 %% API
@@ -131,7 +265,7 @@ seed_s(Alg0, S0 = {_, _, _}) ->
 %%% uniform/0, uniform/1, uniform_s/1, uniform_s/2 are all
 %%% uniformly distributed random numbers.
 
-%% uniform/0: returns a random float X where 0.0 < X < 1.0,
+%% uniform/0: returns a random float X where 0.0 =< X < 1.0,
 %% updating the state in the process dictionary.
 
 -spec uniform() -> X :: float().
@@ -151,12 +285,21 @@ uniform(N) ->
     X.
 
 %% uniform_s/1: given a state, uniform_s/1
-%% returns a random float X where 0.0 < X < 1.0,
+%% returns a random float X where 0.0 =< X < 1.0,
 %% and a new state.
 
 -spec uniform_s(State :: state()) -> {X :: float(), NewState :: state()}.
 uniform_s(State = {#{uniform:=Uniform}, _}) ->
-    Uniform(State).
+    Uniform(State);
+uniform_s({#{bits:=Bits, next:=Next} = Alg, R0}) ->
+    {V, R1} = Next(R0),
+    %% Produce floats on the form N * 2^(-53)
+    {(V bsr (Bits - 53)) * ?TWO_POW_MINUS53, {Alg, R1}};
+uniform_s({#{max:=Max, next:=Next} = Alg, R0}) ->
+    {V, R1} = Next(R0),
+    %% Old broken algorithm with non-uniform density
+    {V / (Max + 1), {Alg, R1}}.
+
 
 %% uniform_s/2: given an integer N >= 1 and a state, uniform_s/2
 %% uniform_s/2 returns a random integer X where 1 =< X =< N,
@@ -164,13 +307,26 @@ uniform_s(State = {#{uniform:=Uniform}, _}) ->
 
 -spec uniform_s(N :: pos_integer(), State :: state()) ->
                        {X :: pos_integer(), NewState :: state()}.
-uniform_s(N, State = {#{uniform_n:=Uniform, max:=Max}, _})
-  when 0 < N, N =< Max ->
-    Uniform(N, State);
-uniform_s(N, State0 = {#{uniform:=Uniform}, _})
-  when is_integer(N), 0 < N ->
-    {F, State} = Uniform(State0),
-    {trunc(F * N) + 1, State}.
+uniform_s(N, State = {#{uniform_n:=UniformN}, _})
+  when is_integer(N), 1 =< N ->
+    UniformN(N, State);
+uniform_s(N, {#{bits:=Bits, next:=Next} = Alg, R0})
+  when is_integer(N), 1 =< N ->
+    {V, R1} = Next(R0),
+    MaxMinusN = ?BIT(Bits) - N,
+    ?uniform_range(N, Alg, R1, V, MaxMinusN, I);
+uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0})
+  when is_integer(N), 1 =< N ->
+    %% Old broken algorithm with skewed probability
+    %% and gap in ranges > Max
+    {V, R1} = Next(R0),  
+    if
+        N =< Max ->
+            {(V rem N) + 1, {Alg, R1}};
+        true ->
+            F = V / (Max + 1),
+            {trunc(F * N) + 1, {Alg, R1}}
+    end.
 
 %% jump/1: given a state, jump/1
 %% returns a new state which is equivalent to that
@@ -179,7 +335,10 @@ uniform_s(N, State0 = {#{uniform:=Uniform}, _})
 
 -spec jump(state()) -> NewState :: state().
 jump(State = {#{jump:=Jump}, _}) ->
-    Jump(State).
+    Jump(State);
+jump({#{}, _}) ->
+    erlang:error(not_implemented).
+
 
 %% jump/0: read the internal state and
 %% apply the jump function for the state as in jump/1
@@ -187,7 +346,6 @@ jump(State = {#{jump:=Jump}, _}) ->
 %% then returns the new value.
 
 -spec jump() -> NewState :: state().
-
 jump() ->
     seed_put(jump(seed_get())).
 
@@ -200,6 +358,13 @@ normal() ->
     _ = seed_put(Seed),
     X.
 
+%% normal/2: returns a random float with N(μ, σ²) normal distribution
+%% updating the state in the process dictionary.
+
+-spec normal(Mean :: number(), Variance :: number()) -> float().
+normal(Mean, Variance) ->
+    Mean + (math:sqrt(Variance) * normal()).
+
 %% normal_s/1: returns a random float with standard normal distribution
 %% The Ziggurat Method for generating random variables - Marsaglia and Tsang
 %% Paper and reference code: http://www.jstatsoft.org/v05/i08/
@@ -207,7 +372,7 @@ normal() ->
 -spec normal_s(State :: state()) -> {float(), NewState :: state()}.
 normal_s(State0) ->
     {Sign, R, State} = get_52(State0),
-    Idx = R band 16#FF,
+    Idx = ?MASK(8, R),
     Idx1 = Idx+1,
     {Ki, Wi} = normal_kiwi(Idx1),
     X = R * Wi,
@@ -220,18 +385,15 @@ normal_s(State0) ->
 	false -> normal_s(Idx, Sign, -X, State)
     end.
 
-%% =====================================================================
-%% Internal functions
+%% normal_s/3: returns a random float with normal N(μ, σ²) distribution
 
--define(UINT21MASK, 16#00000000001fffff).
--define(UINT32MASK, 16#00000000ffffffff).
--define(UINT33MASK, 16#00000001ffffffff).
--define(UINT39MASK, 16#0000007fffffffff).
--define(UINT58MASK, 16#03ffffffffffffff).
--define(UINT64MASK, 16#ffffffffffffffff).
+-spec normal_s(Mean :: number(), Variance :: number(), state()) -> {float(), NewS :: state()}.
+normal_s(Mean, Variance, State0) when Variance > 0 ->
+    {X, State} = normal_s(State0),
+    {Mean + (math:sqrt(Variance) * X), State}.
 
--type uint64() :: 0..16#ffffffffffffffff.
--type uint58() :: 0..16#03ffffffffffffff.
+%% =====================================================================
+%% Internal functions
 
 -spec seed_put(state()) -> state().
 seed_put(Seed) ->
@@ -246,20 +408,30 @@ seed_get() ->
 
 %% Setup alg record
 mk_alg(exs64) ->
-    {#{type=>exs64, max=>?UINT64MASK, next=>fun exs64_next/1,
-       uniform=>fun exs64_uniform/1, uniform_n=>fun exs64_uniform/2,
-       jump=>fun exs64_jump/1},
+    {#{type=>exs64, max=>?MASK(64), next=>fun exs64_next/1},
      fun exs64_seed/1};
 mk_alg(exsplus) ->
-    {#{type=>exsplus, max=>?UINT58MASK, next=>fun exsplus_next/1,
-       uniform=>fun exsplus_uniform/1, uniform_n=>fun exsplus_uniform/2,
+    {#{type=>exsplus, max=>?MASK(58), next=>fun exsplus_next/1,
+       jump=>fun exsplus_jump/1},
+     fun exsplus_seed/1};
+mk_alg(exsp) ->
+    {#{type=>exsp, bits=>58, weak_low_bits=>1, next=>fun exsplus_next/1,
+       uniform=>fun exsp_uniform/1, uniform_n=>fun exsp_uniform/2,
        jump=>fun exsplus_jump/1},
      fun exsplus_seed/1};
 mk_alg(exs1024) ->
-    {#{type=>exs1024, max=>?UINT64MASK, next=>fun exs1024_next/1,
-       uniform=>fun exs1024_uniform/1, uniform_n=>fun exs1024_uniform/2,
+    {#{type=>exs1024, max=>?MASK(64), next=>fun exs1024_next/1,
        jump=>fun exs1024_jump/1},
-     fun exs1024_seed/1}.
+     fun exs1024_seed/1};
+mk_alg(exs1024s) ->
+    {#{type=>exs1024s, bits=>64, weak_low_bits=>3, next=>fun exs1024_next/1,
+       jump=>fun exs1024_jump/1},
+     fun exs1024_seed/1};
+mk_alg(exrop) ->
+    {#{type=>exrop, bits=>58, weak_low_bits=>1, next=>fun exrop_next/1,
+       uniform=>fun exrop_uniform/1, uniform_n=>fun exrop_uniform/2,
+       jump=>fun exrop_jump/1},
+     fun exrop_seed/1}.
 
 %% =====================================================================
 %% exs64 PRNG: Xorshift64*
@@ -270,29 +442,18 @@ mk_alg(exs1024) ->
 -opaque exs64_state() :: uint64().
 
 exs64_seed({A1, A2, A3}) ->
-    {V1, _} = exs64_next(((A1 band ?UINT32MASK) * 4294967197 + 1)),
-    {V2, _} = exs64_next(((A2 band ?UINT32MASK) * 4294967231 + 1)),
-    {V3, _} = exs64_next(((A3 band ?UINT32MASK) * 4294967279 + 1)),
-    ((V1 * V2 * V3) rem (?UINT64MASK - 1)) + 1.
+    {V1, _} = exs64_next((?MASK(32, A1) * 4294967197 + 1)),
+    {V2, _} = exs64_next((?MASK(32, A2) * 4294967231 + 1)),
+    {V3, _} = exs64_next((?MASK(32, A3) * 4294967279 + 1)),
+    ((V1 * V2 * V3) rem (?MASK(64) - 1)) + 1.
 
 %% Advance xorshift64* state for one step and generate 64bit unsigned integer
 -spec exs64_next(exs64_state()) -> {uint64(), exs64_state()}.
 exs64_next(R) ->
     R1 = R bxor (R bsr 12),
-    R2 = R1 bxor ((R1 band ?UINT39MASK) bsl 25),
+    R2 = R1 bxor ?BSL(64, R1, 25),
     R3 = R2 bxor (R2 bsr 27),
-    {(R3 * 2685821657736338717) band ?UINT64MASK, R3}.
-
-exs64_uniform({Alg, R0}) ->
-    {V, R1} = exs64_next(R0),
-    {V / 18446744073709551616, {Alg, R1}}.
-
-exs64_uniform(Max, {Alg, R}) ->
-    {V, R1} = exs64_next(R),
-    {(V rem Max) + 1, {Alg, R1}}.
-
-exs64_jump(_) ->
-    erlang:error(not_implemented).
+    {?MASK(64, R3 * 2685821657736338717), R3}.
 
 %% =====================================================================
 %% exsplus PRNG: Xorshift116+
@@ -307,10 +468,12 @@ exs64_jump(_) ->
 -dialyzer({no_improper_lists, exsplus_seed/1}).
 
 exsplus_seed({A1, A2, A3}) ->
-    {_, R1} = exsplus_next([(((A1 * 4294967197) + 1) band ?UINT58MASK)|
-			    (((A2 * 4294967231) + 1) band ?UINT58MASK)]),
-    {_, R2} = exsplus_next([(((A3 * 4294967279) + 1) band ?UINT58MASK)|
-			    tl(R1)]),
+    {_, R1} = exsplus_next(
+                [?MASK(58, (A1 * 4294967197) + 1)|
+                 ?MASK(58, (A2 * 4294967231) + 1)]),
+    {_, R2} = exsplus_next(
+                [?MASK(58, (A3 * 4294967279) + 1)|
+                 tl(R1)]),
     R2.
 
 -dialyzer({no_improper_lists, exsplus_next/1}).
@@ -319,17 +482,22 @@ exsplus_seed({A1, A2, A3}) ->
 -spec exsplus_next(exsplus_state()) -> {uint58(), exsplus_state()}.
 exsplus_next([S1|S0]) ->
     %% Note: members s0 and s1 are swapped here
-    S11 = (S1 bxor (S1 bsl 24)) band ?UINT58MASK,
+    S11 = S1 bxor ?BSL(58, S1, 24),
     S12 = S11 bxor S0 bxor (S11 bsr 11) bxor (S0 bsr 41),
-    {(S0 + S12) band ?UINT58MASK, [S0|S12]}.
+    {?MASK(58, S0 + S12), [S0|S12]}.
+
 
-exsplus_uniform({Alg, R0}) ->
+exsp_uniform({Alg, R0}) ->
     {I, R1} = exsplus_next(R0),
-    {I / (?UINT58MASK+1), {Alg, R1}}.
+    %% Waste the lowest bit since it is of lower
+    %% randomness quality than the others
+    {(I bsr (58-53)) * ?TWO_POW_MINUS53, {Alg, R1}}.
 
-exsplus_uniform(Max, {Alg, R}) ->
+exsp_uniform(Range, {Alg, R}) ->
     {V, R1} = exsplus_next(R),
-    {(V rem Max) + 1, {Alg, R1}}.
+    MaxMinusRange = ?BIT(58) - Range,
+    ?uniform_range(Range, Alg, R1, V, MaxMinusRange, I).
+
 
 %% This is the jump function for the exsplus generator, equivalent
 %% to 2^64 calls to next/1; it can be used to generate 2^52
@@ -357,7 +525,7 @@ exsplus_jump(S, AS, _, 0) ->
     {S, AS};
 exsplus_jump(S, [AS0|AS1], J, N) ->
     {_, NS} = exsplus_next(S),
-    case (J band 1) of
+    case ?MASK(1, J) of
         1 ->
             [S0|S1] = S,
             exsplus_jump(NS, [(AS0 bxor S0)|(AS1 bxor S1)], J bsr 1, N-1);
@@ -374,9 +542,9 @@ exsplus_jump(S, [AS0|AS1], J, N) ->
 -opaque exs1024_state() :: {list(uint64()), list(uint64())}.
 
 exs1024_seed({A1, A2, A3}) ->
-    B1 = (((A1 band ?UINT21MASK) + 1) * 2097131) band ?UINT21MASK,
-    B2 = (((A2 band ?UINT21MASK) + 1) * 2097133) band ?UINT21MASK,
-    B3 = (((A3 band ?UINT21MASK) + 1) * 2097143) band ?UINT21MASK,
+    B1 = ?MASK(21, (?MASK(21, A1) + 1) * 2097131),
+    B2 = ?MASK(21, (?MASK(21, A2) + 1) * 2097133),
+    B3 = ?MASK(21, (?MASK(21, A3) + 1) * 2097143),
     {exs1024_gen1024((B1 bsl 43) bor (B2 bsl 22) bor (B3 bsl 1) bor 1),
      []}.
 
@@ -399,11 +567,11 @@ exs1024_gen1024(N, R, L) ->
 %% X: random number output
 -spec exs1024_calc(uint64(), uint64()) -> {uint64(), uint64()}.
 exs1024_calc(S0, S1) ->
-    S11 = S1 bxor ((S1 band ?UINT33MASK) bsl 31),
+    S11 = S1 bxor ?BSL(64, S1, 31),
     S12 = S11 bxor (S11 bsr 11),
     S01 = S0 bxor (S0 bsr 30),
     NS1 = S01 bxor S12,
-    {(NS1 * 1181783497276652981) band ?UINT64MASK, NS1}.
+    {?MASK(64, NS1 * 1181783497276652981), NS1}.
 
 %% Advance xorshift1024* state for one step and generate 64bit unsigned integer
 -spec exs1024_next(exs1024_state()) -> {uint64(), exs1024_state()}.
@@ -414,13 +582,6 @@ exs1024_next({[H], RL}) ->
     NL = [H|lists:reverse(RL)],
     exs1024_next({NL, []}).
 
-exs1024_uniform({Alg, R0}) ->
-    {V, R1} = exs1024_next(R0),
-    {V / 18446744073709551616, {Alg, R1}}.
-
-exs1024_uniform(Max, {Alg, R}) ->
-    {V, R1} = exs1024_next(R),
-    {(V rem Max) + 1, {Alg, R1}}.
 
 %% This is the jump function for the exs1024 generator, equivalent
 %% to 2^512 calls to next(); it can be used to generate 2^512
@@ -467,7 +628,7 @@ exs1024_jump(S, AS, [H|T], _, 0, TN) ->
     exs1024_jump(S, AS, T, H, ?JUMPELEMLEN, TN);
 exs1024_jump({L, RL}, AS, JL, J, N, TN) ->
     {_, NS} = exs1024_next({L, RL}),
-    case (J band 1) of
+    case ?MASK(1, J) of
         1 ->
             AS2 = lists:zipwith(fun(X, Y) -> X bxor Y end,
                         AS, L ++ lists:reverse(RL)),
@@ -477,15 +638,149 @@ exs1024_jump({L, RL}, AS, JL, J, N, TN) ->
     end.
 
 %% =====================================================================
+%% exrop PRNG: Xoroshiro116+
+%%
+%% Reference URL: http://xorshift.di.unimi.it/
+%%
+%% 58 bits fits into an immediate on 64bits Erlang and is thus much faster.
+%% In fact, an immediate number is 60 bits signed in Erlang so you can
+%% add two positive 58 bit numbers and get a 59 bit number that still is
+%% a positive immediate, which is a property we utilize here...
+%%
+%% Modification of the original Xororhiro128+ algorithm to 116 bits
+%% by Sebastiano Vigna.  A lot of thanks for his help and work.
+%% =====================================================================
+%% (a, b, c) = (24, 2, 35)
+%% JUMP Polynomial = 0x9863200f83fcd4a11293241fcb12a (116 bit)
+%%
+%% From http://xoroshiro.di.unimi.it/xoroshiro116plus.c:
+%% ---------------------------------------------------------------------
+%% /* Written in 2017 by Sebastiano Vigna ([email protected]).
+%%
+%% To the extent possible under law, the author has dedicated all copyright
+%% and related and neighboring rights to this software to the public domain
+%% worldwide. This software is distributed without any warranty.
+%%
+%% See <http://creativecommons.org/publicdomain/zero/1.0/>. */
+%%
+%% #include <stdint.h>
+%%
+%% #define UINT58MASK (uint64_t)((UINT64_C(1) << 58) - 1)
+%%
+%% uint64_t s[2];
+%%
+%% static inline uint64_t rotl58(const uint64_t x, int k) {
+%%     return (x << k) & UINT58MASK | (x >> (58 - k));
+%% }
+%% 
+%% uint64_t next(void) {
+%%     uint64_t s1 = s[1];
+%%     const uint64_t s0 = s[0];
+%%     const uint64_t result = (s0 + s1) & UINT58MASK;
+%%
+%%     s1 ^= s0;
+%%     s[0] = rotl58(s0, 24) ^ s1 ^ ((s1 << 2) & UINT58MASK); // a, b
+%%     s[1] = rotl58(s1, 35); // c
+%%     return result;
+%% }
+%%
+%% void jump(void) {
+%%     static const uint64_t JUMP[] =
+%%         { 0x4a11293241fcb12a, 0x0009863200f83fcd };
+%%
+%%     uint64_t s0 = 0;
+%%     uint64_t s1 = 0;
+%%     for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
+%%         for(int b = 0; b < 64; b++) {
+%%             if (JUMP[i] & UINT64_C(1) << b) {
+%%                 s0 ^= s[0];
+%% 	           s1 ^= s[1];
+%% 	       }
+%% 	       next();
+%% 	   }
+%%     s[0] = s0;
+%%     s[1] = s1;
+%% }
+
+-opaque exrop_state() :: nonempty_improper_list(uint58(), uint58()).
+
+-dialyzer({no_improper_lists, exrop_seed/1}).
+exrop_seed({A1, A2, A3}) ->
+    [_|S1] =
+        exrop_next_s(
+          ?MASK(58, (A1 * 4294967197) + 1),
+          ?MASK(58, (A2 * 4294967231) + 1)),
+    exrop_next_s(?MASK(58, (A3 * 4294967279) + 1), S1).
+
+-dialyzer({no_improper_lists, exrop_next_s/2}).
+%% Advance xoroshiro116+ state one step
+%% [a, b, c] = [24, 2, 35]
+-define(
+   exrop_next_s(S0, S1, S1_a),
+   begin
+       S1_a = S1 bxor S0,
+       [?ROTL(58, S0, 24) bxor S1_a bxor ?BSL(58, S1_a, 2)| % a, b
+        ?ROTL(58, S1_a, 35)] % c
+   end).
+exrop_next_s(S0, S1) ->
+    ?exrop_next_s(S0, S1, S1_a).
+
+-dialyzer({no_improper_lists, exrop_next/1}).
+%% Advance xoroshiro116+ state one step, generate 58 bit unsigned integer,
+%% and waste the lowest bit since it is of lower randomness quality
+exrop_next([S0|S1]) ->
+    {?MASK(58, S0 + S1), ?exrop_next_s(S0, S1, S1_a)}.
+
+exrop_uniform({Alg, R}) ->
+    {V, R1} = exrop_next(R),
+    %% Waste the lowest bit since it is of lower
+    %% randomness quality than the others
+    {(V bsr (58-53)) * ?TWO_POW_MINUS53, {Alg, R1}}.
+
+exrop_uniform(Range, {Alg, R}) ->
+    {V, R1} = exrop_next(R),
+    MaxMinusRange = ?BIT(58) - Range,
+    ?uniform_range(Range, Alg, R1, V, MaxMinusRange, I).
+
+%% Split a 116 bit constant into two 58 bit words,
+%% a top '1' marks the end of the low word.
+-define(
+   JUMP_116(Jump),
+   [?BIT(58) bor ?MASK(58, (Jump)),(Jump) bsr 58]).
+%%
+exrop_jump({Alg,S}) ->
+    [J|Js] = ?JUMP_116(16#9863200f83fcd4a11293241fcb12a),
+    {Alg, exrop_jump(S, 0, 0, J, Js)}.
+%%
+-dialyzer({no_improper_lists, exrop_jump/5}).
+exrop_jump(_S, S0, S1, 0, []) -> % End of jump constant
+    [S0|S1];
+exrop_jump(S, S0, S1, 1, [J|Js]) -> % End of word
+    exrop_jump(S, S0, S1, J, Js);
+exrop_jump([S__0|S__1] = _S, S0, S1, J, Js) ->
+    case ?MASK(1, J) of
+        1 ->
+            NewS = exrop_next_s(S__0, S__1),
+            exrop_jump(NewS, S0 bxor S__0, S1 bxor S__1, J bsr 1, Js);
+        0 ->
+            NewS = exrop_next_s(S__0, S__1),
+            exrop_jump(NewS, S0, S1, J bsr 1, Js)
+    end.
+
+%% =====================================================================
 %% Ziggurat cont
 %% =====================================================================
 -define(NOR_R, 3.6541528853610087963519472518).
 -define(NOR_INV_R, 1/?NOR_R).
 
 %% return a {sign, Random51bits, State}
+get_52({Alg=#{bits:=Bits, next:=Next}, S0}) ->
+    %% Use the high bits
+    {Int,S1} = Next(S0),
+    {?BIT(Bits - 51 - 1) band Int, Int bsr (Bits - 51), {Alg, S1}};
 get_52({Alg=#{next:=Next}, S0}) ->
     {Int,S1} = Next(S0),
-    {((1 bsl 51) band Int), Int band ((1 bsl 51)-1), {Alg, S1}}.
+    {?BIT(51) band Int, ?MASK(51, Int), {Alg, S1}}.
 
 %% Slow path
 normal_s(0, Sign, X0, State0) ->
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index 52d3c35608..28aab7b590 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -33,7 +33,12 @@
 
 %%% BIFs
 
--export([compile/1, compile/2, run/2, run/3, inspect/2]).
+-export([version/0, compile/1, compile/2, run/2, run/3, inspect/2]).
+
+-spec version() -> binary().
+
+version() ->
+    erlang:nif_error(undef).
 
 -spec compile(Regexp) -> {ok, MP} | {error, ErrSpec} when
       Regexp :: iodata(),
diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl
index 394f4f2fa4..961f5f8a30 100644
--- a/lib/stdlib/src/shell.erl
+++ b/lib/stdlib/src/shell.erl
@@ -349,16 +349,10 @@ default_prompt(N) ->
     %% Don't bother flattening the list irrespective of what the
     %% I/O-protocol states.
     case is_alive() of
-	true  -> io_lib:format(<<"(~ts)~w> ">>, [node_string(), N]);
+	true  -> io_lib:format(<<"(~s)~w> ">>, [node(), N]);
 	false -> io_lib:format(<<"~w> ">>, [N])
     end.
 
-node_string() ->
-    case encoding() of
-        latin1 -> io_lib:write_atom_as_latin1(node());
-        _ ->      io_lib:write_atom(node())
-    end.
-
 %% expand_hist(Expressions, CommandNumber)
 %%  Preprocess the expression list replacing all history list commands
 %%  with their expansions.
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index 82ab484ea6..d56f27953f 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -39,6 +39,7 @@
 	     edlin_expand,
 	     epp,
 	     eval_bits,
+             erl_abstract_code,
              erl_anno,
 	     erl_bits,
 	     erl_compile,
@@ -99,6 +100,7 @@
 	     sys,
 	     timer,
 	     unicode,
+             unicode_util,
 	     win32reg,
 	     zip]},
   {registered,[timer_server,rsh_starter,take_over_monitor,pool_master,
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index c659db78bd..a0fa60fbc7 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -17,43 +17,1322 @@
 %% 
 %% %CopyrightEnd%
 %%
+%% A string library that works on grapheme clusters, with the exception
+%% of codepoints of class 'prepend' and non modern (or decomposed) Hangul.
+%% If these codepoints appear, functions like 'find/2' may return a string
+%% which starts inside a grapheme cluster.
+%% These exceptions are made because the codepoints classes are
+%% seldom used and require that we are able look at previous codepoints in
+%% the stream and is thus hard to implement effectively.
+%%
+%% GC (grapheme cluster) implies that the length of string 'ß↑e̊' is 3 though
+%% it is represented by the codepoints [223,8593,101,778] or the
+%% utf8 binary <<195,159,226,134,145,101,204,138>>
+%%
+%% And that searching for strings or graphemes finds the correct positions:
+%%
+%% find("eeeee̊eee", "e̊") -> "e̊ee".:
+%% find("1£4e̊abcdef", "e") -> "ef"
+%%
+%% Most functions expect all input to be normalized to one form,
+%% see unicode:characters_to_nfc and unicode:characters_to_nfd functions.
+%% When appending strings no checking is done to verify that the
+%% result is valid unicode strings.
+%%
+%% The functions may crash for invalid utf-8 input.
+%%
+%% Return value should be kept consistent when return type is
+%% unicode:chardata() i.e. binary input => binary output,
+%% list input => list output mixed input => mixed output
+%%
 -module(string).
 
--export([len/1,equal/2,concat/2,chr/2,rchr/2,str/2,rstr/2,
-	 span/2,cspan/2,substr/2,substr/3,tokens/2,chars/2,chars/3]).
+-export([is_empty/1, length/1, to_graphemes/1,
+         reverse/1,
+         equal/2, equal/3, equal/4,
+         slice/2, slice/3,
+         pad/2, pad/3, pad/4, trim/1, trim/2, trim/3, chomp/1,
+         take/2, take/3, take/4,
+         lexemes/2, nth_lexeme/3,
+         uppercase/1, lowercase/1, titlecase/1,casefold/1,
+         prefix/2,
+         split/2,split/3,replace/3,replace/4,
+         find/2,find/3,
+         next_codepoint/1, next_grapheme/1
+        ]).
+
+-export([to_float/1, to_integer/1]).
+
+%% Old (will be deprecated) lists/string API kept for backwards compability
+-export([len/1, concat/2, % equal/2, (extended in the new api)
+         chr/2,rchr/2,str/2,rstr/2,
+	 span/2,cspan/2,substr/2,substr/3, tokens/2,
+         chars/2,chars/3]).
 -export([copies/2,words/1,words/2,strip/1,strip/2,strip/3,
 	 sub_word/2,sub_word/3,left/2,left/3,right/2,right/3,
 	 sub_string/2,sub_string/3,centre/2,centre/3, join/2]).
 -export([to_upper/1, to_lower/1]).
+%%
+-import(lists,[member/2]).
 
--import(lists,[reverse/1,member/2]).
+-compile({no_auto_import,[length/1]}).
 
-%%---------------------------------------------------------------------------
+-export_type([grapheme_cluster/0]).
 
-%%% BIFs
+-type grapheme_cluster() :: char() | [char()].
+-type direction() :: 'leading' | 'trailing'.
 
--export([to_float/1, to_integer/1]).
+-dialyzer({no_improper_lists, stack/2}).
+%%% BIFs internal (not documented) should not to be used outside of this module
+%%% May be removed
+-export([list_to_float/1, list_to_integer/1]).
 
--spec to_float(String) -> {Float, Rest} | {error, Reason} when
+%% Uses bifs: string:list_to_float/1 and string:list_to_integer/1
+-spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when
       String :: string(),
       Float :: float(),
       Rest :: string(),
-      Reason :: no_float | not_a_list.
+      Reason :: 'no_float' | 'not_a_list'.
 
-to_float(_) ->
+list_to_float(_) ->
     erlang:nif_error(undef).
 
--spec to_integer(String) -> {Int, Rest} | {error, Reason} when
+-spec list_to_integer(String) -> {Int, Rest} | {'error', Reason} when
       String :: string(),
       Int :: integer(),
       Rest :: string(),
-      Reason :: no_integer | not_a_list.
+      Reason :: 'no_integer' | 'not_a_list'.
 
-to_integer(_) ->
+list_to_integer(_) ->
     erlang:nif_error(undef).
 
 %%% End of BIFs
 
+%% Check if string is the empty string
+-spec is_empty(String::unicode:chardata()) -> boolean().
+is_empty([]) -> true;
+is_empty(<<>>) -> true;
+is_empty([L|R]) -> is_empty(L) andalso is_empty(R);
+is_empty(_) -> false.
+
+%% Count the number of grapheme clusters in chardata
+-spec length(String::unicode:chardata()) -> non_neg_integer().
+length(CD) ->
+    length_1(unicode_util:gc(CD), 0).
+
+%% Convert a string to a list of grapheme clusters
+-spec to_graphemes(String::unicode:chardata()) -> [grapheme_cluster()].
+to_graphemes(CD0) ->
+    case unicode_util:gc(CD0) of
+        [GC|CD] -> [GC|to_graphemes(CD)];
+        [] -> []
+    end.
+
+%% Compare two strings return boolean, assumes that the input are
+%% normalized to same form, see unicode:characters_to_nfX_xxx(..)
+-spec equal(A, B) -> boolean() when
+      A::unicode:chardata(),
+      B::unicode:chardata().
+equal(A,B) when is_binary(A), is_binary(B) ->
+    A =:= B;
+equal(A,B) ->
+    equal_1(A,B).
+
+%% Compare two strings return boolean, assumes that the input are
+%% normalized to same form, see unicode:characters_to_nfX_xxx(..)
+%% does casefold on the fly
+-spec equal(A, B, IgnoreCase) -> boolean() when
+      A::unicode:chardata(),
+      B::unicode:chardata(),
+      IgnoreCase :: boolean().
+equal(A, B, false) ->
+    equal(A,B);
+equal(A, B, true) ->
+    equal_nocase(A,B).
+
+%% Compare two strings return boolean
+%% if specified does casefold and normalization on the fly
+-spec equal(A, B, IgnoreCase, Norm) -> boolean() when
+      A :: unicode:chardata(),
+      B :: unicode:chardata(),
+      IgnoreCase :: boolean(),
+      Norm :: 'none' | 'nfc' | 'nfd' | 'nfkc' | 'nfkd'.
+equal(A, B, Case, none) ->
+    equal(A,B,Case);
+equal(A, B, false, Norm) ->
+    equal_norm(A, B, Norm);
+equal(A, B, true, Norm) ->
+    equal_norm_nocase(A, B, Norm).
+
+%% Reverse grapheme clusters
+-spec reverse(String::unicode:chardata()) -> [grapheme_cluster()].
+reverse(CD) ->
+    reverse_1(CD, []).
+
+%% Slice a string and return rest of string
+%% Note: counts grapheme_clusters
+-spec slice(String, Start) -> Slice when
+      String::unicode:chardata(),
+      Start :: non_neg_integer(),
+      Slice :: unicode:chardata().
+slice(CD, N) when is_integer(N), N >= 0 ->
+    slice_l(CD, N, is_binary(CD)).
+
+-spec slice(String, Start, Length) -> Slice when
+      String::unicode:chardata(),
+      Start :: non_neg_integer(),
+      Length :: 'infinity' | non_neg_integer(),
+      Slice :: unicode:chardata().
+slice(CD, N, Length)
+  when is_integer(N), N >= 0, is_integer(Length), Length > 0 ->
+    slice_trail(slice_l(CD, N, is_binary(CD)), Length);
+slice(CD, N, infinity) ->
+    slice_l(CD, N, is_binary(CD));
+slice(CD, _, 0) ->
+    case is_binary(CD) of
+        true  -> <<>>;
+        false -> []
+    end.
+
+%% Pad a string to desired length
+-spec pad(String, Length) -> unicode:charlist() when
+      String ::unicode:chardata(),
+      Length :: integer().
+pad(CD, Length) ->
+    pad(CD, Length, trailing, $\s).
+
+-spec pad(String, Length, Dir) -> unicode:charlist() when
+      String ::unicode:chardata(),
+      Length :: integer(),
+      Dir :: direction() | 'both'.
+pad(CD, Length, Dir) ->
+    pad(CD, Length, Dir, $\s).
+
+-spec pad(String, Length, Dir, Char) -> unicode:charlist() when
+      String ::unicode:chardata(),
+      Length :: integer(),
+      Dir :: direction() | 'both',
+      Char :: grapheme_cluster().
+pad(CD, Length, leading, Char) when is_integer(Length) ->
+    Len = length(CD),
+    [lists:duplicate(max(0, Length-Len), Char), CD];
+pad(CD, Length, trailing, Char) when is_integer(Length) ->
+    Len = length(CD),
+    [CD|lists:duplicate(max(0, Length-Len), Char)];
+pad(CD, Length, both, Char) when is_integer(Length) ->
+    Len = length(CD),
+    Size = max(0, Length-Len),
+    Pre = lists:duplicate(Size div 2, Char),
+    Post = case Size rem 2 of
+               1 -> [Char];
+               _ -> []
+           end,
+    [Pre, CD, Pre|Post].
+
+%%  Strip characters from whitespace or Separator in Direction
+-spec trim(String) -> unicode:chardata() when
+      String :: unicode:chardata().
+trim(Str) ->
+    trim(Str, both, unicode_util:whitespace()).
+
+-spec trim(String, Dir) -> unicode:chardata() when
+      String :: unicode:chardata(),
+      Dir :: direction() | 'both'.
+trim(Str, Dir) ->
+    trim(Str, Dir, unicode_util:whitespace()).
+
+-spec trim(String, Dir, Characters) -> unicode:chardata() when
+      String :: unicode:chardata(),
+      Dir :: direction() | 'both',
+      Characters :: [grapheme_cluster()].
+trim(Str, _, []) -> Str;
+trim(Str, leading, Sep) when is_list(Sep) ->
+    trim_l(Str, search_pattern(Sep));
+trim(Str, trailing, Sep) when is_list(Sep) ->
+    trim_t(Str, 0, search_pattern(Sep));
+trim(Str, both, Sep0) when is_list(Sep0) ->
+    Sep = search_pattern(Sep0),
+    trim_t(trim_l(Str,Sep), 0, Sep).
+
+%% Delete trailing newlines or \r\n
+-spec chomp(String::unicode:chardata()) -> unicode:chardata().
+chomp(Str) ->
+    trim_t(Str,0, {[[$\r,$\n],$\n], [$\r,$\n], [<<$\r>>,<<$\n>>]}).
+
+%% Split String into two parts where the leading part consists of Characters
+-spec take(String, Characters) -> {Leading, Trailing} when
+      String::unicode:chardata(),
+      Characters::[grapheme_cluster()],
+      Leading::unicode:chardata(),
+      Trailing::unicode:chardata().
+take(Str, Sep) ->
+    take(Str, Sep, false, leading).
+-spec take(String, Characters, Complement) -> {Leading, Trailing} when
+      String::unicode:chardata(),
+      Characters::[grapheme_cluster()],
+      Complement::boolean(),
+      Leading::unicode:chardata(),
+      Trailing::unicode:chardata().
+take(Str, Sep, Complement) ->
+    take(Str, Sep, Complement, leading).
+-spec take(String, Characters, Complement, Dir) -> {Leading, Trailing} when
+      String::unicode:chardata(),
+      Characters::[grapheme_cluster()],
+      Complement::boolean(),
+      Dir::direction(),
+      Leading::unicode:chardata(),
+      Trailing::unicode:chardata().
+take(Str, [], Complement, Dir) ->
+    Empty = case is_binary(Str) of true -> <<>>; false -> [] end,
+    case {Complement,Dir} of
+        {false, leading} -> {Empty, Str};
+        {false, trailing} -> {Str, Empty};
+        {true,  leading} -> {Str, Empty};
+        {true,  trailing} -> {Empty, Str}
+    end;
+take(Str, Sep0, false, leading) ->
+    Sep = search_pattern(Sep0),
+    take_l(Str, Sep, []);
+take(Str, Sep0, true, leading) ->
+    Sep = search_pattern(Sep0),
+    take_lc(Str, Sep, []);
+take(Str, Sep0, false, trailing) ->
+    Sep = search_pattern(Sep0),
+    take_t(Str, 0, Sep);
+take(Str, Sep0, true, trailing) ->
+    Sep = search_pattern(Sep0),
+    take_tc(Str, 0, Sep).
+
+%% Uppercase all chars in Str
+-spec uppercase(String::unicode:chardata()) -> unicode:chardata().
+uppercase(CD) when is_list(CD) ->
+    uppercase_list(CD);
+uppercase(CD) when is_binary(CD) ->
+    uppercase_bin(CD,<<>>).
+
+%% Lowercase all chars in Str
+-spec lowercase(String::unicode:chardata()) -> unicode:chardata().
+lowercase(CD) when is_list(CD) ->
+    lowercase_list(CD);
+lowercase(CD) when is_binary(CD) ->
+    lowercase_bin(CD,<<>>).
+
+%% Make a titlecase of the first char in Str
+-spec titlecase(String::unicode:chardata()) -> unicode:chardata().
+titlecase(CD) when is_list(CD) ->
+    case unicode_util:titlecase(CD) of
+        [GC|Tail] -> append(GC,Tail);
+        Empty -> Empty
+    end;
+titlecase(CD) when is_binary(CD) ->
+    case unicode_util:titlecase(CD) of
+        [CP|Chars] when is_integer(CP) -> <<CP/utf8,Chars/binary>>;
+        [CPs|Chars] ->
+            << << <<CP/utf8>> || CP <- CPs>>/binary, Chars/binary>>;
+        [] -> <<>>
+    end.
+
+%% Make a comparable string of the Str should be used for equality tests only
+-spec casefold(String::unicode:chardata()) -> unicode:chardata().
+casefold(CD) when is_list(CD) ->
+    casefold_list(CD);
+casefold(CD) when is_binary(CD) ->
+    casefold_bin(CD,<<>>).
+
+-spec to_integer(String) -> {Int, Rest} | {'error', Reason} when
+      String :: unicode:chardata(),
+      Int :: integer(),
+      Rest :: unicode:chardata(),
+      Reason :: 'no_integer' | badarg.
+
+to_integer(String) ->
+    try take(String, "+-0123456789") of
+        {Head, Tail} ->
+            case is_empty(Head) of
+                true -> {error, no_integer};
+                false ->
+                    List = unicode:characters_to_list(Head),
+                    case string:list_to_integer(List) of
+                        {error, _} = Err -> Err;
+                        {Int, Rest} ->
+                            to_number(String, Int, Rest, List, Tail)
+                    end
+            end
+    catch _:_ -> {error, badarg}
+    end.
+
+-spec to_float(String) -> {Float, Rest} | {'error', Reason} when
+      String :: unicode:chardata(),
+      Float :: float(),
+      Rest :: unicode:chardata(),
+      Reason :: 'no_float' | 'badarg'.
+
+to_float(String) ->
+    try take(String, "+-0123456789eE.,") of
+        {Head, Tail} ->
+            case is_empty(Head) of
+                true -> {error, no_float};
+                false ->
+                    List = unicode:characters_to_list(Head),
+                    case string:list_to_float(List) of
+                        {error, _} = Err -> Err;
+                        {Float, Rest} ->
+                            to_number(String, Float, Rest, List, Tail)
+                    end
+            end
+    catch _:_ -> {error, badarg}
+    end.
+
+to_number(String, Number, Rest, List, _Tail) when is_binary(String) ->
+    BSz = length(List)-length(Rest),
+    <<_:BSz/binary, Cont/binary>> = String,
+    {Number, Cont};
+to_number(_, Number, Rest, _, Tail) ->
+    {Number, concat(Rest,Tail)}.
+
+%% Return the remaining string with prefix removed or else nomatch
+-spec prefix(String::unicode:chardata(), Prefix::unicode:chardata()) ->
+                    'nomatch' | unicode:chardata().
+prefix(Str, []) -> Str;
+prefix(Str, Prefix0) ->
+    Prefix = unicode:characters_to_list(Prefix0),
+    case prefix_1(Str, Prefix) of
+        [] when is_binary(Str) -> <<>>;
+        Res -> Res
+    end.
+
+%% split String with the first occurrence of SearchPattern, return list of splits
+-spec split(String, SearchPattern) -> [unicode:chardata()] when
+      String :: unicode:chardata(),
+      SearchPattern :: unicode:chardata().
+split(String, SearchPattern) ->
+    split(String, SearchPattern, leading).
+
+%% split String with SearchPattern, return list of splits
+-spec split(String, SearchPattern, Where) -> [unicode:chardata()] when
+      String :: unicode:chardata(),
+      SearchPattern :: unicode:chardata(),
+      Where :: direction() | 'all'.
+split(String, SearchPattern, Where) ->
+    case is_empty(SearchPattern) of
+        true -> [String];
+        false ->
+            SearchPatternCPs = unicode:characters_to_list(SearchPattern),
+            case split_1(String, SearchPatternCPs, 0, Where, [], []) of
+                {_Curr, []} -> [String];
+                {_Curr, Acc} when Where =:= trailing -> Acc;
+                {Curr, Acc} when Where =:= all -> lists:reverse([Curr|Acc]);
+                Acc when is_list(Acc) -> Acc
+            end
+    end.
+
+%% Replace the first SearchPattern in String with Replacement
+-spec replace(String, SearchPattern, Replacement) ->
+                     [unicode:chardata()] when
+      String :: unicode:chardata(),
+      SearchPattern :: unicode:chardata(),
+      Replacement :: unicode:chardata().
+replace(String, SearchPattern, Replacement) ->
+    lists:join(Replacement, split(String, SearchPattern)).
+
+%% Replace Where SearchPattern in String with Replacement
+-spec replace(String, SearchPattern, Replacement, Where) ->
+                     [unicode:chardata()] when
+      String :: unicode:chardata(),
+      SearchPattern :: unicode:chardata(),
+      Replacement :: unicode:chardata(),
+      Where :: direction() | 'all'.
+replace(String, SearchPattern, Replacement, Where) ->
+    lists:join(Replacement, split(String, SearchPattern, Where)).
+
+%% Split Str into a list of chardata separated by one of the grapheme
+%% clusters in Seps
+-spec lexemes(String::unicode:chardata(),
+              SeparatorList::[grapheme_cluster()]) ->
+                     [unicode:chardata()].
+lexemes([], _) -> [];
+lexemes(Str, Seps0) when is_list(Seps0) ->
+    Seps = search_pattern(Seps0),
+    lexemes_m(Str, Seps, []).
+
+-spec nth_lexeme(String, N, SeparatorList) -> unicode:chardata() when
+      String::unicode:chardata(),
+      N::non_neg_integer(),
+      SeparatorList::[grapheme_cluster()].
+
+nth_lexeme(Str, 1, []) -> Str;
+nth_lexeme(Str, N, Seps0) when is_list(Seps0), is_integer(N), N > 0 ->
+    Seps = search_pattern(Seps0),
+    nth_lexeme_m(Str, Seps, N).
+
+%% find first SearchPattern in String return rest of string
+-spec find(String, SearchPattern) -> unicode:chardata() | 'nomatch' when
+      String::unicode:chardata(),
+      SearchPattern::unicode:chardata().
+find(String, SearchPattern) ->
+    find(String, SearchPattern, leading).
+
+%% find SearchPattern in String (search in Dir direction) return rest of string
+-spec find(String, SearchPattern, Dir) -> unicode:chardata() | 'nomatch' when
+      String::unicode:chardata(),
+      SearchPattern::unicode:chardata(),
+      Dir::direction().
+find(String, "", _) -> String;
+find(String, <<>>, _) -> String;
+find(String, SearchPattern, leading) ->
+    find_l(String, unicode:characters_to_list(SearchPattern));
+find(String, SearchPattern, trailing) ->
+    find_r(String, unicode:characters_to_list(SearchPattern), nomatch).
+
+%% Fetch first codepoint and return rest in tail
+-spec next_grapheme(String::unicode:chardata()) ->
+                           maybe_improper_list(grapheme_cluster(),unicode:chardata()).
+next_grapheme(CD) -> unicode_util:gc(CD).
+
+%% Fetch first grapheme cluster and return rest in tail
+-spec next_codepoint(String::unicode:chardata()) ->
+                            maybe_improper_list(char(),unicode:chardata()).
+next_codepoint(CD) -> unicode_util:cp(CD).
+
+%% Internals
+
+length_1([_|Rest], N) ->
+    length_1(unicode_util:gc(Rest), N+1);
+length_1([], N) ->
+    N.
+
+equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) ->
+    A =:= B andalso equal_1(AR, BR);
+equal_1([], BR) -> is_empty(BR);
+equal_1(A0,B0) ->
+    case {unicode_util:cp(A0), unicode_util:cp(B0)} of
+        {[CP|A],[CP|B]} -> equal_1(A,B);
+        {[], []} -> true;
+        _ -> false
+    end.
+
+equal_nocase(A, A) -> true;
+equal_nocase(A0, B0) ->
+    case {unicode_util:cp(unicode_util:casefold(A0)),
+          unicode_util:cp(unicode_util:casefold(B0))} of
+        {[CP|A],[CP|B]} -> equal_nocase(A,B);
+        {[], []} -> true;
+        _ -> false
+    end.
+
+equal_norm(A, A, _Norm) -> true;
+equal_norm(A0, B0, Norm) ->
+    case {unicode_util:cp(unicode_util:Norm(A0)),
+          unicode_util:cp(unicode_util:Norm(B0))} of
+        {[CP|A],[CP|B]} -> equal_norm(A,B, Norm);
+        {[], []} -> true;
+        _ -> false
+    end.
+
+equal_norm_nocase(A, A, _Norm) -> true;
+equal_norm_nocase(A0, B0, Norm) ->
+    case {unicode_util:cp(unicode_util:casefold(unicode_util:Norm(A0))),
+          unicode_util:cp(unicode_util:casefold(unicode_util:Norm(B0)))} of
+        {[CP|A],[CP|B]} -> equal_norm_nocase(A,B, Norm);
+        {[], []} -> true;
+        _ -> false
+    end.
+
+reverse_1(CD, Acc) ->
+    case unicode_util:gc(CD) of
+        [GC|Rest] -> reverse_1(Rest, [GC|Acc]);
+        [] -> Acc
+    end.
+
+slice_l(CD, N, Binary) when N > 0 ->
+    case unicode_util:gc(CD) of
+        [_|Cont] -> slice_l(Cont, N-1, Binary);
+        [] when Binary -> <<>>;
+        [] -> []
+    end;
+slice_l(Cont, 0, Binary) ->
+    case is_empty(Cont) of
+        true when Binary -> <<>>;
+        _ -> Cont
+    end.
+
+slice_trail(CD, N) when is_list(CD) ->
+    slice_list(CD, N);
+slice_trail(CD, N) when is_binary(CD) ->
+    slice_bin(CD, N, CD).
+
+slice_list(CD, N) when N > 0 ->
+    case unicode_util:gc(CD) of
+        [GC|Cont] -> append(GC, slice_list(Cont, N-1));
+        [] -> []
+    end;
+slice_list(_, 0) ->
+    [].
+
+slice_bin(CD, N, Orig) when N > 0 ->
+    case unicode_util:gc(CD) of
+        [_|Cont] -> slice_bin(Cont, N-1, Orig);
+        [] -> Orig
+    end;
+slice_bin([], 0, Orig) ->
+    Orig;
+slice_bin(CD, 0, Orig) ->
+    Sz = byte_size(Orig) - byte_size(CD),
+    <<Keep:Sz/binary, _/binary>> = Orig,
+    Keep.
+
+uppercase_list(CPs0) ->
+    case unicode_util:uppercase(CPs0) of
+        [Char|CPs] -> append(Char,uppercase_list(CPs));
+        [] -> []
+    end.
+
+uppercase_bin(CPs0, Acc) ->
+    case unicode_util:uppercase(CPs0) of
+        [Char|CPs] when is_integer(Char) ->
+            uppercase_bin(CPs, <<Acc/binary, Char/utf8>>);
+        [Chars|CPs] ->
+            uppercase_bin(CPs, <<Acc/binary,
+                                 << <<CP/utf8>> || CP <- Chars>>/binary >>);
+        [] -> Acc
+    end.
+
+lowercase_list(CPs0) ->
+    case unicode_util:lowercase(CPs0) of
+        [Char|CPs] -> append(Char,lowercase_list(CPs));
+        [] -> []
+    end.
+
+lowercase_bin(CPs0, Acc) ->
+    case unicode_util:lowercase(CPs0) of
+        [Char|CPs] when is_integer(Char) ->
+            lowercase_bin(CPs, <<Acc/binary, Char/utf8>>);
+        [Chars|CPs] ->
+            lowercase_bin(CPs, <<Acc/binary,
+                                 << <<CP/utf8>> || CP <- Chars>>/binary >>);
+        [] -> Acc
+    end.
+
+casefold_list(CPs0) ->
+    case unicode_util:casefold(CPs0) of
+        [Char|CPs] -> append(Char, casefold_list(CPs));
+        [] -> []
+    end.
+
+casefold_bin(CPs0, Acc) ->
+    case unicode_util:casefold(CPs0) of
+        [Char|CPs] when is_integer(Char) ->
+            casefold_bin(CPs, <<Acc/binary, Char/utf8>>);
+        [Chars|CPs] ->
+            casefold_bin(CPs, <<Acc/binary,
+                                << <<CP/utf8>> || CP <- Chars>>/binary >>);
+        [] -> Acc
+    end.
+
+
+trim_l([Bin|Cont0], Sep) when is_binary(Bin) ->
+    case bin_search_inv(Bin, Cont0, Sep) of
+        {nomatch, Cont} -> trim_l(Cont, Sep);
+        Keep -> Keep
+    end;
+trim_l(Str, {GCs, _, _}=Sep) when is_list(Str) ->
+    case unicode_util:gc(Str) of
+        [C|Cs] ->
+            case lists:member(C, GCs) of
+                true -> trim_l(Cs, Sep);
+                false -> Str
+            end;
+        [] -> []
+    end;
+trim_l(Bin, Sep) when is_binary(Bin) ->
+    case bin_search_inv(Bin, [], Sep) of
+        {nomatch,_} -> <<>>;
+        [Keep] -> Keep
+    end.
+
+trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search(Rest, Cont0, Sep) of
+        {nomatch,_} ->
+            stack(Bin, trim_t(Cont0, 0, Sep));
+        [SepStart|Cont1] ->
+            case bin_search_inv(SepStart, Cont1, Sep) of
+                {nomatch, Cont} ->
+                    Tail = trim_t(Cont, 0, Sep),
+                    case is_empty(Tail) of
+                        true ->
+                            KeepSz = byte_size(Bin) - byte_size(SepStart),
+                            <<Keep:KeepSz/binary, _/binary>> = Bin,
+                            Keep;
+                        false ->
+                            Used = cp_prefix(Cont0, Cont),
+                            stack(Bin, stack(Used, Tail))
+                    end;
+                [NonSep|Cont] when is_binary(NonSep) ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    trim_t([Bin|Cont], KeepSz, Sep)
+            end
+    end;
+trim_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
+    case unicode_util:cp(Str) of
+        [CP|Cs] ->
+            case lists:member(CP, CPs) of
+                true ->
+                    [GC|Cs1] = unicode_util:gc(Str),
+                    case lists:member(GC, GCs) of
+                        true ->
+                            Tail = trim_t(Cs1, 0, Sep),
+                            case is_empty(Tail) of
+                                true -> [];
+                                false -> append(GC,Tail)
+                            end;
+                        false ->
+                            append(GC,trim_t(Cs1, 0, Sep))
+                    end;
+                false ->
+                    append(CP,trim_t(Cs, 0, Sep))
+            end;
+        [] -> []
+    end;
+trim_t(Bin, N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search(Rest, Sep) of
+        {nomatch,_} -> Bin;
+        [SepStart] ->
+            case bin_search_inv(SepStart, [], Sep) of
+                {nomatch,_} ->
+                    KeepSz = byte_size(Bin) - byte_size(SepStart),
+                    <<Keep:KeepSz/binary, _/binary>> = Bin,
+                    Keep;
+                [NonSep] ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    trim_t(Bin, KeepSz, Sep)
+            end
+    end.
+
+take_l([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
+    case bin_search_inv(Bin, Cont0, Sep) of
+        {nomatch, Cont} ->
+            Used = cp_prefix(Cont0, Cont),
+            take_l(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+        [Bin1|_]=After when is_binary(Bin1) ->
+            First = byte_size(Bin) - byte_size(Bin1),
+            <<Keep:First/binary, _/binary>> = Bin,
+            {btoken(Keep,Acc), After}
+    end;
+take_l(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+    case unicode_util:gc(Str) of
+        [C|Cs] ->
+            case lists:member(C, GCs) of
+                true -> take_l(Cs, Sep, append(rev(C),Acc));
+                false -> {rev(Acc), Str}
+            end;
+        [] -> {rev(Acc), []}
+    end;
+take_l(Bin, Sep, Acc) when is_binary(Bin) ->
+    case bin_search_inv(Bin, [], Sep) of
+        {nomatch,_} ->
+            {btoken(Bin, Acc), <<>>};
+        [After] ->
+            First = byte_size(Bin) - byte_size(After),
+            <<Keep:First/binary, _/binary>> = Bin,
+            {btoken(Keep, Acc), After}
+    end.
+
+take_lc([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
+    case bin_search(Bin, Cont0, Sep) of
+        {nomatch, Cont} ->
+            Used = cp_prefix(Cont0, Cont),
+            take_lc(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+        [Bin1|_]=After when is_binary(Bin1) ->
+            First = byte_size(Bin) - byte_size(Bin1),
+            <<Keep:First/binary, _/binary>> = Bin,
+            {btoken(Keep,Acc), After}
+    end;
+take_lc(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+    case unicode_util:gc(Str) of
+        [C|Cs] ->
+            case lists:member(C, GCs) of
+                false -> take_lc(Cs, Sep, append(rev(C),Acc));
+                true  -> {rev(Acc), Str}
+            end;
+        [] -> {rev(Acc), []}
+    end;
+take_lc(Bin, Sep, Acc) when is_binary(Bin) ->
+    case bin_search(Bin, [], Sep) of
+        {nomatch,_} ->
+            {btoken(Bin, Acc), <<>>};
+        [After] ->
+            First = byte_size(Bin) - byte_size(After),
+            <<Keep:First/binary, _/binary>> = Bin,
+            {btoken(Keep, Acc), After}
+    end.
+
+take_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search(Rest, Cont0, Sep) of
+        {nomatch,Cont} ->
+            Used = cp_prefix(Cont0, Cont),
+            {Head, Tail} = take_t(Cont, 0, Sep),
+            {stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
+        [SepStart|Cont1] ->
+            case bin_search_inv(SepStart, Cont1, Sep) of
+                {nomatch, Cont} ->
+                    {Head, Tail} = take_t(Cont, 0, Sep),
+                    Used = cp_prefix(Cont0, Cont),
+                    case equal(Tail, Cont) of
+                        true ->
+                            KeepSz = byte_size(Bin) - byte_size(SepStart),
+                            <<Keep:KeepSz/binary, End/binary>> = Bin,
+                            {stack(Keep,Head), stack(stack(End,Used),Tail)};
+                        false ->
+                            {stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
+                    end;
+                [NonSep|Cont] when is_binary(NonSep) ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    take_t([Bin|Cont], KeepSz, Sep)
+            end
+    end;
+take_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
+    case unicode_util:cp(Str) of
+        [CP|Cs] ->
+            case lists:member(CP, CPs) of
+                true ->
+                    [GC|Cs1] = unicode_util:gc(Str),
+                    case lists:member(GC, GCs) of
+                        true ->
+                            {Head, Tail} = take_t(Cs1, 0, Sep),
+                            case equal(Tail, Cs1) of
+                                true -> {Head, append(GC,Tail)};
+                                false -> {append(GC,Head), Tail}
+                            end;
+                        false ->
+                            {Head, Tail} = take_t(Cs, 0, Sep),
+                            {append(CP,Head), Tail}
+                    end;
+                false ->
+                    {Head, Tail} = take_t(Cs, 0, Sep),
+                    {append(CP,Head), Tail}
+            end;
+        [] -> {[],[]}
+    end;
+take_t(Bin, N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search(Rest, Sep) of
+        {nomatch,_} -> {Bin, <<>>};
+        [SepStart] ->
+            case bin_search_inv(SepStart, [], Sep) of
+                {nomatch,_} ->
+                    KeepSz = byte_size(Bin) - byte_size(SepStart),
+                    <<Before:KeepSz/binary, End/binary>> = Bin,
+                    {Before, End};
+                [NonSep] ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    take_t(Bin, KeepSz, Sep)
+            end
+    end.
+
+take_tc([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search_inv(Rest, Cont0, Sep) of
+        {nomatch,Cont} ->
+            Used = cp_prefix(Cont0, Cont),
+            {Head, Tail} = take_tc(Cont, 0, Sep),
+            {stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
+        [SepStart|Cont1] ->
+            case bin_search(SepStart, Cont1, Sep) of
+                {nomatch, Cont} ->
+                    {Head, Tail} = take_tc(Cont, 0, Sep),
+                    Used = cp_prefix(Cont0, Cont),
+                    case equal(Tail, Cont) of
+                        true ->
+                            KeepSz = byte_size(Bin) - byte_size(SepStart),
+                            <<Keep:KeepSz/binary, End/binary>> = Bin,
+                            {stack(Keep,Head), stack(stack(End,Used),Tail)};
+                        false ->
+                            {stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
+                    end;
+                [NonSep|Cont] when is_binary(NonSep) ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    take_tc([Bin|Cont], KeepSz, Sep)
+            end
+    end;
+take_tc(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
+    case unicode_util:cp(Str) of
+        [CP|Cs] ->
+            case lists:member(CP, CPs) of
+                true ->
+                    [GC|Cs1] = unicode_util:gc(Str),
+                    case lists:member(GC, GCs) of
+                        false ->
+                            {Head, Tail} = take_tc(Cs1, 0, Sep),
+                            case equal(Tail, Cs1) of
+                                true -> {Head, append(GC,Tail)};
+                                false -> {append(GC,Head), Tail}
+                            end;
+                        true ->
+                            {Head, Tail} = take_tc(Cs1, 0, Sep),
+                            {append(GC,Head), Tail}
+                    end;
+                false ->
+                    {Head, Tail} = take_tc(Cs, 0, Sep),
+                    case equal(Tail, Cs) of
+                        true  -> {Head, append(CP,Tail)};
+                        false -> {append(CP,Head), Tail}
+                    end
+            end;
+        [] -> {[],[]}
+    end;
+take_tc(Bin, N, Sep) when is_binary(Bin) ->
+    <<_:N/binary, Rest/binary>> = Bin,
+    case bin_search_inv(Rest, [], Sep) of
+        {nomatch,_} -> {Bin, <<>>};
+        [SepStart] ->
+            case bin_search(SepStart, [], Sep) of
+                {nomatch,_} ->
+                    KeepSz = byte_size(Bin) - byte_size(SepStart),
+                    <<Before:KeepSz/binary, End/binary>> = Bin,
+                    {Before, End};
+                [NonSep] ->
+                    KeepSz = byte_size(Bin) - byte_size(NonSep),
+                    take_tc(Bin, KeepSz, Sep)
+            end
+    end.
+
+prefix_1(Cs, []) -> Cs;
+prefix_1(Cs, [_]=Pre) ->
+    prefix_2(unicode_util:gc(Cs), Pre);
+prefix_1(Cs, Pre) ->
+    prefix_2(unicode_util:cp(Cs), Pre).
+
+prefix_2([C|Cs], [C|Pre]) ->
+    prefix_1(Cs, Pre);
+prefix_2(_, _) ->
+    nomatch.
+
+split_1([Bin|Cont0], Needle, Start, Where, Curr0, Acc)
+  when is_binary(Bin) ->
+    case bin_search_str(Bin, Start, Cont0, Needle) of
+        {nomatch,Sz,Cont} ->
+            <<Keep:Sz/binary, _/binary>> = Bin,
+            split_1(Cont, Needle, 0, Where, [Keep|Curr0], Acc);
+        {Before, [Cs0|Cont], After} ->
+            Curr = add_non_empty(Before,Curr0),
+            case Where of
+                leading ->
+                    [rev(Curr),After];
+                trailing ->
+                    <<_/utf8, Cs/binary>> = Cs0,
+                    Next = byte_size(Bin) - byte_size(Cs),
+                    split_1([Bin|Cont], Needle, Next, Where,
+                            Curr0, [rev(Curr),After]);
+                all ->
+                    split_1(After, Needle, 0, Where, [], [rev(Curr)|Acc])
+            end
+    end;
+split_1(Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_list(Cs0) ->
+    case unicode_util:cp(Cs0) of
+        [C|Cs] ->
+            case prefix_1(Cs0, Needle) of
+                nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc);
+                Rest when Where =:= leading ->
+                    [rev(Curr), Rest];
+                Rest when Where =:= trailing ->
+                    split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]);
+                Rest when Where =:= all ->
+                    split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc])
+            end;
+        [Other|Cs] ->
+            split_1(Cs, Needle, 0, Where, append(Other,Curr), Acc);
+        [] ->
+            {rev(Curr), Acc}
+    end;
+split_1(Bin, [_C|_]=Needle, Start, Where, Curr0, Acc) ->
+    case bin_search_str(Bin, Start, [], Needle) of
+        {nomatch,_,_} ->
+            <<_:Start/binary, Keep/binary>> = Bin,
+            {rev([Keep|Curr0]), Acc};
+        {Before, [Cs0], After} ->
+            case Where of
+                leading ->
+                    [rev([Before|Curr0]),After];
+                trailing ->
+                    <<_/utf8, Cs/binary>> = Cs0,
+                    Next = byte_size(Bin) - byte_size(Cs),
+                    split_1(Bin, Needle, Next, Where, Curr0,
+                            [btoken(Before,Curr0),After]);
+                all ->
+                    Next = byte_size(Bin) - byte_size(After),
+                    <<_:Start/binary, Keep/binary>> = Before,
+                    Curr = [Keep|Curr0],
+                    split_1(Bin, Needle, Next, Where, [], [rev(Curr)|Acc])
+            end
+    end.
+
+lexemes_m([Bin|Cont0], Seps, Ts) when is_binary(Bin) ->
+    case bin_search_inv(Bin, Cont0, Seps) of
+        {nomatch,Cont} ->
+            lexemes_m(Cont, Seps, Ts);
+        Cs ->
+            {Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
+            lexemes_m(Rest, Seps, [Lexeme|Ts])
+    end;
+lexemes_m(Cs0, {GCs, _, _}=Seps, Ts) when is_list(Cs0) ->
+    case unicode_util:gc(Cs0) of
+        [C|Cs] ->
+            case lists:member(C, GCs) of
+                true  ->
+                    lexemes_m(Cs, Seps, Ts);
+                false ->
+                    {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
+                    lexemes_m(Rest, Seps, [Lexeme|Ts])
+            end;
+        [] ->
+            lists:reverse(Ts)
+    end;
+lexemes_m(Bin, Seps, Ts) when is_binary(Bin) ->
+    case bin_search_inv(Bin, [], Seps) of
+        {nomatch,_} ->
+            lists:reverse(Ts);
+        [Cs] ->
+            {Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
+            lexemes_m(Rest, Seps, add_non_empty(Lexeme,Ts))
+    end.
+
+lexeme_pick([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps, Tkn) when is_integer(CP) ->
+    case lists:member(CP, CPs) of
+        true  ->
+            [GC|Cs2] = unicode_util:gc(Cs0),
+            case lists:member(GC, GCs) of
+                true -> {rev(Tkn), Cs2};
+                false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn))
+            end;
+        false -> lexeme_pick(Cs1, Seps, [CP|Tkn])
+    end;
+lexeme_pick([Bin|Cont0], Seps, Tkn) when is_binary(Bin) ->
+    case bin_search(Bin, Cont0, Seps) of
+        {nomatch,_} ->
+            lexeme_pick(Cont0, Seps, [Bin|Tkn]);
+        [Left|_Cont] = Cs ->
+            Bytes = byte_size(Bin) - byte_size(Left),
+            <<Lexeme:Bytes/binary, _/binary>> = Bin,
+            {btoken(Lexeme, Tkn), Cs}
+    end;
+lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
+    case unicode_util:cp(Cs0) of
+        [CP|Cs] ->
+            case lists:member(CP, CPs) of
+                true ->
+                    [GC|Cs2] = unicode_util:gc(Cs0),
+                    case lists:member(GC, GCs) of
+                        true -> {rev(Tkn), Cs0};
+                        false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn))
+                    end;
+                false ->
+                    lexeme_pick(Cs, Seps, append(CP,Tkn))
+            end;
+        [] ->
+            {rev(Tkn), []}
+    end;
+lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
+    case bin_search(Bin, Seps) of
+        {nomatch,_} ->
+            {btoken(Bin,Tkn), []};
+        [Left] ->
+            Bytes = byte_size(Bin) - byte_size(Left),
+            <<Lexeme:Bytes/binary, _/binary>> = Bin,
+            {btoken(Lexeme, Tkn), Left}
+    end.
+
+nth_lexeme_m([Bin|Cont0], Seps, N) when is_binary(Bin) ->
+    case bin_search_inv(Bin, Cont0, Seps) of
+        {nomatch,Cont} ->
+            nth_lexeme_m(Cont, Seps, N);
+        Cs when N > 1 ->
+            Rest = lexeme_skip(Cs, Seps),
+            nth_lexeme_m(Rest, Seps, N-1);
+        Cs ->
+            {Lexeme,_} = lexeme_pick(Cs, Seps, []),
+            Lexeme
+    end;
+nth_lexeme_m(Cs0, {GCs, _, _}=Seps, N) when is_list(Cs0) ->
+    case unicode_util:gc(Cs0) of
+        [C|Cs] ->
+            case lists:member(C, GCs) of
+                true ->
+                    nth_lexeme_m(Cs, Seps, N);
+                false when N > 1 ->
+                    Cs1 = lexeme_skip(Cs, Seps),
+                    nth_lexeme_m(Cs1, Seps, N-1);
+                false ->
+                    {Lexeme,_} = lexeme_pick(Cs0, Seps, []),
+                    Lexeme
+            end;
+        [] ->
+            []
+    end;
+nth_lexeme_m(Bin, Seps, N) when is_binary(Bin) ->
+    case bin_search_inv(Bin, [], Seps) of
+        [Cs] when N > 1 ->
+            Cs1 = lexeme_skip(Cs, Seps),
+            nth_lexeme_m(Cs1, Seps, N-1);
+        [Cs] ->
+            {Lexeme,_} = lexeme_pick(Cs, Seps, []),
+            Lexeme;
+        {nomatch,_} ->
+            <<>>
+    end.
+
+lexeme_skip([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps) when is_integer(CP) ->
+    case lists:member(CP, CPs) of
+        true  ->
+            [GC|Cs2] = unicode_util:gc(Cs0),
+            case lists:member(GC, GCs) of
+                true -> Cs0;
+                false -> lexeme_skip(Cs2, Seps)
+            end;
+        false ->
+            lexeme_skip(Cs1, Seps)
+    end;
+lexeme_skip([Bin|Cont0], Seps) when is_binary(Bin) ->
+    case bin_search(Bin, Cont0, Seps) of
+        {nomatch,_} -> lexeme_skip(Cont0, Seps);
+        Cs -> Cs
+    end;
+lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
+    case unicode_util:cp(Cs0) of
+        [CP|Cs] ->
+            case lists:member(CP, CPs) of
+                true ->
+                    [GC|Cs2] = unicode_util:gc(Cs0),
+                    case lists:member(GC, GCs) of
+                        true -> Cs0;
+                        false -> lexeme_skip(Cs2, Seps)
+                    end;
+                false ->
+                    lexeme_skip(Cs, Seps)
+            end;
+        [] ->
+            []
+    end;
+lexeme_skip(Bin, Seps) when is_binary(Bin) ->
+    case bin_search(Bin, Seps) of
+        {nomatch,_} -> <<>>;
+        [Left] -> Left
+    end.
+
+find_l([Bin|Cont0], Needle) when is_binary(Bin) ->
+    case bin_search_str(Bin, 0, Cont0, Needle) of
+        {nomatch, _, Cont} ->
+            find_l(Cont, Needle);
+        {_Before, Cs, _After} ->
+            Cs
+    end;
+find_l(Cs0, [C|_]=Needle) when is_list(Cs0) ->
+    case unicode_util:cp(Cs0) of
+        [C|Cs] ->
+            case prefix_1(Cs0, Needle) of
+                nomatch -> find_l(Cs, Needle);
+                _ -> Cs0
+            end;
+        [_C|Cs] ->
+            find_l(Cs, Needle);
+        [] -> nomatch
+    end;
+find_l(Bin, Needle) ->
+    case bin_search_str(Bin, 0, [], Needle) of
+        {nomatch,_,_} -> nomatch;
+        {_Before, [Cs], _After} -> Cs
+    end.
+
+find_r([Bin|Cont0], Needle, Res) when is_binary(Bin) ->
+    case bin_search_str(Bin, 0, Cont0, Needle) of
+        {nomatch,_,Cont} ->
+            find_r(Cont, Needle, Res);
+        {_, Cs0, _} ->
+            [_|Cs] = unicode_util:gc(Cs0),
+            find_r(Cs, Needle, Cs0)
+    end;
+find_r(Cs0, [C|_]=Needle, Res) when is_list(Cs0) ->
+    case unicode_util:cp(Cs0) of
+        [C|Cs] ->
+            case prefix_1(Cs0, Needle) of
+                nomatch -> find_r(Cs, Needle, Res);
+                _ -> find_r(Cs, Needle, Cs0)
+            end;
+        [_C|Cs] ->
+            find_r(Cs, Needle, Res);
+        [] -> Res
+    end;
+find_r(Bin, Needle, Res) ->
+    case bin_search_str(Bin, 0, [], Needle) of
+        {nomatch,_,_} -> Res;
+        {_Before, [Cs0], _After} ->
+            <<_/utf8, Cs/binary>> = Cs0,
+            find_r(Cs, Needle, Cs0)
+    end.
+
+%% These are used to avoid creating lists around binaries
+%% might be unnecessary, is there a better solution?
+btoken(Token, []) -> Token;
+btoken(BinPart, [C]) when is_integer(C) -> <<C/utf8, BinPart/binary>>;
+btoken(<<>>, Tkn) -> lists:reverse(Tkn);
+btoken(BinPart, Cs) -> [lists:reverse(Cs),BinPart].
+
+rev([B]) when is_binary(B) -> B;
+rev(L) when is_list(L) -> lists:reverse(L);
+rev(C) when is_integer(C) -> C.
+
+append(Char, <<>>) when is_integer(Char) -> [Char];
+append(Char, <<>>) when is_list(Char) -> Char;
+append(Char, Bin) when is_binary(Bin) -> [Char,Bin];
+append(Char, Str) when is_integer(Char) -> [Char|Str];
+append(GC, Str) when is_list(GC) -> GC ++ Str.
+
+stack(Bin, []) -> Bin;
+stack(<<>>, St) -> St;
+stack([], St) -> St;
+stack(Bin, St) -> [Bin|St].
+
+add_non_empty(<<>>, L) -> L;
+add_non_empty(Token, L) -> [Token|L].
+
+cp_prefix(Orig, Cont) ->
+    case unicode_util:cp(Cont) of
+        [] -> Orig;
+        [Cp|Rest] -> cp_prefix_1(Orig, Cp, Rest)
+    end.
+
+cp_prefix_1(Orig, Until, Cont) ->
+    case unicode_util:cp(Orig) of
+        [Until|Rest] ->
+            case equal(Rest, Cont) of
+                true -> [];
+                false-> [Until|cp_prefix_1(Rest, Until, Cont)]
+            end;
+        [CP|Rest] -> [CP|cp_prefix_1(Rest, Until, Cont)]
+    end.
+
+
+%% Binary special
+bin_search(Bin, Seps) ->
+    bin_search(Bin, [], Seps).
+
+bin_search(_Bin, Cont, {[],_,_}) ->
+    {nomatch, Cont};
+bin_search(Bin, Cont, {Seps,_,BP}) ->
+    bin_search_loop(Bin, 0, BP, Cont, Seps).
+
+%% Need to work with [<<$a>>, <<778/utf8>>],
+%% i.e. å in nfd form  $a "COMBINING RING ABOVE"
+%% and PREPEND characters like "ARABIC NUMBER SIGN" 1536 <<216,128>>
+%% combined with other characters are currently ignored.
+search_pattern(Seps) ->
+    CPs = search_cp(Seps),
+    Bin = bin_pattern(CPs),
+    {Seps, CPs, Bin}.
+
+search_cp([CP|Seps]) when is_integer(CP) ->
+    [CP|search_cp(Seps)];
+search_cp([Pattern|Seps]) ->
+    [CP|_] = unicode_util:cp(Pattern),
+    [CP|search_cp(Seps)];
+search_cp([]) -> [].
+
+bin_pattern([CP|Seps]) ->
+    [<<CP/utf8>>|bin_pattern(Seps)];
+bin_pattern([]) -> [].
+
+bin_search_loop(Bin0, Start, _, Cont, _Seps)
+  when byte_size(Bin0) =< Start; Start < 0 ->
+    {nomatch, Cont};
+bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
+    <<_:Start/binary, Bin/binary>> = Bin0,
+    case binary:match(Bin, BinSeps) of
+        nomatch ->
+            {nomatch,Cont};
+        {Where, _CL} ->
+            <<_:Where/binary, Cont0/binary>> = Bin,
+            Cont1 = stack(Cont0, Cont),
+            [GC|Cont2] = unicode_util:gc(Cont1),
+            case lists:member(GC, Seps) of
+                false ->
+                    case Cont2 of
+                        [BinR|Cont] when is_binary(BinR) ->
+                            Next = byte_size(Bin0) - byte_size(BinR),
+                            bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
+                        BinR when is_binary(BinR), Cont =:= [] ->
+                            Next = byte_size(Bin0) - byte_size(BinR),
+                            bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
+                        _ ->
+                            {nomatch, Cont2}
+                    end;
+                true when is_list(Cont1) ->
+                    Cont1;
+                true ->
+                    [Cont1]
+            end
+    end.
+
+bin_search_inv(Bin, Cont, {[], _, _}) ->
+    [Bin|Cont];
+bin_search_inv(Bin, Cont, {[Sep], _, _}) ->
+    bin_search_inv_1([Bin|Cont], Sep);
+bin_search_inv(Bin, Cont, {Seps, _, _}) ->
+    bin_search_inv_n([Bin|Cont], Seps).
+
+bin_search_inv_1([<<>>|CPs], _) ->
+    {nomatch, CPs};
+bin_search_inv_1(CPs = [Bin0|Cont], Sep) when is_binary(Bin0) ->
+    case unicode_util:gc(CPs) of
+        [Sep|Bin] when is_binary(Bin), Cont =:= [] ->
+            bin_search_inv_1([Bin], Sep);
+        [Sep|[Bin|Cont]=Cs] when is_binary(Bin) ->
+            bin_search_inv_1(Cs, Sep);
+        [Sep|Cs] ->
+            {nomatch, Cs};
+        _ -> CPs
+    end.
+
+bin_search_inv_n([<<>>|CPs], _) ->
+    {nomatch, CPs};
+bin_search_inv_n([Bin0|Cont]=CPs, Seps) when is_binary(Bin0) ->
+    [C|Cs0] = unicode_util:gc(CPs),
+    case {lists:member(C, Seps), Cs0} of
+        {true, Cs} when is_binary(Cs), Cont =:= [] ->
+            bin_search_inv_n([Cs], Seps);
+        {true, [Bin|Cont]=Cs} when is_binary(Bin) ->
+            bin_search_inv_n(Cs, Seps);
+        {true, Cs} -> {nomatch, Cs};
+        {false, _} -> CPs
+    end.
+
+bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
+    <<_:Start/binary, Bin/binary>> = Bin0,
+    case binary:match(Bin, <<CP/utf8>>) of
+        nomatch -> {nomatch, byte_size(Bin0), Cont};
+        {Where0, _} ->
+            Where = Start+Where0,
+            <<Keep:Where/binary, Cs0/binary>> = Bin0,
+            [GC|Cs]=unicode_util:gc(Cs0),
+            case prefix_1(stack(Cs0,Cont), SearchCPs) of
+                nomatch when is_binary(Cs) ->
+                    KeepSz = byte_size(Bin0) - byte_size(Cs),
+                    bin_search_str(Bin0, KeepSz, Cont, SearchCPs);
+                nomatch ->
+                    {nomatch, Where, stack([GC|Cs],Cont)};
+                [] ->
+                    {Keep, [Cs0|Cont], <<>>};
+                Rest ->
+                    {Keep, [Cs0|Cont], Rest}
+            end
+    end.
+
+
+%%---------------------------------------------------------------------------
+%% OLD lists API kept for backwards compability
+%%---------------------------------------------------------------------------
+
 %% Robert's bit
 
 %% len(String)
@@ -68,12 +1347,12 @@ len(S) -> length(S).
 %% equal(String1, String2)
 %%  Test if 2 strings are equal.
 
--spec equal(String1, String2) -> boolean() when
-      String1 :: string(),
-      String2 :: string().
+%% -spec equal(String1, String2) -> boolean() when
+%%       String1 :: string(),
+%%       String2 :: string().
 
-equal(S, S) -> true;
-equal(_, _) -> false.
+%% equal(S, S) -> true;
+%% equal(_, _) -> false.
 
 %% concat(String1, String2)
 %%  Concatenate 2 strings.
@@ -127,7 +1406,7 @@ rchr([], _C, _I, L) -> L.
 str(S, Sub) when is_list(Sub) -> str(S, Sub, 1).
 
 str([C|S], [C|Sub], I) ->
-    case prefix(Sub, S) of
+    case l_prefix(Sub, S) of
 	true -> I;
 	false -> str(S, [C|Sub], I+1)
     end;
@@ -142,16 +1421,16 @@ str([], _Sub, _I) -> 0.
 rstr(S, Sub) when is_list(Sub) -> rstr(S, Sub, 1, 0).
 
 rstr([C|S], [C|Sub], I, L) ->
-    case prefix(Sub, S) of
+    case l_prefix(Sub, S) of
 	true -> rstr(S, [C|Sub], I+1, I);
 	false -> rstr(S, [C|Sub], I+1, L)
     end;
 rstr([_|S], Sub, I, L) -> rstr(S, Sub, I+1, L);
 rstr([], _Sub, _I, L) -> L.
 
-prefix([C|Pre], [C|String]) -> prefix(Pre, String);
-prefix([], String) when is_list(String) -> true;
-prefix(Pre, String) when is_list(Pre), is_list(String) -> false.
+l_prefix([C|Pre], [C|String]) -> l_prefix(Pre, String);
+l_prefix([], String) when is_list(String) -> true;
+l_prefix(Pre, String) when is_list(Pre), is_list(String) -> false.
 
 %% span(String, Chars) -> Length.
 %% cspan(String, Chars) -> Length.
@@ -229,9 +1508,9 @@ tokens(S, Seps) ->
 		[_|_] -> [S]
 	    end;
 	[C] ->
-	    tokens_single_1(reverse(S), C, []);
+	    tokens_single_1(lists:reverse(S), C, []);
 	[_|_] ->
-	    tokens_multiple_1(reverse(S), Seps, [])
+	    tokens_multiple_1(lists:reverse(S), Seps, [])
     end.
 
 tokens_single_1([Sep|S], Sep, Toks) ->
@@ -342,8 +1621,8 @@ sub_word(String, Index, Char) when is_integer(Index), is_integer(Char) ->
 	    s_word(strip(String, left, Char), Index, Char, 1, [])
     end.
 
-s_word([], _, _, _,Res) -> reverse(Res);
-s_word([Char|_],Index,Char,Index,Res) -> reverse(Res);
+s_word([], _, _, _,Res) -> lists:reverse(Res);
+s_word([Char|_],Index,Char,Index,Res) -> lists:reverse(Res);
 s_word([H|T],Index,Char,Index,Res) -> s_word(T,Index,Char,Index,[H|Res]);
 s_word([Char|T],Stop,Char,Index,Res) when Index < Stop -> 
     s_word(strip(T,left,Char),Stop,Char,Index+1,Res);
@@ -359,7 +1638,7 @@ strip(String) -> strip(String, both).
 -spec strip(String, Direction) -> Stripped when
       String :: string(),
       Stripped :: string(),
-      Direction :: left | right | both.
+      Direction :: 'left' | 'right' | 'both'.
 
 strip(String, left) -> strip_left(String, $\s);
 strip(String, right) -> strip_right(String, $\s);
@@ -369,7 +1648,7 @@ strip(String, both) ->
 -spec strip(String, Direction, Character) -> Stripped when
       String :: string(),
       Stripped :: string(),
-      Direction :: left | right | both,
+      Direction :: 'left' | 'right' | 'both',
       Character :: char().
 
 strip(String, right, Char) -> strip_right(String, Char);
diff --git a/lib/stdlib/src/unicode.erl b/lib/stdlib/src/unicode.erl
index 617da11ba8..59499021cb 100644
--- a/lib/stdlib/src/unicode.erl
+++ b/lib/stdlib/src/unicode.erl
@@ -1,8 +1,8 @@
 %%
 %% %CopyrightBegin%
-%% 
+%%
 %% Copyright Ericsson AB 2008-2016. All Rights Reserved.
-%% 
+%%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
 %% You may obtain a copy of the License at
@@ -14,7 +14,7 @@
 %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 %% See the License for the specific language governing permissions and
 %% limitations under the License.
-%% 
+%%
 %% %CopyrightEnd%
 %%
 -module(unicode).
@@ -22,7 +22,12 @@
 -export([characters_to_list/1, characters_to_list_int/2,
 	 characters_to_binary/1, characters_to_binary_int/2,
 	 characters_to_binary/3,
-	 bom_to_encoding/1, encoding_to_bom/1]).
+	 bom_to_encoding/1, encoding_to_bom/1,
+         characters_to_nfd_list/1, characters_to_nfd_binary/1,
+         characters_to_nfc_list/1, characters_to_nfc_binary/1,
+         characters_to_nfkd_list/1, characters_to_nfkd_binary/1,
+         characters_to_nfkc_list/1, characters_to_nfkc_binary/1
+        ]).
 
 -export_type([chardata/0, charlist/0, encoding/0, external_chardata/0,
               external_charlist/0, latin1_char/0, latin1_chardata/0,
@@ -102,35 +107,6 @@ characters_to_list(_, _) ->
 characters_to_list(ML) ->
     unicode:characters_to_list(ML,unicode).
 
-characters_to_list_int(ML, Encoding) ->
-    try
-	do_characters_to_list(ML,Encoding)
-    catch
-	error:AnyError ->
-	    TheError = case AnyError of
-			   system_limit ->
-			       system_limit;
-			   _ ->
-			       badarg
-		       end,
-	    {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} =
-		(catch erlang:error(new_stacktrace,
-				    [ML,Encoding])),
-	    erlang:raise(error,TheError,[{Mod,characters_to_list,L}|Rest])
-    end.
-
-% XXX: Optimize me!
-do_characters_to_list(ML, Encoding) -> 
-    case unicode:characters_to_binary(ML,Encoding) of
-	Bin when is_binary(Bin) ->
-	    unicode:characters_to_list(Bin,utf8); 
-	{error,Encoded,Rest} ->
-	    {error,unicode:characters_to_list(Encoded,utf8),Rest};
-	{incomplete, Encoded2, Rest2} ->
-	    {incomplete,unicode:characters_to_list(Encoded2,utf8),Rest2}
-    end.
-
-
 -spec characters_to_binary(Data) -> Result when
       Data :: latin1_chardata() | chardata() | external_chardata(),
       Result :: binary()
@@ -154,24 +130,6 @@ characters_to_binary(ML) ->
 				    [ML])),
 	    erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest])
     end.
-	
-
-characters_to_binary_int(ML,InEncoding) ->
-    try
-	characters_to_binary_int(ML,InEncoding,unicode)
-    catch
-	error:AnyError ->
-	    TheError = case AnyError of
-			   system_limit ->
-			       system_limit;
-			   _ ->
-			       badarg
-		       end,
-	    {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} =
-		(catch erlang:error(new_stacktrace,
-				    [ML,InEncoding])),
-	    erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest])
-    end.
 
 -spec characters_to_binary(Data, InEncoding, OutEncoding) -> Result when
       Data :: latin1_chardata() | chardata() | external_chardata(),
@@ -192,7 +150,7 @@ characters_to_binary(ML, latin1, Uni) when is_binary(ML) and ((Uni =:= utf8) or
 	        try
 		    characters_to_binary_int(ML,latin1,utf8)
 		catch
-		    error:AnyError ->	    
+		    error:AnyError ->
 			TheError = case AnyError of
 				       system_limit ->
 					   system_limit;
@@ -228,7 +186,7 @@ characters_to_binary(ML,Uni,latin1) when is_binary(ML) and ((Uni =:= utf8) or
 				     [{Mod,characters_to_binary,L}|Rest])
 		end
     end;
-    
+
 characters_to_binary(ML, InEncoding, OutEncoding) ->
     try
 	characters_to_binary_int(ML,InEncoding,OutEncoding)
@@ -246,53 +204,6 @@ characters_to_binary(ML, InEncoding, OutEncoding) ->
 	    erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest])
     end.
 
-characters_to_binary_int(ML, InEncoding, OutEncoding) when 
-    InEncoding =:= latin1, OutEncoding =:= unicode; 
-    InEncoding =:= latin1, OutEncoding =:= utf8;
-    InEncoding =:= unicode, OutEncoding =:= unicode; 
-    InEncoding =:= unicode, OutEncoding =:= utf8; 
-    InEncoding =:= utf8, OutEncoding =:= unicode; 
-    InEncoding =:= utf8, OutEncoding =:= utf8 -> 
-    unicode:characters_to_binary(ML,InEncoding);
-
-characters_to_binary_int(ML, InEncoding, OutEncoding) ->
-    {InTrans,Limit} = case OutEncoding of
-		  latin1 -> {i_trans_chk(InEncoding),255};
-		  _ -> {i_trans(InEncoding),case InEncoding of latin1 -> 255; _ -> 16#10FFFF end}
-	      end,
-    OutTrans = o_trans(OutEncoding),
-    Res = 
-	ml_map(ML,
-	       fun(Part,Accum) when is_binary(Part) ->
-		       case InTrans(Part) of
-			   List when is_list(List) ->
-			       Tail = OutTrans(List),
-			       <<Accum/binary, Tail/binary>>;
-			   {error, Translated, Rest} -> 
-			       Tail = OutTrans(Translated),
-			       {error, <<Accum/binary,Tail/binary>>, Rest};
-			   {incomplete, Translated, Rest, Missing}  ->
-			       Tail = OutTrans(Translated),
-			       {incomplete, <<Accum/binary,Tail/binary>>, Rest,
-				Missing}
-		       end;
-		  (Part, Accum) when is_integer(Part), Part =< Limit ->
-		       case OutTrans([Part]) of
-			   Binary when is_binary(Binary) ->
-			       <<Accum/binary, Binary/binary>>;
-			   {error, _, [Part]} ->
-			       {error,Accum,[Part]}
-		       end;
-		  (Part, Accum) ->
-		       {error, Accum, [Part]}
-	       end,<<>>),
-    case Res of
-	{incomplete,A,B,_} ->
-	    {incomplete,A,B};
-	_ ->
-	    Res
-    end.
-
 -spec bom_to_encoding(Bin) -> {Encoding, Length} when
       Bin :: binary(),
       Encoding ::  'latin1' | 'utf8'
@@ -335,11 +246,194 @@ encoding_to_bom({utf32,little}) ->
     <<255,254,0,0>>;
 encoding_to_bom(latin1) ->
     <<>>.
-	    
 
-cbv(utf8,<<1:1,1:1,0:1,_:5>>) -> 
+-define(GC_N, 200). %% arbitrary number
+
+%% Canonical decompose string to list of chars
+-spec characters_to_nfd_list(chardata()) -> [char()].
+characters_to_nfd_list(CD) ->
+    case unicode_util:nfd(CD) of
+        [GC|Str] when is_list(GC) -> GC++characters_to_nfd_list(Str);
+        [CP|Str] -> [CP|characters_to_nfd_list(Str)];
+        [] -> []
+    end.
+
+-spec characters_to_nfd_binary(chardata()) -> unicode_binary().
+characters_to_nfd_binary(CD) ->
+    list_to_binary(characters_to_nfd_binary(CD, ?GC_N, [])).
+
+characters_to_nfd_binary(CD, N, Row) when N > 0 ->
+    case unicode_util:nfd(CD) of
+        [GC|Str] -> characters_to_nfd_binary(Str, N-1, [GC|Row]);
+        [] -> [characters_to_binary(lists:reverse(Row))]
+    end;
+characters_to_nfd_binary(CD, _, Row) ->
+    [characters_to_binary(lists:reverse(Row))|characters_to_nfd_binary(CD,?GC_N,[])].
+
+%% Compability Canonical decompose string to list of chars.
+-spec characters_to_nfkd_list(chardata()) -> [char()].
+characters_to_nfkd_list(CD) ->
+    case unicode_util:nfkd(CD) of
+        [GC|Str] when is_list(GC) -> GC++characters_to_nfkd_list(Str);
+        [CP|Str] -> [CP|characters_to_nfkd_list(Str)];
+        [] -> []
+    end.
+
+-spec characters_to_nfkd_binary(chardata()) -> unicode_binary().
+characters_to_nfkd_binary(CD) ->
+    list_to_binary(characters_to_nfkd_binary(CD, ?GC_N, [])).
+
+characters_to_nfkd_binary(CD, N, Row) when N > 0 ->
+    case unicode_util:nfkd(CD) of
+        [GC|Str] -> characters_to_nfkd_binary(Str, N-1, [GC|Row]);
+        [] -> [characters_to_binary(lists:reverse(Row))]
+    end;
+characters_to_nfkd_binary(CD, _, Row) ->
+    [characters_to_binary(lists:reverse(Row))|characters_to_nfkd_binary(CD,?GC_N,[])].
+
+
+%% Canonical compose string to list of chars
+-spec characters_to_nfc_list(chardata()) -> [char()].
+characters_to_nfc_list(CD) ->
+    case unicode_util:nfc(CD) of
+        [CPs|Str] when is_list(CPs) -> CPs ++ characters_to_nfc_list(Str);
+        [CP|Str] -> [CP|characters_to_nfc_list(Str)];
+        [] -> []
+    end.
+
+-spec characters_to_nfc_binary(chardata()) -> unicode_binary().
+characters_to_nfc_binary(CD) ->
+    list_to_binary(characters_to_nfc_binary(CD, ?GC_N, [])).
+
+characters_to_nfc_binary(CD, N, Row) when N > 0 ->
+    case unicode_util:nfc(CD) of
+        [GC|Str] -> characters_to_nfc_binary(Str, N-1, [GC|Row]);
+        [] -> [characters_to_binary(lists:reverse(Row))]
+    end;
+characters_to_nfc_binary(CD, _, Row) ->
+    [characters_to_binary(lists:reverse(Row))|characters_to_nfc_binary(CD,?GC_N,[])].
+
+%% Compability Canonical compose string to list of chars
+-spec characters_to_nfkc_list(chardata()) -> [char()].
+characters_to_nfkc_list(CD) ->
+    case unicode_util:nfkc(CD) of
+        [CPs|Str] when is_list(CPs) -> CPs ++ characters_to_nfkc_list(Str);
+        [CP|Str] -> [CP|characters_to_nfkc_list(Str)];
+        [] -> []
+    end.
+
+-spec characters_to_nfkc_binary(chardata()) -> unicode_binary().
+characters_to_nfkc_binary(CD) ->
+    list_to_binary(characters_to_nfkc_binary(CD, ?GC_N, [])).
+
+characters_to_nfkc_binary(CD, N, Row) when N > 0 ->
+    case unicode_util:nfkc(CD) of
+        [GC|Str] -> characters_to_nfkc_binary(Str, N-1, [GC|Row]);
+        [] -> [characters_to_binary(lists:reverse(Row))]
+    end;
+characters_to_nfkc_binary(CD, _, Row) ->
+    [characters_to_binary(lists:reverse(Row))|characters_to_nfkc_binary(CD,?GC_N,[])].
+
+%% internals
+
+characters_to_list_int(ML, Encoding) ->
+    try
+	do_characters_to_list(ML,Encoding)
+    catch
+	error:AnyError ->
+	    TheError = case AnyError of
+			   system_limit ->
+			       system_limit;
+			   _ ->
+			       badarg
+		       end,
+	    {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} =
+		(catch erlang:error(new_stacktrace,
+				    [ML,Encoding])),
+	    erlang:raise(error,TheError,[{Mod,characters_to_list,L}|Rest])
+    end.
+
+                                                % XXX: Optimize me!
+do_characters_to_list(ML, Encoding) ->
+    case unicode:characters_to_binary(ML,Encoding) of
+	Bin when is_binary(Bin) ->
+	    unicode:characters_to_list(Bin,utf8);
+	{error,Encoded,Rest} ->
+	    {error,unicode:characters_to_list(Encoded,utf8),Rest};
+	{incomplete, Encoded2, Rest2} ->
+	    {incomplete,unicode:characters_to_list(Encoded2,utf8),Rest2}
+    end.
+
+
+characters_to_binary_int(ML,InEncoding) ->
+    try
+	characters_to_binary_int(ML,InEncoding,unicode)
+    catch
+	error:AnyError ->
+	    TheError = case AnyError of
+			   system_limit ->
+			       system_limit;
+			   _ ->
+			       badarg
+		       end,
+	    {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} =
+		(catch erlang:error(new_stacktrace,
+				    [ML,InEncoding])),
+	    erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest])
+    end.
+
+
+characters_to_binary_int(ML, InEncoding, OutEncoding) when
+      InEncoding =:= latin1, OutEncoding =:= unicode;
+      InEncoding =:= latin1, OutEncoding =:= utf8;
+      InEncoding =:= unicode, OutEncoding =:= unicode;
+      InEncoding =:= unicode, OutEncoding =:= utf8;
+      InEncoding =:= utf8, OutEncoding =:= unicode;
+      InEncoding =:= utf8, OutEncoding =:= utf8 ->
+    unicode:characters_to_binary(ML,InEncoding);
+
+characters_to_binary_int(ML, InEncoding, OutEncoding) ->
+    {InTrans,Limit} = case OutEncoding of
+                          latin1 -> {i_trans_chk(InEncoding),255};
+                          _ -> {i_trans(InEncoding),case InEncoding of latin1 -> 255; _ -> 16#10FFFF end}
+                      end,
+    OutTrans = o_trans(OutEncoding),
+    Res =
+	ml_map(ML,
+	       fun(Part,Accum) when is_binary(Part) ->
+		       case InTrans(Part) of
+			   List when is_list(List) ->
+			       Tail = OutTrans(List),
+			       <<Accum/binary, Tail/binary>>;
+			   {error, Translated, Rest} ->
+			       Tail = OutTrans(Translated),
+			       {error, <<Accum/binary,Tail/binary>>, Rest};
+			   {incomplete, Translated, Rest, Missing}  ->
+			       Tail = OutTrans(Translated),
+			       {incomplete, <<Accum/binary,Tail/binary>>, Rest,
+				Missing}
+		       end;
+		  (Part, Accum) when is_integer(Part), Part =< Limit ->
+		       case OutTrans([Part]) of
+			   Binary when is_binary(Binary) ->
+			       <<Accum/binary, Binary/binary>>;
+			   {error, _, [Part]} ->
+			       {error,Accum,[Part]}
+		       end;
+		  (Part, Accum) ->
+		       {error, Accum, [Part]}
+	       end,<<>>),
+    case Res of
+	{incomplete,A,B,_} ->
+	    {incomplete,A,B};
+	_ ->
+	    Res
+    end.
+
+
+cbv(utf8,<<1:1,1:1,0:1,_:5>>) ->
     1;
-cbv(utf8,<<1:1,1:1,1:1,0:1,_:4,R/binary>>) -> 
+cbv(utf8,<<1:1,1:1,1:1,0:1,_:4,R/binary>>) ->
     case R of
 	<<>> ->
 	    2;
@@ -386,18 +480,18 @@ cbv({utf32,big}, <<0:8>>) ->
     3;
 cbv({utf32,big}, <<0:8,X:8>>) when X =< 16 ->
     2;
-cbv({utf32,big}, <<0:8,X:8,Y:8>>) 
+cbv({utf32,big}, <<0:8,X:8,Y:8>>)
   when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) ->
     1;
 cbv({utf32,big},_) ->
     false;
 cbv({utf32,little},<<_:8>>) ->
     3;
-cbv({utf32,little},<<_:8,_:8>>) -> 
+cbv({utf32,little},<<_:8,_:8>>) ->
     2;
 cbv({utf32,little},<<X:8,255:8,0:8>>) when X =:= 254; X =:= 255 ->
     false;
-cbv({utf32,little},<<_:8,Y:8,X:8>>) 
+cbv({utf32,little},<<_:8,Y:8,X:8>>)
   when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) ->
     1;
 cbv({utf32,little},_) ->
@@ -417,8 +511,8 @@ ml_map([Part|T],Fun,Accum) when is_integer(Part) ->
 		Bin2 when is_binary(Bin2) ->
 		    Bin2;
 		{error, Converted, Rest} ->
-		    {error, Converted, Rest}; 
-		{incomplete, Converted, Rest,X} -> 
+		    {error, Converted, Rest};
+		{incomplete, Converted, Rest,X} ->
 		    {incomplete, Converted, Rest,X}
 	    end;
 	% Can not be incomplete - it's an integer
@@ -471,7 +565,7 @@ ml_map(Part,Fun,Accum) when is_binary(Part), byte_size(Part) > 8192 ->
 ml_map(Bin,Fun,Accum) when is_binary(Bin) ->
     Fun(Bin,Accum).
 
- 
+
 
 
 
@@ -523,7 +617,7 @@ o_trans(utf8) ->
 				<<One/utf8>>
 			end, L)
     end;
-    
+
 o_trans(utf16) ->
     fun(L) ->
 	    do_o_binary(fun(One) ->
@@ -577,9 +671,9 @@ do_o_binary2(F,[H|T]) ->
 		    [Bin|Bin3]
 	    end
     end.
- 
+
 %% Specific functions only allowing codepoints in latin1 range
-	
+
 do_i_utf8_chk(<<>>) ->
     [];
 do_i_utf8_chk(<<U/utf8,R/binary>>) when U =< 255 ->