aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/Makefile1
-rw-r--r--lib/stdlib/src/base64.erl108
-rw-r--r--lib/stdlib/src/c.erl18
-rw-r--r--lib/stdlib/src/epp.erl5
-rw-r--r--lib/stdlib/src/erl_lint.erl7
-rw-r--r--lib/stdlib/src/erl_pp.erl40
-rw-r--r--lib/stdlib/src/erl_scan.erl6
-rw-r--r--lib/stdlib/src/escript.erl12
-rw-r--r--lib/stdlib/src/ets.erl4
-rw-r--r--lib/stdlib/src/filelib.erl42
-rw-r--r--lib/stdlib/src/filename.erl106
-rw-r--r--lib/stdlib/src/gen.erl1
-rw-r--r--lib/stdlib/src/gen_server.erl31
-rw-r--r--lib/stdlib/src/gen_statem.erl4
-rw-r--r--lib/stdlib/src/io_lib.erl13
-rw-r--r--lib/stdlib/src/io_lib_format.erl10
-rw-r--r--lib/stdlib/src/io_lib_pretty.erl12
-rw-r--r--lib/stdlib/src/lib.erl2
-rw-r--r--lib/stdlib/src/maps.erl121
-rw-r--r--lib/stdlib/src/otp_internal.erl46
-rw-r--r--lib/stdlib/src/pool.erl6
-rw-r--r--lib/stdlib/src/rand.erl261
-rw-r--r--lib/stdlib/src/slave.erl2
-rw-r--r--lib/stdlib/src/stdlib.app.src3
-rw-r--r--lib/stdlib/src/stdlib.appup.src6
-rw-r--r--lib/stdlib/src/string.erl757
-rw-r--r--lib/stdlib/src/supervisor.erl946
-rw-r--r--lib/stdlib/src/uri_string.erl2069
28 files changed, 3780 insertions, 859 deletions
diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile
index bf836203ec..8b156929d7 100644
--- a/lib/stdlib/src/Makefile
+++ b/lib/stdlib/src/Makefile
@@ -121,6 +121,7 @@ MODULES= \
timer \
unicode \
unicode_util \
+ uri_string \
win32reg \
zip
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index 5885745fb1..c8cf6fdffe 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -113,9 +113,9 @@ encode_binary(Bin) ->
Data :: ascii_binary().
decode(Bin) when is_binary(Bin) ->
- decode_binary(<<>>, Bin);
+ decode_binary(Bin, <<>>);
decode(List) when is_list(List) ->
- list_to_binary(decode_l(List)).
+ decode_list(List, <<>>).
-spec mime_decode(Base64) -> Data when
Base64 :: ascii_string() | ascii_binary(),
@@ -186,31 +186,41 @@ mime_decode_to_string(List) when is_list(List) ->
bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}).
-decode_binary(Result0, <<C:8,T0/bits>>) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- erlang:error({badarg,C});
- ws ->
- decode_binary(Result0, T0);
- eq ->
- case strip_ws(T0) of
- <<$=:8,T/binary>> ->
- <<>> = strip_ws(T),
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:4>> = Result0,
- Result;
- T ->
- <<>> = strip_ws(T),
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:2>> = Result0,
- Result
- end;
- Bits ->
- decode_binary(<<Result0/bits,Bits:6>>, T0)
+decode_binary(<<C1:8, Cs/bits>>, A) ->
+ case element(C1, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A);
+ B1 -> decode_binary(Cs, A, B1)
end;
-decode_binary(Result, <<>>) ->
- true = is_binary(Result),
- Result.
+decode_binary(<<>>, A) ->
+ A.
+
+decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
+ case element(C2, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A, B1);
+ B2 -> decode_binary(Cs, A, B1, B2)
+ end.
+
+decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
+ case element(C3, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A, B1, B2);
+ B3 -> decode_binary(Cs, A, B1, B2, B3)
+ end.
+
+decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
+ case element(C4, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A, B1, B2, B3);
+ eq when B3 =:= eq -> only_ws_binary(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws_binary(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_binary(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+ end.
+
+only_ws_binary(<<>>, A) ->
+ A;
+only_ws_binary(<<C:8, Cs/bits>>, A) ->
+ case element(C, ?DECODE_MAP) of
+ ws -> only_ws_binary(Cs, A);
+ _ -> erlang:error(function_clause)
+ end.
%% Skipping pad character if not at end of string. Also liberal about
%% excess padding and skipping of other illegal (non-base64 alphabet)
@@ -262,6 +272,42 @@ mime_decode_binary_after_eq(Result0, <<>>, Eq) ->
Result
end.
+decode_list([C1 | Cs], A) ->
+ case element(C1, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A);
+ B1 -> decode_list(Cs, A, B1)
+ end;
+decode_list([], A) ->
+ A.
+
+decode_list([C2 | Cs], A, B1) ->
+ case element(C2, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A, B1);
+ B2 -> decode_list(Cs, A, B1, B2)
+ end.
+
+decode_list([C3 | Cs], A, B1, B2) ->
+ case element(C3, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A, B1, B2);
+ B3 -> decode_list(Cs, A, B1, B2, B3)
+ end.
+
+decode_list([C4 | Cs], A, B1, B2, B3) ->
+ case element(C4, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A, B1, B2, B3);
+ eq when B3 =:= eq -> only_ws(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_list(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+ end.
+
+only_ws([], A) ->
+ A;
+only_ws([C | Cs], A) ->
+ case element(C, ?DECODE_MAP) of
+ ws -> only_ws(Cs, A);
+ _ -> erlang:error(function_clause)
+ end.
+
decode([], A) -> A;
decode([$=,$=,C2,C1|Cs], A) ->
Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12),
@@ -292,16 +338,6 @@ strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A);
strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A);
strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]).
-strip_ws(<<$\t,T/binary>>) ->
- strip_ws(T);
-strip_ws(<<$\n,T/binary>>) ->
- strip_ws(T);
-strip_ws(<<$\r,T/binary>>) ->
- strip_ws(T);
-strip_ws(<<$\s,T/binary>>) ->
- strip_ws(T);
-strip_ws(T) -> T.
-
%% Skipping pad character if not at end of string. Also liberal about
%% excess padding and skipping of other illegal (non-base64 alphabet)
%% characters. See section 3.3 of RFC4648
diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl
index c04a201ce1..9a447af5b7 100644
--- a/lib/stdlib/src/c.erl
+++ b/lib/stdlib/src/c.erl
@@ -668,19 +668,23 @@ lm() ->
[l(M) || M <- mm()].
%% erlangrc(Home)
-%% Try to run a ".erlang" file, first in the current directory
-%% else in home directory.
+%% Try to run a ".erlang" file in home directory.
+
+-spec erlangrc() -> {ok, file:filename()} | {error, term()}.
erlangrc() ->
case init:get_argument(home) of
{ok,[[Home]]} ->
erlangrc([Home]);
_ ->
- f_p_e(["."], ".erlang")
+ {error, enoent}
end.
-erlangrc([Home]) ->
- f_p_e([".",Home], ".erlang").
+-spec erlangrc(PathList) -> {ok, file:filename()} | {error, term()}
+ when PathList :: [Dir :: file:name()].
+
+erlangrc([Home|_]=Paths) when is_list(Home) ->
+ f_p_e(Paths, ".erlang").
error(Fmt, Args) ->
error_logger:error_msg(Fmt, Args).
@@ -692,11 +696,11 @@ f_p_e(P, F) ->
{error, E={Line, _Mod, _Term}} ->
error("file:path_eval(~tp,~tp): error on line ~p: ~ts~n",
[P, F, Line, file:format_error(E)]),
- ok;
+ {error, E};
{error, E} ->
error("file:path_eval(~tp,~tp): ~ts~n",
[P, F, file:format_error(E)]),
- ok;
+ {error, E};
Other ->
Other
end.
diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl
index 31d0d499e3..00e6a10d8a 100644
--- a/lib/stdlib/src/epp.erl
+++ b/lib/stdlib/src/epp.erl
@@ -479,7 +479,7 @@ com_enc(_B, _Fun, _N, L, Ps) ->
com_enc_end([L | Ps]).
com_enc_end(Ps0) ->
- Ps = lists:reverse([lists:reverse(string:to_lower(P)) || P <- Ps0]),
+ Ps = lists:reverse([lists:reverse(lowercase(P)) || P <- Ps0]),
com_encoding(Ps).
com_encoding(["latin","1"|_]) ->
@@ -489,6 +489,9 @@ com_encoding(["utf","8"|_]) ->
com_encoding(_) ->
throw(no). % Don't try any further
+lowercase(S) ->
+ unicode:characters_to_list(string:lowercase(S)).
+
normalize_typed_record_fields([]) ->
{typed, []};
normalize_typed_record_fields(Fields) ->
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 9cd4727dc3..f58cb35cea 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -3910,10 +3910,9 @@ check_format_string(Fmt) ->
extract_sequences(Fmt, []).
extract_sequences(Fmt, Need0) ->
- case string:chr(Fmt, $~) of
- 0 -> {ok,lists:reverse(Need0)}; %That's it
- Pos ->
- Fmt1 = string:substr(Fmt, Pos+1), %Skip ~
+ case string:find(Fmt, [$~]) of
+ nomatch -> {ok,lists:reverse(Need0)}; %That's it
+ [$~|Fmt1] ->
case extract_sequence(1, Fmt1, Need0) of
{ok,Need1,Rest} -> extract_sequences(Rest, Need1);
Error -> Error
diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl
index ee5e7a11bf..367dbefb82 100644
--- a/lib/stdlib/src/erl_pp.erl
+++ b/lib/stdlib/src/erl_pp.erl
@@ -237,13 +237,20 @@ lform({attribute,Line,Name,Arg}, Opts) ->
lform({function,Line,Name,Arity,Clauses}, Opts) ->
lfunction({function,Line,Name,Arity,Clauses}, Opts);
%% These are specials to make it easier for the compiler.
-lform({error,E}, _Opts) ->
- leaf(format("~p\n", [{error,E}]));
-lform({warning,W}, _Opts) ->
- leaf(format("~p\n", [{warning,W}]));
+lform({error,_}=E, Opts) ->
+ message(E, Opts);
+lform({warning,_}=W, Opts) ->
+ message(W, Opts);
lform({eof,_Line}, _Opts) ->
$\n.
+message(M, #options{encoding = Encoding}) ->
+ F = case Encoding of
+ latin1 -> "~p\n";
+ unicode -> "~tp\n"
+ end,
+ leaf(format(F, [M])).
+
lattribute({attribute,_Line,type,Type}, Opts) ->
[typeattr(type, Type, Opts),leaf(".\n")];
lattribute({attribute,_Line,opaque,Type}, Opts) ->
@@ -598,8 +605,6 @@ lexpr({'fun',_,{clauses,Cs},Extra}, _Prec, Opts) ->
lexpr({named_fun,_,Name,Cs,Extra}, _Prec, Opts) ->
{force_nl,fun_info(Extra),
{list,[{first,['fun', " "],fun_clauses(Cs, Opts, {named, Name})},'end']}};
-lexpr({'query',_,Lc}, _Prec, Opts) ->
- {list,[{step,leaf("query"),lexpr(Lc, 0, Opts)},'end']};
lexpr({call,_,{remote,_,{atom,_,M},{atom,_,F}=N}=Name,Args}, Prec, Opts) ->
case erl_internal:bif(M, F, length(Args)) of
true ->
@@ -904,7 +909,7 @@ maybe_paren(_P, _Prec, Expr) ->
Expr.
leaf(S) ->
- {leaf,chars_size(S),S}.
+ {leaf,string:length(S),S}.
%%% Do the formatting. Currently nothing fancy. Could probably have
%%% done it in one single pass.
@@ -964,7 +969,7 @@ f({seq,Before,After,Sep,LItems}, I0, ST, WT, PP) ->
Sizes = BSizeL ++ SizeL,
NSepChars = if
is_list(Sep), Sep =/= [] ->
- erlang:max(0, length(CharsL)-1);
+ erlang:max(0, length(CharsL)-1); % not string:length
true ->
0
end,
@@ -1120,7 +1125,7 @@ incr(I, Incr) ->
I+Incr.
indentation(E, I) when I < 0 ->
- chars_size(E);
+ string:length(E);
indentation(E, I0) ->
I = io_lib_format:indentation(E, I0),
case has_nl(E) of
@@ -1157,19 +1162,19 @@ write_a_string(S, I, PP) ->
write_a_string([], _N, _Len, _PP) ->
[];
write_a_string(S, N, Len, PP) ->
- SS = string:sub_string(S, 1, N),
+ SS = string:slice(S, 0, N),
Sl = write_string(SS, PP),
- case (chars_size(Sl) > Len) and (N > ?MIN_SUBSTRING) of
+ case (string:length(Sl) > Len) and (N > ?MIN_SUBSTRING) of
true ->
write_a_string(S, N-1, Len, PP);
false ->
[flat_leaf(Sl) |
- write_a_string(lists:nthtail(length(SS), S), Len, Len, PP)]
+ write_a_string(string:slice(S, string:length(SS)), Len, Len, PP)]
end.
flat_leaf(S) ->
L = lists:flatten(S),
- {leaf,length(L),L}.
+ {leaf,string:length(L),L}.
write_value(V, PP) ->
(PP#pp.value_fun)(V).
@@ -1190,15 +1195,6 @@ write_char(C, PP) ->
a0() ->
erl_anno:new(0).
-chars_size([C | Es]) when is_integer(C) ->
- 1 + chars_size(Es);
-chars_size([E | Es]) ->
- chars_size(E) + chars_size(Es);
-chars_size([]) ->
- 0;
-chars_size(B) when is_binary(B) ->
- byte_size(B).
-
-define(N_SPACES, 30).
spacetab() ->
diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
index 47223b129c..4774c4bf19 100644
--- a/lib/stdlib/src/erl_scan.erl
+++ b/lib/stdlib/src/erl_scan.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2015. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -752,7 +752,7 @@ scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
{char_error,Ncs,Error,Nline,Ncol,EndCol} ->
scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs);
{error,Nline,Ncol,Nwcs,Ncs} ->
- Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars.
+ Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars.
scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs); %"
{Ncs,Nline,Ncol,Nstr,Nwcs} ->
Anno = anno(Line0, Col0, St, Nstr),
@@ -767,7 +767,7 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
{char_error,Ncs,Error,Nline,Ncol,EndCol} ->
scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs);
{error,Nline,Ncol,Nwcs,Ncs} ->
- Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars.
+ Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars.
scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); %'
{Ncs,Nline,Ncol,Nstr,Nwcs} ->
case catch list_to_atom(Nwcs) of
diff --git a/lib/stdlib/src/escript.erl b/lib/stdlib/src/escript.erl
index 2b9d8ff65b..132f8efbbe 100644
--- a/lib/stdlib/src/escript.erl
+++ b/lib/stdlib/src/escript.erl
@@ -224,8 +224,8 @@ return_sections(S, Bin) ->
normalize_section(Name, undefined) ->
{Name, undefined};
normalize_section(shebang, "#!" ++ Chars) ->
- Chopped = string:strip(Chars, right, $\n),
- Stripped = string:strip(Chopped, both),
+ Chopped = string:trim(Chars, trailing, "$\n"),
+ Stripped = string:trim(Chopped, both),
if
Stripped =:= ?SHEBANG ->
{shebang, default};
@@ -233,8 +233,8 @@ normalize_section(shebang, "#!" ++ Chars) ->
{shebang, Stripped}
end;
normalize_section(comment, Chars) ->
- Chopped = string:strip(Chars, right, $\n),
- Stripped = string:strip(string:strip(Chopped, left, $%), both),
+ Chopped = string:trim(Chars, trailing, "$\n"),
+ Stripped = string:trim(string:trim(Chopped, leading, "$%"), both),
if
Stripped =:= ?COMMENT ->
{comment, default};
@@ -242,8 +242,8 @@ normalize_section(comment, Chars) ->
{comment, Stripped}
end;
normalize_section(emu_args, "%%!" ++ Chars) ->
- Chopped = string:strip(Chars, right, $\n),
- Stripped = string:strip(Chopped, both),
+ Chopped = string:trim(Chars, trailing, "$\n"),
+ Stripped = string:trim(Chopped, both),
{emu_args, Stripped};
normalize_section(Name, Chars) ->
{Name, Chars}.
diff --git a/lib/stdlib/src/ets.erl b/lib/stdlib/src/ets.erl
index 1db004c91e..b6548626f3 100644
--- a/lib/stdlib/src/ets.erl
+++ b/lib/stdlib/src/ets.erl
@@ -1719,7 +1719,7 @@ get_line(P, Default) ->
line_string(Binary) when is_binary(Binary) -> unicode:characters_to_list(Binary);
line_string(Other) -> Other.
-nonl(S) -> string:strip(S, right, $\n).
+nonl(S) -> string:trim(S, trailing, "$\n").
print_number(Tab, Key, Num) ->
Os = ets:lookup(Tab, Key),
@@ -1748,7 +1748,7 @@ do_display_item(_Height, Width, I, Opos) ->
L = to_string(I),
L2 = if
length(L) > Width - 8 ->
- string:substr(L, 1, Width-13) ++ " ...";
+ string:slice(L, 0, Width-13) ++ " ...";
true ->
L
end,
diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl
index d7c313f214..0f90b3fc33 100644
--- a/lib/stdlib/src/filelib.erl
+++ b/lib/stdlib/src/filelib.erl
@@ -365,11 +365,18 @@ do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod).
%%% Compiling a wildcard.
+
+%% Define characters used for escaping a \.
+-define(ESCAPE_PREFIX, $@).
+-define(ESCAPE_CHARACTER, [?ESCAPE_PREFIX,$e]).
+-define(ESCAPED_ESCAPE_PREFIX, [?ESCAPE_PREFIX,?ESCAPE_PREFIX]).
+
%% Only for debugging.
compile_wildcard(Pattern) when is_list(Pattern) ->
{compiled_wildcard,?HANDLE_ERROR(compile_wildcard(Pattern, "."))}.
-compile_wildcard(Pattern, Cwd0) ->
+compile_wildcard(Pattern0, Cwd0) ->
+ Pattern = convert_escapes(Pattern0),
[Root|Rest] = filename:split(Pattern),
case filename:pathtype(Root) of
relative ->
@@ -409,7 +416,8 @@ compile_join({cwd,Cwd}, File0) ->
compile_join({root,PrefixLen,Root}, File) ->
{root,PrefixLen,filename:join(Root, File)}.
-compile_part(Part) ->
+compile_part(Part0) ->
+ Part = wrap_escapes(Part0),
compile_part(Part, false, []).
compile_part_to_sep(Part) ->
@@ -445,6 +453,8 @@ compile_part([${|Rest], Upto, Result) ->
error ->
compile_part(Rest, Upto, [${|Result])
end;
+compile_part([{escaped,X}|Rest], Upto, Result) ->
+ compile_part(Rest, Upto, [X|Result]);
compile_part([X|Rest], Upto, Result) ->
compile_part(Rest, Upto, [X|Result]);
compile_part([], _Upto, Result) ->
@@ -461,6 +471,8 @@ compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper ->
compile_charset1(Rest, compile_range(Lower, Upper, Ordset));
compile_charset1([$]|Rest], Ordset) ->
{ok, {one_of, gb_sets:from_ordset(Ordset)}, Rest};
+compile_charset1([{escaped,X}|Rest], Ordset) ->
+ compile_charset1(Rest, ordsets:add_element(X, Ordset));
compile_charset1([X|Rest], Ordset) ->
compile_charset1(Rest, ordsets:add_element(X, Ordset));
compile_charset1([], _Ordset) ->
@@ -486,6 +498,32 @@ compile_alt(Pattern, Result) ->
error
end.
+%% Convert backslashes to an illegal Unicode character to
+%% protect in from filename:split/1.
+
+convert_escapes([?ESCAPE_PREFIX|T]) ->
+ ?ESCAPED_ESCAPE_PREFIX ++ convert_escapes(T);
+convert_escapes([$\\|T]) ->
+ ?ESCAPE_CHARACTER ++ convert_escapes(T);
+convert_escapes([H|T]) ->
+ [H|convert_escapes(T)];
+convert_escapes([]) ->
+ [].
+
+%% Wrap each escape in a tuple to remove the special meaning for
+%% the character that follows.
+
+wrap_escapes(?ESCAPED_ESCAPE_PREFIX ++ T) ->
+ [?ESCAPE_PREFIX|wrap_escapes(T)];
+wrap_escapes(?ESCAPE_CHARACTER ++ [C|T]) ->
+ [{escaped,C}|wrap_escapes(T)];
+wrap_escapes(?ESCAPE_CHARACTER) ->
+ [];
+wrap_escapes([H|T]) ->
+ [H|wrap_escapes(T)];
+wrap_escapes([]) ->
+ [].
+
badpattern(Reason) ->
error({badpattern,Reason}).
diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl
index ee807dfd09..a322bd002d 100644
--- a/lib/stdlib/src/filename.erl
+++ b/lib/stdlib/src/filename.erl
@@ -34,6 +34,38 @@
%% we flatten the arguments immediately on function entry as that makes
%% it easier to ensure that the code works.
+%%
+%% *** Requirements on Raw Filename Format ***
+%%
+%% These requirements are due to the 'filename' module
+%% in stdlib. This since it is documented that it
+%% should be able to operate on raw filenames as well
+%% as ordinary filenames.
+%%
+%% A raw filename *must* be a byte sequence where:
+%% 1. Codepoints 0-127 (7-bit ascii) *must* be encoded
+%% as a byte with the corresponding value. That is,
+%% the most significant bit in the byte encoding the
+%% codepoint is never set.
+%% 2. Codepoints greater than 127 *must* be encoded
+%% with the most significant bit set in *every* byte
+%% encoding it.
+%%
+%% Latin1 and UTF-8 meet these requirements while
+%% UTF-16 and UTF-32 don't.
+%%
+%% On Windows filenames are natively stored as malformed
+%% UTF-16LE (lonely surrogates may appear). A more correct
+%% description than UTF-16 would be an array of 16-bit
+%% words... In order to meet the requirements of the
+%% raw file format we convert the malformed UTF-16LE to
+%% malformed UTF-8 which meet the requirements.
+%%
+%% Note that these requirements are today only OTP
+%% internal (erts-stdlib internal) requirements that
+%% could be changed.
+%%
+
-export([absname/1, absname/2, absname_join/2,
basename/1, basename/2, dirname/1,
extension/1, join/1, join/2, pathtype/1,
@@ -41,6 +73,7 @@
safe_relative_path/1]).
-export([find_src/1, find_src/2]). % deprecated
-export([basedir/2, basedir/3]).
+-export([validate/1]).
%% Undocumented and unsupported exports.
-export([append/2]).
@@ -1053,10 +1086,10 @@ basedir_linux(Type) ->
user_log -> getenv("XDG_CACHE_HOME", ?basedir_linux_user_log, true);
site_data ->
Base = getenv("XDG_DATA_DIRS",?basedir_linux_site_data,false),
- string:tokens(Base,":");
+ string:lexemes(Base, ":");
site_config ->
Base = getenv("XDG_CONFIG_DIRS",?basedir_linux_site_config,false),
- string:tokens(Base,":")
+ string:lexemes(Base, ":")
end.
-define(basedir_darwin_user_data, "Library/Application Support").
@@ -1152,3 +1185,72 @@ basedir_os_type() ->
{win32,_} -> windows;
_ -> linux
end.
+
+%%
+%% validate/1
+%%
+
+-spec validate(FileName) -> boolean() when
+ FileName :: file:name_all().
+
+validate(FileName) when is_binary(FileName) ->
+ %% Raw filename...
+ validate_bin(FileName);
+validate(FileName) when is_list(FileName);
+ is_atom(FileName) ->
+ validate_list(FileName,
+ file:native_name_encoding(),
+ os:type()).
+
+validate_list(FileName, Enc, Os) ->
+ try
+ true = validate_list(FileName, Enc, Os, 0) > 0
+ catch
+ _ : _ -> false
+ end.
+
+validate_list([], _Enc, _Os, Chars) ->
+ Chars;
+validate_list(C, Enc, Os, Chars) when is_integer(C) ->
+ validate_char(C, Enc, Os),
+ Chars+1;
+validate_list(A, Enc, Os, Chars) when is_atom(A) ->
+ validate_list(atom_to_list(A), Enc, Os, Chars);
+validate_list([H|T], Enc, Os, Chars) ->
+ NewChars = validate_list(H, Enc, Os, Chars),
+ validate_list(T, Enc, Os, NewChars).
+
+%% C is always an integer...
+% validate_char(C, _, _) when not is_integer(C) ->
+% throw(invalid);
+validate_char(C, _, _) when C < 1 ->
+ throw(invalid); %% No negative or null characters...
+validate_char(C, latin1, _) when C > 255 ->
+ throw(invalid);
+validate_char(C, utf8, _) when C >= 16#110000 ->
+ throw(invalid);
+validate_char(C, utf8, {win32, _}) when C > 16#ffff ->
+ throw(invalid); %% invalid win wchar...
+validate_char(_C, utf8, {win32, _}) ->
+ ok; %% Range below is accepted on windows...
+validate_char(C, utf8, _) when 16#D800 =< C, C =< 16#DFFF ->
+ throw(invalid); %% invalid unicode range...
+validate_char(_, _, _) ->
+ ok.
+
+validate_bin(Bin) ->
+ %% Raw filename. That is, we do not interpret
+ %% the encoding, but we still do not accept
+ %% null characters...
+ try
+ true = validate_bin(Bin, 0) > 0
+ catch
+ _ : _ -> false
+ end.
+
+validate_bin(<<>>, Bs) ->
+ Bs;
+validate_bin(<<0, _Rest/binary>>, _Bs) ->
+ throw(invalid); %% No null characters allowed...
+validate_bin(<<_B, Rest/binary>>, Bs) ->
+ validate_bin(Rest, Bs+1).
diff --git a/lib/stdlib/src/gen.erl b/lib/stdlib/src/gen.erl
index 33af0aed8f..4b1d448487 100644
--- a/lib/stdlib/src/gen.erl
+++ b/lib/stdlib/src/gen.erl
@@ -49,6 +49,7 @@
| {'logfile', string()}.
-type option() :: {'timeout', timeout()}
| {'debug', [debug_flag()]}
+ | {'hibernate_after', timeout()}
| {'spawn_opt', [proc_lib:spawn_option()]}.
-type options() :: [option()].
diff --git a/lib/stdlib/src/gen_server.erl b/lib/stdlib/src/gen_server.erl
index 7daa7a9fe4..ac172325b5 100644
--- a/lib/stdlib/src/gen_server.erl
+++ b/lib/stdlib/src/gen_server.erl
@@ -116,23 +116,27 @@
%%%=========================================================================
-callback init(Args :: term()) ->
- {ok, State :: term()} | {ok, State :: term(), timeout() | hibernate} |
+ {ok, State :: term()} | {ok, State :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term()} | ignore.
-callback handle_call(Request :: term(), From :: {pid(), Tag :: term()},
State :: term()) ->
{reply, Reply :: term(), NewState :: term()} |
- {reply, Reply :: term(), NewState :: term(), timeout() | hibernate} |
+ {reply, Reply :: term(), NewState :: term(), timeout() | hibernate | {continue, term()}} |
{noreply, NewState :: term()} |
- {noreply, NewState :: term(), timeout() | hibernate} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term(), Reply :: term(), NewState :: term()} |
{stop, Reason :: term(), NewState :: term()}.
-callback handle_cast(Request :: term(), State :: term()) ->
{noreply, NewState :: term()} |
- {noreply, NewState :: term(), timeout() | hibernate} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term(), NewState :: term()}.
-callback handle_info(Info :: timeout | term(), State :: term()) ->
{noreply, NewState :: term()} |
- {noreply, NewState :: term(), timeout() | hibernate} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
+ {stop, Reason :: term(), NewState :: term()}.
+-callback handle_continue(Info :: term(), State :: term()) ->
+ {noreply, NewState :: term()} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term(), NewState :: term()}.
-callback terminate(Reason :: (normal | shutdown | {shutdown, term()} |
term()),
@@ -149,7 +153,7 @@
Status :: term().
-optional_callbacks(
- [handle_info/2, terminate/2, code_change/3, format_status/2]).
+ [handle_info/2, handle_continue/2, terminate/2, code_change/3, format_status/2]).
%%% -----------------------------------------------------------------
%%% Starts a generic server.
@@ -309,7 +313,7 @@ enter_loop(Mod, Options, State, ServerName, Timeout) ->
Name = gen:get_proc_name(ServerName),
Parent = gen:get_parent(),
Debug = gen:debug_options(Name, Options),
- HibernateAfterTimeout = gen:hibernate_after(Options),
+ HibernateAfterTimeout = gen:hibernate_after(Options),
loop(Parent, Name, State, Mod, Timeout, HibernateAfterTimeout, Debug).
%%%========================================================================
@@ -374,6 +378,19 @@ init_it(Mod, Args) ->
%%% ---------------------------------------------------
%%% The MAIN loop.
%%% ---------------------------------------------------
+
+loop(Parent, Name, State, Mod, {continue, Continue} = Msg, HibernateAfterTimeout, Debug) ->
+ Reply = try_dispatch(Mod, handle_continue, Continue, State),
+ case Debug of
+ [] ->
+ handle_common_reply(Reply, Parent, Name, undefined, Msg, Mod,
+ HibernateAfterTimeout, State);
+ _ ->
+ Debug1 = sys:handle_debug(Debug, fun print_event/3, Name, Msg),
+ handle_common_reply(Reply, Parent, Name, undefined, Msg, Mod,
+ HibernateAfterTimeout, State, Debug1)
+ end;
+
loop(Parent, Name, State, Mod, hibernate, HibernateAfterTimeout, Debug) ->
proc_lib:hibernate(?MODULE,wake_hib,[Parent, Name, State, Mod, HibernateAfterTimeout, Debug]);
diff --git a/lib/stdlib/src/gen_statem.erl b/lib/stdlib/src/gen_statem.erl
index 1110d18af6..cd6312855d 100644
--- a/lib/stdlib/src/gen_statem.erl
+++ b/lib/stdlib/src/gen_statem.erl
@@ -296,7 +296,7 @@
(Reason :: term()).
%% Format the callback module state in some sensible that is
-%% often condensed way. For StatusOption =:= 'normal' the perferred
+%% often condensed way. For StatusOption =:= 'normal' the preferred
%% return term is [{data,[{"State",FormattedState}]}], and for
%% StatusOption =:= 'terminate' it is just FormattedState.
-callback format_status(
@@ -510,8 +510,6 @@ call(ServerRef, Request, Timeout) ->
parse_timeout(Timeout) ->
case Timeout of
- {clean_timeout,infinity} ->
- {dirty_timeout,infinity};
{clean_timeout,_} ->
Timeout;
{dirty_timeout,_} ->
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index 9d447418f8..3c8430b820 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -963,7 +963,18 @@ limit_tail(Other, D) ->
%% maps:from_list() creates a map with the same internal ordering of
%% the selected associations as in Map.
limit_map(Map, D) ->
- maps:from_list(erts_internal:maps_to_list(Map, D)).
+ limit_map(maps:iterator(Map), D, []).
+
+limit_map(_I, 0, Acc) ->
+ maps:from_list(Acc);
+limit_map(I, D, Acc) ->
+ case maps:next(I) of
+ {K, V, NextI} ->
+ limit_map(NextI, D-1, [{K,V} | Acc]);
+ none ->
+ maps:from_list(Acc)
+ end.
+
%% maps:from_list(limit_map_body(erts_internal:maps_to_list(Map, D), D)).
%% limit_map_body(_, 0) -> [{'...', '...'}];
diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index 4b2d15c8b3..e345810ca0 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -380,7 +380,7 @@ float_e(_Fl, {Ds,E}, P) ->
{Fs,false} -> [Fs|float_exp(E-1)]
end.
-%% float_man([Digit], Icount, Dcount) -> {[Chars],CarryFlag}.
+%% float_man([Digit], Icount, Dcount) -> {[Char],CarryFlag}.
%% Generate the characters in the mantissa from the digits with Icount
%% characters before the '.' and Dcount decimals. Handle carry and let
%% caller decide what to do at top.
@@ -395,7 +395,7 @@ float_man([D|Ds], I, Dc) ->
{Cs,false} -> {[D|Cs],false}
end;
float_man([], I, Dc) -> %Pad with 0's
- {string:chars($0, I, [$.|string:chars($0, Dc)]),false}.
+ {lists:duplicate(I, $0) ++ [$.|lists:duplicate(Dc, $0)],false}.
float_man([D|_], 0) when D >= $5 -> {[],true};
float_man([_|_], 0) -> {[],false};
@@ -405,7 +405,7 @@ float_man([D|Ds], Dc) ->
{Cs,true} -> {[D+1|Cs],false};
{Cs,false} -> {[D|Cs],false}
end;
-float_man([], Dc) -> {string:chars($0, Dc),false}. %Pad with 0's
+float_man([], Dc) -> {lists:duplicate(Dc, $0),false}. %Pad with 0's
%% float_exp(Exponent) -> [Char].
%% Generate the exponent of a floating point number. Always include sign.
@@ -429,7 +429,7 @@ fwrite_f(Fl, F, Adj, P, Pad) when P >= 1 ->
float_f(Fl, Fd, P) when Fl < 0.0 ->
[$-|float_f(-Fl, Fd, P)];
float_f(Fl, {Ds,E}, P) when E =< 0 ->
- float_f(Fl, {string:chars($0, -E+1, Ds),1}, P); %Prepend enough 0's
+ float_f(Fl, {lists:duplicate(-E+1, $0)++Ds,1}, P); %Prepend enough 0's
float_f(_Fl, {Ds,E}, P) ->
case float_man(Ds, E, P) of
{Fs,true} -> "1" ++ Fs; %Handle carry
@@ -751,7 +751,7 @@ adjust(Data, Pad, right) -> [Pad|Data].
flat_trunc(List, N) when is_integer(N), N >= 0 ->
string:slice(List, 0, N).
-%% A deep version of string:chars/2,3
+%% A deep version of lists:duplicate/2
chars(_C, 0) ->
[];
diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl
index 505613b80e..89e1931d2d 100644
--- a/lib/stdlib/src/io_lib_pretty.erl
+++ b/lib/stdlib/src/io_lib_pretty.erl
@@ -478,9 +478,19 @@ print_length(Term, _D, _RF, _Enc, _Str) ->
print_length_map(_Map, 1, _RF, _Enc, _Str) ->
{"#{...}", 6};
print_length_map(Map, D, RF, Enc, Str) when is_map(Map) ->
- Pairs = print_length_map_pairs(erts_internal:maps_to_list(Map, D), D, RF, Enc, Str),
+ Pairs = print_length_map_pairs(limit_map(maps:iterator(Map), D, []), D, RF, Enc, Str),
{{map, Pairs}, list_length(Pairs, 3)}.
+limit_map(_I, 0, Acc) ->
+ Acc;
+limit_map(I, D, Acc) ->
+ case maps:next(I) of
+ {K, V, NextI} ->
+ limit_map(NextI, D-1, [{K,V} | Acc]);
+ none ->
+ Acc
+ end.
+
print_length_map_pairs([], _D, _RF, _Enc, _Str) ->
[];
print_length_map_pairs(_Pairs, 1, _RF, _Enc, _Str) ->
diff --git a/lib/stdlib/src/lib.erl b/lib/stdlib/src/lib.erl
index c6eb0d7915..a7980cc294 100644
--- a/lib/stdlib/src/lib.erl
+++ b/lib/stdlib/src/lib.erl
@@ -646,7 +646,7 @@ pp_arguments(PF, As, I, Enc) ->
Ll = length(L),
A = list_to_atom(lists:duplicate(Ll, $a)),
S0 = unicode:characters_to_list(PF([A | T], I+1), Enc),
- brackets_to_parens([$[,L,string:sub_string(S0, 2+Ll)], Enc);
+ brackets_to_parens([$[,L,string:slice(S0, 1+Ll)], Enc);
_ ->
brackets_to_parens(PF(As, I+1), Enc)
end.
diff --git a/lib/stdlib/src/maps.erl b/lib/stdlib/src/maps.erl
index 5dafdb282a..a13f340709 100644
--- a/lib/stdlib/src/maps.erl
+++ b/lib/stdlib/src/maps.erl
@@ -23,7 +23,8 @@
-export([get/3, filter/2,fold/3,
map/2, size/1,
update_with/3, update_with/4,
- without/2, with/2]).
+ without/2, with/2,
+ iterator/1, next/1]).
%% BIFs
-export([get/2, find/2, from_list/1,
@@ -31,6 +32,15 @@
new/0, put/3, remove/2, take/2,
to_list/1, update/3, values/1]).
+-opaque iterator() :: {term(), term(), iterator()}
+ | none | nonempty_improper_list(integer(),map()).
+
+-export_type([iterator/0]).
+
+-dialyzer({no_improper_lists, iterator/1}).
+
+-define(IS_ITERATOR(I), is_tuple(I) andalso tuple_size(I) == 3; I == none; is_integer(hd(I)) andalso is_map(tl(I))).
+
%% Shadowed by erl_bif_types: maps:get/2
-spec get(Key,Map) -> Value when
Key :: term(),
@@ -39,7 +49,6 @@
get(_,_) -> erlang:nif_error(undef).
-
-spec find(Key,Map) -> {ok, Value} | error when
Key :: term(),
Map :: map(),
@@ -114,14 +123,20 @@ remove(_,_) -> erlang:nif_error(undef).
take(_,_) -> erlang:nif_error(undef).
-%% Shadowed by erl_bif_types: maps:to_list/1
-spec to_list(Map) -> [{Key,Value}] when
Map :: map(),
Key :: term(),
Value :: term().
-to_list(_) -> erlang:nif_error(undef).
+to_list(Map) when is_map(Map) ->
+ to_list_internal(erts_internal:map_next(0, Map, []));
+to_list(Map) ->
+ erlang:error({badmap,Map},[Map]).
+to_list_internal([Iter, Map | Acc]) when is_integer(Iter) ->
+ to_list_internal(erts_internal:map_next(Iter, Map, Acc));
+to_list_internal(Acc) ->
+ Acc.
%% Shadowed by erl_bif_types: maps:update/3
-spec update(Key,Value,Map1) -> Map2 when
@@ -192,47 +207,80 @@ get(Key,Map,Default) ->
erlang:error({badmap,Map},[Key,Map,Default]).
--spec filter(Pred,Map1) -> Map2 when
+-spec filter(Pred,MapOrIter) -> Map when
Pred :: fun((Key, Value) -> boolean()),
Key :: term(),
Value :: term(),
- Map1 :: map(),
- Map2 :: map().
+ MapOrIter :: map() | iterator(),
+ Map :: map().
filter(Pred,Map) when is_function(Pred,2), is_map(Map) ->
- maps:from_list([{K,V}||{K,V}<-maps:to_list(Map),Pred(K,V)]);
+ maps:from_list(filter_1(Pred, iterator(Map)));
+filter(Pred,Iterator) when is_function(Pred,2), ?IS_ITERATOR(Iterator) ->
+ maps:from_list(filter_1(Pred, Iterator));
filter(Pred,Map) ->
erlang:error(error_type(Map),[Pred,Map]).
-
--spec fold(Fun,Init,Map) -> Acc when
+filter_1(Pred, Iter) ->
+ case next(Iter) of
+ {K, V, NextIter} ->
+ case Pred(K,V) of
+ true ->
+ [{K,V} | filter_1(Pred, NextIter)];
+ false ->
+ filter_1(Pred, NextIter)
+ end;
+ none ->
+ []
+ end.
+
+-spec fold(Fun,Init,MapOrIter) -> Acc when
Fun :: fun((K, V, AccIn) -> AccOut),
Init :: term(),
Acc :: term(),
AccIn :: term(),
AccOut :: term(),
- Map :: map(),
+ MapOrIter :: map() | iterator(),
K :: term(),
V :: term().
fold(Fun,Init,Map) when is_function(Fun,3), is_map(Map) ->
- lists:foldl(fun({K,V},A) -> Fun(K,V,A) end,Init,maps:to_list(Map));
+ fold_1(Fun,Init,iterator(Map));
+fold(Fun,Init,Iterator) when is_function(Fun,3), ?IS_ITERATOR(Iterator) ->
+ fold_1(Fun,Init,Iterator);
fold(Fun,Init,Map) ->
erlang:error(error_type(Map),[Fun,Init,Map]).
--spec map(Fun,Map1) -> Map2 when
+fold_1(Fun, Acc, Iter) ->
+ case next(Iter) of
+ {K, V, NextIter} ->
+ fold_1(Fun, Fun(K,V,Acc), NextIter);
+ none ->
+ Acc
+ end.
+
+-spec map(Fun,MapOrIter) -> Map when
Fun :: fun((K, V1) -> V2),
- Map1 :: map(),
- Map2 :: map(),
+ MapOrIter :: map() | iterator(),
+ Map :: map(),
K :: term(),
V1 :: term(),
V2 :: term().
map(Fun,Map) when is_function(Fun, 2), is_map(Map) ->
- maps:from_list([{K,Fun(K,V)}||{K,V}<-maps:to_list(Map)]);
+ maps:from_list(map_1(Fun, iterator(Map)));
+map(Fun,Iterator) when is_function(Fun, 2), ?IS_ITERATOR(Iterator) ->
+ maps:from_list(map_1(Fun, Iterator));
map(Fun,Map) ->
erlang:error(error_type(Map),[Fun,Map]).
+map_1(Fun, Iter) ->
+ case next(Iter) of
+ {K, V, NextIter} ->
+ [{K, Fun(K, V)} | map_1(Fun, NextIter)];
+ none ->
+ []
+ end.
-spec size(Map) -> non_neg_integer() when
Map :: map().
@@ -242,6 +290,26 @@ size(Map) when is_map(Map) ->
size(Val) ->
erlang:error({badmap,Val},[Val]).
+-spec iterator(Map) -> Iterator when
+ Map :: map(),
+ Iterator :: iterator().
+
+iterator(M) when is_map(M) -> [0 | M];
+iterator(M) -> erlang:error({badmap, M}, [M]).
+
+-spec next(Iterator) -> {Key, Value, NextIterator} | 'none' when
+ Iterator :: iterator(),
+ Key :: term(),
+ Value :: term(),
+ NextIterator :: iterator().
+next({K, V, I}) ->
+ {K, V, I};
+next([Path | Map]) when is_integer(Path), is_map(Map) ->
+ erts_internal:map_next(Path, Map, iterator);
+next(none) ->
+ none;
+next(Iter) ->
+ erlang:error(badarg, [Iter]).
-spec without(Ks,Map1) -> Map2 when
Ks :: [K],
@@ -250,11 +318,10 @@ size(Val) ->
K :: term().
without(Ks,M) when is_list(Ks), is_map(M) ->
- lists:foldl(fun(K, M1) -> ?MODULE:remove(K, M1) end, M, Ks);
+ lists:foldl(fun(K, M1) -> maps:remove(K, M1) end, M, Ks);
without(Ks,M) ->
erlang:error(error_type(M),[Ks,M]).
-
-spec with(Ks, Map1) -> Map2 when
Ks :: [K],
Map1 :: map(),
@@ -263,17 +330,17 @@ without(Ks,M) ->
with(Ks,Map1) when is_list(Ks), is_map(Map1) ->
Fun = fun(K, List) ->
- case ?MODULE:find(K, Map1) of
- {ok, V} ->
- [{K, V} | List];
- error ->
- List
- end
- end,
- ?MODULE:from_list(lists:foldl(Fun, [], Ks));
+ case maps:find(K, Map1) of
+ {ok, V} ->
+ [{K, V} | List];
+ error ->
+ List
+ end
+ end,
+ maps:from_list(lists:foldl(Fun, [], Ks));
with(Ks,M) ->
erlang:error(error_type(M),[Ks,M]).
-error_type(M) when is_map(M) -> badarg;
+error_type(M) when is_map(M); ?IS_ITERATOR(M) -> badarg;
error_type(V) -> {badmap, V}.
diff --git a/lib/stdlib/src/otp_internal.erl b/lib/stdlib/src/otp_internal.erl
index 5b488cc677..122b476ddb 100644
--- a/lib/stdlib/src/otp_internal.erl
+++ b/lib/stdlib/src/otp_internal.erl
@@ -609,6 +609,52 @@ obsolete_1(filename, find_src, 2) ->
obsolete_1(erlang, hash, 2) ->
{removed, {erlang, phash2, 2}, "20.0"};
+%% Added in OTP-21
+obsolete_1(string, len, 1) ->
+ {deprecated, "deprecated; use string:length/3 instead"};
+obsolete_1(string, concat, 2) ->
+ {deprecated, "deprecated; use [Str1,Str2] instead"};
+obsolete_1(string, str, 2) ->
+ {deprecated, "deprecated; use string:find/2 instead"};
+obsolete_1(string, rstr, 2) ->
+ {deprecated, "deprecated; use string:find/3 instead"};
+obsolete_1(string, chr, 2) ->
+ {deprecated, "deprecated; use string:find/2 instead"};
+obsolete_1(string, rchr, 2) ->
+ {deprecated, "deprecated; use string:find/3 instead"};
+obsolete_1(string, span, 2) ->
+ {deprecated, "deprecated; use string:take/2 instead"};
+obsolete_1(string, cspan, 2) ->
+ {deprecated, "deprecated; use string:take/3 instead"};
+obsolete_1(string, substr, _) ->
+ {deprecated, "deprecated; use string:slice/3 instead"};
+obsolete_1(string, tokens, 2) ->
+ {deprecated, "deprecated; use string:lexemes/2 instead"};
+obsolete_1(string, chars, _) ->
+ {deprecated, "deprecated; use lists:duplicate/2 instead"};
+obsolete_1(string, copies, _) ->
+ {deprecated, "deprecated; use lists:duplicate/2 instead"};
+obsolete_1(string, words, _) ->
+ {deprecated, "deprecated; use string:lexemes/2 instead"};
+obsolete_1(string, strip, _) ->
+ {deprecated, "deprecated; use string:trim/3 instead"};
+obsolete_1(string, sub_word, _) ->
+ {deprecated, "deprecated; use string:nth_lexeme/3 instead"};
+obsolete_1(string, sub_string, _) ->
+ {deprecated, "deprecated; use string:slice/3 instead"};
+obsolete_1(string, left, _) ->
+ {deprecated, "deprecated; use string:pad/3 instead"};
+obsolete_1(string, right, _) ->
+ {deprecated, "deprecated; use string:pad/3 instead"};
+obsolete_1(string, centre, _) ->
+ {deprecated, "deprecated; use string:pad/3 instead"};
+obsolete_1(string, join, _) ->
+ {deprecated, "deprecated; use lists:join/2 instead"};
+obsolete_1(string, to_upper, _) ->
+ {deprecated, "deprecated; use string:uppercase/1 or string:titlecase/1 instead"};
+obsolete_1(string, to_lower, _) ->
+ {deprecated, "deprecated; use string:lowercase/1 or string:casefold/1 instead"};
+
%% not obsolete
obsolete_1(_, _, _) ->
diff --git a/lib/stdlib/src/pool.erl b/lib/stdlib/src/pool.erl
index 05950a1d7c..b12ff205b1 100644
--- a/lib/stdlib/src/pool.erl
+++ b/lib/stdlib/src/pool.erl
@@ -25,7 +25,7 @@
%% with the least load !!!!
%% This function is callable from any node including the master
%% That is part of the pool
-%% nodes are scheduled on a per usgae basis and per load basis,
+%% nodes are scheduled on a per usage basis and per load basis,
%% Whenever we use a node, we put at the end of the queue, and whenever
%% a node report a change in load, we insert it accordingly
@@ -197,7 +197,7 @@ pure_insert({Load,Node},[{L,N}|Tail]) when Load < L ->
pure_insert(L,[H|T]) -> [H|pure_insert(L,T)].
%% Really should not measure the contributions from
-%% the back ground processes here .... which we do :-(
+%% the background processes here .... which we do :-(
%% We don't have to monitor the master, since we're slaves anyway
statistic_collector() ->
@@ -213,7 +213,7 @@ statistic_collector(I) ->
stat_loop(M, 999999)
end.
-%% Do not tell the master about our load if it has not changed
+%% Do not tell the master about our load if it has not changed
stat_loop(M, Old) ->
sleep(2000),
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index 7a8a5e6d4a..362e98006e 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -21,8 +21,8 @@
%% Multiple PRNG module for Erlang/OTP
%% Copyright (c) 2015-2016 Kenji Rikitake
%%
-%% exrop (xoroshiro116+) added and statistical distribution
-%% improvements by the Erlang/OTP team 2017
+%% exrop (xoroshiro116+) added, statistical distribution
+%% improvements and uniform_real added by the Erlang/OTP team 2017
%% =====================================================================
-module(rand).
@@ -30,10 +30,14 @@
-export([seed_s/1, seed_s/2, seed/1, seed/2,
export_seed/0, export_seed_s/1,
uniform/0, uniform/1, uniform_s/1, uniform_s/2,
+ uniform_real/0, uniform_real_s/1,
jump/0, jump/1,
normal/0, normal/2, normal_s/1, normal_s/3
]).
+%% Debug
+-export([make_float/3, float2str/1, bc64/1]).
+
-compile({inline, [exs64_next/1, exsplus_next/1,
exs1024_next/1, exs1024_calc/2,
exrop_next/1, exrop_next_s/2,
@@ -60,6 +64,10 @@
%% N i evaluated 3 times
(?BSL((Bits), (X), (N)) bor ((X) bsr ((Bits)-(N))))).
+-define(
+ BC(V, N),
+ bc((V), ?BIT((N) - 1), N)).
+
%%-define(TWO_POW_MINUS53, (math:pow(2, -53))).
-define(TWO_POW_MINUS53, 1.11022302462515657e-16).
@@ -84,14 +92,21 @@
%% The 'bits' field indicates how many bits the integer
%% returned from 'next' has got, i.e 'next' shall return
%% an random integer in the range 0..(2^Bits - 1).
-%% At least 53 bits is required for the floating point
-%% producing fallbacks. This field is only used when
-%% the 'uniform' or 'uniform_n' fields are not defined.
+%% At least 55 bits is required for the floating point
+%% producing fallbacks, but 56 bits would be more future proof.
%%
%% The fields 'next', 'uniform' and 'uniform_n'
-%% implement the algorithm. If 'uniform' or 'uinform_n'
+%% implement the algorithm. If 'uniform' or 'uniform_n'
%% is not present there is a fallback using 'next' and either
-%% 'bits' or the deprecated 'max'.
+%% 'bits' or the deprecated 'max'. The 'next' function
+%% must generate a word with at least 56 good random bits.
+%%
+%% The 'weak_low_bits' field indicate how many bits are of
+%% lesser quality and they will not be used by the floating point
+%% producing functions, nor by the range producing functions
+%% when more bits are needed, to avoid weak bits in the middle
+%% of the generated bits. The lowest bits from the range
+%% functions still have the generator's quality.
%%
-type alg_handler() ::
#{type := alg(),
@@ -148,11 +163,7 @@
%% For ranges larger than the algorithm bit size
uniform_range(Range, #{next:=Next, bits:=Bits} = Alg, R, V) ->
- WeakLowBits =
- case Alg of
- #{weak_low_bits:=WLB} -> WLB;
- #{} -> 0
- end,
+ WeakLowBits = maps:get(weak_low_bits, Alg, 0),
%% Maybe waste the lowest bit(s) when shifting in new bits
Shift = Bits - WeakLowBits,
ShiftMask = bnot ?MASK(WeakLowBits),
@@ -297,7 +308,7 @@ uniform_s({#{bits:=Bits, next:=Next} = Alg, R0}) ->
{(V bsr (Bits - 53)) * ?TWO_POW_MINUS53, {Alg, R1}};
uniform_s({#{max:=Max, next:=Next} = Alg, R0}) ->
{V, R1} = Next(R0),
- %% Old broken algorithm with non-uniform density
+ %% Old algorithm with non-uniform density
{V / (Max + 1), {Alg, R1}}.
@@ -317,7 +328,7 @@ uniform_s(N, {#{bits:=Bits, next:=Next} = Alg, R0})
?uniform_range(N, Alg, R1, V, MaxMinusN, I);
uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0})
when is_integer(N), 1 =< N ->
- %% Old broken algorithm with skewed probability
+ %% Old algorithm with skewed probability
%% and gap in ranges > Max
{V, R1} = Next(R0),
if
@@ -328,6 +339,189 @@ uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0})
{trunc(F * N) + 1, {Alg, R1}}
end.
+%% uniform_real/0: returns a random float X where 0.0 < X =< 1.0,
+%% updating the state in the process dictionary.
+
+-spec uniform_real() -> X :: float().
+uniform_real() ->
+ {X, Seed} = uniform_real_s(seed_get()),
+ _ = seed_put(Seed),
+ X.
+
+%% uniform_real_s/1: given a state, uniform_s/1
+%% returns a random float X where 0.0 < X =< 1.0,
+%% and a new state.
+%%
+%% This function does not use the same form of uniformity
+%% as the uniform_s/1 function.
+%%
+%% Instead, this function does not generate numbers with equal
+%% distance in the interval, but rather tries to keep all mantissa
+%% bits random also for small numbers, meaning that the distance
+%% between possible numbers decreases when the numbers
+%% approaches 0.0, as does the possibility for a particular
+%% number. Hence uniformity is preserved.
+%%
+%% To generate 56 bits at the time instead of 53 is actually
+%% a speed optimization since the probability to have to
+%% generate a second word decreases by 1/2 for every extra bit.
+%%
+%% This function generates normalized numbers, so the smallest number
+%% that can be generated is 2^-1022 with the distance 2^-1074
+%% to the next to smallest number, compared to 2^-53 for uniform_s/1.
+%%
+%% This concept of uniformity should work better for applications
+%% where you need to calculate 1.0/X or math:log(X) since those
+%% operations benefits from larger precision approaching 0.0,
+%% and that this function does not return 0.0 nor denormalized
+%% numbers very close to 0.0. The log() operation in The Box-Muller
+%% transformation for normal distribution is an example of this.
+%%
+%%-define(TWO_POW_MINUS55, (math:pow(2, -55))).
+%%-define(TWO_POW_MINUS110, (math:pow(2, -110))).
+%%-define(TWO_POW_MINUS55, 2.7755575615628914e-17).
+%%-define(TWO_POW_MINUS110, 7.7037197775489436e-34).
+%%
+-spec uniform_real_s(State :: state()) -> {X :: float(), NewState :: state()}.
+uniform_real_s({#{bits:=Bits, next:=Next} = Alg, R0}) ->
+ %% Generate a 56 bit number without using the weak low bits.
+ %%
+ %% Be sure to use only 53 bits when multiplying with
+ %% math:pow(2.0, -N) to avoid rounding which would make
+ %% "even" floats more probable than "odd".
+ %%
+ {V1, R1} = Next(R0),
+ M1 = V1 bsr (Bits - 56),
+ if
+ ?BIT(55) =< M1 ->
+ %% We have 56 bits - waste 3
+ {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}};
+ ?BIT(54) =< M1 ->
+ %% We have 55 bits - waste 2
+ {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}};
+ ?BIT(53) =< M1 ->
+ %% We have 54 bits - waste 1
+ {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}};
+ ?BIT(52) =< M1 ->
+ %% We have 53 bits - use all
+ {M1 * math:pow(2.0, -56), {Alg, R1}};
+ true ->
+ %% Need more bits
+ {V2, R2} = Next(R1),
+ uniform_real_s(Alg, Next, M1, -56, R2, V2, Bits)
+ end;
+uniform_real_s({#{max:=_, next:=Next} = Alg, R0}) ->
+ %% Generate a 56 bit number.
+ %% Ignore the weak low bits for these old algorithms,
+ %% just produce something reasonable.
+ %%
+ %% Be sure to use only 53 bits when multiplying with
+ %% math:pow(2.0, -N) to avoid rounding which would make
+ %% "even" floats more probable than "odd".
+ %%
+ {V1, R1} = Next(R0),
+ M1 = ?MASK(56, V1),
+ if
+ ?BIT(55) =< M1 ->
+ %% We have 56 bits - waste 3
+ {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}};
+ ?BIT(54) =< M1 ->
+ %% We have 55 bits - waste 2
+ {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}};
+ ?BIT(53) =< M1 ->
+ %% We have 54 bits - waste 1
+ {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}};
+ ?BIT(52) =< M1 ->
+ %% We have 53 bits - use all
+ {M1 * math:pow(2.0, -56), {Alg, R1}};
+ true ->
+ %% Need more bits
+ {V2, R2} = Next(R1),
+ uniform_real_s(Alg, Next, M1, -56, R2, V2, 56)
+ end.
+
+uniform_real_s(Alg, _Next, M0, -1064, R1, V1, Bits) -> % 19*56
+ %% This is a very theoretical bottom case.
+ %% The odds of getting here is about 2^-1008,
+ %% through a white box test case, or thanks to
+ %% a malfunctioning PRNG producing 18 56-bit zeros in a row.
+ %%
+ %% Fill up to 53 bits, we have at most 52
+ B0 = (53 - ?BC(M0, 52)), % Missing bits
+ {(((M0 bsl B0) bor (V1 bsr (Bits - B0))) * math:pow(2.0, -1064 - B0)),
+ {Alg, R1}};
+uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits) ->
+ if
+ %% Optimize the most probable.
+ %% Fill up to 53 bits.
+ ?BIT(51) =< M0 ->
+ %% We have 52 bits in M0 - need 1
+ {(((M0 bsl 1) bor (V1 bsr (Bits - 1)))
+ * math:pow(2.0, BitNo - 1)),
+ {Alg, R1}};
+ ?BIT(50) =< M0 ->
+ %% We have 51 bits in M0 - need 2
+ {(((M0 bsl 2) bor (V1 bsr (Bits - 2)))
+ * math:pow(2.0, BitNo - 2)),
+ {Alg, R1}};
+ ?BIT(49) =< M0 ->
+ %% We have 50 bits in M0 - need 3
+ {(((M0 bsl 3) bor (V1 bsr (Bits - 3)))
+ * math:pow(2.0, BitNo - 3)),
+ {Alg, R1}};
+ M0 == 0 ->
+ M1 = V1 bsr (Bits - 56),
+ if
+ ?BIT(55) =< M1 ->
+ %% We have 56 bits - waste 3
+ {(M1 bsr 3) * math:pow(2.0, BitNo - 53), {Alg, R1}};
+ ?BIT(54) =< M1 ->
+ %% We have 55 bits - waste 2
+ {(M1 bsr 2) * math:pow(2.0, BitNo - 54), {Alg, R1}};
+ ?BIT(53) =< M1 ->
+ %% We have 54 bits - waste 1
+ {(M1 bsr 1) * math:pow(2.0, BitNo - 55), {Alg, R1}};
+ ?BIT(52) =< M1 ->
+ %% We have 53 bits - use all
+ {M1 * math:pow(2.0, BitNo - 56), {Alg, R1}};
+ BitNo =:= -1008 ->
+ %% Endgame
+ %% For the last round we can not have 14 zeros or more
+ %% at the top of M1 because then we will underflow,
+ %% so we need at least 43 bits
+ if
+ ?BIT(42) =< M1 ->
+ %% We have 43 bits - get the last bits
+ uniform_real_s(Alg, Next, M1, BitNo - 56, R1);
+ true ->
+ %% Would underflow 2^-1022 - start all over
+ %%
+ %% We could just crash here since the odds for
+ %% the PRNG being broken is much higher than
+ %% for a good PRNG generating this many zeros
+ %% in a row. Maybe we should write an error
+ %% report or call this a system limit...?
+ uniform_real_s({Alg, R1})
+ end;
+ true ->
+ %% Need more bits
+ uniform_real_s(Alg, Next, M1, BitNo - 56, R1)
+ end;
+ true ->
+ %% Fill up to 53 bits
+ B0 = 53 - ?BC(M0, 49), % Number of bits we need to append
+ {(((M0 bsl B0) bor (V1 bsr (Bits - B0)))
+ * math:pow(2.0, BitNo - B0)),
+ {Alg, R1}}
+ end.
+%%
+uniform_real_s(#{bits:=Bits} = Alg, Next, M0, BitNo, R0) ->
+ {V1, R1} = Next(R0),
+ uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits);
+uniform_real_s(#{max:=_} = Alg, Next, M0, BitNo, R0) ->
+ {V1, R1} = Next(R0),
+ uniform_real_s(Alg, Next, M0, BitNo, R1, ?MASK(56, V1), 56).
+
%% jump/1: given a state, jump/1
%% returns a new state which is equivalent to that
%% after a large number of call defined for each algorithm.
@@ -1025,3 +1219,42 @@ normal_fi(Indx) ->
1.0214971439701471e-02,8.6165827693987316e-03,7.0508754713732268e-03,
5.5224032992509968e-03,4.0379725933630305e-03,2.6090727461021627e-03,
1.2602859304985975e-03}).
+
+%%%bitcount64(0) -> 0;
+%%%bitcount64(V) -> 1 + bitcount(V, 64).
+%%%
+%%%-define(
+%%% BITCOUNT(V, N),
+%%% bitcount(V, N) ->
+%%% if
+%%% (1 bsl ((N) bsr 1)) =< (V) ->
+%%% ((N) bsr 1) + bitcount((V) bsr ((N) bsr 1), ((N) bsr 1));
+%%% true ->
+%%% bitcount((V), ((N) bsr 1))
+%%% end).
+%%%?BITCOUNT(V, 64);
+%%%?BITCOUNT(V, 32);
+%%%?BITCOUNT(V, 16);
+%%%?BITCOUNT(V, 8);
+%%%?BITCOUNT(V, 4);
+%%%?BITCOUNT(V, 2);
+%%%bitcount(_, 1) -> 0.
+
+bc64(V) -> ?BC(V, 64).
+
+%% Linear from high bit - higher probability first gives faster execution
+bc(V, B, N) when B =< V -> N;
+bc(V, B, N) -> bc(V, B bsr 1, N - 1).
+
+make_float(S, E, M) ->
+ <<F/float>> = <<S:1, E:11, M:52>>,
+ F.
+
+float2str(N) ->
+ <<S:1, E:11, M:52>> = <<(float(N))/float>>,
+ lists:flatten(
+ io_lib:format(
+ "~c~c.~13.16.0bE~b",
+ [case S of 1 -> $-; 0 -> $+ end,
+ case E of 0 -> $0; _ -> $1 end,
+ M, E - 16#3ff])).
diff --git a/lib/stdlib/src/slave.erl b/lib/stdlib/src/slave.erl
index d7cf6386f5..b3f3206d67 100644
--- a/lib/stdlib/src/slave.erl
+++ b/lib/stdlib/src/slave.erl
@@ -320,7 +320,7 @@ mk_cmd(Host, Name, Args, Waiter, Prog0) ->
%% emulator and flags as the test node. The return from lib:progname()
%% could then typically be '/<full_path_to>/cerl -gcov').
quote_progname(Progname) ->
- do_quote_progname(string:tokens(to_list(Progname)," ")).
+ do_quote_progname(string:lexemes(to_list(Progname)," ")).
do_quote_progname([Prog]) ->
"\""++Prog++"\"";
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index 3c449d3cb9..5fb48acfab 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -101,13 +101,14 @@
timer,
unicode,
unicode_util,
+ uri_string,
win32reg,
zip]},
{registered,[timer_server,rsh_starter,take_over_monitor,pool_master,
dets]},
{applications, [kernel]},
{env, []},
- {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-9.0","crypto-3.3",
+ {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-10.0","crypto-3.3",
"compiler-5.0"]}
]}.
diff --git a/lib/stdlib/src/stdlib.appup.src b/lib/stdlib/src/stdlib.appup.src
index 800c2c61f3..e4e3fb83e9 100644
--- a/lib/stdlib/src/stdlib.appup.src
+++ b/lib/stdlib/src/stdlib.appup.src
@@ -18,9 +18,7 @@
%% %CopyrightEnd%
{"%VSN%",
%% Up from - max one major revision back
- [{<<"3\\.[0-3](\\.[0-9]+)*">>,[restart_new_emulator]}, % OTP-19.*
- {<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}], % OTP-20.*
+ [{<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}], % OTP-20.*
%% Down to - max one major revision back
- [{<<"3\\.[0-3](\\.[0-9]+)*">>,[restart_new_emulator]}, % OTP-19.*
- {<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}] % OTP-20.*
+ [{<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}] % OTP-20.*
}.
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index 4972da297d..e01bb7d85e 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -74,19 +74,30 @@
-export([to_upper/1, to_lower/1]).
%%
-import(lists,[member/2]).
-
-compile({no_auto_import,[length/1]}).
+-compile({inline, [btoken/2, rev/1, append/2, stack/2, search_compile/1]}).
+-define(ASCII_LIST(CP1,CP2), CP1 < 256, CP2 < 256, CP1 =/= $\r).
-export_type([grapheme_cluster/0]).
-type grapheme_cluster() :: char() | [char()].
-type direction() :: 'leading' | 'trailing'.
--dialyzer({no_improper_lists, stack/2}).
+-dialyzer({no_improper_lists, [stack/2, length_b/3]}).
%%% BIFs internal (not documented) should not to be used outside of this module
%%% May be removed
-export([list_to_float/1, list_to_integer/1]).
+-deprecated([{len,1},{concat,2},
+ {str,2},{chr,2},{rchr,2},{rstr,2},
+ {span,2},{cspan,2},{substr,'_'},{tokens,2},
+ {chars,'_'},
+ {copies,2},{words,'_'},{strip,'_'},
+ {sub_word,'_'},{left,'_'},{right,'_'},
+ {sub_string,'_'},{centre,'_'},{join,2},
+ {to_upper,1}, {to_lower,1}
+ ]).
+
%% Uses bifs: string:list_to_float/1 and string:list_to_integer/1
-spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when
String :: string(),
@@ -117,8 +128,10 @@ is_empty(_) -> false.
%% Count the number of grapheme clusters in chardata
-spec length(String::unicode:chardata()) -> non_neg_integer().
+length(<<CP1/utf8, Bin/binary>>) ->
+ length_b(Bin, CP1, 0);
length(CD) ->
- length_1(unicode_util:gc(CD), 0).
+ length_1(CD, 0).
%% Convert a string to a list of grapheme clusters
-spec to_graphemes(String::unicode:chardata()) -> [grapheme_cluster()].
@@ -166,6 +179,8 @@ equal(A, B, true, Norm) ->
%% Reverse grapheme clusters
-spec reverse(String::unicode:chardata()) -> [grapheme_cluster()].
+reverse(<<CP1/utf8, Rest/binary>>) ->
+ reverse_b(Rest, CP1, []);
reverse(CD) ->
reverse_1(CD, []).
@@ -176,7 +191,10 @@ reverse(CD) ->
Start :: non_neg_integer(),
Slice :: unicode:chardata().
slice(CD, N) when is_integer(N), N >= 0 ->
- slice_l(CD, N, is_binary(CD)).
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ Res -> Res
+ end.
-spec slice(String, Start, Length) -> Slice when
String::unicode:chardata(),
@@ -185,9 +203,15 @@ slice(CD, N) when is_integer(N), N >= 0 ->
Slice :: unicode:chardata().
slice(CD, N, Length)
when is_integer(N), N >= 0, is_integer(Length), Length > 0 ->
- slice_trail(slice_l(CD, N, is_binary(CD)), Length);
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ L -> slice_trail(L, Length)
+ end;
slice(CD, N, infinity) ->
- slice_l(CD, N, is_binary(CD));
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ Res -> Res
+ end;
slice(CD, _, 0) ->
case is_binary(CD) of
true -> <<>>;
@@ -246,18 +270,22 @@ trim(Str, Dir) ->
Dir :: direction() | 'both',
Characters :: [grapheme_cluster()].
trim(Str, _, []) -> Str;
+trim(Str, leading, [Sep]) when is_list(Str), Sep < 256 ->
+ trim_ls(Str, Sep);
trim(Str, leading, Sep) when is_list(Sep) ->
- trim_l(Str, search_pattern(Sep));
-trim(Str, trailing, Sep) when is_list(Sep) ->
- trim_t(Str, 0, search_pattern(Sep));
-trim(Str, both, Sep0) when is_list(Sep0) ->
- Sep = search_pattern(Sep0),
- trim_t(trim_l(Str,Sep), 0, Sep).
+ trim_l(Str, Sep);
+trim(Str, trailing, [Sep]) when is_list(Str), Sep < 256 ->
+ trim_ts(Str, Sep);
+trim(Str, trailing, Seps0) when is_list(Seps0) ->
+ Seps = search_pattern(Seps0),
+ trim_t(Str, 0, Seps);
+trim(Str, both, Sep) when is_list(Sep) ->
+ trim(trim(Str,leading,Sep), trailing, Sep).
%% Delete trailing newlines or \r\n
-spec chomp(String::unicode:chardata()) -> unicode:chardata().
chomp(Str) ->
- trim_t(Str,0, {[[$\r,$\n],$\n], [$\r,$\n], [<<$\r>>,<<$\n>>]}).
+ trim(Str, trailing, [[$\r,$\n],$\n]).
%% Split String into two parts where the leading part consists of Characters
-spec take(String, Characters) -> {Leading, Trailing} when
@@ -290,8 +318,7 @@ take(Str, [], Complement, Dir) ->
{true, leading} -> {Str, Empty};
{true, trailing} -> {Empty, Str}
end;
-take(Str, Sep0, false, leading) ->
- Sep = search_pattern(Sep0),
+take(Str, Sep, false, leading) ->
take_l(Str, Sep, []);
take(Str, Sep0, true, leading) ->
Sep = search_pattern(Sep0),
@@ -451,6 +478,7 @@ replace(String, SearchPattern, Replacement, Where) ->
SeparatorList::[grapheme_cluster()]) ->
[unicode:chardata()].
lexemes([], _) -> [];
+lexemes(Str, []) -> [Str];
lexemes(Str, Seps0) when is_list(Seps0) ->
Seps = search_pattern(Seps0),
lexemes_m(Str, Seps, []).
@@ -484,13 +512,13 @@ find(String, SearchPattern, leading) ->
find(String, SearchPattern, trailing) ->
find_r(String, unicode:characters_to_list(SearchPattern), nomatch).
-%% Fetch first codepoint and return rest in tail
+%% Fetch first grapheme cluster and return rest in tail
-spec next_grapheme(String::unicode:chardata()) ->
maybe_improper_list(grapheme_cluster(),unicode:chardata()) |
{error,unicode:chardata()}.
next_grapheme(CD) -> unicode_util:gc(CD).
-%% Fetch first grapheme cluster and return rest in tail
+%% Fetch first codepoint and return rest in tail
-spec next_codepoint(String::unicode:chardata()) ->
maybe_improper_list(char(),unicode:chardata()) |
{error,unicode:chardata()}.
@@ -498,10 +526,23 @@ next_codepoint(CD) -> unicode_util:cp(CD).
%% Internals
-length_1([_|Rest], N) ->
- length_1(unicode_util:gc(Rest), N+1);
-length_1([], N) ->
- N.
+length_1([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2) ->
+ length_1(Cont, N+1);
+length_1(Str, N) ->
+ case unicode_util:gc(Str) of
+ [] -> N;
+ [_|Rest] -> length_1(Rest, N+1)
+ end.
+
+length_b(<<CP2/utf8, Rest/binary>>, CP1, N)
+ when ?ASCII_LIST(CP1,CP2) ->
+ length_b(Rest, CP2, N+1);
+length_b(Bin0, CP1, N) ->
+ [_|Bin1] = unicode_util:gc([CP1|Bin0]),
+ case unicode_util:cp(Bin1) of
+ [] -> N+1;
+ [CP3|Bin] -> length_b(Bin, CP3, N+1)
+ end.
equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) ->
A =:= B andalso equal_1(AR, BR);
@@ -540,29 +581,66 @@ equal_norm_nocase(A0, B0, Norm) ->
{L1,L2} when is_list(L1), is_list(L2) -> false
end.
+reverse_1([CP1|[CP2|_]=Cont], Acc) when ?ASCII_LIST(CP1,CP2) ->
+ reverse_1(Cont, [CP1|Acc]);
reverse_1(CD, Acc) ->
case unicode_util:gc(CD) of
[GC|Rest] -> reverse_1(Rest, [GC|Acc]);
[] -> Acc
end.
-slice_l(CD, N, Binary) when N > 0 ->
+reverse_b(<<CP2/utf8, Rest/binary>>, CP1, Acc)
+ when ?ASCII_LIST(CP1,CP2) ->
+ reverse_b(Rest, CP2, [CP1|Acc]);
+reverse_b(Bin0, CP1, Acc) ->
+ [GC|Bin1] = unicode_util:gc([CP1|Bin0]),
+ case unicode_util:cp(Bin1) of
+ [] -> [GC|Acc];
+ [CP3|Bin] -> reverse_b(Bin, CP3, [GC|Acc])
+ end.
+
+slice_l0(<<CP1/utf8, Bin/binary>>, N) when N > 0 ->
+ slice_lb(Bin, CP1, N);
+slice_l0(L, N) ->
+ slice_l(L, N).
+
+slice_l([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
+ slice_l(Cont, N-1);
+slice_l(CD, N) when N > 0 ->
case unicode_util:gc(CD) of
- [_|Cont] -> slice_l(Cont, N-1, Binary);
- [] when Binary -> <<>>;
+ [_|Cont] -> slice_l(Cont, N-1);
[] -> []
end;
-slice_l(Cont, 0, Binary) ->
- case is_empty(Cont) of
- true when Binary -> <<>>;
- _ -> Cont
+slice_l(Cont, 0) ->
+ Cont.
+
+slice_lb(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 1 ->
+ slice_lb(Bin, CP2, N-1);
+slice_lb(Bin, CP1, N) ->
+ [_|Rest] = unicode_util:gc([CP1|Bin]),
+ if N > 1 ->
+ case unicode_util:cp(Rest) of
+ [CP2|Cont] -> slice_lb(Cont, CP2, N-1);
+ [] -> <<>>
+ end;
+ N =:= 1 ->
+ Rest
end.
+slice_trail(Orig, N) when is_binary(Orig) ->
+ case Orig of
+ <<CP1/utf8, Bin/binary>> when N > 0 ->
+ Length = slice_bin(Bin, CP1, N),
+ Sz = byte_size(Orig) - Length,
+ <<Keep:Sz/binary, _/binary>> = Orig,
+ Keep;
+ _ -> <<>>
+ end;
slice_trail(CD, N) when is_list(CD) ->
- slice_list(CD, N);
-slice_trail(CD, N) when is_binary(CD) ->
- slice_bin(CD, N, CD).
+ slice_list(CD, N).
+slice_list([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
+ [CP1|slice_list(Cont, N-1)];
slice_list(CD, N) when N > 0 ->
case unicode_util:gc(CD) of
[GC|Cont] -> append(GC, slice_list(Cont, N-1));
@@ -571,17 +649,16 @@ slice_list(CD, N) when N > 0 ->
slice_list(_, 0) ->
[].
-slice_bin(CD, N, Orig) when N > 0 ->
- case unicode_util:gc(CD) of
- [_|Cont] -> slice_bin(Cont, N-1, Orig);
- [] -> Orig
+slice_bin(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 0 ->
+ slice_bin(Bin, CP2, N-1);
+slice_bin(CD, CP1, N) when N > 0 ->
+ [_|Bin] = unicode_util:gc([CP1|CD]),
+ case unicode_util:cp(Bin) of
+ [CP2|Cont] -> slice_bin(Cont, CP2, N-1);
+ [] -> 0
end;
-slice_bin([], 0, Orig) ->
- Orig;
-slice_bin(CD, 0, Orig) ->
- Sz = byte_size(Orig) - byte_size(CD),
- <<Keep:Sz/binary, _/binary>> = Orig,
- Keep.
+slice_bin(CD, CP1, 0) ->
+ byte_size(CD)+byte_size(<<CP1/utf8>>).
uppercase_list(CPs0) ->
case unicode_util:uppercase(CPs0) of
@@ -631,16 +708,31 @@ casefold_bin(CPs0, Acc) ->
[] -> Acc
end.
-
+%% Fast path for ascii searching for one character in lists
+trim_ls([CP1|[CP2|_]=Cont]=Str, Sep)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case Sep of
+ CP1 -> trim_ls(Cont, Sep);
+ _ -> Str
+ end;
+trim_ls(Str, Sep) ->
+ trim_l(Str, [Sep]).
+
+trim_l([CP1|[CP2|_]=Cont]=Str, Sep)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, Sep) of
+ true -> trim_l(Cont, Sep);
+ false -> Str
+ end;
trim_l([Bin|Cont0], Sep) when is_binary(Bin) ->
case bin_search_inv(Bin, Cont0, Sep) of
{nomatch, Cont} -> trim_l(Cont, Sep);
Keep -> Keep
end;
-trim_l(Str, {GCs, _, _}=Sep) when is_list(Str) ->
+trim_l(Str, Sep) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
- case lists:member(C, GCs) of
+ case lists:member(C, Sep) of
true -> trim_l(Cs, Sep);
false -> Str
end;
@@ -652,15 +744,51 @@ trim_l(Bin, Sep) when is_binary(Bin) ->
[Keep] -> Keep
end.
-trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+%% Fast path for ascii searching for one character in lists
+trim_ts([Sep|Cs1]=Str, Sep) ->
+ case Cs1 of
+ [] -> [];
+ [CP2|_] when ?ASCII_LIST(Sep,CP2) ->
+ Tail = trim_ts(Cs1, Sep),
+ case is_empty(Tail) of
+ true -> [];
+ false -> [Sep|Tail]
+ end;
+ _ ->
+ trim_t(Str, 0, search_pattern([Sep]))
+ end;
+trim_ts([CP|Cont],Sep) when is_integer(CP) ->
+ [CP|trim_ts(Cont, Sep)];
+trim_ts(Str, Sep) ->
+ trim_t(Str, 0, search_pattern([Sep])).
+
+trim_t([CP1|Cont]=Cs0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Cs1] = unicode_util:gc(Cs0),
+ case lists:member(GC, GCs) of
+ true ->
+ Tail = trim_t(Cs1, 0, Seps),
+ case is_empty(Tail) of
+ true -> [];
+ false -> append(GC,Tail)
+ end;
+ false ->
+ append(GC,trim_t(Cs1, 0, Seps))
+ end;
+ false ->
+ [CP1|trim_t(Cont, 0, Seps)]
+ end;
+trim_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Cont0, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, Cont0, Seps) of
{nomatch,_} ->
- stack(Bin, trim_t(Cont0, 0, Sep));
+ stack(Bin, trim_t(Cont0, 0, Seps));
[SepStart|Cont1] ->
- case bin_search_inv(SepStart, Cont1, Sep) of
+ case bin_search_inv(SepStart, Cont1, GCs) of
{nomatch, Cont} ->
- Tail = trim_t(Cont, 0, Sep),
+ Tail = trim_t(Cont, 0, Seps),
case is_empty(Tail) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
@@ -672,67 +800,69 @@ trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- trim_t([Bin|Cont], KeepSz, Sep)
+ trim_t([Bin|Cont], KeepSz, Seps)
end
end;
-trim_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
+trim_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- true ->
- Tail = trim_t(Cs1, 0, Sep),
- case is_empty(Tail) of
- true -> [];
- false -> append(GC,Tail)
- end;
- false ->
- append(GC,trim_t(Cs1, 0, Sep))
+ Tail = trim_t(Cs1, 0, Seps),
+ case is_empty(Tail) of
+ true -> [];
+ false -> append(GC,Tail)
end;
false ->
- append(CP,trim_t(Cs, 0, Sep))
+ append(GC,trim_t(Cs1, 0, Seps))
end;
[] -> []
end;
-trim_t(Bin, N, Sep) when is_binary(Bin) ->
+trim_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, [], Seps) of
{nomatch,_} -> Bin;
[SepStart] ->
- case bin_search_inv(SepStart, [], Sep) of
+ case bin_search_inv(SepStart, [], GCs) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, _/binary>> = Bin,
Keep;
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- trim_t(Bin, KeepSz, Sep)
+ trim_t(Bin, KeepSz, Seps)
end
end.
-take_l([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Sep) of
+
+take_l([CP1|[CP2|_]=Cont]=Str, Seps, Acc)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, Seps) of
+ true -> take_l(Cont, Seps, [CP1|Acc]);
+ false -> {rev(Acc), Str}
+ end;
+take_l([Bin|Cont0], Seps, Acc) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, Seps) of
{nomatch, Cont} ->
Used = cp_prefix(Cont0, Cont),
- take_l(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+ take_l(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]);
[Bin1|_]=After when is_binary(Bin1) ->
First = byte_size(Bin) - byte_size(Bin1),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep,Acc), After}
end;
-take_l(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+take_l(Str, Seps, Acc) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
- case lists:member(C, GCs) of
- true -> take_l(Cs, Sep, append(rev(C),Acc));
+ case lists:member(C, Seps) of
+ true -> take_l(Cs, Seps, append(rev(C),Acc));
false -> {rev(Acc), Str}
end;
[] -> {rev(Acc), []}
end;
-take_l(Bin, Sep, Acc) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Sep) of
+take_l(Bin, Seps, Acc) when is_binary(Bin) ->
+ case bin_search_inv(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin, Acc), <<>>};
[After] ->
@@ -741,27 +871,41 @@ take_l(Bin, Sep, Acc) when is_binary(Bin) ->
{btoken(Keep, Acc), After}
end.
-take_lc([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
- case bin_search(Bin, Cont0, Sep) of
+
+take_lc([CP1|Cont]=Str0, {GCs,CPs,_}=Seps, Acc) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Str] = unicode_util:gc(Str0),
+ case lists:member(GC, GCs) of
+ false -> take_lc(Str, Seps, append(rev(GC),Acc));
+ true -> {rev(Acc), Str0}
+ end;
+ false ->
+ take_lc(Cont, Seps, append(CP1,Acc))
+ end;
+take_lc([Bin|Cont0], Seps0, Acc) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, Cont0, Seps) of
{nomatch, Cont} ->
Used = cp_prefix(Cont0, Cont),
- take_lc(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+ take_lc(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]);
[Bin1|_]=After when is_binary(Bin1) ->
First = byte_size(Bin) - byte_size(Bin1),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep,Acc), After}
end;
-take_lc(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+take_lc(Str, {GCs,_,_}=Seps, Acc) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
case lists:member(C, GCs) of
- false -> take_lc(Cs, Sep, append(rev(C),Acc));
+ false -> take_lc(Cs, Seps, append(rev(C),Acc));
true -> {rev(Acc), Str}
end;
[] -> {rev(Acc), []}
end;
-take_lc(Bin, Sep, Acc) when is_binary(Bin) ->
- case bin_search(Bin, [], Sep) of
+take_lc(Bin, Seps0, Acc) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin, Acc), <<>>};
[After] ->
@@ -770,148 +914,192 @@ take_lc(Bin, Sep, Acc) when is_binary(Bin) ->
{btoken(Keep, Acc), After}
end.
-take_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+
+take_t([CP1|Cont]=Str0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Str] = unicode_util:gc(Str0),
+ case lists:member(GC, GCs) of
+ true ->
+ {Head, Tail} = take_t(Str, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
+ end;
+ false ->
+ {Head, Tail} = take_t(Str, 0, Seps),
+ {append(GC,Head), Tail}
+ end;
+ false ->
+ {Head, Tail} = take_t(Cont, 0, Seps),
+ {[CP1|Head], Tail}
+ end;
+take_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Cont0, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, Cont0, Seps) of
{nomatch,Cont} ->
Used = cp_prefix(Cont0, Cont),
- {Head, Tail} = take_t(Cont, 0, Sep),
+ {Head, Tail} = take_t(Cont, 0, Seps),
{stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
[SepStart|Cont1] ->
- case bin_search_inv(SepStart, Cont1, Sep) of
+ case bin_search_inv(SepStart, Cont1, GCs) of
{nomatch, Cont} ->
- {Head, Tail} = take_t(Cont, 0, Sep),
+ {Head, Tail} = take_t(Cont, 0, Seps),
Used = cp_prefix(Cont0, Cont),
- case equal(Tail, Cont) of
+ case is_empty(Head) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, End/binary>> = Bin,
- {stack(Keep,Head), stack(stack(End,Used),Tail)};
+ {Keep, stack(stack(End,Used),Tail)};
false ->
{stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_t([Bin|Cont], KeepSz, Sep)
+ take_t([Bin|Cont], KeepSz, Seps)
end
end;
-take_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
+take_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- true ->
- {Head, Tail} = take_t(Cs1, 0, Sep),
- case equal(Tail, Cs1) of
- true -> {Head, append(GC,Tail)};
- false -> {append(GC,Head), Tail}
- end;
- false ->
- {Head, Tail} = take_t(Cs, 0, Sep),
- {append(CP,Head), Tail}
+ {Head, Tail} = take_t(Cs1, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
end;
false ->
- {Head, Tail} = take_t(Cs, 0, Sep),
- {append(CP,Head), Tail}
+ {Head, Tail} = take_t(Cs1, 0, Seps),
+ {append(GC,Head), Tail}
end;
[] -> {[],[]}
end;
-take_t(Bin, N, Sep) when is_binary(Bin) ->
+take_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, [], Seps) of
{nomatch,_} -> {Bin, <<>>};
[SepStart] ->
- case bin_search_inv(SepStart, [], Sep) of
+ case bin_search_inv(SepStart, [], GCs) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Before:KeepSz/binary, End/binary>> = Bin,
{Before, End};
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_t(Bin, KeepSz, Sep)
+ take_t(Bin, KeepSz, Seps)
end
end.
-take_tc([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+take_tc([CP1|[CP2|_]=Cont], _, {GCs,_,_}=Seps) when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, GCs) of
+ false ->
+ {Head, Tail} = take_tc(Cont, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(CP1,Tail)};
+ false -> {append(CP1,Head), Tail}
+ end;
+ true ->
+ {Head, Tail} = take_tc(Cont, 0, Seps),
+ {append(CP1,Head), Tail}
+ end;
+take_tc([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search_inv(Rest, Cont0, Sep) of
+ case bin_search_inv(Rest, Cont0, GCs) of
{nomatch,Cont} ->
Used = cp_prefix(Cont0, Cont),
- {Head, Tail} = take_tc(Cont, 0, Sep),
+ {Head, Tail} = take_tc(Cont, 0, Seps0),
{stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
[SepStart|Cont1] ->
- case bin_search(SepStart, Cont1, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(SepStart, Cont1, Seps) of
{nomatch, Cont} ->
- {Head, Tail} = take_tc(Cont, 0, Sep),
+ {Head, Tail} = take_tc(Cont, 0, Seps),
Used = cp_prefix(Cont0, Cont),
- case equal(Tail, Cont) of
+ case is_empty(Head) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, End/binary>> = Bin,
- {stack(Keep,Head), stack(stack(End,Used),Tail)};
+ {Keep, stack(stack(End,Used),Tail)};
false ->
{stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_tc([Bin|Cont], KeepSz, Sep)
+ take_tc([Bin|Cont], KeepSz, Seps)
end
end;
-take_tc(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
- true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- false ->
- {Head, Tail} = take_tc(Cs1, 0, Sep),
- case equal(Tail, Cs1) of
- true -> {Head, append(GC,Tail)};
- false -> {append(GC,Head), Tail}
- end;
- true ->
- {Head, Tail} = take_tc(Cs1, 0, Sep),
- {append(GC,Head), Tail}
- end;
+take_tc(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
false ->
- {Head, Tail} = take_tc(Cs, 0, Sep),
- case equal(Tail, Cs) of
- true -> {Head, append(CP,Tail)};
- false -> {append(CP,Head), Tail}
- end
+ {Head, Tail} = take_tc(Cs1, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
+ end;
+ true ->
+ {Head, Tail} = take_tc(Cs1, 0, Seps),
+ {append(GC,Head), Tail}
end;
[] -> {[],[]}
end;
-take_tc(Bin, N, Sep) when is_binary(Bin) ->
+take_tc(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search_inv(Rest, [], Sep) of
+ case bin_search_inv(Rest, [], GCs) of
{nomatch,_} -> {Bin, <<>>};
[SepStart] ->
- case bin_search(SepStart, [], Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(SepStart, [], Seps) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Before:KeepSz/binary, End/binary>> = Bin,
{Before, End};
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_tc(Bin, KeepSz, Sep)
+ take_tc(Bin, KeepSz, Seps)
end
end.
-prefix_1(Cs, []) -> Cs;
-prefix_1(Cs, [_]=Pre) ->
- prefix_2(unicode_util:gc(Cs), Pre);
-prefix_1(Cs, Pre) ->
- prefix_2(unicode_util:cp(Cs), Pre).
-
-prefix_2([C|Cs], [C|Pre]) ->
- prefix_1(Cs, Pre);
-prefix_2(_, _) ->
- nomatch.
+prefix_1(Cs0, [GC]) ->
+ case unicode_util:gc(Cs0) of
+ [GC|Cs] -> Cs;
+ _ -> nomatch
+ end;
+prefix_1([CP|Cs], [Pre|PreR]) when is_integer(CP) ->
+ case CP =:= Pre of
+ true -> prefix_1(Cs,PreR);
+ false -> nomatch
+ end;
+prefix_1(<<CP/utf8, Cs/binary>>, [Pre|PreR]) ->
+ case CP =:= Pre of
+ true -> prefix_1(Cs,PreR);
+ false -> nomatch
+ end;
+prefix_1(Cs0, [Pre|PreR]) ->
+ case unicode_util:cp(Cs0) of
+ [Pre|Cs] -> prefix_1(Cs,PreR);
+ _ -> nomatch
+ end.
+split_1([CP1|Cs]=Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_integer(CP1) ->
+ case CP1=:=C of
+ true ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc);
+ Rest when Where =:= leading ->
+ [rev(Curr), Rest];
+ Rest when Where =:= trailing ->
+ split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]);
+ Rest when Where =:= all ->
+ split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc])
+ end;
+ false ->
+ split_1(Cs, Needle, 0, Where, append(CP1,Curr), Acc)
+ end;
split_1([Bin|Cont0], Needle, Start, Where, Curr0, Acc)
when is_binary(Bin) ->
case bin_search_str(Bin, Start, Cont0, Needle) of
@@ -971,32 +1159,50 @@ split_1(Bin, [_C|_]=Needle, Start, Where, Curr0, Acc) ->
end
end.
-lexemes_m([Bin|Cont0], Seps, Ts) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Seps) of
+lexemes_m([CP|_]=Cs0, {GCs,CPs,_}=Seps, Ts) when is_integer(CP) ->
+ case lists:member(CP, CPs) of
+ true ->
+ [GC|Cs2] = unicode_util:gc(Cs0),
+ case lists:member(GC, GCs) of
+ true ->
+ lexemes_m(Cs2, Seps, Ts);
+ false ->
+ {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
+ lexemes_m(Rest, Seps, [Lexeme|Ts])
+ end;
+ false ->
+ {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
+ lexemes_m(Rest, Seps, [Lexeme|Ts])
+ end;
+lexemes_m([Bin|Cont0], {GCs,_,_}=Seps0, Ts) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, GCs) of
{nomatch,Cont} ->
- lexemes_m(Cont, Seps, Ts);
+ lexemes_m(Cont, Seps0, Ts);
Cs ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
-lexemes_m(Cs0, {GCs, _, _}=Seps, Ts) when is_list(Cs0) ->
+lexemes_m(Cs0, {GCs, _, _}=Seps0, Ts) when is_list(Cs0) ->
case unicode_util:gc(Cs0) of
[C|Cs] ->
case lists:member(C, GCs) of
true ->
- lexemes_m(Cs, Seps, Ts);
+ lexemes_m(Cs, Seps0, Ts);
false ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
[] ->
lists:reverse(Ts)
end;
-lexemes_m(Bin, Seps, Ts) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Seps) of
+lexemes_m(Bin, {GCs,_,_}=Seps0, Ts) when is_binary(Bin) ->
+ case bin_search_inv(Bin, [], GCs) of
{nomatch,_} ->
lists:reverse(Ts);
[Cs] ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
lexemes_m(Rest, Seps, add_non_empty(Lexeme,Ts))
end.
@@ -1027,7 +1233,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> {rev(Tkn), Cs0};
+ true -> {rev(Tkn), Cs2};
false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn))
end;
false ->
@@ -1037,7 +1243,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
{rev(Tkn), []}
end;
lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
- case bin_search(Bin, Seps) of
+ case bin_search(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin,Tkn), []};
[Left] ->
@@ -1046,35 +1252,38 @@ lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
{btoken(Lexeme, Tkn), Left}
end.
-nth_lexeme_m([Bin|Cont0], Seps, N) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Seps) of
+nth_lexeme_m([Bin|Cont0], {GCs,_,_}=Seps0, N) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, GCs) of
{nomatch,Cont} ->
- nth_lexeme_m(Cont, Seps, N);
+ nth_lexeme_m(Cont, Seps0, N);
Cs when N > 1 ->
- Rest = lexeme_skip(Cs, Seps),
- nth_lexeme_m(Rest, Seps, N-1);
+ Rest = lexeme_skip(Cs, Seps0),
+ nth_lexeme_m(Rest, Seps0, N-1);
Cs ->
+ Seps = search_compile(Seps0),
{Lexeme,_} = lexeme_pick(Cs, Seps, []),
Lexeme
end;
-nth_lexeme_m(Cs0, {GCs, _, _}=Seps, N) when is_list(Cs0) ->
+nth_lexeme_m(Cs0, {GCs, _, _}=Seps0, N) when is_list(Cs0) ->
case unicode_util:gc(Cs0) of
[C|Cs] ->
case lists:member(C, GCs) of
true ->
- nth_lexeme_m(Cs, Seps, N);
+ nth_lexeme_m(Cs, Seps0, N);
false when N > 1 ->
- Cs1 = lexeme_skip(Cs, Seps),
- nth_lexeme_m(Cs1, Seps, N-1);
+ Cs1 = lexeme_skip(Cs, Seps0),
+ nth_lexeme_m(Cs1, Seps0, N-1);
false ->
+ Seps = search_compile(Seps0),
{Lexeme,_} = lexeme_pick(Cs0, Seps, []),
Lexeme
end;
[] ->
[]
end;
-nth_lexeme_m(Bin, Seps, N) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Seps) of
+nth_lexeme_m(Bin, {GCs,_,_}=Seps0, N) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search_inv(Bin, [], GCs) of
[Cs] when N > 1 ->
Cs1 = lexeme_skip(Cs, Seps),
nth_lexeme_m(Cs1, Seps, N-1);
@@ -1090,16 +1299,17 @@ lexeme_skip([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps) when is_integer(CP) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> Cs0;
+ true -> Cs2;
false -> lexeme_skip(Cs2, Seps)
end;
false ->
lexeme_skip(Cs1, Seps)
end;
-lexeme_skip([Bin|Cont0], Seps) when is_binary(Bin) ->
+lexeme_skip([Bin|Cont0], Seps0) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
case bin_search(Bin, Cont0, Seps) of
{nomatch,_} -> lexeme_skip(Cont0, Seps);
- Cs -> Cs
+ Cs -> tl(unicode_util:gc(Cs))
end;
lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
case unicode_util:cp(Cs0) of
@@ -1108,7 +1318,7 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> Cs0;
+ true -> Cs2;
false -> lexeme_skip(Cs2, Seps)
end;
false ->
@@ -1117,12 +1327,23 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
[] ->
[]
end;
-lexeme_skip(Bin, Seps) when is_binary(Bin) ->
- case bin_search(Bin, Seps) of
+lexeme_skip(Bin, Seps0) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, [], Seps) of
{nomatch,_} -> <<>>;
- [Left] -> Left
+ [Left] -> tl(unicode_util:gc(Left))
end.
+find_l([C1|Cs]=Cs0, [C|_]=Needle) when is_integer(C1) ->
+ case C1 of
+ C ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> find_l(Cs, Needle);
+ _ -> Cs0
+ end;
+ _ ->
+ find_l(Cs, Needle)
+ end;
find_l([Bin|Cont0], Needle) when is_binary(Bin) ->
case bin_search_str(Bin, 0, Cont0, Needle) of
{nomatch, _, Cont} ->
@@ -1147,6 +1368,16 @@ find_l(Bin, Needle) ->
{_Before, [Cs], _After} -> Cs
end.
+find_r([Cp|Cs]=Cs0, [C|_]=Needle, Res) when is_integer(Cp) ->
+ case Cp of
+ C ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> find_r(Cs, Needle, Res);
+ _ -> find_r(Cs, Needle, Cs0)
+ end;
+ _ ->
+ find_r(Cs, Needle, Res)
+ end;
find_r([Bin|Cont0], Needle, Res) when is_binary(Bin) ->
case bin_search_str(Bin, 0, Cont0, Needle) of
{nomatch,_,Cont} ->
@@ -1217,11 +1448,6 @@ cp_prefix_1(Orig, Until, Cont) ->
%% Binary special
-bin_search(Bin, Seps) ->
- bin_search(Bin, [], Seps).
-
-bin_search(_Bin, Cont, {[],_,_}) ->
- {nomatch, Cont};
bin_search(Bin, Cont, {Seps,_,BP}) ->
bin_search_loop(Bin, 0, BP, Cont, Seps).
@@ -1229,10 +1455,14 @@ bin_search(Bin, Cont, {Seps,_,BP}) ->
%% i.e. å in nfd form $a "COMBINING RING ABOVE"
%% and PREPEND characters like "ARABIC NUMBER SIGN" 1536 <<216,128>>
%% combined with other characters are currently ignored.
+search_pattern({_,_,_}=P) -> P;
search_pattern(Seps) ->
CPs = search_cp(Seps),
- Bin = bin_pattern(CPs),
- {Seps, CPs, Bin}.
+ {Seps, CPs, undefined}.
+
+search_compile({Sep, CPs, undefined}) ->
+ {Sep, CPs, binary:compile_pattern(bin_pattern(CPs))};
+search_compile({_,_,_}=Compiled) -> Compiled.
search_cp([CP|Seps]) when is_integer(CP) ->
[CP|search_cp(Seps)];
@@ -1253,9 +1483,21 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
case binary:match(Bin, BinSeps) of
nomatch ->
{nomatch,Cont};
+ {Where, _CL} when Cont =:= [] ->
+ <<_:Where/binary, Cont1/binary>> = Bin,
+ [GC|Cont2] = unicode_util:gc(Cont1),
+ case lists:member(GC, Seps) of
+ false when Cont2 =:= [] ->
+ {nomatch, []};
+ false ->
+ Next = byte_size(Bin0) - byte_size(Cont2),
+ bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
+ true ->
+ [Cont1]
+ end;
{Where, _CL} ->
<<_:Where/binary, Cont0/binary>> = Bin,
- Cont1 = stack(Cont0, Cont),
+ Cont1 = [Cont0|Cont],
[GC|Cont2] = unicode_util:gc(Cont1),
case lists:member(GC, Seps) of
false ->
@@ -1263,55 +1505,108 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
[BinR|Cont] when is_binary(BinR) ->
Next = byte_size(Bin0) - byte_size(BinR),
bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
- BinR when is_binary(BinR), Cont =:= [] ->
- Next = byte_size(Bin0) - byte_size(BinR),
- bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
_ ->
{nomatch, Cont2}
end;
- true when is_list(Cont1) ->
- Cont1;
true ->
- [Cont1]
+ Cont1
end
end.
-bin_search_inv(Bin, Cont, {[], _, _}) ->
- [Bin|Cont];
-bin_search_inv(Bin, Cont, {[Sep], _, _}) ->
- bin_search_inv_1([Bin|Cont], Sep);
-bin_search_inv(Bin, Cont, {Seps, _, _}) ->
- bin_search_inv_n([Bin|Cont], Seps).
-
-bin_search_inv_1([<<>>|CPs], _) ->
- {nomatch, CPs};
-bin_search_inv_1(CPs = [Bin0|Cont], Sep) when is_binary(Bin0) ->
- case unicode_util:gc(CPs) of
- [Sep|Bin] when is_binary(Bin), Cont =:= [] ->
- bin_search_inv_1([Bin], Sep);
- [Sep|[Bin|Cont]=Cs] when is_binary(Bin) ->
- bin_search_inv_1(Cs, Sep);
- [Sep|Cs] ->
- {nomatch, Cs};
- _ -> CPs
- end.
+bin_search_inv(<<>>, Cont, _) ->
+ {nomatch, Cont};
+bin_search_inv(Bin, Cont, [Sep]) ->
+ bin_search_inv_1(Bin, Cont, Sep);
+bin_search_inv(Bin, Cont, Seps) ->
+ bin_search_inv_n(Bin, Cont, Seps).
+
+bin_search_inv_1(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Sep) ->
+ case BinRest of
+ <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) ->
+ case CP1 of
+ Sep -> bin_search_inv_1(BinRest, Cont, Sep);
+ _ -> [Bin0|Cont]
+ end;
+ _ when Cont =:= [] ->
+ case unicode_util:gc(Bin0) of
+ [Sep|Bin] -> bin_search_inv_1(Bin, Cont, Sep);
+ _ -> [Bin0|Cont]
+ end;
+ _ ->
+ case unicode_util:gc([Bin0|Cont]) of
+ [Sep|[Bin|Cont]] when is_binary(Bin) ->
+ bin_search_inv_1(Bin, Cont, Sep);
+ [Sep|Cs] ->
+ {nomatch, Cs};
+ _ -> [Bin0|Cont]
+ end
+ end;
+bin_search_inv_1(<<>>, Cont, _Sep) ->
+ {nomatch, Cont};
+bin_search_inv_1([], Cont, _Sep) ->
+ {nomatch, Cont}.
-bin_search_inv_n([<<>>|CPs], _) ->
- {nomatch, CPs};
-bin_search_inv_n([Bin0|Cont]=CPs, Seps) when is_binary(Bin0) ->
- [C|Cs0] = unicode_util:gc(CPs),
- case {lists:member(C, Seps), Cs0} of
- {true, Cs} when is_binary(Cs), Cont =:= [] ->
- bin_search_inv_n([Cs], Seps);
- {true, [Bin|Cont]=Cs} when is_binary(Bin) ->
- bin_search_inv_n(Cs, Seps);
- {true, Cs} -> {nomatch, Cs};
- {false, _} -> CPs
- end.
+bin_search_inv_n(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Seps) ->
+ case BinRest of
+ <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) ->
+ case lists:member(CP1,Seps) of
+ true -> bin_search_inv_n(BinRest, Cont, Seps);
+ false -> [Bin0|Cont]
+ end;
+ _ when Cont =:= [] ->
+ [GC|Bin] = unicode_util:gc(Bin0),
+ case lists:member(GC, Seps) of
+ true -> bin_search_inv_n(Bin, Cont, Seps);
+ false -> [Bin0|Cont]
+ end;
+ _ ->
+ [GC|Cs0] = unicode_util:gc([Bin0|Cont]),
+ case lists:member(GC, Seps) of
+ false -> [Bin0|Cont];
+ true ->
+ case Cs0 of
+ [Bin|Cont] when is_binary(Bin) ->
+ bin_search_inv_n(Bin, Cont, Seps);
+ _ ->
+ {nomatch, Cs0}
+ end
+ end
+ end;
+bin_search_inv_n(<<>>, Cont, _Sep) ->
+ {nomatch, Cont};
+bin_search_inv_n([], Cont, _Sep) ->
+ {nomatch, Cont}.
+
+bin_search_str(Bin0, Start, [], SearchCPs) ->
+ Compiled = binary:compile_pattern(unicode:characters_to_binary(SearchCPs)),
+ bin_search_str_1(Bin0, Start, Compiled, SearchCPs);
bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
+ First = binary:compile_pattern(<<CP/utf8>>),
+ bin_search_str_2(Bin0, Start, Cont, First, SearchCPs).
+
+bin_search_str_1(Bin0, Start, First, SearchCPs) ->
+ <<_:Start/binary, Bin/binary>> = Bin0,
+ case binary:match(Bin, First) of
+ nomatch -> {nomatch, byte_size(Bin0), []};
+ {Where0, _} ->
+ Where = Start+Where0,
+ <<Keep:Where/binary, Cs0/binary>> = Bin0,
+ case prefix_1(Cs0, SearchCPs) of
+ nomatch ->
+ <<_/utf8, Cs/binary>> = Cs0,
+ KeepSz = byte_size(Bin0) - byte_size(Cs),
+ bin_search_str_1(Bin0, KeepSz, First, SearchCPs);
+ [] ->
+ {Keep, [Cs0], <<>>};
+ Rest ->
+ {Keep, [Cs0], Rest}
+ end
+ end.
+
+bin_search_str_2(Bin0, Start, Cont, First, SearchCPs) ->
<<_:Start/binary, Bin/binary>> = Bin0,
- case binary:match(Bin, <<CP/utf8>>) of
+ case binary:match(Bin, First) of
nomatch -> {nomatch, byte_size(Bin0), Cont};
{Where0, _} ->
Where = Start+Where0,
@@ -1320,7 +1615,7 @@ bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
case prefix_1(stack(Cs0,Cont), SearchCPs) of
nomatch when is_binary(Cs) ->
KeepSz = byte_size(Bin0) - byte_size(Cs),
- bin_search_str(Bin0, KeepSz, Cont, SearchCPs);
+ bin_search_str_2(Bin0, KeepSz, Cont, First, SearchCPs);
nomatch ->
{nomatch, Where, stack([GC|Cs],Cont)};
[] ->
diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl
index 7920e55930..e56415650f 100644
--- a/lib/stdlib/src/supervisor.erl
+++ b/lib/stdlib/src/supervisor.erl
@@ -31,7 +31,6 @@
%% Internal exports
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
terminate/2, code_change/3, format_status/2]).
--export([try_again_restart/2]).
%% For release_handler only
-export([get_callback_module/1]).
@@ -79,6 +78,7 @@
| {RestartStrategy :: strategy(),
Intensity :: non_neg_integer(),
Period :: pos_integer()}.
+-type children() :: {Ids :: [child_id()], Db :: #{child_id() => child_rec()}}.
%%--------------------------------------------------------------------------
%% Defaults
@@ -96,7 +96,7 @@
pid = undefined :: child()
| {restarting, pid() | undefined}
| [pid()],
- name :: child_id(),
+ id :: child_id(),
mfargs :: mfargs(),
restart_type :: restart(),
shutdown :: shutdown(),
@@ -104,16 +104,11 @@
modules = [] :: modules()}).
-type child_rec() :: #child{}.
--define(DICTS, dict).
--define(DICT, dict:dict).
--define(SETS, sets).
--define(SET, sets:set).
-
-record(state, {name,
strategy :: strategy() | 'undefined',
- children = [] :: [child_rec()],
- dynamics :: {'dict', ?DICT(pid(), list())}
- | {'set', ?SET(pid())}
+ children = {[],#{}} :: children(), % Ids in start order
+ dynamics :: {'maps', #{pid() => list()}}
+ | {'sets', sets:set(pid())}
| 'undefined',
intensity :: non_neg_integer() | 'undefined',
period :: pos_integer() | 'undefined',
@@ -124,6 +119,9 @@
-type state() :: #state{}.
-define(is_simple(State), State#state.strategy =:= simple_one_for_one).
+-define(is_temporary(_Child_), _Child_#child.restart_type=:=temporary).
+-define(is_transient(_Child_), _Child_#child.restart_type=:=transient).
+-define(is_permanent(_Child_), _Child_#child.restart_type=:=permanent).
-callback init(Args :: term()) ->
{ok, {SupFlags :: sup_flags(), [ChildSpec :: child_spec()]}}
@@ -179,16 +177,16 @@ start_child(Supervisor, ChildSpec) ->
| {'error', Error},
Error :: 'running' | 'restarting' | 'not_found' | 'simple_one_for_one' |
term().
-restart_child(Supervisor, Name) ->
- call(Supervisor, {restart_child, Name}).
+restart_child(Supervisor, Id) ->
+ call(Supervisor, {restart_child, Id}).
-spec delete_child(SupRef, Id) -> Result when
SupRef :: sup_ref(),
Id :: child_id(),
Result :: 'ok' | {'error', Error},
Error :: 'running' | 'restarting' | 'not_found' | 'simple_one_for_one'.
-delete_child(Supervisor, Name) ->
- call(Supervisor, {delete_child, Name}).
+delete_child(Supervisor, Id) ->
+ call(Supervisor, {delete_child, Id}).
%%-----------------------------------------------------------------
%% Func: terminate_child/2
@@ -202,16 +200,16 @@ delete_child(Supervisor, Name) ->
Id :: pid() | child_id(),
Result :: 'ok' | {'error', Error},
Error :: 'not_found' | 'simple_one_for_one'.
-terminate_child(Supervisor, Name) ->
- call(Supervisor, {terminate_child, Name}).
+terminate_child(Supervisor, Id) ->
+ call(Supervisor, {terminate_child, Id}).
-spec get_childspec(SupRef, Id) -> Result when
SupRef :: sup_ref(),
Id :: pid() | child_id(),
Result :: {'ok', child_spec()} | {'error', Error},
Error :: 'not_found'.
-get_childspec(Supervisor, Name) ->
- call(Supervisor, {get_childspec, Name}).
+get_childspec(Supervisor, Id) ->
+ call(Supervisor, {get_childspec, Id}).
-spec which_children(SupRef) -> [{Id,Child,Type,Modules}] when
SupRef :: sup_ref(),
@@ -246,17 +244,6 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) ->
check_childspecs(X) -> {error, {badarg, X}}.
%%%-----------------------------------------------------------------
-%%% Called by restart/2
--spec try_again_restart(SupRef, Child) -> ok when
- SupRef :: sup_ref(),
- Child :: child_id() | pid().
-try_again_restart(Supervisor, Child) ->
- cast(Supervisor, {try_again_restart, Child}).
-
-cast(Supervisor, Req) ->
- gen_server:cast(Supervisor, Req).
-
-%%%-----------------------------------------------------------------
%%% Called by release_handler during upgrade
-spec get_callback_module(Pid) -> Module when
Pid :: pid(),
@@ -325,7 +312,7 @@ init_children(State, StartSpec) ->
init_dynamic(State, [StartSpec]) ->
case check_startspec([StartSpec]) of
{ok, Children} ->
- {ok, State#state{children = Children}};
+ {ok, dyn_init(State#state{children = Children})};
Error ->
{stop, {start_spec, Error}}
end;
@@ -334,35 +321,34 @@ init_dynamic(_State, StartSpec) ->
%%-----------------------------------------------------------------
%% Func: start_children/2
-%% Args: Children = [child_rec()] in start order
+%% Args: Children = children() % Ids in start order
%% SupName = {local, atom()} | {global, atom()} | {pid(), Mod}
-%% Purpose: Start all children. The new list contains #child's
+%% Purpose: Start all children. The new map contains #child's
%% with pids.
%% Returns: {ok, NChildren} | {error, NChildren, Reason}
-%% NChildren = [child_rec()] in termination order (reversed
-%% start order)
+%% NChildren = children() % Ids in termination order
+%% (reversed start order)
%%-----------------------------------------------------------------
-start_children(Children, SupName) -> start_children(Children, [], SupName).
-
-start_children([Child|Chs], NChildren, SupName) ->
- case do_start_child(SupName, Child) of
- {ok, undefined} when Child#child.restart_type =:= temporary ->
- start_children(Chs, NChildren, SupName);
- {ok, Pid} ->
- start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName);
- {ok, Pid, _Extra} ->
- start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName);
- {error, Reason} ->
- report_error(start_error, Reason, Child, SupName),
- {error, lists:reverse(Chs) ++ [Child | NChildren],
- {failed_to_start_child,Child#child.name,Reason}}
- end;
-start_children([], NChildren, _SupName) ->
- {ok, NChildren}.
+start_children(Children, SupName) ->
+ Start =
+ fun(Id,Child) ->
+ case do_start_child(SupName, Child) of
+ {ok, undefined} when ?is_temporary(Child) ->
+ remove;
+ {ok, Pid} ->
+ {update,Child#child{pid = Pid}};
+ {ok, Pid, _Extra} ->
+ {update,Child#child{pid = Pid}};
+ {error, Reason} ->
+ report_error(start_error, Reason, Child, SupName),
+ {abort,{failed_to_start_child,Id,Reason}}
+ end
+ end,
+ children_map(Start,Children).
do_start_child(SupName, Child) ->
#child{mfargs = {M, F, Args}} = Child,
- case catch apply(M, F, Args) of
+ case do_start_child_i(M, F, Args) of
{ok, Pid} when is_pid(Pid) ->
NChild = Child#child{pid = Pid},
report_progress(NChild, SupName),
@@ -371,10 +357,8 @@ do_start_child(SupName, Child) ->
NChild = Child#child{pid = Pid},
report_progress(NChild, SupName),
{ok, Pid, Extra};
- ignore ->
- {ok, undefined};
- {error, What} -> {error, What};
- What -> {error, What}
+ Other ->
+ Other
end.
do_start_child_i(M, F, A) ->
@@ -400,17 +384,17 @@ do_start_child_i(M, F, A) ->
-spec handle_call(call(), term(), state()) -> {'reply', term(), state()}.
handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) ->
- Child = hd(State#state.children),
+ Child = get_dynamic_child(State),
#child{mfargs = {M, F, A}} = Child,
Args = A ++ EArgs,
case do_start_child_i(M, F, Args) of
{ok, undefined} ->
{reply, {ok, undefined}, State};
{ok, Pid} ->
- NState = save_dynamic_child(Child#child.restart_type, Pid, Args, State),
+ NState = dyn_store(Pid, Args, State),
{reply, {ok, Pid}, NState};
{ok, Pid, Extra} ->
- NState = save_dynamic_child(Child#child.restart_type, Pid, Args, State),
+ NState = dyn_store(Pid, Args, State),
{reply, {ok, Pid, Extra}, NState};
What ->
{reply, What, State}
@@ -426,121 +410,94 @@ handle_call({start_child, ChildSpec}, _From, State) ->
end;
%% terminate_child for simple_one_for_one can only be done with pid
-handle_call({terminate_child, Name}, _From, State) when not is_pid(Name),
- ?is_simple(State) ->
+handle_call({terminate_child, Id}, _From, State) when not is_pid(Id),
+ ?is_simple(State) ->
{reply, {error, simple_one_for_one}, State};
-handle_call({terminate_child, Name}, _From, State) ->
- case get_child(Name, State, ?is_simple(State)) of
- {value, Child} ->
- case do_terminate(Child, State#state.name) of
- #child{restart_type=RT} when RT=:=temporary; ?is_simple(State) ->
- {reply, ok, state_del_child(Child, State)};
- NChild ->
- {reply, ok, replace_child(NChild, State)}
- end;
- false ->
+handle_call({terminate_child, Id}, _From, State) ->
+ case find_child(Id, State) of
+ {ok, Child} ->
+ do_terminate(Child, State#state.name),
+ {reply, ok, del_child(Child, State)};
+ error ->
{reply, {error, not_found}, State}
end;
%% restart_child request is invalid for simple_one_for_one supervisors
-handle_call({restart_child, _Name}, _From, State) when ?is_simple(State) ->
+handle_call({restart_child, _Id}, _From, State) when ?is_simple(State) ->
{reply, {error, simple_one_for_one}, State};
-handle_call({restart_child, Name}, _From, State) ->
- case get_child(Name, State) of
- {value, Child} when Child#child.pid =:= undefined ->
+handle_call({restart_child, Id}, _From, State) ->
+ case find_child(Id, State) of
+ {ok, Child} when Child#child.pid =:= undefined ->
case do_start_child(State#state.name, Child) of
{ok, Pid} ->
- NState = replace_child(Child#child{pid = Pid}, State),
+ NState = set_pid(Pid, Id, State),
{reply, {ok, Pid}, NState};
{ok, Pid, Extra} ->
- NState = replace_child(Child#child{pid = Pid}, State),
+ NState = set_pid(Pid, Id, State),
{reply, {ok, Pid, Extra}, NState};
Error ->
{reply, Error, State}
end;
- {value, #child{pid=?restarting(_)}} ->
+ {ok, #child{pid=?restarting(_)}} ->
{reply, {error, restarting}, State};
- {value, _} ->
+ {ok, _} ->
{reply, {error, running}, State};
_ ->
{reply, {error, not_found}, State}
end;
%% delete_child request is invalid for simple_one_for_one supervisors
-handle_call({delete_child, _Name}, _From, State) when ?is_simple(State) ->
+handle_call({delete_child, _Id}, _From, State) when ?is_simple(State) ->
{reply, {error, simple_one_for_one}, State};
-handle_call({delete_child, Name}, _From, State) ->
- case get_child(Name, State) of
- {value, Child} when Child#child.pid =:= undefined ->
- NState = remove_child(Child, State),
+handle_call({delete_child, Id}, _From, State) ->
+ case find_child(Id, State) of
+ {ok, Child} when Child#child.pid =:= undefined ->
+ NState = remove_child(Id, State),
{reply, ok, NState};
- {value, #child{pid=?restarting(_)}} ->
+ {ok, #child{pid=?restarting(_)}} ->
{reply, {error, restarting}, State};
- {value, _} ->
+ {ok, _} ->
{reply, {error, running}, State};
_ ->
{reply, {error, not_found}, State}
end;
-handle_call({get_childspec, Name}, _From, State) ->
- case get_child(Name, State, ?is_simple(State)) of
- {value, Child} ->
+handle_call({get_childspec, Id}, _From, State) ->
+ case find_child(Id, State) of
+ {ok, Child} ->
{reply, {ok, child_to_spec(Child)}, State};
- false ->
+ error ->
{reply, {error, not_found}, State}
end;
-handle_call(which_children, _From, #state{children = [#child{restart_type = temporary,
- child_type = CT,
- modules = Mods}]} =
- State) when ?is_simple(State) ->
- Reply = lists:map(fun(Pid) -> {undefined, Pid, CT, Mods} end,
- ?SETS:to_list(dynamics_db(temporary, State#state.dynamics))),
- {reply, Reply, State};
-
-handle_call(which_children, _From, #state{children = [#child{restart_type = RType,
- child_type = CT,
- modules = Mods}]} =
- State) when ?is_simple(State) ->
- Reply = lists:map(fun({?restarting(_),_}) -> {undefined,restarting,CT,Mods};
- ({Pid, _}) -> {undefined, Pid, CT, Mods} end,
- ?DICTS:to_list(dynamics_db(RType, State#state.dynamics))),
+handle_call(which_children, _From, State) when ?is_simple(State) ->
+ #child{child_type = CT,modules = Mods} = get_dynamic_child(State),
+ Reply = dyn_map(fun(?restarting(_)) -> {undefined, restarting, CT, Mods};
+ (Pid) -> {undefined, Pid, CT, Mods}
+ end, State),
{reply, Reply, State};
handle_call(which_children, _From, State) ->
Resp =
- lists:map(fun(#child{pid = ?restarting(_), name = Name,
- child_type = ChildType, modules = Mods}) ->
- {Name, restarting, ChildType, Mods};
- (#child{pid = Pid, name = Name,
- child_type = ChildType, modules = Mods}) ->
- {Name, Pid, ChildType, Mods}
- end,
- State#state.children),
+ children_to_list(
+ fun(Id,#child{pid = ?restarting(_),
+ child_type = ChildType, modules = Mods}) ->
+ {Id, restarting, ChildType, Mods};
+ (Id,#child{pid = Pid,
+ child_type = ChildType, modules = Mods}) ->
+ {Id, Pid, ChildType, Mods}
+ end,
+ State#state.children),
{reply, Resp, State};
-
-handle_call(count_children, _From, #state{children = [#child{restart_type = temporary,
- child_type = CT}]} = State)
- when ?is_simple(State) ->
- Sz = ?SETS:size(dynamics_db(temporary, State#state.dynamics)),
- Reply = case CT of
- supervisor -> [{specs, 1}, {active, Sz},
- {supervisors, Sz}, {workers, 0}];
- worker -> [{specs, 1}, {active, Sz},
- {supervisors, 0}, {workers, Sz}]
- end,
- {reply, Reply, State};
-
-handle_call(count_children, _From, #state{dynamic_restarts = Restarts,
- children = [#child{restart_type = RType,
- child_type = CT}]} = State)
+handle_call(count_children, _From, #state{dynamic_restarts = Restarts} = State)
when ?is_simple(State) ->
- Sz = ?DICTS:size(dynamics_db(RType, State#state.dynamics)),
- Active = Sz - Restarts,
+ #child{child_type = CT} = get_dynamic_child(State),
+ Sz = dyn_size(State),
+ Active = Sz - Restarts, % Restarts is always 0 for temporary children
Reply = case CT of
supervisor -> [{specs, 1}, {active, Active},
{supervisors, Sz}, {workers, 0}];
@@ -552,16 +509,15 @@ handle_call(count_children, _From, #state{dynamic_restarts = Restarts,
handle_call(count_children, _From, State) ->
%% Specs and children are together on the children list...
{Specs, Active, Supers, Workers} =
- lists:foldl(fun(Child, Counts) ->
- count_child(Child, Counts)
- end, {0,0,0,0}, State#state.children),
+ children_fold(fun(_Id, Child, Counts) ->
+ count_child(Child, Counts)
+ end, {0,0,0,0}, State#state.children),
%% Reformat counts to a property list.
Reply = [{specs, Specs}, {active, Active},
{supervisors, Supers}, {workers, Workers}],
{reply, Reply, State}.
-
count_child(#child{pid = Pid, child_type = worker},
{Specs, Active, Supers, Workers}) ->
case is_pid(Pid) andalso is_process_alive(Pid) of
@@ -575,34 +531,15 @@ count_child(#child{pid = Pid, child_type = supervisor},
false -> {Specs+1, Active, Supers+1, Workers}
end.
-
%%% If a restart attempt failed, this message is cast
%%% from restart/2 in order to give gen_server the chance to
%%% check it's inbox before trying again.
--spec handle_cast({try_again_restart, child_id() | pid()}, state()) ->
+-spec handle_cast({try_again_restart, child_id() | {'restarting',pid()}}, state()) ->
{'noreply', state()} | {stop, shutdown, state()}.
-handle_cast({try_again_restart,Pid}, #state{children=[Child]}=State)
- when ?is_simple(State) ->
- RT = Child#child.restart_type,
- RPid = restarting(Pid),
- case dynamic_child_args(RPid, RT, State#state.dynamics) of
- {ok, Args} ->
- {M, F, _} = Child#child.mfargs,
- NChild = Child#child{pid = RPid, mfargs = {M, F, Args}},
- case restart(NChild,State) of
- {ok, State1} ->
- {noreply, State1};
- {shutdown, State1} ->
- {stop, shutdown, State1}
- end;
- error ->
- {noreply, State}
- end;
-
-handle_cast({try_again_restart,Name}, State) ->
- case lists:keyfind(Name,#child.name,State#state.children) of
- Child = #child{pid=?restarting(_)} ->
+handle_cast({try_again_restart,TryAgainId}, State) ->
+ case find_child_and_args(TryAgainId, State) of
+ {ok, Child = #child{pid=?restarting(_)}} ->
case restart(Child,State) of
{ok, State1} ->
{noreply, State1};
@@ -637,10 +574,8 @@ handle_info(Msg, State) ->
%%
-spec terminate(term(), state()) -> 'ok'.
-terminate(_Reason, #state{children=[Child]} = State) when ?is_simple(State) ->
- terminate_dynamic_children(Child, dynamics_db(Child#child.restart_type,
- State#state.dynamics),
- State#state.name);
+terminate(_Reason, State) when ?is_simple(State) ->
+ terminate_dynamic_children(State);
terminate(_Reason, State) ->
terminate_children(State#state.children, State#state.name).
@@ -675,8 +610,8 @@ code_change(_, State, _) ->
update_childspec(State, StartSpec) when ?is_simple(State) ->
case check_startspec(StartSpec) of
- {ok, [Child]} ->
- {ok, State#state{children = [Child]}};
+ {ok, {[_],_}=Children} ->
+ {ok, State#state{children = Children}};
Error ->
{error, Error}
end;
@@ -690,39 +625,36 @@ update_childspec(State, StartSpec) ->
{error, Error}
end.
-update_childspec1([Child|OldC], Children, KeepOld) ->
- case update_chsp(Child, Children) of
- {ok,NewChildren} ->
- update_childspec1(OldC, NewChildren, KeepOld);
+update_childspec1({[Id|OldIds], OldDb}, {Ids,Db}, KeepOld) ->
+ case update_chsp(maps:get(Id,OldDb), Db) of
+ {ok,NewDb} ->
+ update_childspec1({OldIds,OldDb}, {Ids,NewDb}, KeepOld);
false ->
- update_childspec1(OldC, Children, [Child|KeepOld])
+ update_childspec1({OldIds,OldDb}, {Ids,Db}, [Id|KeepOld])
end;
-update_childspec1([], Children, KeepOld) ->
+update_childspec1({[],OldDb}, {Ids,Db}, KeepOld) ->
+ KeepOldDb = maps:with(KeepOld,OldDb),
%% Return them in (kept) reverse start order.
- lists:reverse(Children ++ KeepOld).
-
-update_chsp(OldCh, Children) ->
- case lists:map(fun(Ch) when OldCh#child.name =:= Ch#child.name ->
- Ch#child{pid = OldCh#child.pid};
- (Ch) ->
- Ch
- end,
- Children) of
- Children ->
- false; % OldCh not found in new spec.
- NewC ->
- {ok, NewC}
+ {lists:reverse(Ids ++ KeepOld),maps:merge(KeepOldDb,Db)}.
+
+update_chsp(#child{id=Id}=OldChild, NewDb) ->
+ case maps:find(Id, NewDb) of
+ {ok,Child} ->
+ {ok,NewDb#{Id => Child#child{pid = OldChild#child.pid}}};
+ error -> % Id not found in new spec.
+ false
end.
+
%%% ---------------------------------------------------
%%% Start a new child.
%%% ---------------------------------------------------
handle_start_child(Child, State) ->
- case get_child(Child#child.name, State) of
- false ->
+ case find_child(Child#child.id, State) of
+ error ->
case do_start_child(State#state.name, Child) of
- {ok, undefined} when Child#child.restart_type =:= temporary ->
+ {ok, undefined} when ?is_temporary(Child) ->
{{ok, undefined}, State};
{ok, Pid} ->
{{ok, Pid}, save_child(Child#child{pid = Pid}, State)};
@@ -731,9 +663,9 @@ handle_start_child(Child, State) ->
{error, What} ->
{{error, {What, Child}}, State}
end;
- {value, OldChild} when is_pid(OldChild#child.pid) ->
+ {ok, OldChild} when is_pid(OldChild#child.pid) ->
{{error, {already_started, OldChild#child.pid}}, State};
- {value, _OldChild} ->
+ {ok, _OldChild} ->
{{error, already_present}, State}
end.
@@ -742,63 +674,45 @@ handle_start_child(Child, State) ->
%%% Returns: {ok, state()} | {shutdown, state()}
%%% ---------------------------------------------------
-restart_child(Pid, Reason, #state{children = [Child]} = State) when ?is_simple(State) ->
- RestartType = Child#child.restart_type,
- case dynamic_child_args(Pid, RestartType, State#state.dynamics) of
- {ok, Args} ->
- {M, F, _} = Child#child.mfargs,
- NChild = Child#child{pid = Pid, mfargs = {M, F, Args}},
- do_restart(RestartType, Reason, NChild, State);
- error ->
- {ok, State}
- end;
-
restart_child(Pid, Reason, State) ->
- Children = State#state.children,
- case lists:keyfind(Pid, #child.pid, Children) of
- #child{restart_type = RestartType} = Child ->
- do_restart(RestartType, Reason, Child, State);
- false ->
+ case find_child_and_args(Pid, State) of
+ {ok, Child} ->
+ do_restart(Reason, Child, State);
+ error ->
{ok, State}
end.
-do_restart(permanent, Reason, Child, State) ->
+do_restart(Reason, Child, State) when ?is_permanent(Child) ->
report_error(child_terminated, Reason, Child, State#state.name),
restart(Child, State);
-do_restart(_, normal, Child, State) ->
- NState = state_del_child(Child, State),
+do_restart(normal, Child, State) ->
+ NState = del_child(Child, State),
{ok, NState};
-do_restart(_, shutdown, Child, State) ->
- NState = state_del_child(Child, State),
+do_restart(shutdown, Child, State) ->
+ NState = del_child(Child, State),
{ok, NState};
-do_restart(_, {shutdown, _Term}, Child, State) ->
- NState = state_del_child(Child, State),
+do_restart({shutdown, _Term}, Child, State) ->
+ NState = del_child(Child, State),
{ok, NState};
-do_restart(transient, Reason, Child, State) ->
+do_restart(Reason, Child, State) when ?is_transient(Child) ->
report_error(child_terminated, Reason, Child, State#state.name),
restart(Child, State);
-do_restart(temporary, Reason, Child, State) ->
+do_restart(Reason, Child, State) when ?is_temporary(Child) ->
report_error(child_terminated, Reason, Child, State#state.name),
- NState = state_del_child(Child, State),
+ NState = del_child(Child, State),
{ok, NState}.
restart(Child, State) ->
case add_restart(State) of
{ok, NState} ->
case restart(NState#state.strategy, Child, NState) of
- {try_again,NState2} ->
+ {{try_again, TryAgainId}, NState2} ->
%% Leaving control back to gen_server before
%% trying again. This way other incoming requsts
%% for the supervisor can be handled - e.g. a
%% shutdown request for the supervisor or the
%% child.
- Id = if ?is_simple(State) -> Child#child.pid;
- true -> Child#child.name
- end,
- ok = try_again_restart(self(), Id),
- {ok,NState2};
- {try_again, NState2, #child{name=ChName}} ->
- ok = try_again_restart(self(), ChName),
+ try_again_restart(TryAgainId),
{ok,NState2};
Other ->
Other
@@ -806,124 +720,111 @@ restart(Child, State) ->
{terminate, NState} ->
report_error(shutdown, reached_max_restart_intensity,
Child, State#state.name),
- {shutdown, remove_child(Child, NState)}
+ {shutdown, del_child(Child, NState)}
end.
restart(simple_one_for_one, Child, State0) ->
#child{pid = OldPid, mfargs = {M, F, A}} = Child,
- State = case OldPid of
+ State1 = case OldPid of
?restarting(_) ->
NRes = State0#state.dynamic_restarts - 1,
State0#state{dynamic_restarts = NRes};
_ ->
State0
end,
- Dynamics = ?DICTS:erase(OldPid, dynamics_db(Child#child.restart_type,
- State#state.dynamics)),
+ State2 = dyn_erase(OldPid, State1),
case do_start_child_i(M, F, A) of
{ok, Pid} ->
- DynamicsDb = {dict, ?DICTS:store(Pid, A, Dynamics)},
- NState = State#state{dynamics = DynamicsDb},
+ NState = dyn_store(Pid, A, State2),
{ok, NState};
{ok, Pid, _Extra} ->
- DynamicsDb = {dict, ?DICTS:store(Pid, A, Dynamics)},
- NState = State#state{dynamics = DynamicsDb},
+ NState = dyn_store(Pid, A, State2),
{ok, NState};
{error, Error} ->
- NRestarts = State#state.dynamic_restarts + 1,
- DynamicsDb = {dict, ?DICTS:store(restarting(OldPid), A, Dynamics)},
- NState = State#state{dynamic_restarts = NRestarts,
- dynamics = DynamicsDb},
- report_error(start_error, Error, Child, State#state.name),
- {try_again, NState}
+ ROldPid = restarting(OldPid),
+ NRestarts = State2#state.dynamic_restarts + 1,
+ State3 = State2#state{dynamic_restarts = NRestarts},
+ NState = dyn_store(ROldPid, A, State3),
+ report_error(start_error, Error, Child, NState#state.name),
+ {{try_again, ROldPid}, NState}
end;
-restart(one_for_one, Child, State) ->
+restart(one_for_one, #child{id=Id} = Child, State) ->
OldPid = Child#child.pid,
case do_start_child(State#state.name, Child) of
{ok, Pid} ->
- NState = replace_child(Child#child{pid = Pid}, State),
+ NState = set_pid(Pid, Id, State),
{ok, NState};
{ok, Pid, _Extra} ->
- NState = replace_child(Child#child{pid = Pid}, State),
+ NState = set_pid(Pid, Id, State),
{ok, NState};
{error, Reason} ->
- NState = replace_child(Child#child{pid = restarting(OldPid)}, State),
+ NState = set_pid(restarting(OldPid), Id, State),
report_error(start_error, Reason, Child, State#state.name),
- {try_again, NState}
- end;
-restart(rest_for_one, Child, State) ->
- {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children),
- ChAfter2 = terminate_children(ChAfter, State#state.name),
- case start_children(ChAfter2, State#state.name) of
- {ok, ChAfter3} ->
- {ok, State#state{children = ChAfter3 ++ ChBefore}};
- {error, ChAfter3, {failed_to_start_child, ChName, _Reason}}
- when ChName =:= Child#child.name ->
- NChild = Child#child{pid=restarting(Child#child.pid)},
- NState = State#state{children = ChAfter3 ++ ChBefore},
- {try_again, replace_child(NChild,NState)};
- {error, ChAfter3, {failed_to_start_child, ChName, _Reason}} ->
- NChild = lists:keyfind(ChName, #child.name, ChAfter3),
- NChild2 = NChild#child{pid=?restarting(undefined)},
- NState = State#state{children = ChAfter3 ++ ChBefore},
- {try_again, replace_child(NChild2,NState), NChild2}
+ {{try_again,Id}, NState}
end;
-restart(one_for_all, Child, State) ->
- Children1 = del_child(Child#child.pid, State#state.children),
- Children2 = terminate_children(Children1, State#state.name),
- case start_children(Children2, State#state.name) of
- {ok, NChs} ->
- {ok, State#state{children = NChs}};
- {error, NChs, {failed_to_start_child, ChName, _Reason}}
- when ChName =:= Child#child.name ->
- NChild = Child#child{pid=restarting(Child#child.pid)},
- NState = State#state{children = NChs},
- {try_again, replace_child(NChild,NState)};
- {error, NChs, {failed_to_start_child, ChName, _Reason}} ->
- NChild = lists:keyfind(ChName, #child.name, NChs),
- NChild2 = NChild#child{pid=?restarting(undefined)},
- NState = State#state{children = NChs},
- {try_again, replace_child(NChild2,NState), NChild2}
+restart(rest_for_one, #child{id=Id} = Child, #state{name=SupName} = State) ->
+ {ChAfter, ChBefore} = split_child(Id, State#state.children),
+ {Return, ChAfter2} = restart_multiple_children(Child, ChAfter, SupName),
+ {Return, State#state{children = append(ChAfter2,ChBefore)}};
+restart(one_for_all, Child, #state{name=SupName} = State) ->
+ Children1 = del_child(Child#child.id, State#state.children),
+ {Return, NChildren} = restart_multiple_children(Child, Children1, SupName),
+ {Return, State#state{children = NChildren}}.
+
+restart_multiple_children(Child, Children, SupName) ->
+ Children1 = terminate_children(Children, SupName),
+ case start_children(Children1, SupName) of
+ {ok, NChildren} ->
+ {ok, NChildren};
+ {error, NChildren, {failed_to_start_child, FailedId, _Reason}} ->
+ NewPid = if FailedId =:= Child#child.id ->
+ restarting(Child#child.pid);
+ true ->
+ ?restarting(undefined)
+ end,
+ {{try_again, FailedId}, set_pid(NewPid,FailedId,NChildren)}
end.
restarting(Pid) when is_pid(Pid) -> ?restarting(Pid);
restarting(RPid) -> RPid.
+-spec try_again_restart(child_id() | {'restarting',pid()}) -> 'ok'.
+try_again_restart(TryAgainId) ->
+ gen_server:cast(self(), {try_again_restart, TryAgainId}).
+
%%-----------------------------------------------------------------
%% Func: terminate_children/2
-%% Args: Children = [child_rec()] in termination order
+%% Args: Children = children() % Ids in termination order
%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
-%% Returns: NChildren = [child_rec()] in
-%% startup order (reversed termination order)
+%% Returns: NChildren = children() % Ids in startup order
+%% % (reversed termination order)
%%-----------------------------------------------------------------
terminate_children(Children, SupName) ->
- terminate_children(Children, SupName, []).
-
-%% Temporary children should not be restarted and thus should
-%% be skipped when building the list of terminated children, although
-%% we do want them to be shut down as many functions from this module
-%% use this function to just clear everything.
-terminate_children([Child = #child{restart_type=temporary} | Children], SupName, Res) ->
- _ = do_terminate(Child, SupName),
- terminate_children(Children, SupName, Res);
-terminate_children([Child | Children], SupName, Res) ->
- NChild = do_terminate(Child, SupName),
- terminate_children(Children, SupName, [NChild | Res]);
-terminate_children([], _SupName, Res) ->
- Res.
+ Terminate =
+ fun(_Id,Child) when ?is_temporary(Child) ->
+ %% Temporary children should not be restarted and thus should
+ %% be skipped when building the list of terminated children.
+ do_terminate(Child, SupName),
+ remove;
+ (_Id,Child) ->
+ do_terminate(Child, SupName),
+ {update,Child#child{pid=undefined}}
+ end,
+ {ok,NChildren} = children_map(Terminate, Children),
+ NChildren.
do_terminate(Child, SupName) when is_pid(Child#child.pid) ->
case shutdown(Child#child.pid, Child#child.shutdown) of
ok ->
ok;
- {error, normal} when Child#child.restart_type =/= permanent ->
+ {error, normal} when not (?is_permanent(Child)) ->
ok;
{error, OtherReason} ->
report_error(shutdown_error, OtherReason, Child, SupName)
end,
- Child#child{pid = undefined};
-do_terminate(Child, _SupName) ->
- Child#child{pid = undefined}.
+ ok;
+do_terminate(_Child, _SupName) ->
+ ok.
%%-----------------------------------------------------------------
%% Shutdowns a child. We must check the EXIT value
@@ -996,66 +897,50 @@ monitor_child(Pid) ->
ok
end.
-
%%-----------------------------------------------------------------
-%% Func: terminate_dynamic_children/3
-%% Args: Child = child_rec()
-%% Dynamics = ?DICT() | ?SET()
-%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
+%% Func: terminate_dynamic_children/1
+%% Args: State
%% Returns: ok
%%
-%%
%% Shutdown all dynamic children. This happens when the supervisor is
%% stopped. Because the supervisor can have millions of dynamic children, we
-%% can have an significative overhead here.
+%% can have a significative overhead here.
%%-----------------------------------------------------------------
-terminate_dynamic_children(Child, Dynamics, SupName) ->
- {Pids, EStack0} = monitor_dynamic_children(Child, Dynamics),
- Sz = ?SETS:size(Pids),
+terminate_dynamic_children(State) ->
+ Child = get_dynamic_child(State),
+ {Pids, EStack0} = monitor_dynamic_children(Child,State),
+ Sz = sets:size(Pids),
EStack = case Child#child.shutdown of
brutal_kill ->
- ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids),
+ sets:fold(fun(P, _) -> exit(P, kill) end, ok, Pids),
wait_dynamic_children(Child, Pids, Sz, undefined, EStack0);
infinity ->
- ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids),
+ sets:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids),
wait_dynamic_children(Child, Pids, Sz, undefined, EStack0);
Time ->
- ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids),
+ sets:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids),
TRef = erlang:start_timer(Time, self(), kill),
wait_dynamic_children(Child, Pids, Sz, TRef, EStack0)
end,
%% Unroll stacked errors and report them
- ?DICTS:fold(fun(Reason, Ls, _) ->
- report_error(shutdown_error, Reason,
- Child#child{pid=Ls}, SupName)
- end, ok, EStack).
-
-
-monitor_dynamic_children(#child{restart_type=temporary}, Dynamics) ->
- ?SETS:fold(fun(P, {Pids, EStack}) ->
- case monitor_child(P) of
- ok ->
- {?SETS:add_element(P, Pids), EStack};
- {error, normal} ->
- {Pids, EStack};
- {error, Reason} ->
- {Pids, ?DICTS:append(Reason, P, EStack)}
- end
- end, {?SETS:new(), ?DICTS:new()}, Dynamics);
-monitor_dynamic_children(#child{restart_type=RType}, Dynamics) ->
- ?DICTS:fold(fun(P, _, {Pids, EStack}) when is_pid(P) ->
- case monitor_child(P) of
- ok ->
- {?SETS:add_element(P, Pids), EStack};
- {error, normal} when RType =/= permanent ->
- {Pids, EStack};
- {error, Reason} ->
- {Pids, ?DICTS:append(Reason, P, EStack)}
- end;
- (?restarting(_), _, {Pids, EStack}) ->
- {Pids, EStack}
- end, {?SETS:new(), ?DICTS:new()}, Dynamics).
-
+ dict:fold(fun(Reason, Ls, _) ->
+ report_error(shutdown_error, Reason,
+ Child#child{pid=Ls}, State#state.name)
+ end, ok, EStack).
+
+monitor_dynamic_children(Child,State) ->
+ dyn_fold(fun(P,{Pids, EStack}) when is_pid(P) ->
+ case monitor_child(P) of
+ ok ->
+ {sets:add_element(P, Pids), EStack};
+ {error, normal} when not (?is_permanent(Child)) ->
+ {Pids, EStack};
+ {error, Reason} ->
+ {Pids, dict:append(Reason, P, EStack)}
+ end;
+ (?restarting(_), {Pids, EStack}) ->
+ {Pids, EStack}
+ end, {sets:new(), dict:new()}, State).
wait_dynamic_children(_Child, _Pids, 0, undefined, EStack) ->
EStack;
@@ -1073,39 +958,38 @@ wait_dynamic_children(#child{shutdown=brutal_kill} = Child, Pids, Sz,
TRef, EStack) ->
receive
{'DOWN', _MRef, process, Pid, killed} ->
- wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
+ wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1,
TRef, EStack);
{'DOWN', _MRef, process, Pid, Reason} ->
- wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
- TRef, ?DICTS:append(Reason, Pid, EStack))
+ wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1,
+ TRef, dict:append(Reason, Pid, EStack))
end;
-wait_dynamic_children(#child{restart_type=RType} = Child, Pids, Sz,
- TRef, EStack) ->
+wait_dynamic_children(Child, Pids, Sz, TRef, EStack) ->
receive
{'DOWN', _MRef, process, Pid, shutdown} ->
- wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
+ wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1,
TRef, EStack);
{'DOWN', _MRef, process, Pid, {shutdown, _}} ->
- wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
+ wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1,
TRef, EStack);
- {'DOWN', _MRef, process, Pid, normal} when RType =/= permanent ->
- wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
+ {'DOWN', _MRef, process, Pid, normal} when not (?is_permanent(Child)) ->
+ wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1,
TRef, EStack);
{'DOWN', _MRef, process, Pid, Reason} ->
- wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1,
- TRef, ?DICTS:append(Reason, Pid, EStack));
+ wait_dynamic_children(Child, sets:del_element(Pid, Pids), Sz-1,
+ TRef, dict:append(Reason, Pid, EStack));
{timeout, TRef, kill} ->
- ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids),
+ sets:fold(fun(P, _) -> exit(P, kill) end, ok, Pids),
wait_dynamic_children(Child, Pids, Sz, undefined, EStack)
end.
%%-----------------------------------------------------------------
-%% Child/State manipulating functions.
+%% Access #state.children
%%-----------------------------------------------------------------
%% Note we do not want to save the parameter list for temporary processes as
@@ -1113,114 +997,184 @@ wait_dynamic_children(#child{restart_type=RType} = Child, Pids, Sz,
%% Especially for dynamic children to simple_one_for_one supervisors
%% it could become very costly as it is not uncommon to spawn
%% very many such processes.
-save_child(#child{restart_type = temporary,
- mfargs = {M, F, _}} = Child, #state{children = Children} = State) ->
- State#state{children = [Child#child{mfargs = {M, F, undefined}} |Children]};
-save_child(Child, #state{children = Children} = State) ->
- State#state{children = [Child |Children]}.
-
-save_dynamic_child(temporary, Pid, _, #state{dynamics = Dynamics} = State) ->
- DynamicsDb = dynamics_db(temporary, Dynamics),
- State#state{dynamics = {set, ?SETS:add_element(Pid, DynamicsDb)}};
-save_dynamic_child(RestartType, Pid, Args, #state{dynamics = Dynamics} = State) ->
- DynamicsDb = dynamics_db(RestartType, Dynamics),
- State#state{dynamics = {dict, ?DICTS:store(Pid, Args, DynamicsDb)}}.
-
-dynamics_db(temporary, undefined) ->
- ?SETS:new();
-dynamics_db(_, undefined) ->
- ?DICTS:new();
-dynamics_db(_, {_Tag, DynamicsDb}) ->
- DynamicsDb.
-
-dynamic_child_args(_Pid, temporary, _DynamicsDb) ->
- {ok, undefined};
-dynamic_child_args(Pid, _RT, {dict, DynamicsDb}) ->
- ?DICTS:find(Pid, DynamicsDb);
-dynamic_child_args(_Pid, _RT, undefined) ->
- error.
-
-state_del_child(#child{pid = Pid, restart_type = temporary}, State) when ?is_simple(State) ->
- NDynamics = ?SETS:del_element(Pid, dynamics_db(temporary, State#state.dynamics)),
- State#state{dynamics = {set, NDynamics}};
-state_del_child(#child{pid = Pid, restart_type = RType}, State) when ?is_simple(State) ->
- NDynamics = ?DICTS:erase(Pid, dynamics_db(RType, State#state.dynamics)),
- State#state{dynamics = {dict, NDynamics}};
-state_del_child(Child, State) ->
- NChildren = del_child(Child#child.name, State#state.children),
- State#state{children = NChildren}.
-
-del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name, Ch#child.restart_type =:= temporary ->
- Chs;
-del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name ->
- [Ch#child{pid = undefined} | Chs];
-del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid, Ch#child.restart_type =:= temporary ->
- Chs;
-del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid ->
- [Ch#child{pid = undefined} | Chs];
-del_child(Name, [Ch|Chs]) ->
- [Ch|del_child(Name, Chs)];
-del_child(_, []) ->
- [].
+-spec save_child(child_rec(), state()) -> state().
+save_child(#child{mfargs = {M, F, _}} = Child, State) when ?is_temporary(Child) ->
+ do_save_child(Child#child{mfargs = {M, F, undefined}}, State);
+save_child(Child, State) ->
+ do_save_child(Child, State).
+
+-spec do_save_child(child_rec(), state()) -> state().
+do_save_child(#child{id = Id} = Child, #state{children = {Ids,Db}} = State) ->
+ State#state{children = {[Id|Ids],Db#{Id => Child}}}.
+
+-spec del_child(child_rec(), state()) -> state();
+ (child_id(), children()) -> children().
+del_child(#child{pid = Pid}, State) when ?is_simple(State) ->
+ dyn_erase(Pid,State);
+del_child(Child, State) when is_record(Child,child), is_record(State,state) ->
+ NChildren = del_child(Child#child.id, State#state.children),
+ State#state{children = NChildren};
+del_child(Id, {Ids,Db}) ->
+ case maps:get(Id, Db) of
+ Child when Child#child.restart_type =:= temporary ->
+ {lists:delete(Id, Ids), maps:remove(Id, Db)};
+ Child ->
+ {Ids, Db#{Id=>Child#child{pid=undefined}}}
+ end.
-%% Chs = [S4, S3, Ch, S1, S0]
-%% Ret: {[S4, S3, Ch], [S1, S0]}
-split_child(Name, Chs) ->
- split_child(Name, Chs, []).
-
-split_child(Name, [Ch|Chs], After) when Ch#child.name =:= Name ->
- {lists:reverse([Ch#child{pid = undefined} | After]), Chs};
-split_child(Pid, [Ch|Chs], After) when Ch#child.pid =:= Pid ->
- {lists:reverse([Ch#child{pid = undefined} | After]), Chs};
-split_child(Name, [Ch|Chs], After) ->
- split_child(Name, Chs, [Ch | After]);
-split_child(_, [], After) ->
- {lists:reverse(After), []}.
-
-get_child(Name, State) ->
- get_child(Name, State, false).
-
-get_child(Pid, State, AllowPid) when AllowPid, is_pid(Pid) ->
- get_dynamic_child(Pid, State);
-get_child(Name, State, _) ->
- lists:keysearch(Name, #child.name, State#state.children).
-
-get_dynamic_child(Pid, #state{children=[Child], dynamics=Dynamics}) ->
- case is_dynamic_pid(Pid, Dynamics) of
- true ->
- {value, Child#child{pid=Pid}};
- false ->
- RPid = restarting(Pid),
- case is_dynamic_pid(RPid, Dynamics) of
- true ->
- {value, Child#child{pid=RPid}};
- false ->
+%% In: {[S4, S3, Ch, S1, S0],Db}
+%% Ret: {{[S4, S3, Ch],Db1}, {[S1, S0],Db2}}
+%% Db1 and Db2 contain the keys in the lists they are associated with.
+-spec split_child(child_id(), children()) -> {children(), children()}.
+split_child(Id, {Ids,Db}) ->
+ {IdsAfter,IdsBefore} = split_ids(Id, Ids, []),
+ DbBefore = maps:with(IdsBefore,Db),
+ #{Id:=Ch} = DbAfter = maps:with(IdsAfter,Db),
+ {{IdsAfter,DbAfter#{Id=>Ch#child{pid=undefined}}},{IdsBefore,DbBefore}}.
+
+split_ids(Id, [Id|Ids], After) ->
+ {lists:reverse([Id|After]), Ids};
+split_ids(Id, [Other|Ids], After) ->
+ split_ids(Id, Ids, [Other | After]).
+
+%% Find the child record for a given Pid (dynamic child) or Id
+%% (non-dynamic child). This is called from the API functions.
+-spec find_child(pid() | child_id(), state()) -> {ok,child_rec()} | error.
+find_child(Pid, State) when is_pid(Pid), ?is_simple(State) ->
+ case find_dynamic_child(Pid, State) of
+ error ->
+ case find_dynamic_child(restarting(Pid), State) of
+ error ->
case erlang:is_process_alive(Pid) of
- true -> false;
- false -> {value, Child}
- end
- end
+ true -> error;
+ false -> {ok, get_dynamic_child(State)}
+ end;
+ Other ->
+ Other
+ end;
+ Other ->
+ Other
+ end;
+find_child(Id, #state{children = {_Ids,Db}}) ->
+ maps:find(Id, Db).
+
+%% Get the child record - either by child id or by pid. If
+%% simple_one_for_one, then insert the pid and args into the returned
+%% child record. This is called when trying to restart the child.
+-spec find_child_and_args(IdOrPid, state()) -> {ok, child_rec()} | error when
+ IdOrPid :: pid() | {restarting,pid()} | child_id().
+find_child_and_args(Pid, State) when ?is_simple(State) ->
+ case find_dynamic_child(Pid, State) of
+ {ok,#child{mfargs={M,F,_}} = Child} ->
+ {ok, Args} = dyn_args(Pid, State),
+ {ok, Child#child{mfargs = {M, F, Args}}};
+ error ->
+ error
+ end;
+find_child_and_args(Pid, State) when is_pid(Pid) ->
+ find_child_by_pid(Pid, State);
+find_child_and_args(Id, #state{children={_Ids,Db}}) ->
+ maps:find(Id, Db).
+
+%% Given the pid, find the child record for a dynamic child, and
+%% include the pid in the returned record.
+-spec find_dynamic_child(IdOrPid, state()) -> {ok, child_rec()} | error when
+ IdOrPid :: pid() | {restarting,pid()} | child_id().
+find_dynamic_child(Pid, State) ->
+ case dyn_exists(Pid, State) of
+ true ->
+ Child = get_dynamic_child(State),
+ {ok, Child#child{pid=Pid}};
+ false ->
+ error
end.
-is_dynamic_pid(Pid, {dict, Dynamics}) ->
- ?DICTS:is_key(Pid, Dynamics);
-is_dynamic_pid(Pid, {set, Dynamics}) ->
- ?SETS:is_element(Pid, Dynamics);
-is_dynamic_pid(_Pid, undefined) ->
- false.
-
-replace_child(Child, State) ->
- Chs = do_replace_child(Child, State#state.children),
- State#state{children = Chs}.
-
-do_replace_child(Child, [Ch|Chs]) when Ch#child.name =:= Child#child.name ->
- [Child | Chs];
-do_replace_child(Child, [Ch|Chs]) ->
- [Ch|do_replace_child(Child, Chs)].
+%% Given the pid, find the child record for a non-dyanamic child.
+-spec find_child_by_pid(IdOrPid, state()) -> {ok,child_rec()} | error when
+ IdOrPid :: pid() | {restarting,pid()}.
+find_child_by_pid(Pid,#state{children={_Ids,Db}}) ->
+ Fun = fun(_Id,#child{pid=P}=Ch,_) when P =:= Pid ->
+ throw(Ch);
+ (_,_,error) ->
+ error
+ end,
+ try maps:fold(Fun,error,Db)
+ catch throw:Child -> {ok,Child}
+ end.
-remove_child(Child, State) ->
- Chs = lists:keydelete(Child#child.name, #child.name, State#state.children),
- State#state{children = Chs}.
+%% Get the child record from a simple_one_for_one supervisor - no pid
+%% It is assumed that the child can always be found
+-spec get_dynamic_child(state()) -> child_rec().
+get_dynamic_child(#state{children={[Id],Db}}) ->
+ #{Id := Child} = Db,
+ Child.
+
+%% Update pid in the given child record and store it in the process state
+-spec set_pid(term(), child_id(), state()) -> state();
+ (term(), child_id(), children()) -> children().
+set_pid(Pid, Id, #state{children=Children} = State) ->
+ State#state{children = set_pid(Pid, Id, Children)};
+set_pid(Pid, Id, {Ids, Db}) ->
+ NewDb = maps:update_with(Id, fun(Child) -> Child#child{pid=Pid} end, Db),
+ {Ids,NewDb}.
+
+%% Remove the Id and the child record from the process state
+-spec remove_child(child_id(), state()) -> state().
+remove_child(Id, #state{children={Ids,Db}} = State) ->
+ NewIds = lists:delete(Id,Ids),
+ NewDb = maps:remove(Id,Db),
+ State#state{children = {NewIds,NewDb}}.
+
+%% In the order of Ids, traverse the children and update each child
+%% according to the return value of the Fun.
+%% On error, abort and return the merge of the old and the updated map.
+%% NOTE: The returned list of Ids is reverted compared to the input.
+-spec children_map(Fun, children()) -> {ok, children()} |
+ {error,children(),Reason} when
+ Fun :: fun((child_id(),child_rec()) -> {update,child_rec()} |
+ remove |
+ {abort, Reason}),
+ Reason :: term().
+children_map(Fun,{Ids,Db}) ->
+ children_map(Fun, Ids, Db, []).
+
+children_map(Fun,[Id|Ids],Db,Acc) ->
+ case Fun(Id,maps:get(Id,Db)) of
+ {update,Child} ->
+ children_map(Fun,Ids,Db#{Id => Child},[Id|Acc]);
+ remove ->
+ children_map(Fun,Ids,maps:remove(Id,Db),Acc);
+ {abort,Reason} ->
+ {error,{lists:reverse(Ids)++[Id|Acc],Db},Reason}
+ end;
+children_map(_Fun,[],Db,Acc) ->
+ {ok,{Acc,Db}}.
+
+%% In the order of Ids, map over all children and return the list
+-spec children_to_list(Fun, children()) -> List when
+ Fun :: fun((child_id(), child_rec()) -> Elem),
+ List :: list(Elem),
+ Elem :: term().
+children_to_list(Fun,{Ids,Db}) ->
+ children_to_list(Fun, Ids, Db, []).
+children_to_list(Fun,[Id|Ids],Db,Acc) ->
+ children_to_list(Fun,Ids,Db,[Fun(Id,maps:get(Id,Db))|Acc]);
+children_to_list(_Fun,[],_Db,Acc) ->
+ lists:reverse(Acc).
+
+%% The order is not important - so ignore Ids
+-spec children_fold(Fun, Acc0, children()) -> Acc1 when
+ Fun :: fun((child_id(), child_rec(), AccIn) -> AccOut),
+ Acc0 :: term(),
+ Acc1 :: term(),
+ AccIn :: term(),
+ AccOut :: term().
+children_fold(Fun,Init,{_Ids,Db}) ->
+ maps:fold(Fun, Init, Db).
+
+-spec append(children(), children()) -> children().
+append({Ids1,Db1},{Ids2,Db2}) ->
+ {Ids1++Ids2,maps:merge(Db1,Db2)}.
%%-----------------------------------------------------------------
%% Func: init_state/4
@@ -1290,27 +1244,27 @@ supname(N, _) -> N.
%%% Returns: {ok, [child_rec()]} | Error
%%% ------------------------------------------------------
-check_startspec(Children) -> check_startspec(Children, []).
+check_startspec(Children) -> check_startspec(Children, [], #{}).
-check_startspec([ChildSpec|T], Res) ->
+check_startspec([ChildSpec|T], Ids, Db) ->
case check_childspec(ChildSpec) of
- {ok, Child} ->
- case lists:keymember(Child#child.name, #child.name, Res) of
+ {ok, #child{id=Id}=Child} ->
+ case maps:is_key(Id, Db) of
%% The error message duplicate_child_name is kept for
%% backwards compatibility, although
%% duplicate_child_id would be more correct.
- true -> {duplicate_child_name, Child#child.name};
- false -> check_startspec(T, [Child | Res])
+ true -> {duplicate_child_name, Id};
+ false -> check_startspec(T, [Id | Ids], Db#{Id=>Child})
end;
Error -> Error
end;
-check_startspec([], Res) ->
- {ok, lists:reverse(Res)}.
+check_startspec([], Ids, Db) ->
+ {ok, {lists:reverse(Ids),Db}}.
check_childspec(ChildSpec) when is_map(ChildSpec) ->
catch do_check_childspec(maps:merge(?default_child_spec,ChildSpec));
-check_childspec({Name, Func, RestartType, Shutdown, ChildType, Mods}) ->
- check_childspec(#{id => Name,
+check_childspec({Id, Func, RestartType, Shutdown, ChildType, Mods}) ->
+ check_childspec(#{id => Id,
start => Func,
restart => RestartType,
shutdown => Shutdown,
@@ -1320,15 +1274,15 @@ check_childspec(X) -> {invalid_child_spec, X}.
do_check_childspec(#{restart := RestartType,
type := ChildType} = ChildSpec)->
- Name = case ChildSpec of
- #{id := N} -> N;
+ Id = case ChildSpec of
+ #{id := I} -> I;
_ -> throw(missing_id)
end,
Func = case ChildSpec of
#{start := F} -> F;
_ -> throw(missing_start)
end,
- validName(Name),
+ validId(Id),
validFunc(Func),
validRestartType(RestartType),
validChildType(ChildType),
@@ -1343,14 +1297,14 @@ do_check_childspec(#{restart := RestartType,
_ -> {M,_,_} = Func, [M]
end,
validMods(Mods),
- {ok, #child{name = Name, mfargs = Func, restart_type = RestartType,
+ {ok, #child{id = Id, mfargs = Func, restart_type = RestartType,
shutdown = Shutdown, child_type = ChildType, modules = Mods}}.
validChildType(supervisor) -> true;
validChildType(worker) -> true;
validChildType(What) -> throw({invalid_child_type, What}).
-validName(_Name) -> true.
+validId(_Id) -> true.
validFunc({M, F, A}) when is_atom(M),
is_atom(F),
@@ -1379,13 +1333,13 @@ validMods(Mods) when is_list(Mods) ->
Mods);
validMods(Mods) -> throw({invalid_modules, Mods}).
-child_to_spec(#child{name = Name,
+child_to_spec(#child{id = Id,
mfargs = Func,
restart_type = RestartType,
shutdown = Shutdown,
child_type = ChildType,
modules = Mods}) ->
- #{id => Name,
+ #{id => Id,
start => Func,
restart => RestartType,
shutdown => Shutdown,
@@ -1439,17 +1393,16 @@ report_error(Error, Reason, Child, SupName) ->
{offender, extract_child(Child)}],
error_logger:error_report(supervisor_report, ErrorMsg).
-
extract_child(Child) when is_list(Child#child.pid) ->
[{nb_children, length(Child#child.pid)},
- {id, Child#child.name},
+ {id, Child#child.id},
{mfargs, Child#child.mfargs},
{restart_type, Child#child.restart_type},
{shutdown, Child#child.shutdown},
{child_type, Child#child.child_type}];
extract_child(Child) ->
[{pid, Child#child.pid},
- {id, Child#child.name},
+ {id, Child#child.id},
{mfargs, Child#child.mfargs},
{restart_type, Child#child.restart_type},
{shutdown, Child#child.shutdown},
@@ -1465,3 +1418,46 @@ format_status(terminate, [_PDict, State]) ->
format_status(_, [_PDict, State]) ->
[{data, [{"State", State}]},
{supervisor, [{"Callback", State#state.module}]}].
+
+%%%-----------------------------------------------------------------
+%%% Dynamics database access
+dyn_size(#state{dynamics = {Mod,Db}}) ->
+ Mod:size(Db).
+
+dyn_erase(Pid,#state{dynamics={sets,Db}}=State) ->
+ State#state{dynamics={sets,sets:del_element(Pid,Db)}};
+dyn_erase(Pid,#state{dynamics={maps,Db}}=State) ->
+ State#state{dynamics={maps,maps:remove(Pid,Db)}}.
+
+dyn_store(Pid,_,#state{dynamics={sets,Db}}=State) ->
+ State#state{dynamics={sets,sets:add_element(Pid,Db)}};
+dyn_store(Pid,Args,#state{dynamics={maps,Db}}=State) ->
+ State#state{dynamics={maps,Db#{Pid => Args}}}.
+
+dyn_fold(Fun,Init,#state{dynamics={sets,Db}}) ->
+ sets:fold(Fun,Init,Db);
+dyn_fold(Fun,Init,#state{dynamics={maps,Db}}) ->
+ maps:fold(fun(Pid,_,Acc) -> Fun(Pid,Acc) end, Init, Db).
+
+dyn_map(Fun, #state{dynamics={sets,Db}}) ->
+ lists:map(Fun, sets:to_list(Db));
+dyn_map(Fun, #state{dynamics={maps,Db}}) ->
+ lists:map(Fun, maps:keys(Db)).
+
+dyn_exists(Pid, #state{dynamics={sets, Db}}) ->
+ sets:is_element(Pid, Db);
+dyn_exists(Pid, #state{dynamics={maps, Db}}) ->
+ maps:is_key(Pid, Db).
+
+dyn_args(_Pid, #state{dynamics={sets, _Db}}) ->
+ {ok,undefined};
+dyn_args(Pid, #state{dynamics={maps, Db}}) ->
+ maps:find(Pid, Db).
+
+dyn_init(State) ->
+ dyn_init(get_dynamic_child(State),State).
+
+dyn_init(Child,State) when ?is_temporary(Child) ->
+ State#state{dynamics={sets,sets:new()}};
+dyn_init(_Child,State) ->
+ State#state{dynamics={maps,maps:new()}}.
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
new file mode 100644
index 0000000000..a84679c595
--- /dev/null
+++ b/lib/stdlib/src/uri_string.erl
@@ -0,0 +1,2069 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%
+%% [RFC 3986, Chapter 2.2. Reserved Characters]
+%%
+%% reserved = gen-delims / sub-delims
+%%
+%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+%%
+%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+%% / "*" / "+" / "," / ";" / "="
+%%
+%%
+%% [RFC 3986, Chapter 2.3. Unreserved Characters]
+%%
+%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+%%
+%%
+%% [RFC 3986, Chapter 3. Syntax Components]
+%%
+%% The generic URI syntax consists of a hierarchical sequence of
+%% components referred to as the scheme, authority, path, query, and
+%% fragment.
+%%
+%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+%%
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%%
+%% The scheme and path components are required, though the path may be
+%% empty (no characters). When authority is present, the path must
+%% either be empty or begin with a slash ("/") character. When
+%% authority is not present, the path cannot begin with two slash
+%% characters ("//"). These restrictions result in five different ABNF
+%% rules for a path (Section 3.3), only one of which will match any
+%% given URI reference.
+%%
+%% The following are two example URIs and their component parts:
+%%
+%% foo://example.com:8042/over/there?name=ferret#nose
+%% \_/ \______________/\_________/ \_________/ \__/
+%% | | | | |
+%% scheme authority path query fragment
+%% | _____________________|__
+%% / \ / \
+%% urn:example:animal:ferret:nose
+%%
+%%
+%% [RFC 3986, Chapter 3.1. Scheme]
+%%
+%% Each URI begins with a scheme name that refers to a specification for
+%% assigning identifiers within that scheme.
+%%
+%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+%%
+%%
+%% [RFC 3986, Chapter 3.2. Authority]
+%%
+%% Many URI schemes include a hierarchical element for a naming
+%% authority so that governance of the name space defined by the
+%% remainder of the URI is delegated to that authority (which may, in
+%% turn, delegate it further).
+%%
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%%
+%%
+%% [RFC 3986, Chapter 3.2.1. User Information]
+%%
+%% The userinfo subcomponent may consist of a user name and, optionally,
+%% scheme-specific information about how to gain authorization to access
+%% the resource. The user information, if present, is followed by a
+%% commercial at-sign ("@") that delimits it from the host.
+%%
+%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+%%
+%%
+%% [RFC 3986, Chapter 3.2.2. Host]
+%%
+%% The host subcomponent of authority is identified by an IP literal
+%% encapsulated within square brackets, an IPv4 address in dotted-
+%% decimal form, or a registered name.
+%%
+%% host = IP-literal / IPv4address / reg-name
+%%
+%% IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+%%
+%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+%%
+%% IPv6address = 6( h16 ":" ) ls32
+%% / "::" 5( h16 ":" ) ls32
+%% / [ h16 ] "::" 4( h16 ":" ) ls32
+%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+%% / [ *4( h16 ":" ) h16 ] "::" ls32
+%% / [ *5( h16 ":" ) h16 ] "::" h16
+%% / [ *6( h16 ":" ) h16 ] "::"
+%%
+%% ls32 = ( h16 ":" h16 ) / IPv4address
+%% ; least-significant 32 bits of address
+%%
+%% h16 = 1*4HEXDIG
+%% ; 16 bits of address represented in hexadecimal
+%%
+%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+%%
+%% dec-octet = DIGIT ; 0-9
+%% / %x31-39 DIGIT ; 10-99
+%% / "1" 2DIGIT ; 100-199
+%% / "2" %x30-34 DIGIT ; 200-249
+%% / "25" %x30-35 ; 250-255
+%%
+%% reg-name = *( unreserved / pct-encoded / sub-delims )
+%%
+%%
+%% [RFC 3986, Chapter 3.2.2. Port]
+%%
+%% The port subcomponent of authority is designated by an optional port
+%% number in decimal following the host and delimited from it by a
+%% single colon (":") character.
+%%
+%% port = *DIGIT
+%%
+%%
+%% [RFC 3986, Chapter 3.3. Path]
+%%
+%% The path component contains data, usually organized in hierarchical
+%% form, that, along with data in the non-hierarchical query component
+%% (Section 3.4), serves to identify a resource within the scope of the
+%% URI's scheme and naming authority (if any). The path is terminated
+%% by the first question mark ("?") or number sign ("#") character, or
+%% by the end of the URI.
+%%
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%%
+%% path-abempty = *( "/" segment )
+%% path-absolute = "/" [ segment-nz *( "/" segment ) ]
+%% path-noscheme = segment-nz-nc *( "/" segment )
+%% path-rootless = segment-nz *( "/" segment )
+%% path-empty = 0<pchar>
+%% segment = *pchar
+%% segment-nz = 1*pchar
+%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+%% ; non-zero-length segment without any colon ":"
+%%
+%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+%%
+%%
+%% [RFC 3986, Chapter 3.4. Query]
+%%
+%% The query component contains non-hierarchical data that, along with
+%% data in the path component (Section 3.3), serves to identify a
+%% resource within the scope of the URI's scheme and naming authority
+%% (if any). The query component is indicated by the first question
+%% mark ("?") character and terminated by a number sign ("#") character
+%% or by the end of the URI.
+%%
+%% query = *( pchar / "/" / "?" )
+%%
+%%
+%% [RFC 3986, Chapter 3.5. Fragment]
+%%
+%% The fragment identifier component of a URI allows indirect
+%% identification of a secondary resource by reference to a primary
+%% resource and additional identifying information.
+%%
+%% fragment = *( pchar / "/" / "?" )
+%%
+%%
+%% [RFC 3986, Chapter 4.1. URI Reference]
+%%
+%% URI-reference is used to denote the most common usage of a resource
+%% identifier.
+%%
+%% URI-reference = URI / relative-ref
+%%
+%%
+%% [RFC 3986, Chapter 4.2. Relative Reference]
+%%
+%% A relative reference takes advantage of the hierarchical syntax
+%% (Section 1.2.3) to express a URI reference relative to the name space
+%% of another hierarchical URI.
+%%
+%% relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+%%
+%% relative-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-noscheme
+%% / path-empty
+%%
+%%
+%% [RFC 3986, Chapter 4.3. Absolute URI]
+%%
+%% Some protocol elements allow only the absolute form of a URI without
+%% a fragment identifier. For example, defining a base URI for later
+%% use by relative references calls for an absolute-URI syntax rule that
+%% does not allow a fragment.
+%%
+%% absolute-URI = scheme ":" hier-part [ "?" query ]
+%%
+-module(uri_string).
+
+%%-------------------------------------------------------------------------
+%% External API
+%%-------------------------------------------------------------------------
+-export([compose_query/1, compose_query/2,
+ dissect_query/1, normalize/1, parse/1,
+ recompose/1, transcode/2]).
+-export_type([error/0, uri_map/0, uri_string/0]).
+
+
+%%-------------------------------------------------------------------------
+%% Internal API
+%%-------------------------------------------------------------------------
+-export([is_host/1, is_path/1]). % suppress warnings
+
+
+%%-------------------------------------------------------------------------
+%% Macros
+%%-------------------------------------------------------------------------
+-define(CHAR(Char), <<Char/utf8>>).
+-define(STRING_EMPTY, <<>>).
+-define(STRING(MatchStr), <<MatchStr/binary>>).
+-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>).
+
+-define(DEC2HEX(X),
+ if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0;
+ ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10
+ end).
+
+-define(HEX2DEC(X),
+ if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0;
+ ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10;
+ ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10
+ end).
+
+
+%%%=========================================================================
+%%% API
+%%%=========================================================================
+
+%%-------------------------------------------------------------------------
+%% URI compliant with RFC 3986
+%% ASCII %x21 - %x7A ("!" - "z") except
+%% %x34 " double quote
+%% %x60 < less than
+%% %x62 > greater than
+%% %x92 \ backslash
+%% %x94 ^ caret / circumflex
+%% %x96 ` grave / accent
+%%-------------------------------------------------------------------------
+-type uri_string() :: iodata().
+-type error() :: {error, atom(), term()}.
+
+
+%%-------------------------------------------------------------------------
+%% RFC 3986, Chapter 3. Syntax Components
+%%-------------------------------------------------------------------------
+-type uri_map() ::
+ #{fragment => unicode:chardata(),
+ host => unicode:chardata(),
+ path => unicode:chardata(),
+ port => non_neg_integer() | undefined,
+ query => unicode:chardata(),
+ scheme => unicode:chardata(),
+ userinfo => unicode:chardata()} | #{}.
+
+
+%%-------------------------------------------------------------------------
+%% Normalize URIs
+%%-------------------------------------------------------------------------
+-spec normalize(URIString) -> NormalizedURI when
+ URIString :: uri_string(),
+ NormalizedURI :: uri_string().
+normalize(URIString) ->
+ %% Percent-encoding normalization and case normalization for
+ %% percent-encoded triplets are achieved by running parse and
+ %% recompose on the input URI string.
+ recompose(
+ normalize_path_segment(
+ normalize_scheme_based(
+ normalize_case(
+ parse(URIString))))).
+
+
+%%-------------------------------------------------------------------------
+%% Parse URIs
+%%-------------------------------------------------------------------------
+-spec parse(URIString) -> URIMap when
+ URIString :: uri_string(),
+ URIMap :: uri_map()
+ | error().
+parse(URIString) when is_binary(URIString) ->
+ try parse_uri_reference(URIString, #{})
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+parse(URIString) when is_list(URIString) ->
+ try
+ Binary = unicode:characters_to_binary(URIString),
+ Map = parse_uri_reference(Binary, #{}),
+ convert_mapfields_to_list(Map)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Recompose URIs
+%%-------------------------------------------------------------------------
+-spec recompose(URIMap) -> URIString when
+ URIMap :: uri_map(),
+ URIString :: uri_string()
+ | error().
+recompose(Map) ->
+ case is_valid_map(Map) of
+ false ->
+ {error, invalid_map, Map};
+ true ->
+ try
+ T0 = update_scheme(Map, empty),
+ T1 = update_userinfo(Map, T0),
+ T2 = update_host(Map, T1),
+ T3 = update_port(Map, T2),
+ T4 = update_path(Map, T3),
+ T5 = update_query(Map, T4),
+ update_fragment(Map, T5)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Transcode URIs
+%%-------------------------------------------------------------------------
+-spec transcode(URIString, Options) -> Result when
+ URIString :: uri_string(),
+ Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}],
+ Result :: uri_string()
+ | error().
+transcode(URIString, Options) when is_binary(URIString) ->
+ try
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ List = convert_to_list(URIString, InEnc),
+ Output = transcode(List, [], InEnc, OutEnc),
+ convert_to_binary(Output, utf8, OutEnc)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+transcode(URIString, Options) when is_list(URIString) ->
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ Flattened = flatten_list(URIString, InEnc),
+ try transcode(Flattened, [], InEnc, OutEnc)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Functions for working with the query part of a URI as a list
+%% of key/value pairs.
+%% HTML5 - 4.10.22.6 URL-encoded form data
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% Compose urlencoded query string from a list of unescaped key/value pairs.
+%% (application/x-www-form-urlencoded encoding algorithm)
+%%-------------------------------------------------------------------------
+-spec compose_query(QueryList) -> QueryString when
+ QueryList :: [{uri_string(), uri_string()}],
+ QueryString :: uri_string()
+ | error().
+compose_query(List) ->
+ compose_query(List, [{encoding, utf8}]).
+
+
+-spec compose_query(QueryList, Options) -> QueryString when
+ QueryList :: [{uri_string(), uri_string()}],
+ Options :: [{encoding, atom()}],
+ QueryString :: uri_string()
+ | error().
+compose_query([],_Options) ->
+ [];
+compose_query(List, Options) ->
+ try compose_query(List, Options, false, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+%%
+compose_query([{Key,Value}|Rest], Options, IsList, Acc) ->
+ Separator = get_separator(Rest),
+ K = form_urlencode(Key, Options),
+ V = form_urlencode(Value, Options),
+ IsListNew = IsList orelse is_list(Key) orelse is_list(Value),
+ compose_query(Rest, Options, IsListNew, <<Acc/binary,K/binary,"=",V/binary,Separator/binary>>);
+compose_query([], _Options, IsList, Acc) ->
+ case IsList of
+ true -> convert_to_list(Acc, utf8);
+ false -> Acc
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Dissect a query string into a list of unescaped key/value pairs.
+%% (application/x-www-form-urlencoded decoding algorithm)
+%%-------------------------------------------------------------------------
+-spec dissect_query(QueryString) -> QueryList when
+ QueryString :: uri_string(),
+ QueryList :: [{uri_string(), uri_string()}]
+ | error().
+dissect_query(<<>>) ->
+ [];
+dissect_query([]) ->
+ [];
+dissect_query(QueryString) when is_list(QueryString) ->
+ try
+ B = convert_to_binary(QueryString, utf8, utf8),
+ dissect_query_key(B, true, [], <<>>, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+dissect_query(QueryString) ->
+ try dissect_query_key(QueryString, false, [], <<>>, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
+%%%========================================================================
+%%% Internal functions
+%%%========================================================================
+
+%%-------------------------------------------------------------------------
+%% Converts Map fields to lists
+%%-------------------------------------------------------------------------
+convert_mapfields_to_list(Map) ->
+ Fun = fun (_, V) when is_binary(V) -> unicode:characters_to_list(V);
+ (_, V) -> V end,
+ maps:map(Fun, Map).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 4.1. URI Reference]
+%%
+%% URI-reference is used to denote the most common usage of a resource
+%% identifier.
+%%
+%% URI-reference = URI / relative-ref
+%%-------------------------------------------------------------------------
+-spec parse_uri_reference(binary(), uri_map()) -> uri_map().
+parse_uri_reference(<<>>, _) -> #{path => <<>>};
+parse_uri_reference(URIString, URI) ->
+ try parse_scheme_start(URIString, URI)
+ catch
+ throw:{_,_,_} ->
+ parse_relative_part(URIString, URI)
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 4.2. Relative Reference]
+%%
+%% A relative reference takes advantage of the hierarchical syntax
+%% (Section 1.2.3) to express a URI reference relative to the name space
+%% of another hierarchical URI.
+%%
+%% relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+%%
+%% relative-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-noscheme
+%% / path-empty
+%%-------------------------------------------------------------------------
+-spec parse_relative_part(binary(), uri_map()) -> uri_map().
+parse_relative_part(?STRING_REST("//", Rest), URI) ->
+ %% Parse userinfo - "//" is NOT part of authority
+ try parse_userinfo(Rest, URI) of
+ {T, URI1} ->
+ Userinfo = calculate_parsed_userinfo(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{userinfo => decode_userinfo(Userinfo)}
+ catch
+ throw:{_,_,_} ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{host => decode_host(remove_brackets(Host))}
+ end;
+parse_relative_part(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-absolute
+ Path = calculate_parsed_part(Rest, T),
+ URI1#{path => decode_path(?STRING_REST($/, Path))};
+parse_relative_part(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{query => decode_query(Query)};
+parse_relative_part(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{fragment => decode_fragment(Fragment)};
+parse_relative_part(?STRING_REST(Char, Rest), URI) ->
+ case is_segment_nz_nc(Char) of
+ true ->
+ {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme
+ Path = calculate_parsed_part(Rest, T),
+ URI1#{path => decode_path(?STRING_REST(Char, Path))};
+ false -> throw({error,invalid_uri,[Char]})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.3. Path]
+%%
+%% The path component contains data, usually organized in hierarchical
+%% form, that, along with data in the non-hierarchical query component
+%% (Section 3.4), serves to identify a resource within the scope of the
+%% URI's scheme and naming authority (if any). The path is terminated
+%% by the first question mark ("?") or number sign ("#") character, or
+%% by the end of the URI.
+%%
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%%
+%% path-abempty = *( "/" segment )
+%% path-absolute = "/" [ segment-nz *( "/" segment ) ]
+%% path-noscheme = segment-nz-nc *( "/" segment )
+%% path-rootless = segment-nz *( "/" segment )
+%% path-empty = 0<pchar>
+%% segment = *pchar
+%% segment-nz = 1*pchar
+%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+%% ; non-zero-length segment without any colon ":"
+%%
+%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% path-abempty
+%%-------------------------------------------------------------------------
+-spec parse_segment(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_segment(?STRING_REST($/, Rest), URI) ->
+ parse_segment(Rest, URI); % segment
+parse_segment(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_segment(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_segment(?STRING_REST(Char, Rest), URI) ->
+ case is_pchar(Char) of
+ true -> parse_segment(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_segment(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% path-noscheme
+%%-------------------------------------------------------------------------
+-spec parse_segment_nz_nc(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_segment_nz_nc(?STRING_REST($/, Rest), URI) ->
+ parse_segment(Rest, URI); % segment
+parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
+ case is_segment_nz_nc(Char) of
+ true -> parse_segment_nz_nc(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_segment_nz_nc(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is pchar.
+-spec is_pchar(char()) -> boolean().
+is_pchar($%) -> true; % pct-encoded
+is_pchar($:) -> true;
+is_pchar($@) -> true;
+is_pchar(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+%% Check if char is segment_nz_nc.
+-spec is_segment_nz_nc(char()) -> boolean().
+is_segment_nz_nc($%) -> true; % pct-encoded
+is_segment_nz_nc($@) -> true;
+is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.1. Scheme]
+%%
+%% Each URI begins with a scheme name that refers to a specification for
+%% assigning identifiers within that scheme.
+%%
+%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+%%-------------------------------------------------------------------------
+-spec parse_scheme_start(binary(), uri_map()) -> uri_map().
+parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
+ case is_alpha(Char) of
+ true -> {T, URI1} = parse_scheme(Rest, URI),
+ Scheme = calculate_parsed_scheme(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{scheme => ?STRING_REST(Char, Scheme)};
+ false -> throw({error,invalid_uri,[Char]})
+ end.
+
+%% Add path component if it missing after parsing the URI.
+%% According to the URI specification there is always a
+%% path component in every URI-reference and it can be
+%% empty.
+maybe_add_path(Map) ->
+ case maps:is_key(path, Map) of
+ false ->
+ Map#{path => <<>>};
+ _Else ->
+ Map
+ end.
+
+
+
+-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_scheme(?STRING_REST($:, Rest), URI) ->
+ {_, URI1} = parse_hier(Rest, URI),
+ {Rest, URI1};
+parse_scheme(?STRING_REST(Char, Rest), URI) ->
+ case is_scheme(Char) of
+ true -> parse_scheme(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_scheme(?STRING_EMPTY, _URI) ->
+ throw({error,invalid_uri,<<>>}).
+
+
+%% Check if char is allowed in scheme
+-spec is_scheme(char()) -> boolean().
+is_scheme($+) -> true;
+is_scheme($-) -> true;
+is_scheme($.) -> true;
+is_scheme(Char) -> is_alpha(Char) orelse is_digit(Char).
+
+
+%%-------------------------------------------------------------------------
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%%-------------------------------------------------------------------------
+-spec parse_hier(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_hier(?STRING_REST("//", Rest), URI) ->
+ % Parse userinfo - "//" is NOT part of authority
+ try parse_userinfo(Rest, URI) of
+ {T, URI1} ->
+ Userinfo = calculate_parsed_userinfo(Rest, T),
+ {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
+ catch
+ throw:{_,_,_} ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ {Rest, URI1#{host => decode_host(remove_brackets(Host))}}
+ end;
+parse_hier(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-absolute
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_hier(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_hier(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
+ case is_pchar(Char) of
+ true -> % segment_nz
+ {T, URI1} = parse_segment(Rest, URI),
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}};
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_hier(?STRING_EMPTY, URI) ->
+ {<<>>, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2. Authority]
+%%
+%% Many URI schemes include a hierarchical element for a naming
+%% authority so that governance of the name space defined by the
+%% remainder of the URI is delegated to that authority (which may, in
+%% turn, delegate it further).
+%%
+%% The authority component is preceded by a double slash ("//") and is
+%% terminated by the next slash ("/"), question mark ("?"), or number
+%% sign ("#") character, or by the end of the URI.
+%%
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%%
+%%
+%% [RFC 3986, Chapter 3.2.1. User Information]
+%%
+%% The userinfo subcomponent may consist of a user name and, optionally,
+%% scheme-specific information about how to gain authorization to access
+%% the resource. The user information, if present, is followed by a
+%% commercial at-sign ("@") that delimits it from the host.
+%%
+%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+%%-------------------------------------------------------------------------
+-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_userinfo(?CHAR($@), URI) ->
+ {?STRING_EMPTY, URI#{host => <<>>}};
+parse_userinfo(?STRING_REST($@, Rest), URI) ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ {Rest, URI1#{host => decode_host(remove_brackets(Host))}};
+parse_userinfo(?STRING_REST(Char, Rest), URI) ->
+ case is_userinfo(Char) of
+ true -> parse_userinfo(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_userinfo(?STRING_EMPTY, _URI) ->
+ %% URI cannot end in userinfo state
+ throw({error,invalid_uri,<<>>}).
+
+
+%% Check if char is allowed in userinfo
+-spec is_userinfo(char()) -> boolean().
+is_userinfo($%) -> true; % pct-encoded
+is_userinfo($:) -> true;
+is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2.2. Host]
+%%
+%% The host subcomponent of authority is identified by an IP literal
+%% encapsulated within square brackets, an IPv4 address in dotted-
+%% decimal form, or a registered name.
+%%
+%% host = IP-literal / IPv4address / reg-name
+%%
+%% IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+%%
+%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+%%
+%% IPv6address = 6( h16 ":" ) ls32
+%% / "::" 5( h16 ":" ) ls32
+%% / [ h16 ] "::" 4( h16 ":" ) ls32
+%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+%% / [ *4( h16 ":" ) h16 ] "::" ls32
+%% / [ *5( h16 ":" ) h16 ] "::" h16
+%% / [ *6( h16 ":" ) h16 ] "::"
+%%
+%% ls32 = ( h16 ":" h16 ) / IPv4address
+%% ; least-significant 32 bits of address
+%%
+%% h16 = 1*4HEXDIG
+%% ; 16 bits of address represented in hexadecimal
+%%
+%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+%%
+%% dec-octet = DIGIT ; 0-9
+%% / %x31-39 DIGIT ; 10-99
+%% / "1" 2DIGIT ; 100-199
+%% / "2" %x30-34 DIGIT ; 200-249
+%% / "25" %x30-35 ; 250-255
+%%
+%% reg-name = *( unreserved / pct-encoded / sub-delims )
+%%-------------------------------------------------------------------------
+-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_host(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_host(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_host(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_host(?STRING_REST($[, Rest), URI) ->
+ parse_ipv6_bin(Rest, [], URI);
+parse_host(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_host(?STRING_REST(Char, Rest), URI) ->
+ case is_digit(Char) of
+ true -> parse_ipv4_bin(Rest, [Char], URI);
+ false -> parse_reg_name(?STRING_REST(Char, Rest), URI)
+ end;
+parse_host(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_reg_name(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_reg_name(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_reg_name(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_reg_name(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_reg_name(?STRING_REST(Char, Rest), URI) ->
+ case is_reg_name(Char) of
+ true -> parse_reg_name(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_reg_name(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+%% Check if char is allowed in reg-name
+-spec is_reg_name(char()) -> boolean().
+is_reg_name($%) -> true;
+is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+-spec parse_ipv4_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
+ case is_ipv4(Char) of
+ true -> parse_ipv4_bin(Rest, [Char|Acc], URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv4_bin(?STRING_EMPTY, Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in IPv4 addresses
+-spec is_ipv4(char()) -> boolean().
+is_ipv4($.) -> true;
+is_ipv4(Char) -> is_digit(Char).
+
+-spec validate_ipv4_address(list()) -> list().
+validate_ipv4_address(Addr) ->
+ case inet:parse_ipv4strict_address(Addr) of
+ {ok, _} -> Addr;
+ {error, _} -> throw({error,invalid_uri,Addr})
+ end.
+
+
+-spec parse_ipv6_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) ->
+ _ = validate_ipv6_address(lists:reverse(Acc)),
+ parse_ipv6_bin_end(Rest, URI);
+parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) ->
+ case is_ipv6(Char) of
+ true -> parse_ipv6_bin(Rest, [Char|Acc], URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) ->
+ throw({error,invalid_uri,<<>>}).
+
+%% Check if char is allowed in IPv6 addresses
+-spec is_ipv6(char()) -> boolean().
+is_ipv6($:) -> true;
+is_ipv6($.) -> true;
+is_ipv6(Char) -> is_hex_digit(Char).
+
+
+-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
+ case is_ipv6(Char) of
+ true -> parse_ipv6_bin_end(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv6_bin_end(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+-spec validate_ipv6_address(list()) -> list().
+validate_ipv6_address(Addr) ->
+ case inet:parse_ipv6strict_address(Addr) of
+ {ok, _} -> Addr;
+ {error, _} -> throw({error,invalid_uri,Addr})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2.2. Port]
+%%
+%% The port subcomponent of authority is designated by an optional port
+%% number in decimal following the host and delimited from it by a
+%% single colon (":") character.
+%%
+%% port = *DIGIT
+%%-------------------------------------------------------------------------
+-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_port(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_port(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_port(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_port(?STRING_REST(Char, Rest), URI) ->
+ case is_digit(Char) of
+ true -> parse_port(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_port(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.4. Query]
+%%
+%% The query component contains non-hierarchical data that, along with
+%% data in the path component (Section 3.3), serves to identify a
+%% resource within the scope of the URI's scheme and naming authority
+%% (if any). The query component is indicated by the first question
+%% mark ("?") character and terminated by a number sign ("#") character
+%% or by the end of the URI.
+%%
+%% query = *( pchar / "/" / "?" )
+%%-------------------------------------------------------------------------
+-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_query(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_query(?STRING_REST(Char, Rest), URI) ->
+ case is_query(Char) of
+ true -> parse_query(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_query(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in query
+-spec is_query(char()) -> boolean().
+is_query($/) -> true;
+is_query($?) -> true;
+is_query(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.5. Fragment]
+%%
+%% The fragment identifier component of a URI allows indirect
+%% identification of a secondary resource by reference to a primary
+%% resource and additional identifying information.
+%%
+%% fragment = *( pchar / "/" / "?" )
+%%-------------------------------------------------------------------------
+-spec parse_fragment(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_fragment(?STRING_REST(Char, Rest), URI) ->
+ case is_fragment(Char) of
+ true -> parse_fragment(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_fragment(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in fragment
+-spec is_fragment(char()) -> boolean().
+is_fragment($/) -> true;
+is_fragment($?) -> true;
+is_fragment(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.2. Reserved Characters]
+%%
+%% reserved = gen-delims / sub-delims
+%%
+%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+%%
+%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+%% / "*" / "+" / "," / ";" / "="
+%%
+%%-------------------------------------------------------------------------
+
+%% Check if char is sub-delim.
+-spec is_sub_delim(char()) -> boolean().
+is_sub_delim($!) -> true;
+is_sub_delim($$) -> true;
+is_sub_delim($&) -> true;
+is_sub_delim($') -> true;
+is_sub_delim($() -> true;
+is_sub_delim($)) -> true;
+
+is_sub_delim($*) -> true;
+is_sub_delim($+) -> true;
+is_sub_delim($,) -> true;
+is_sub_delim($;) -> true;
+is_sub_delim($=) -> true;
+is_sub_delim(_) -> false.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.3. Unreserved Characters]
+%%
+%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+%%
+%%-------------------------------------------------------------------------
+-spec is_unreserved(char()) -> boolean().
+is_unreserved($-) -> true;
+is_unreserved($.) -> true;
+is_unreserved($_) -> true;
+is_unreserved($~) -> true;
+is_unreserved(Char) -> is_alpha(Char) orelse is_digit(Char).
+
+-spec is_alpha(char()) -> boolean().
+is_alpha(C)
+ when $A =< C, C =< $Z;
+ $a =< C, C =< $z -> true;
+is_alpha(_) -> false.
+
+-spec is_digit(char()) -> boolean().
+is_digit(C)
+ when $0 =< C, C =< $9 -> true;
+is_digit(_) -> false.
+
+-spec is_hex_digit(char()) -> boolean().
+is_hex_digit(C)
+ when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true;
+is_hex_digit(_) -> false.
+
+
+%% Remove enclosing brackets from binary
+-spec remove_brackets(binary()) -> binary().
+remove_brackets(<<$[/utf8, Rest/binary>>) ->
+ {H,T} = split_binary(Rest, byte_size(Rest) - 1),
+ case T =:= <<$]/utf8>> of
+ true -> H;
+ false -> Rest
+ end;
+remove_brackets(Addr) -> Addr.
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for calculating the parsed binary.
+%%-------------------------------------------------------------------------
+-spec calculate_parsed_scheme(binary(), binary()) -> binary().
+calculate_parsed_scheme(Input, <<>>) ->
+ strip_last_char(Input, [$:]);
+calculate_parsed_scheme(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_part(binary(), binary()) -> binary().
+calculate_parsed_part(Input, <<>>) ->
+ strip_last_char(Input, [$?,$#]);
+calculate_parsed_part(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_userinfo(binary(), binary()) -> binary().
+calculate_parsed_userinfo(Input, <<>>) ->
+ strip_last_char(Input, [$?,$#,$@]);
+calculate_parsed_userinfo(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_host_port(binary(), binary()) -> binary().
+calculate_parsed_host_port(Input, <<>>) ->
+ strip_last_char(Input, [$:,$?,$#,$/]);
+calculate_parsed_host_port(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+calculate_parsed_query_fragment(Input, <<>>) ->
+ strip_last_char(Input, [$#]);
+calculate_parsed_query_fragment(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+get_port(<<>>) ->
+ undefined;
+get_port(B) ->
+ try binary_to_integer(B)
+ catch
+ error:badarg ->
+ throw({error, invalid_uri, B})
+ end.
+
+
+%% Strip last char if it is in list
+%%
+%% This function is optimized for speed: parse/1 is about 10% faster than
+%% with an alternative implementation based on lists and sets.
+strip_last_char(<<>>, _) -> <<>>;
+strip_last_char(Input, [C0]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1,C2]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ C2 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1,C2,C3]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ C2 ->
+ init_binary(Input);
+ C3 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end.
+
+
+%% Get parsed binary
+get_parsed_binary(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+%% Return all bytes of the binary except the last one. The binary must be non-empty.
+init_binary(B) ->
+ {Init, _} =
+ split_binary(B, byte_size(B) - 1),
+ Init.
+
+
+%% Returns the size of a binary exluding the first element.
+%% Used in calls to split_binary().
+-spec byte_size_exl_head(binary()) -> number().
+byte_size_exl_head(<<>>) -> 0;
+byte_size_exl_head(Binary) -> byte_size(Binary) + 1.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.1. Percent-Encoding]
+%%
+%% A percent-encoding mechanism is used to represent a data octet in a
+%% component when that octet's corresponding character is outside the
+%% allowed set or is being used as a delimiter of, or within, the
+%% component. A percent-encoded octet is encoded as a character
+%% triplet, consisting of the percent character "%" followed by the two
+%% hexadecimal digits representing that octet's numeric value. For
+%% example, "%20" is the percent-encoding for the binary octet
+%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space
+%% character (SP). Section 2.4 describes when percent-encoding and
+%% decoding is applied.
+%%
+%% pct-encoded = "%" HEXDIG HEXDIG
+%%-------------------------------------------------------------------------
+-spec decode_userinfo(binary()) -> binary().
+decode_userinfo(Cs) ->
+ check_utf8(decode(Cs, fun is_userinfo/1, <<>>)).
+
+-spec decode_host(binary()) -> binary().
+decode_host(Cs) ->
+ check_utf8(decode(Cs, fun is_host/1, <<>>)).
+
+-spec decode_path(binary()) -> binary().
+decode_path(Cs) ->
+ check_utf8(decode(Cs, fun is_path/1, <<>>)).
+
+-spec decode_query(binary()) -> binary().
+decode_query(Cs) ->
+ check_utf8(decode(Cs, fun is_query/1, <<>>)).
+
+-spec decode_fragment(binary()) -> binary().
+decode_fragment(Cs) ->
+ check_utf8(decode(Cs, fun is_fragment/1, <<>>)).
+
+
+%% Returns Cs if it is utf8 encoded.
+check_utf8(Cs) ->
+ case unicode:characters_to_list(Cs) of
+ {incomplete,_,_} ->
+ throw({error,invalid_utf8,Cs});
+ {error,_,_} ->
+ throw({error,invalid_utf8,Cs});
+ _ -> Cs
+ end.
+
+%%-------------------------------------------------------------------------
+%% Percent-encode
+%%-------------------------------------------------------------------------
+
+%% Only validates as scheme cannot have percent-encoded characters
+-spec encode_scheme(list()|binary()) -> list() | binary().
+encode_scheme([]) ->
+ throw({error,invalid_scheme,""});
+encode_scheme(<<>>) ->
+ throw({error,invalid_scheme,<<>>});
+encode_scheme(Scheme) ->
+ case validate_scheme(Scheme) of
+ true -> Scheme;
+ false -> throw({error,invalid_scheme,Scheme})
+ end.
+
+-spec encode_userinfo(list()|binary()) -> list() | binary().
+encode_userinfo(Cs) ->
+ encode(Cs, fun is_userinfo/1).
+
+-spec encode_host(list()|binary()) -> list() | binary().
+encode_host(Cs) ->
+ case classify_host(Cs) of
+ regname -> Cs;
+ ipv4 -> Cs;
+ ipv6 -> bracket_ipv6(Cs);
+ other -> encode(Cs, fun is_reg_name/1)
+ end.
+
+-spec encode_path(list()|binary()) -> list() | binary().
+encode_path(Cs) ->
+ encode(Cs, fun is_path/1).
+
+-spec encode_query(list()|binary()) -> list() | binary().
+encode_query(Cs) ->
+ encode(Cs, fun is_query/1).
+
+-spec encode_fragment(list()|binary()) -> list() | binary().
+encode_fragment(Cs) ->
+ encode(Cs, fun is_fragment/1).
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for percent-decode
+%%-------------------------------------------------------------------------
+decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ decode(Cs, Fun, <<Acc/binary, B>>);
+ false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>})
+ end;
+decode(<<C,Cs/binary>>, Fun, Acc) ->
+ case Fun(C) of
+ true -> decode(Cs, Fun, <<Acc/binary, C>>);
+ false -> throw({error,invalid_percent_encoding,<<C,Cs/binary>>})
+ end;
+decode(<<>>, _Fun, Acc) ->
+ Acc.
+
+%% Check if char is allowed in host
+-spec is_host(char()) -> boolean().
+is_host($:) -> true;
+is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+%% Check if char is allowed in path
+-spec is_path(char()) -> boolean().
+is_path($/) -> true;
+is_path(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for percent-encode
+%%-------------------------------------------------------------------------
+-spec encode(list()|binary(), fun()) -> list() | binary().
+encode(Component, Fun) when is_list(Component) ->
+ B = unicode:characters_to_binary(Component),
+ unicode:characters_to_list(encode(B, Fun, <<>>));
+encode(Component, Fun) when is_binary(Component) ->
+ encode(Component, Fun, <<>>).
+%%
+encode(<<Char/utf8, Rest/binary>>, Fun, Acc) ->
+ C = encode_codepoint_binary(Char, Fun),
+ encode(Rest, Fun, <<Acc/binary,C/binary>>);
+encode(<<Char, Rest/binary>>, _Fun, _Acc) ->
+ throw({error,invalid_input,<<Char,Rest/binary>>});
+encode(<<>>, _Fun, Acc) ->
+ Acc.
+
+
+-spec encode_codepoint_binary(integer(), fun()) -> binary().
+encode_codepoint_binary(C, Fun) ->
+ case Fun(C) of
+ false -> percent_encode_binary(C);
+ true -> <<C>>
+ end.
+
+
+-spec percent_encode_binary(integer()) -> binary().
+percent_encode_binary(Code) ->
+ percent_encode_binary(<<Code/utf8>>, <<>>).
+
+
+percent_encode_binary(<<A:4,B:4,Rest/binary>>, Acc) ->
+ percent_encode_binary(Rest, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>);
+percent_encode_binary(<<>>, Acc) ->
+ Acc.
+
+
+%%-------------------------------------------------------------------------
+%%-------------------------------------------------------------------------
+validate_scheme([]) -> true;
+validate_scheme([H|T]) ->
+ case is_scheme(H) of
+ true -> validate_scheme(T);
+ false -> false
+ end;
+validate_scheme(<<>>) -> true;
+validate_scheme(<<H, Rest/binary>>) ->
+ case is_scheme(H) of
+ true -> validate_scheme(Rest);
+ false -> false
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Classifies hostname into the following categories:
+%% regname, ipv4 - address does not contain reserved characters to be
+%% percent-encoded
+%% ipv6 - address does not contain reserved characters but it shall be
+%% encolsed in brackets
+%% other - address shall be percent-encoded
+%%-------------------------------------------------------------------------
+classify_host([]) -> other;
+classify_host(Addr) when is_binary(Addr) ->
+ A = unicode:characters_to_list(Addr),
+ classify_host_ipv6(A);
+classify_host(Addr) ->
+ classify_host_ipv6(Addr).
+
+classify_host_ipv6(Addr) ->
+ case is_ipv6_address(Addr) of
+ true -> ipv6;
+ false -> classify_host_ipv4(Addr)
+ end.
+
+classify_host_ipv4(Addr) ->
+ case is_ipv4_address(Addr) of
+ true -> ipv4;
+ false -> classify_host_regname(Addr)
+ end.
+
+classify_host_regname([]) -> regname;
+classify_host_regname([H|T]) ->
+ case is_reg_name(H) of
+ true -> classify_host_regname(T);
+ false -> other
+ end.
+
+is_ipv4_address(Addr) ->
+ case inet:parse_ipv4strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+is_ipv6_address(Addr) ->
+ case inet:parse_ipv6strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+bracket_ipv6(Addr) when is_binary(Addr) ->
+ concat(<<$[,Addr/binary>>,<<$]>>);
+bracket_ipv6(Addr) when is_list(Addr) ->
+ [$[|Addr] ++ "]".
+
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for recompose
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% Checks if input Map has valid combination of fields that can be
+%% recomposed into a URI.
+%%
+%% The implementation is based on a decision tree that fulfills the
+%% following rules:
+%% - 'path' shall always be present in the input map
+%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%% - 'host' shall be present in the input map when 'path' starts with
+%% two slashes ("//")
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%% path-abempty = *( "/" segment )
+%% segment = *pchar
+%% - 'host' shall be present if userinfo or port is present in input map
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%% - All fields shall be valid (scheme, userinfo, host, port, path, query
+%% or fragment).
+%%-------------------------------------------------------------------------
+is_valid_map(#{path := Path} = Map) ->
+ ((starts_with_two_slash(Path) andalso is_valid_map_host(Map))
+ orelse
+ (maps:is_key(userinfo, Map) andalso is_valid_map_host(Map))
+ orelse
+ (maps:is_key(port, Map) andalso is_valid_map_host(Map))
+ orelse
+ all_fields_valid(Map));
+is_valid_map(#{}) ->
+ false.
+
+
+is_valid_map_host(Map) ->
+ maps:is_key(host, Map) andalso all_fields_valid(Map).
+
+
+all_fields_valid(Map) ->
+ Fun = fun(scheme, _, Acc) -> Acc;
+ (userinfo, _, Acc) -> Acc;
+ (host, _, Acc) -> Acc;
+ (port, _, Acc) -> Acc;
+ (path, _, Acc) -> Acc;
+ (query, _, Acc) -> Acc;
+ (fragment, _, Acc) -> Acc;
+ (_, _, _) -> false
+ end,
+ maps:fold(Fun, true, Map).
+
+
+starts_with_two_slash([$/,$/|_]) ->
+ true;
+starts_with_two_slash(?STRING_REST("//", _)) ->
+ true;
+starts_with_two_slash(_) -> false.
+
+
+update_scheme(#{scheme := Scheme}, _) ->
+ add_colon_postfix(encode_scheme(Scheme));
+update_scheme(#{}, _) ->
+ empty.
+
+
+update_userinfo(#{userinfo := Userinfo}, empty) ->
+ add_auth_prefix(encode_userinfo(Userinfo));
+update_userinfo(#{userinfo := Userinfo}, URI) ->
+ concat(URI,add_auth_prefix(encode_userinfo(Userinfo)));
+update_userinfo(#{}, empty) ->
+ empty;
+update_userinfo(#{}, URI) ->
+ URI.
+
+
+update_host(#{host := Host}, empty) ->
+ add_auth_prefix(encode_host(Host));
+update_host(#{host := Host} = Map, URI) ->
+ concat(URI,add_host_prefix(Map, encode_host(Host)));
+update_host(#{}, empty) ->
+ empty;
+update_host(#{}, URI) ->
+ URI.
+
+
+%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI
+update_port(#{port := undefined}, URI) ->
+ concat(URI, <<":">>);
+update_port(#{port := Port}, URI) ->
+ concat(URI,add_colon(encode_port(Port)));
+update_port(#{}, URI) ->
+ URI.
+
+
+update_path(#{path := Path}, empty) ->
+ encode_path(Path);
+update_path(#{path := Path}, URI) ->
+ concat(URI,encode_path(Path));
+update_path(#{}, empty) ->
+ empty;
+update_path(#{}, URI) ->
+ URI.
+
+
+update_query(#{query := Query}, empty) ->
+ encode_query(Query);
+update_query(#{query := Query}, URI) ->
+ concat(URI,add_question_mark(encode_query(Query)));
+update_query(#{}, empty) ->
+ empty;
+update_query(#{}, URI) ->
+ URI.
+
+
+update_fragment(#{fragment := Fragment}, empty) ->
+ add_hashmark(encode_fragment(Fragment));
+update_fragment(#{fragment := Fragment}, URI) ->
+ concat(URI,add_hashmark(encode_fragment(Fragment)));
+update_fragment(#{}, empty) ->
+ "";
+update_fragment(#{}, URI) ->
+ URI.
+
+%%-------------------------------------------------------------------------
+%% Concatenates its arguments that can be lists and binaries.
+%% The result is a list if at least one of its argument is a list and
+%% binary otherwise.
+%%-------------------------------------------------------------------------
+concat(A, B) when is_binary(A), is_binary(B) ->
+ <<A/binary, B/binary>>;
+concat(A, B) when is_binary(A), is_list(B) ->
+ unicode:characters_to_list(A) ++ B;
+concat(A, B) when is_list(A) ->
+ A ++ maybe_to_list(B).
+
+add_hashmark(Comp) when is_binary(Comp) ->
+ <<$#, Comp/binary>>;
+add_hashmark(Comp) when is_list(Comp) ->
+ [$#|Comp].
+
+add_question_mark(Comp) when is_binary(Comp) ->
+ <<$?, Comp/binary>>;
+add_question_mark(Comp) when is_list(Comp) ->
+ [$?|Comp].
+
+add_colon(Comp) when is_binary(Comp) ->
+ <<$:, Comp/binary>>.
+
+add_colon_postfix(Comp) when is_binary(Comp) ->
+ <<Comp/binary,$:>>;
+add_colon_postfix(Comp) when is_list(Comp) ->
+ Comp ++ ":".
+
+add_auth_prefix(Comp) when is_binary(Comp) ->
+ <<"//", Comp/binary>>;
+add_auth_prefix(Comp) when is_list(Comp) ->
+ [$/,$/|Comp].
+
+add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) ->
+ <<$@,Host/binary>>;
+add_host_prefix(#{}, Host) when is_binary(Host) ->
+ <<"//",Host/binary>>;
+add_host_prefix(#{userinfo := _}, Host) when is_list(Host) ->
+ [$@|Host];
+add_host_prefix(#{}, Host) when is_list(Host) ->
+ [$/,$/|Host].
+
+maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp);
+maybe_to_list(Comp) -> Comp.
+
+encode_port(Port) ->
+ integer_to_binary(Port).
+
+%%-------------------------------------------------------------------------
+%% Helper functions for transcode
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>).
+%% 1. Convert (transcode/2) input to list form (list of unicode codepoints)
+%% "x%00%00%00%F6"
+%% 2. Accumulate characters until percent-encoded segment (transcode/4).
+%% Acc = "x"
+%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4)
+%% <<0,0,0,246>>
+%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8):
+%% <<195,182>>
+%% 5. Percent-encode out-encoded binary:
+%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>>
+%% 6. Convert binary to list form, reverse it and append the accumulator
+%% "6B%3C%" + "x"
+%% 7. Reverse Acc and return it
+%%-------------------------------------------------------------------------
+transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode_pct(L, Acc, <<>>, InEnc, OutEnc);
+transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode(L, Acc, [], InEnc, OutEnc).
+%%
+transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) ->
+ transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding);
+transcode([C|Rest], Acc, List, InEncoding, OutEncoding) ->
+ transcode(Rest, Acc, [C|List], InEncoding, OutEncoding);
+transcode([], Acc, List, _InEncoding, _OutEncoding) ->
+ lists:reverse(List ++ Acc).
+
+
+%% Transcode percent-encoded segment
+transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding);
+ false -> throw({error, invalid_percent_encoding,L})
+ end;
+transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_to_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = lists:reverse(convert_to_list(PctEncUtf8, utf8)),
+ transcode(L, Out ++ Acc, [], InEncoding, OutEncoding);
+transcode_pct([], Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_to_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = convert_to_list(PctEncUtf8, utf8),
+ lists:reverse(Acc) ++ Out.
+
+
+%% Convert to binary
+convert_to_binary(Binary, InEncoding, OutEncoding) ->
+ case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of
+ {error, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ {incomplete, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ Result ->
+ Result
+ end.
+
+
+%% Convert to list
+convert_to_list(Binary, InEncoding) ->
+ case unicode:characters_to_list(Binary, InEncoding) of
+ {error, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ {incomplete, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ Result ->
+ Result
+ end.
+
+
+%% Flatten input list
+flatten_list([], _) ->
+ [];
+flatten_list(L, InEnc) ->
+ flatten_list(L, InEnc, []).
+%%
+flatten_list([H|T], InEnc, Acc) when is_binary(H) ->
+ L = convert_to_list(H, InEnc),
+ flatten_list(T, InEnc, lists:reverse(L) ++ Acc);
+flatten_list([H|T], InEnc, Acc) when is_list(H) ->
+ flatten_list(H ++ T, InEnc, Acc);
+flatten_list([H|T], InEnc, Acc) ->
+ flatten_list(T, InEnc, [H|Acc]);
+flatten_list([], _InEnc, Acc) ->
+ lists:reverse(Acc);
+flatten_list(Arg, _, _) ->
+ throw({error, invalid_input, Arg}).
+
+
+percent_encode_segment(Segment) ->
+ percent_encode_binary(Segment, <<>>).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for compose_query
+%%-------------------------------------------------------------------------
+
+%% Returns separator to be used between key-value pairs
+get_separator(L) when length(L) =:= 0 ->
+ <<>>;
+get_separator(_L) ->
+ <<"&">>.
+
+
+%% HTML5 - 4.10.22.6 URL-encoded form data - encoding
+form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) ->
+ B = convert_to_binary(Cs, utf8, utf8),
+ html5_byte_encode(base10_encode(B));
+form_urlencode(Cs, [{encoding, latin1}]) when is_binary(Cs) ->
+ html5_byte_encode(base10_encode(Cs));
+form_urlencode(Cs, [{encoding, Encoding}])
+ when is_list(Cs), Encoding =:= utf8; Encoding =:= unicode ->
+ B = convert_to_binary(Cs, utf8, Encoding),
+ html5_byte_encode(B);
+form_urlencode(Cs, [{encoding, Encoding}])
+ when is_binary(Cs), Encoding =:= utf8; Encoding =:= unicode ->
+ html5_byte_encode(Cs);
+form_urlencode(Cs, [{encoding, Encoding}]) when is_list(Cs); is_binary(Cs) ->
+ throw({error,invalid_encoding, Encoding});
+form_urlencode(Cs, _) ->
+ throw({error,invalid_input, Cs}).
+
+
+%% For each character in the entry's name and value that cannot be expressed using
+%% the selected character encoding, replace the character by a string consisting of
+%% a U+0026 AMPERSAND character (&), a "#" (U+0023) character, one or more ASCII
+%% digits representing the Unicode code point of the character in base ten, and
+%% finally a ";" (U+003B) character.
+base10_encode(Cs) ->
+ base10_encode(Cs, <<>>).
+%%
+base10_encode(<<>>, Acc) ->
+ Acc;
+base10_encode(<<H/utf8,T/binary>>, Acc) when H > 255 ->
+ Base10 = convert_to_binary(integer_to_list(H,10), utf8, utf8),
+ base10_encode(T, <<Acc/binary,"&#",Base10/binary,$;>>);
+base10_encode(<<H/utf8,T/binary>>, Acc) ->
+ base10_encode(T, <<Acc/binary,H>>).
+
+
+html5_byte_encode(B) ->
+ html5_byte_encode(B, <<>>).
+%%
+html5_byte_encode(<<>>, Acc) ->
+ Acc;
+html5_byte_encode(<<$ ,T/binary>>, Acc) ->
+ html5_byte_encode(T, <<Acc/binary,$+>>);
+html5_byte_encode(<<H,T/binary>>, Acc) ->
+ case is_url_char(H) of
+ true ->
+ html5_byte_encode(T, <<Acc/binary,H>>);
+ false ->
+ <<A:4,B:4>> = <<H>>,
+ html5_byte_encode(T, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>)
+ end;
+html5_byte_encode(H, _Acc) ->
+ throw({error,invalid_input, H}).
+
+
+%% Return true if input char can appear in form-urlencoded string
+%% Allowed chararacters:
+%% 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A,
+%% 0x5F, 0x61 to 0x7A
+is_url_char(C)
+ when C =:= 16#2A; C =:= 16#2D;
+ C =:= 16#2E; C =:= 16#5F;
+ 16#30 =< C, C =< 16#39;
+ 16#41 =< C, C =< 16#5A;
+ 16#61 =< C, C =< 16#7A -> true;
+is_url_char(_) -> false.
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for dissect_query
+%%-------------------------------------------------------------------------
+dissect_query_key(<<$=,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_value(T, IsList, Acc, Key, Value);
+dissect_query_key(<<"&#",T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_key(T, IsList, Acc, <<Key/binary,"&#">>, Value);
+dissect_query_key(<<$&,_T/binary>>, _IsList, _Acc, _Key, _Value) ->
+ throw({error, missing_value, "&"});
+dissect_query_key(<<H,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_key(T, IsList, Acc, <<Key/binary,H>>, Value);
+dissect_query_key(B, _, _, _, _) ->
+ throw({error, missing_value, B}).
+
+
+dissect_query_value(<<$&,T/binary>>, IsList, Acc, Key, Value) ->
+ K = form_urldecode(IsList, Key),
+ V = form_urldecode(IsList, Value),
+ dissect_query_key(T, IsList, [{K,V}|Acc], <<>>, <<>>);
+dissect_query_value(<<H,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_value(T, IsList, Acc, Key, <<Value/binary,H>>);
+dissect_query_value(<<>>, IsList, Acc, Key, Value) ->
+ K = form_urldecode(IsList, Key),
+ V = form_urldecode(IsList, Value),
+ lists:reverse([{K,V}|Acc]).
+
+
+%% Form-urldecode input based on RFC 1866 [8.2.1]
+form_urldecode(true, B) ->
+ Result = base10_decode(form_urldecode(B, <<>>)),
+ convert_to_list(Result, utf8);
+form_urldecode(false, B) ->
+ base10_decode(form_urldecode(B, <<>>));
+form_urldecode(<<>>, Acc) ->
+ Acc;
+form_urldecode(<<$+,T/binary>>, Acc) ->
+ form_urldecode(T, <<Acc/binary,$ >>);
+form_urldecode(<<$%,C0,C1,T/binary>>, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ V = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ form_urldecode(T, <<Acc/binary, V>>);
+ false ->
+ L = convert_to_list(<<$%,C0,C1,T/binary>>, utf8),
+ throw({error, invalid_percent_encoding, L})
+ end;
+form_urldecode(<<H/utf8,T/binary>>, Acc) ->
+ form_urldecode(T, <<Acc/binary,H/utf8>>);
+form_urldecode(<<H,_/binary>>, _Acc) ->
+ throw({error, invalid_character, [H]}).
+
+base10_decode(Cs) ->
+ base10_decode(Cs, <<>>).
+%
+base10_decode(<<>>, Acc) ->
+ Acc;
+base10_decode(<<"&#",T/binary>>, Acc) ->
+ base10_decode_unicode(T, Acc);
+base10_decode(<<H/utf8,T/binary>>, Acc) ->
+ base10_decode(T,<<Acc/binary,H/utf8>>);
+base10_decode(<<H,_/binary>>, _) ->
+ throw({error, invalid_input, [H]}).
+
+
+base10_decode_unicode(B, Acc) ->
+ base10_decode_unicode(B, 0, Acc).
+%%
+base10_decode_unicode(<<H/utf8,T/binary>>, Codepoint, Acc) when $0 =< H, H =< $9 ->
+ Res = Codepoint * 10 + (H - $0),
+ base10_decode_unicode(T, Res, Acc);
+base10_decode_unicode(<<$;,T/binary>>, Codepoint, Acc) ->
+ base10_decode(T, <<Acc/binary,Codepoint/utf8>>);
+base10_decode_unicode(<<H,_/binary>>, _, _) ->
+ throw({error, invalid_input, [H]}).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for normalize
+%%-------------------------------------------------------------------------
+
+%% 6.2.2.1. Case Normalization
+normalize_case(#{scheme := Scheme, host := Host} = Map) ->
+ Map#{scheme => to_lower(Scheme),
+ host => to_lower(Host)};
+normalize_case(#{host := Host} = Map) ->
+ Map#{host => to_lower(Host)};
+normalize_case(#{scheme := Scheme} = Map) ->
+ Map#{scheme => to_lower(Scheme)};
+normalize_case(#{} = Map) ->
+ Map.
+
+
+to_lower(Cs) when is_list(Cs) ->
+ B = convert_to_binary(Cs, utf8, utf8),
+ convert_to_list(to_lower(B), utf8);
+to_lower(Cs) when is_binary(Cs) ->
+ to_lower(Cs, <<>>).
+%%
+to_lower(<<C,Cs/binary>>, Acc) when $A =< C, C =< $Z ->
+ to_lower(Cs, <<Acc/binary,(C + 32)>>);
+to_lower(<<C,Cs/binary>>, Acc) ->
+ to_lower(Cs, <<Acc/binary,C>>);
+to_lower(<<>>, Acc) ->
+ Acc.
+
+
+%% 6.2.2.3. Path Segment Normalization
+%% 5.2.4. Remove Dot Segments
+normalize_path_segment(Map) ->
+ Path = maps:get(path, Map, undefined),
+ Map#{path => remove_dot_segments(Path)}.
+
+
+remove_dot_segments(Path) when is_binary(Path) ->
+ remove_dot_segments(Path, <<>>);
+remove_dot_segments(Path) when is_list(Path) ->
+ B = convert_to_binary(Path, utf8, utf8),
+ B1 = remove_dot_segments(B, <<>>),
+ convert_to_list(B1, utf8).
+%%
+remove_dot_segments(<<>>, Output) ->
+ Output;
+remove_dot_segments(<<"../",T/binary>>, Output) ->
+ remove_dot_segments(T, Output);
+remove_dot_segments(<<"./",T/binary>>, Output) ->
+ remove_dot_segments(T, Output);
+remove_dot_segments(<<"/./",T/binary>>, Output) ->
+ remove_dot_segments(<<$/,T/binary>>, Output);
+remove_dot_segments(<<"/.">>, Output) ->
+ remove_dot_segments(<<$/>>, Output);
+remove_dot_segments(<<"/../",T/binary>>, Output) ->
+ Out1 = remove_last_segment(Output),
+ remove_dot_segments(<<$/,T/binary>>, Out1);
+remove_dot_segments(<<"/..">>, Output) ->
+ Out1 = remove_last_segment(Output),
+ remove_dot_segments(<<$/>>, Out1);
+remove_dot_segments(<<$.>>, Output) ->
+ remove_dot_segments(<<>>, Output);
+remove_dot_segments(<<"..">>, Output) ->
+ remove_dot_segments(<<>>, Output);
+remove_dot_segments(Input, Output) ->
+ {First, Rest} = first_path_segment(Input),
+ remove_dot_segments(Rest, <<Output/binary,First/binary>>).
+
+
+first_path_segment(Input) ->
+ F = first_path_segment(Input, <<>>),
+ split_binary(Input, byte_size(F)).
+%%
+first_path_segment(<<$/,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,$/>>);
+first_path_segment(<<C,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>).
+
+
+first_path_segment_end(<<>>, Acc) ->
+ Acc;
+first_path_segment_end(<<$/,_/binary>>, Acc) ->
+ Acc;
+first_path_segment_end(<<C,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>).
+
+
+remove_last_segment(<<>>) ->
+ <<>>;
+remove_last_segment(B) ->
+ {Init, Last} = split_binary(B, byte_size(B) - 1),
+ case Last of
+ <<$/>> ->
+ Init;
+ _Char ->
+ remove_last_segment(Init)
+ end.
+
+
+%% RFC 3986, 6.2.3. Scheme-Based Normalization
+normalize_scheme_based(Map) ->
+ Scheme = maps:get(scheme, Map, undefined),
+ Port = maps:get(port, Map, undefined),
+ Path= maps:get(path, Map, undefined),
+ normalize_scheme_based(Map, Scheme, Port, Path).
+%%
+normalize_scheme_based(Map, Scheme, Port, Path)
+ when Scheme =:= "http"; Scheme =:= <<"http">> ->
+ normalize_http(Map, Port, Path);
+normalize_scheme_based(Map, Scheme, Port, Path)
+ when Scheme =:= "https"; Scheme =:= <<"https">> ->
+ normalize_https(Map, Port, Path);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "ftp"; Scheme =:= <<"ftp">> ->
+ normalize_ftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "ssh"; Scheme =:= <<"ssh">> ->
+ normalize_ssh_sftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "sftp"; Scheme =:= <<"sftp">> ->
+ normalize_ssh_sftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "tftp"; Scheme =:= <<"tftp">> ->
+ normalize_tftp(Map, Port);
+normalize_scheme_based(Map, _, _, _) ->
+ Map.
+
+
+normalize_http(Map, Port, Path) ->
+ M1 = normalize_port(Map, Port, 80),
+ normalize_http_path(M1, Path).
+
+
+normalize_https(Map, Port, Path) ->
+ M1 = normalize_port(Map, Port, 443),
+ normalize_http_path(M1, Path).
+
+
+normalize_ftp(Map, Port) ->
+ normalize_port(Map, Port, 21).
+
+
+normalize_ssh_sftp(Map, Port) ->
+ normalize_port(Map, Port, 22).
+
+
+normalize_tftp(Map, Port) ->
+ normalize_port(Map, Port, 69).
+
+
+normalize_port(Map, Port, Default) ->
+ case Port of
+ Default ->
+ maps:remove(port, Map);
+ _Else ->
+ Map
+ end.
+
+
+normalize_http_path(Map, Path) ->
+ case Path of
+ "" ->
+ Map#{path => "/"};
+ <<>> ->
+ Map#{path => <<"/">>};
+ _Else ->
+ Map
+ end.