aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/Makefile1
-rw-r--r--lib/stdlib/src/base64.erl108
-rw-r--r--lib/stdlib/src/c.erl18
-rw-r--r--lib/stdlib/src/epp.erl5
-rw-r--r--lib/stdlib/src/erl_lint.erl7
-rw-r--r--lib/stdlib/src/erl_pp.erl40
-rw-r--r--lib/stdlib/src/erl_scan.erl6
-rw-r--r--lib/stdlib/src/escript.erl12
-rw-r--r--lib/stdlib/src/ets.erl4
-rw-r--r--lib/stdlib/src/filelib.erl42
-rw-r--r--lib/stdlib/src/filename.erl106
-rw-r--r--lib/stdlib/src/gen_server.erl31
-rw-r--r--lib/stdlib/src/gen_statem.erl4
-rw-r--r--lib/stdlib/src/io_lib_format.erl10
-rw-r--r--lib/stdlib/src/lib.erl2
-rw-r--r--lib/stdlib/src/otp_internal.erl46
-rw-r--r--lib/stdlib/src/pool.erl6
-rw-r--r--lib/stdlib/src/rand.erl261
-rw-r--r--lib/stdlib/src/slave.erl2
-rw-r--r--lib/stdlib/src/stdlib.app.src3
-rw-r--r--lib/stdlib/src/stdlib.appup.src6
-rw-r--r--lib/stdlib/src/string.erl10
-rw-r--r--lib/stdlib/src/uri_string.erl1842
23 files changed, 2448 insertions, 124 deletions
diff --git a/lib/stdlib/src/Makefile b/lib/stdlib/src/Makefile
index bf836203ec..8b156929d7 100644
--- a/lib/stdlib/src/Makefile
+++ b/lib/stdlib/src/Makefile
@@ -121,6 +121,7 @@ MODULES= \
timer \
unicode \
unicode_util \
+ uri_string \
win32reg \
zip
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index 5885745fb1..c8cf6fdffe 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -113,9 +113,9 @@ encode_binary(Bin) ->
Data :: ascii_binary().
decode(Bin) when is_binary(Bin) ->
- decode_binary(<<>>, Bin);
+ decode_binary(Bin, <<>>);
decode(List) when is_list(List) ->
- list_to_binary(decode_l(List)).
+ decode_list(List, <<>>).
-spec mime_decode(Base64) -> Data when
Base64 :: ascii_string() | ascii_binary(),
@@ -186,31 +186,41 @@ mime_decode_to_string(List) when is_list(List) ->
bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}).
-decode_binary(Result0, <<C:8,T0/bits>>) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- erlang:error({badarg,C});
- ws ->
- decode_binary(Result0, T0);
- eq ->
- case strip_ws(T0) of
- <<$=:8,T/binary>> ->
- <<>> = strip_ws(T),
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:4>> = Result0,
- Result;
- T ->
- <<>> = strip_ws(T),
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:2>> = Result0,
- Result
- end;
- Bits ->
- decode_binary(<<Result0/bits,Bits:6>>, T0)
+decode_binary(<<C1:8, Cs/bits>>, A) ->
+ case element(C1, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A);
+ B1 -> decode_binary(Cs, A, B1)
end;
-decode_binary(Result, <<>>) ->
- true = is_binary(Result),
- Result.
+decode_binary(<<>>, A) ->
+ A.
+
+decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
+ case element(C2, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A, B1);
+ B2 -> decode_binary(Cs, A, B1, B2)
+ end.
+
+decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
+ case element(C3, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A, B1, B2);
+ B3 -> decode_binary(Cs, A, B1, B2, B3)
+ end.
+
+decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
+ case element(C4, ?DECODE_MAP) of
+ ws -> decode_binary(Cs, A, B1, B2, B3);
+ eq when B3 =:= eq -> only_ws_binary(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws_binary(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_binary(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+ end.
+
+only_ws_binary(<<>>, A) ->
+ A;
+only_ws_binary(<<C:8, Cs/bits>>, A) ->
+ case element(C, ?DECODE_MAP) of
+ ws -> only_ws_binary(Cs, A);
+ _ -> erlang:error(function_clause)
+ end.
%% Skipping pad character if not at end of string. Also liberal about
%% excess padding and skipping of other illegal (non-base64 alphabet)
@@ -262,6 +272,42 @@ mime_decode_binary_after_eq(Result0, <<>>, Eq) ->
Result
end.
+decode_list([C1 | Cs], A) ->
+ case element(C1, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A);
+ B1 -> decode_list(Cs, A, B1)
+ end;
+decode_list([], A) ->
+ A.
+
+decode_list([C2 | Cs], A, B1) ->
+ case element(C2, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A, B1);
+ B2 -> decode_list(Cs, A, B1, B2)
+ end.
+
+decode_list([C3 | Cs], A, B1, B2) ->
+ case element(C3, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A, B1, B2);
+ B3 -> decode_list(Cs, A, B1, B2, B3)
+ end.
+
+decode_list([C4 | Cs], A, B1, B2, B3) ->
+ case element(C4, ?DECODE_MAP) of
+ ws -> decode_list(Cs, A, B1, B2, B3);
+ eq when B3 =:= eq -> only_ws(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_list(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+ end.
+
+only_ws([], A) ->
+ A;
+only_ws([C | Cs], A) ->
+ case element(C, ?DECODE_MAP) of
+ ws -> only_ws(Cs, A);
+ _ -> erlang:error(function_clause)
+ end.
+
decode([], A) -> A;
decode([$=,$=,C2,C1|Cs], A) ->
Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12),
@@ -292,16 +338,6 @@ strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A);
strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A);
strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]).
-strip_ws(<<$\t,T/binary>>) ->
- strip_ws(T);
-strip_ws(<<$\n,T/binary>>) ->
- strip_ws(T);
-strip_ws(<<$\r,T/binary>>) ->
- strip_ws(T);
-strip_ws(<<$\s,T/binary>>) ->
- strip_ws(T);
-strip_ws(T) -> T.
-
%% Skipping pad character if not at end of string. Also liberal about
%% excess padding and skipping of other illegal (non-base64 alphabet)
%% characters. See section 3.3 of RFC4648
diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl
index c04a201ce1..9a447af5b7 100644
--- a/lib/stdlib/src/c.erl
+++ b/lib/stdlib/src/c.erl
@@ -668,19 +668,23 @@ lm() ->
[l(M) || M <- mm()].
%% erlangrc(Home)
-%% Try to run a ".erlang" file, first in the current directory
-%% else in home directory.
+%% Try to run a ".erlang" file in home directory.
+
+-spec erlangrc() -> {ok, file:filename()} | {error, term()}.
erlangrc() ->
case init:get_argument(home) of
{ok,[[Home]]} ->
erlangrc([Home]);
_ ->
- f_p_e(["."], ".erlang")
+ {error, enoent}
end.
-erlangrc([Home]) ->
- f_p_e([".",Home], ".erlang").
+-spec erlangrc(PathList) -> {ok, file:filename()} | {error, term()}
+ when PathList :: [Dir :: file:name()].
+
+erlangrc([Home|_]=Paths) when is_list(Home) ->
+ f_p_e(Paths, ".erlang").
error(Fmt, Args) ->
error_logger:error_msg(Fmt, Args).
@@ -692,11 +696,11 @@ f_p_e(P, F) ->
{error, E={Line, _Mod, _Term}} ->
error("file:path_eval(~tp,~tp): error on line ~p: ~ts~n",
[P, F, Line, file:format_error(E)]),
- ok;
+ {error, E};
{error, E} ->
error("file:path_eval(~tp,~tp): ~ts~n",
[P, F, file:format_error(E)]),
- ok;
+ {error, E};
Other ->
Other
end.
diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl
index 31d0d499e3..00e6a10d8a 100644
--- a/lib/stdlib/src/epp.erl
+++ b/lib/stdlib/src/epp.erl
@@ -479,7 +479,7 @@ com_enc(_B, _Fun, _N, L, Ps) ->
com_enc_end([L | Ps]).
com_enc_end(Ps0) ->
- Ps = lists:reverse([lists:reverse(string:to_lower(P)) || P <- Ps0]),
+ Ps = lists:reverse([lists:reverse(lowercase(P)) || P <- Ps0]),
com_encoding(Ps).
com_encoding(["latin","1"|_]) ->
@@ -489,6 +489,9 @@ com_encoding(["utf","8"|_]) ->
com_encoding(_) ->
throw(no). % Don't try any further
+lowercase(S) ->
+ unicode:characters_to_list(string:lowercase(S)).
+
normalize_typed_record_fields([]) ->
{typed, []};
normalize_typed_record_fields(Fields) ->
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 9cd4727dc3..f58cb35cea 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -3910,10 +3910,9 @@ check_format_string(Fmt) ->
extract_sequences(Fmt, []).
extract_sequences(Fmt, Need0) ->
- case string:chr(Fmt, $~) of
- 0 -> {ok,lists:reverse(Need0)}; %That's it
- Pos ->
- Fmt1 = string:substr(Fmt, Pos+1), %Skip ~
+ case string:find(Fmt, [$~]) of
+ nomatch -> {ok,lists:reverse(Need0)}; %That's it
+ [$~|Fmt1] ->
case extract_sequence(1, Fmt1, Need0) of
{ok,Need1,Rest} -> extract_sequences(Rest, Need1);
Error -> Error
diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl
index ee5e7a11bf..367dbefb82 100644
--- a/lib/stdlib/src/erl_pp.erl
+++ b/lib/stdlib/src/erl_pp.erl
@@ -237,13 +237,20 @@ lform({attribute,Line,Name,Arg}, Opts) ->
lform({function,Line,Name,Arity,Clauses}, Opts) ->
lfunction({function,Line,Name,Arity,Clauses}, Opts);
%% These are specials to make it easier for the compiler.
-lform({error,E}, _Opts) ->
- leaf(format("~p\n", [{error,E}]));
-lform({warning,W}, _Opts) ->
- leaf(format("~p\n", [{warning,W}]));
+lform({error,_}=E, Opts) ->
+ message(E, Opts);
+lform({warning,_}=W, Opts) ->
+ message(W, Opts);
lform({eof,_Line}, _Opts) ->
$\n.
+message(M, #options{encoding = Encoding}) ->
+ F = case Encoding of
+ latin1 -> "~p\n";
+ unicode -> "~tp\n"
+ end,
+ leaf(format(F, [M])).
+
lattribute({attribute,_Line,type,Type}, Opts) ->
[typeattr(type, Type, Opts),leaf(".\n")];
lattribute({attribute,_Line,opaque,Type}, Opts) ->
@@ -598,8 +605,6 @@ lexpr({'fun',_,{clauses,Cs},Extra}, _Prec, Opts) ->
lexpr({named_fun,_,Name,Cs,Extra}, _Prec, Opts) ->
{force_nl,fun_info(Extra),
{list,[{first,['fun', " "],fun_clauses(Cs, Opts, {named, Name})},'end']}};
-lexpr({'query',_,Lc}, _Prec, Opts) ->
- {list,[{step,leaf("query"),lexpr(Lc, 0, Opts)},'end']};
lexpr({call,_,{remote,_,{atom,_,M},{atom,_,F}=N}=Name,Args}, Prec, Opts) ->
case erl_internal:bif(M, F, length(Args)) of
true ->
@@ -904,7 +909,7 @@ maybe_paren(_P, _Prec, Expr) ->
Expr.
leaf(S) ->
- {leaf,chars_size(S),S}.
+ {leaf,string:length(S),S}.
%%% Do the formatting. Currently nothing fancy. Could probably have
%%% done it in one single pass.
@@ -964,7 +969,7 @@ f({seq,Before,After,Sep,LItems}, I0, ST, WT, PP) ->
Sizes = BSizeL ++ SizeL,
NSepChars = if
is_list(Sep), Sep =/= [] ->
- erlang:max(0, length(CharsL)-1);
+ erlang:max(0, length(CharsL)-1); % not string:length
true ->
0
end,
@@ -1120,7 +1125,7 @@ incr(I, Incr) ->
I+Incr.
indentation(E, I) when I < 0 ->
- chars_size(E);
+ string:length(E);
indentation(E, I0) ->
I = io_lib_format:indentation(E, I0),
case has_nl(E) of
@@ -1157,19 +1162,19 @@ write_a_string(S, I, PP) ->
write_a_string([], _N, _Len, _PP) ->
[];
write_a_string(S, N, Len, PP) ->
- SS = string:sub_string(S, 1, N),
+ SS = string:slice(S, 0, N),
Sl = write_string(SS, PP),
- case (chars_size(Sl) > Len) and (N > ?MIN_SUBSTRING) of
+ case (string:length(Sl) > Len) and (N > ?MIN_SUBSTRING) of
true ->
write_a_string(S, N-1, Len, PP);
false ->
[flat_leaf(Sl) |
- write_a_string(lists:nthtail(length(SS), S), Len, Len, PP)]
+ write_a_string(string:slice(S, string:length(SS)), Len, Len, PP)]
end.
flat_leaf(S) ->
L = lists:flatten(S),
- {leaf,length(L),L}.
+ {leaf,string:length(L),L}.
write_value(V, PP) ->
(PP#pp.value_fun)(V).
@@ -1190,15 +1195,6 @@ write_char(C, PP) ->
a0() ->
erl_anno:new(0).
-chars_size([C | Es]) when is_integer(C) ->
- 1 + chars_size(Es);
-chars_size([E | Es]) ->
- chars_size(E) + chars_size(Es);
-chars_size([]) ->
- 0;
-chars_size(B) when is_binary(B) ->
- byte_size(B).
-
-define(N_SPACES, 30).
spacetab() ->
diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
index 47223b129c..4774c4bf19 100644
--- a/lib/stdlib/src/erl_scan.erl
+++ b/lib/stdlib/src/erl_scan.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2015. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -752,7 +752,7 @@ scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
{char_error,Ncs,Error,Nline,Ncol,EndCol} ->
scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs);
{error,Nline,Ncol,Nwcs,Ncs} ->
- Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars.
+ Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars.
scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs); %"
{Ncs,Nline,Ncol,Nstr,Nwcs} ->
Anno = anno(Line0, Col0, St, Nstr),
@@ -767,7 +767,7 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
{char_error,Ncs,Error,Nline,Ncol,EndCol} ->
scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs);
{error,Nline,Ncol,Nwcs,Ncs} ->
- Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars.
+ Estr = string:slice(Nwcs, 0, 16), % Expanded escape chars.
scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); %'
{Ncs,Nline,Ncol,Nstr,Nwcs} ->
case catch list_to_atom(Nwcs) of
diff --git a/lib/stdlib/src/escript.erl b/lib/stdlib/src/escript.erl
index 2b9d8ff65b..132f8efbbe 100644
--- a/lib/stdlib/src/escript.erl
+++ b/lib/stdlib/src/escript.erl
@@ -224,8 +224,8 @@ return_sections(S, Bin) ->
normalize_section(Name, undefined) ->
{Name, undefined};
normalize_section(shebang, "#!" ++ Chars) ->
- Chopped = string:strip(Chars, right, $\n),
- Stripped = string:strip(Chopped, both),
+ Chopped = string:trim(Chars, trailing, "$\n"),
+ Stripped = string:trim(Chopped, both),
if
Stripped =:= ?SHEBANG ->
{shebang, default};
@@ -233,8 +233,8 @@ normalize_section(shebang, "#!" ++ Chars) ->
{shebang, Stripped}
end;
normalize_section(comment, Chars) ->
- Chopped = string:strip(Chars, right, $\n),
- Stripped = string:strip(string:strip(Chopped, left, $%), both),
+ Chopped = string:trim(Chars, trailing, "$\n"),
+ Stripped = string:trim(string:trim(Chopped, leading, "$%"), both),
if
Stripped =:= ?COMMENT ->
{comment, default};
@@ -242,8 +242,8 @@ normalize_section(comment, Chars) ->
{comment, Stripped}
end;
normalize_section(emu_args, "%%!" ++ Chars) ->
- Chopped = string:strip(Chars, right, $\n),
- Stripped = string:strip(Chopped, both),
+ Chopped = string:trim(Chars, trailing, "$\n"),
+ Stripped = string:trim(Chopped, both),
{emu_args, Stripped};
normalize_section(Name, Chars) ->
{Name, Chars}.
diff --git a/lib/stdlib/src/ets.erl b/lib/stdlib/src/ets.erl
index 1db004c91e..b6548626f3 100644
--- a/lib/stdlib/src/ets.erl
+++ b/lib/stdlib/src/ets.erl
@@ -1719,7 +1719,7 @@ get_line(P, Default) ->
line_string(Binary) when is_binary(Binary) -> unicode:characters_to_list(Binary);
line_string(Other) -> Other.
-nonl(S) -> string:strip(S, right, $\n).
+nonl(S) -> string:trim(S, trailing, "$\n").
print_number(Tab, Key, Num) ->
Os = ets:lookup(Tab, Key),
@@ -1748,7 +1748,7 @@ do_display_item(_Height, Width, I, Opos) ->
L = to_string(I),
L2 = if
length(L) > Width - 8 ->
- string:substr(L, 1, Width-13) ++ " ...";
+ string:slice(L, 0, Width-13) ++ " ...";
true ->
L
end,
diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl
index d7c313f214..0f90b3fc33 100644
--- a/lib/stdlib/src/filelib.erl
+++ b/lib/stdlib/src/filelib.erl
@@ -365,11 +365,18 @@ do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod).
%%% Compiling a wildcard.
+
+%% Define characters used for escaping a \.
+-define(ESCAPE_PREFIX, $@).
+-define(ESCAPE_CHARACTER, [?ESCAPE_PREFIX,$e]).
+-define(ESCAPED_ESCAPE_PREFIX, [?ESCAPE_PREFIX,?ESCAPE_PREFIX]).
+
%% Only for debugging.
compile_wildcard(Pattern) when is_list(Pattern) ->
{compiled_wildcard,?HANDLE_ERROR(compile_wildcard(Pattern, "."))}.
-compile_wildcard(Pattern, Cwd0) ->
+compile_wildcard(Pattern0, Cwd0) ->
+ Pattern = convert_escapes(Pattern0),
[Root|Rest] = filename:split(Pattern),
case filename:pathtype(Root) of
relative ->
@@ -409,7 +416,8 @@ compile_join({cwd,Cwd}, File0) ->
compile_join({root,PrefixLen,Root}, File) ->
{root,PrefixLen,filename:join(Root, File)}.
-compile_part(Part) ->
+compile_part(Part0) ->
+ Part = wrap_escapes(Part0),
compile_part(Part, false, []).
compile_part_to_sep(Part) ->
@@ -445,6 +453,8 @@ compile_part([${|Rest], Upto, Result) ->
error ->
compile_part(Rest, Upto, [${|Result])
end;
+compile_part([{escaped,X}|Rest], Upto, Result) ->
+ compile_part(Rest, Upto, [X|Result]);
compile_part([X|Rest], Upto, Result) ->
compile_part(Rest, Upto, [X|Result]);
compile_part([], _Upto, Result) ->
@@ -461,6 +471,8 @@ compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper ->
compile_charset1(Rest, compile_range(Lower, Upper, Ordset));
compile_charset1([$]|Rest], Ordset) ->
{ok, {one_of, gb_sets:from_ordset(Ordset)}, Rest};
+compile_charset1([{escaped,X}|Rest], Ordset) ->
+ compile_charset1(Rest, ordsets:add_element(X, Ordset));
compile_charset1([X|Rest], Ordset) ->
compile_charset1(Rest, ordsets:add_element(X, Ordset));
compile_charset1([], _Ordset) ->
@@ -486,6 +498,32 @@ compile_alt(Pattern, Result) ->
error
end.
+%% Convert backslashes to an illegal Unicode character to
+%% protect in from filename:split/1.
+
+convert_escapes([?ESCAPE_PREFIX|T]) ->
+ ?ESCAPED_ESCAPE_PREFIX ++ convert_escapes(T);
+convert_escapes([$\\|T]) ->
+ ?ESCAPE_CHARACTER ++ convert_escapes(T);
+convert_escapes([H|T]) ->
+ [H|convert_escapes(T)];
+convert_escapes([]) ->
+ [].
+
+%% Wrap each escape in a tuple to remove the special meaning for
+%% the character that follows.
+
+wrap_escapes(?ESCAPED_ESCAPE_PREFIX ++ T) ->
+ [?ESCAPE_PREFIX|wrap_escapes(T)];
+wrap_escapes(?ESCAPE_CHARACTER ++ [C|T]) ->
+ [{escaped,C}|wrap_escapes(T)];
+wrap_escapes(?ESCAPE_CHARACTER) ->
+ [];
+wrap_escapes([H|T]) ->
+ [H|wrap_escapes(T)];
+wrap_escapes([]) ->
+ [].
+
badpattern(Reason) ->
error({badpattern,Reason}).
diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl
index ee807dfd09..a322bd002d 100644
--- a/lib/stdlib/src/filename.erl
+++ b/lib/stdlib/src/filename.erl
@@ -34,6 +34,38 @@
%% we flatten the arguments immediately on function entry as that makes
%% it easier to ensure that the code works.
+%%
+%% *** Requirements on Raw Filename Format ***
+%%
+%% These requirements are due to the 'filename' module
+%% in stdlib. This since it is documented that it
+%% should be able to operate on raw filenames as well
+%% as ordinary filenames.
+%%
+%% A raw filename *must* be a byte sequence where:
+%% 1. Codepoints 0-127 (7-bit ascii) *must* be encoded
+%% as a byte with the corresponding value. That is,
+%% the most significant bit in the byte encoding the
+%% codepoint is never set.
+%% 2. Codepoints greater than 127 *must* be encoded
+%% with the most significant bit set in *every* byte
+%% encoding it.
+%%
+%% Latin1 and UTF-8 meet these requirements while
+%% UTF-16 and UTF-32 don't.
+%%
+%% On Windows filenames are natively stored as malformed
+%% UTF-16LE (lonely surrogates may appear). A more correct
+%% description than UTF-16 would be an array of 16-bit
+%% words... In order to meet the requirements of the
+%% raw file format we convert the malformed UTF-16LE to
+%% malformed UTF-8 which meet the requirements.
+%%
+%% Note that these requirements are today only OTP
+%% internal (erts-stdlib internal) requirements that
+%% could be changed.
+%%
+
-export([absname/1, absname/2, absname_join/2,
basename/1, basename/2, dirname/1,
extension/1, join/1, join/2, pathtype/1,
@@ -41,6 +73,7 @@
safe_relative_path/1]).
-export([find_src/1, find_src/2]). % deprecated
-export([basedir/2, basedir/3]).
+-export([validate/1]).
%% Undocumented and unsupported exports.
-export([append/2]).
@@ -1053,10 +1086,10 @@ basedir_linux(Type) ->
user_log -> getenv("XDG_CACHE_HOME", ?basedir_linux_user_log, true);
site_data ->
Base = getenv("XDG_DATA_DIRS",?basedir_linux_site_data,false),
- string:tokens(Base,":");
+ string:lexemes(Base, ":");
site_config ->
Base = getenv("XDG_CONFIG_DIRS",?basedir_linux_site_config,false),
- string:tokens(Base,":")
+ string:lexemes(Base, ":")
end.
-define(basedir_darwin_user_data, "Library/Application Support").
@@ -1152,3 +1185,72 @@ basedir_os_type() ->
{win32,_} -> windows;
_ -> linux
end.
+
+%%
+%% validate/1
+%%
+
+-spec validate(FileName) -> boolean() when
+ FileName :: file:name_all().
+
+validate(FileName) when is_binary(FileName) ->
+ %% Raw filename...
+ validate_bin(FileName);
+validate(FileName) when is_list(FileName);
+ is_atom(FileName) ->
+ validate_list(FileName,
+ file:native_name_encoding(),
+ os:type()).
+
+validate_list(FileName, Enc, Os) ->
+ try
+ true = validate_list(FileName, Enc, Os, 0) > 0
+ catch
+ _ : _ -> false
+ end.
+
+validate_list([], _Enc, _Os, Chars) ->
+ Chars;
+validate_list(C, Enc, Os, Chars) when is_integer(C) ->
+ validate_char(C, Enc, Os),
+ Chars+1;
+validate_list(A, Enc, Os, Chars) when is_atom(A) ->
+ validate_list(atom_to_list(A), Enc, Os, Chars);
+validate_list([H|T], Enc, Os, Chars) ->
+ NewChars = validate_list(H, Enc, Os, Chars),
+ validate_list(T, Enc, Os, NewChars).
+
+%% C is always an integer...
+% validate_char(C, _, _) when not is_integer(C) ->
+% throw(invalid);
+validate_char(C, _, _) when C < 1 ->
+ throw(invalid); %% No negative or null characters...
+validate_char(C, latin1, _) when C > 255 ->
+ throw(invalid);
+validate_char(C, utf8, _) when C >= 16#110000 ->
+ throw(invalid);
+validate_char(C, utf8, {win32, _}) when C > 16#ffff ->
+ throw(invalid); %% invalid win wchar...
+validate_char(_C, utf8, {win32, _}) ->
+ ok; %% Range below is accepted on windows...
+validate_char(C, utf8, _) when 16#D800 =< C, C =< 16#DFFF ->
+ throw(invalid); %% invalid unicode range...
+validate_char(_, _, _) ->
+ ok.
+
+validate_bin(Bin) ->
+ %% Raw filename. That is, we do not interpret
+ %% the encoding, but we still do not accept
+ %% null characters...
+ try
+ true = validate_bin(Bin, 0) > 0
+ catch
+ _ : _ -> false
+ end.
+
+validate_bin(<<>>, Bs) ->
+ Bs;
+validate_bin(<<0, _Rest/binary>>, _Bs) ->
+ throw(invalid); %% No null characters allowed...
+validate_bin(<<_B, Rest/binary>>, Bs) ->
+ validate_bin(Rest, Bs+1).
diff --git a/lib/stdlib/src/gen_server.erl b/lib/stdlib/src/gen_server.erl
index 7daa7a9fe4..ac172325b5 100644
--- a/lib/stdlib/src/gen_server.erl
+++ b/lib/stdlib/src/gen_server.erl
@@ -116,23 +116,27 @@
%%%=========================================================================
-callback init(Args :: term()) ->
- {ok, State :: term()} | {ok, State :: term(), timeout() | hibernate} |
+ {ok, State :: term()} | {ok, State :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term()} | ignore.
-callback handle_call(Request :: term(), From :: {pid(), Tag :: term()},
State :: term()) ->
{reply, Reply :: term(), NewState :: term()} |
- {reply, Reply :: term(), NewState :: term(), timeout() | hibernate} |
+ {reply, Reply :: term(), NewState :: term(), timeout() | hibernate | {continue, term()}} |
{noreply, NewState :: term()} |
- {noreply, NewState :: term(), timeout() | hibernate} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term(), Reply :: term(), NewState :: term()} |
{stop, Reason :: term(), NewState :: term()}.
-callback handle_cast(Request :: term(), State :: term()) ->
{noreply, NewState :: term()} |
- {noreply, NewState :: term(), timeout() | hibernate} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term(), NewState :: term()}.
-callback handle_info(Info :: timeout | term(), State :: term()) ->
{noreply, NewState :: term()} |
- {noreply, NewState :: term(), timeout() | hibernate} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
+ {stop, Reason :: term(), NewState :: term()}.
+-callback handle_continue(Info :: term(), State :: term()) ->
+ {noreply, NewState :: term()} |
+ {noreply, NewState :: term(), timeout() | hibernate | {continue, term()}} |
{stop, Reason :: term(), NewState :: term()}.
-callback terminate(Reason :: (normal | shutdown | {shutdown, term()} |
term()),
@@ -149,7 +153,7 @@
Status :: term().
-optional_callbacks(
- [handle_info/2, terminate/2, code_change/3, format_status/2]).
+ [handle_info/2, handle_continue/2, terminate/2, code_change/3, format_status/2]).
%%% -----------------------------------------------------------------
%%% Starts a generic server.
@@ -309,7 +313,7 @@ enter_loop(Mod, Options, State, ServerName, Timeout) ->
Name = gen:get_proc_name(ServerName),
Parent = gen:get_parent(),
Debug = gen:debug_options(Name, Options),
- HibernateAfterTimeout = gen:hibernate_after(Options),
+ HibernateAfterTimeout = gen:hibernate_after(Options),
loop(Parent, Name, State, Mod, Timeout, HibernateAfterTimeout, Debug).
%%%========================================================================
@@ -374,6 +378,19 @@ init_it(Mod, Args) ->
%%% ---------------------------------------------------
%%% The MAIN loop.
%%% ---------------------------------------------------
+
+loop(Parent, Name, State, Mod, {continue, Continue} = Msg, HibernateAfterTimeout, Debug) ->
+ Reply = try_dispatch(Mod, handle_continue, Continue, State),
+ case Debug of
+ [] ->
+ handle_common_reply(Reply, Parent, Name, undefined, Msg, Mod,
+ HibernateAfterTimeout, State);
+ _ ->
+ Debug1 = sys:handle_debug(Debug, fun print_event/3, Name, Msg),
+ handle_common_reply(Reply, Parent, Name, undefined, Msg, Mod,
+ HibernateAfterTimeout, State, Debug1)
+ end;
+
loop(Parent, Name, State, Mod, hibernate, HibernateAfterTimeout, Debug) ->
proc_lib:hibernate(?MODULE,wake_hib,[Parent, Name, State, Mod, HibernateAfterTimeout, Debug]);
diff --git a/lib/stdlib/src/gen_statem.erl b/lib/stdlib/src/gen_statem.erl
index 1110d18af6..cd6312855d 100644
--- a/lib/stdlib/src/gen_statem.erl
+++ b/lib/stdlib/src/gen_statem.erl
@@ -296,7 +296,7 @@
(Reason :: term()).
%% Format the callback module state in some sensible that is
-%% often condensed way. For StatusOption =:= 'normal' the perferred
+%% often condensed way. For StatusOption =:= 'normal' the preferred
%% return term is [{data,[{"State",FormattedState}]}], and for
%% StatusOption =:= 'terminate' it is just FormattedState.
-callback format_status(
@@ -510,8 +510,6 @@ call(ServerRef, Request, Timeout) ->
parse_timeout(Timeout) ->
case Timeout of
- {clean_timeout,infinity} ->
- {dirty_timeout,infinity};
{clean_timeout,_} ->
Timeout;
{dirty_timeout,_} ->
diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index 4b2d15c8b3..e345810ca0 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -380,7 +380,7 @@ float_e(_Fl, {Ds,E}, P) ->
{Fs,false} -> [Fs|float_exp(E-1)]
end.
-%% float_man([Digit], Icount, Dcount) -> {[Chars],CarryFlag}.
+%% float_man([Digit], Icount, Dcount) -> {[Char],CarryFlag}.
%% Generate the characters in the mantissa from the digits with Icount
%% characters before the '.' and Dcount decimals. Handle carry and let
%% caller decide what to do at top.
@@ -395,7 +395,7 @@ float_man([D|Ds], I, Dc) ->
{Cs,false} -> {[D|Cs],false}
end;
float_man([], I, Dc) -> %Pad with 0's
- {string:chars($0, I, [$.|string:chars($0, Dc)]),false}.
+ {lists:duplicate(I, $0) ++ [$.|lists:duplicate(Dc, $0)],false}.
float_man([D|_], 0) when D >= $5 -> {[],true};
float_man([_|_], 0) -> {[],false};
@@ -405,7 +405,7 @@ float_man([D|Ds], Dc) ->
{Cs,true} -> {[D+1|Cs],false};
{Cs,false} -> {[D|Cs],false}
end;
-float_man([], Dc) -> {string:chars($0, Dc),false}. %Pad with 0's
+float_man([], Dc) -> {lists:duplicate(Dc, $0),false}. %Pad with 0's
%% float_exp(Exponent) -> [Char].
%% Generate the exponent of a floating point number. Always include sign.
@@ -429,7 +429,7 @@ fwrite_f(Fl, F, Adj, P, Pad) when P >= 1 ->
float_f(Fl, Fd, P) when Fl < 0.0 ->
[$-|float_f(-Fl, Fd, P)];
float_f(Fl, {Ds,E}, P) when E =< 0 ->
- float_f(Fl, {string:chars($0, -E+1, Ds),1}, P); %Prepend enough 0's
+ float_f(Fl, {lists:duplicate(-E+1, $0)++Ds,1}, P); %Prepend enough 0's
float_f(_Fl, {Ds,E}, P) ->
case float_man(Ds, E, P) of
{Fs,true} -> "1" ++ Fs; %Handle carry
@@ -751,7 +751,7 @@ adjust(Data, Pad, right) -> [Pad|Data].
flat_trunc(List, N) when is_integer(N), N >= 0 ->
string:slice(List, 0, N).
-%% A deep version of string:chars/2,3
+%% A deep version of lists:duplicate/2
chars(_C, 0) ->
[];
diff --git a/lib/stdlib/src/lib.erl b/lib/stdlib/src/lib.erl
index c6eb0d7915..a7980cc294 100644
--- a/lib/stdlib/src/lib.erl
+++ b/lib/stdlib/src/lib.erl
@@ -646,7 +646,7 @@ pp_arguments(PF, As, I, Enc) ->
Ll = length(L),
A = list_to_atom(lists:duplicate(Ll, $a)),
S0 = unicode:characters_to_list(PF([A | T], I+1), Enc),
- brackets_to_parens([$[,L,string:sub_string(S0, 2+Ll)], Enc);
+ brackets_to_parens([$[,L,string:slice(S0, 1+Ll)], Enc);
_ ->
brackets_to_parens(PF(As, I+1), Enc)
end.
diff --git a/lib/stdlib/src/otp_internal.erl b/lib/stdlib/src/otp_internal.erl
index 5b488cc677..122b476ddb 100644
--- a/lib/stdlib/src/otp_internal.erl
+++ b/lib/stdlib/src/otp_internal.erl
@@ -609,6 +609,52 @@ obsolete_1(filename, find_src, 2) ->
obsolete_1(erlang, hash, 2) ->
{removed, {erlang, phash2, 2}, "20.0"};
+%% Added in OTP-21
+obsolete_1(string, len, 1) ->
+ {deprecated, "deprecated; use string:length/3 instead"};
+obsolete_1(string, concat, 2) ->
+ {deprecated, "deprecated; use [Str1,Str2] instead"};
+obsolete_1(string, str, 2) ->
+ {deprecated, "deprecated; use string:find/2 instead"};
+obsolete_1(string, rstr, 2) ->
+ {deprecated, "deprecated; use string:find/3 instead"};
+obsolete_1(string, chr, 2) ->
+ {deprecated, "deprecated; use string:find/2 instead"};
+obsolete_1(string, rchr, 2) ->
+ {deprecated, "deprecated; use string:find/3 instead"};
+obsolete_1(string, span, 2) ->
+ {deprecated, "deprecated; use string:take/2 instead"};
+obsolete_1(string, cspan, 2) ->
+ {deprecated, "deprecated; use string:take/3 instead"};
+obsolete_1(string, substr, _) ->
+ {deprecated, "deprecated; use string:slice/3 instead"};
+obsolete_1(string, tokens, 2) ->
+ {deprecated, "deprecated; use string:lexemes/2 instead"};
+obsolete_1(string, chars, _) ->
+ {deprecated, "deprecated; use lists:duplicate/2 instead"};
+obsolete_1(string, copies, _) ->
+ {deprecated, "deprecated; use lists:duplicate/2 instead"};
+obsolete_1(string, words, _) ->
+ {deprecated, "deprecated; use string:lexemes/2 instead"};
+obsolete_1(string, strip, _) ->
+ {deprecated, "deprecated; use string:trim/3 instead"};
+obsolete_1(string, sub_word, _) ->
+ {deprecated, "deprecated; use string:nth_lexeme/3 instead"};
+obsolete_1(string, sub_string, _) ->
+ {deprecated, "deprecated; use string:slice/3 instead"};
+obsolete_1(string, left, _) ->
+ {deprecated, "deprecated; use string:pad/3 instead"};
+obsolete_1(string, right, _) ->
+ {deprecated, "deprecated; use string:pad/3 instead"};
+obsolete_1(string, centre, _) ->
+ {deprecated, "deprecated; use string:pad/3 instead"};
+obsolete_1(string, join, _) ->
+ {deprecated, "deprecated; use lists:join/2 instead"};
+obsolete_1(string, to_upper, _) ->
+ {deprecated, "deprecated; use string:uppercase/1 or string:titlecase/1 instead"};
+obsolete_1(string, to_lower, _) ->
+ {deprecated, "deprecated; use string:lowercase/1 or string:casefold/1 instead"};
+
%% not obsolete
obsolete_1(_, _, _) ->
diff --git a/lib/stdlib/src/pool.erl b/lib/stdlib/src/pool.erl
index 05950a1d7c..b12ff205b1 100644
--- a/lib/stdlib/src/pool.erl
+++ b/lib/stdlib/src/pool.erl
@@ -25,7 +25,7 @@
%% with the least load !!!!
%% This function is callable from any node including the master
%% That is part of the pool
-%% nodes are scheduled on a per usgae basis and per load basis,
+%% nodes are scheduled on a per usage basis and per load basis,
%% Whenever we use a node, we put at the end of the queue, and whenever
%% a node report a change in load, we insert it accordingly
@@ -197,7 +197,7 @@ pure_insert({Load,Node},[{L,N}|Tail]) when Load < L ->
pure_insert(L,[H|T]) -> [H|pure_insert(L,T)].
%% Really should not measure the contributions from
-%% the back ground processes here .... which we do :-(
+%% the background processes here .... which we do :-(
%% We don't have to monitor the master, since we're slaves anyway
statistic_collector() ->
@@ -213,7 +213,7 @@ statistic_collector(I) ->
stat_loop(M, 999999)
end.
-%% Do not tell the master about our load if it has not changed
+%% Do not tell the master about our load if it has not changed
stat_loop(M, Old) ->
sleep(2000),
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index 7a8a5e6d4a..362e98006e 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -21,8 +21,8 @@
%% Multiple PRNG module for Erlang/OTP
%% Copyright (c) 2015-2016 Kenji Rikitake
%%
-%% exrop (xoroshiro116+) added and statistical distribution
-%% improvements by the Erlang/OTP team 2017
+%% exrop (xoroshiro116+) added, statistical distribution
+%% improvements and uniform_real added by the Erlang/OTP team 2017
%% =====================================================================
-module(rand).
@@ -30,10 +30,14 @@
-export([seed_s/1, seed_s/2, seed/1, seed/2,
export_seed/0, export_seed_s/1,
uniform/0, uniform/1, uniform_s/1, uniform_s/2,
+ uniform_real/0, uniform_real_s/1,
jump/0, jump/1,
normal/0, normal/2, normal_s/1, normal_s/3
]).
+%% Debug
+-export([make_float/3, float2str/1, bc64/1]).
+
-compile({inline, [exs64_next/1, exsplus_next/1,
exs1024_next/1, exs1024_calc/2,
exrop_next/1, exrop_next_s/2,
@@ -60,6 +64,10 @@
%% N i evaluated 3 times
(?BSL((Bits), (X), (N)) bor ((X) bsr ((Bits)-(N))))).
+-define(
+ BC(V, N),
+ bc((V), ?BIT((N) - 1), N)).
+
%%-define(TWO_POW_MINUS53, (math:pow(2, -53))).
-define(TWO_POW_MINUS53, 1.11022302462515657e-16).
@@ -84,14 +92,21 @@
%% The 'bits' field indicates how many bits the integer
%% returned from 'next' has got, i.e 'next' shall return
%% an random integer in the range 0..(2^Bits - 1).
-%% At least 53 bits is required for the floating point
-%% producing fallbacks. This field is only used when
-%% the 'uniform' or 'uniform_n' fields are not defined.
+%% At least 55 bits is required for the floating point
+%% producing fallbacks, but 56 bits would be more future proof.
%%
%% The fields 'next', 'uniform' and 'uniform_n'
-%% implement the algorithm. If 'uniform' or 'uinform_n'
+%% implement the algorithm. If 'uniform' or 'uniform_n'
%% is not present there is a fallback using 'next' and either
-%% 'bits' or the deprecated 'max'.
+%% 'bits' or the deprecated 'max'. The 'next' function
+%% must generate a word with at least 56 good random bits.
+%%
+%% The 'weak_low_bits' field indicate how many bits are of
+%% lesser quality and they will not be used by the floating point
+%% producing functions, nor by the range producing functions
+%% when more bits are needed, to avoid weak bits in the middle
+%% of the generated bits. The lowest bits from the range
+%% functions still have the generator's quality.
%%
-type alg_handler() ::
#{type := alg(),
@@ -148,11 +163,7 @@
%% For ranges larger than the algorithm bit size
uniform_range(Range, #{next:=Next, bits:=Bits} = Alg, R, V) ->
- WeakLowBits =
- case Alg of
- #{weak_low_bits:=WLB} -> WLB;
- #{} -> 0
- end,
+ WeakLowBits = maps:get(weak_low_bits, Alg, 0),
%% Maybe waste the lowest bit(s) when shifting in new bits
Shift = Bits - WeakLowBits,
ShiftMask = bnot ?MASK(WeakLowBits),
@@ -297,7 +308,7 @@ uniform_s({#{bits:=Bits, next:=Next} = Alg, R0}) ->
{(V bsr (Bits - 53)) * ?TWO_POW_MINUS53, {Alg, R1}};
uniform_s({#{max:=Max, next:=Next} = Alg, R0}) ->
{V, R1} = Next(R0),
- %% Old broken algorithm with non-uniform density
+ %% Old algorithm with non-uniform density
{V / (Max + 1), {Alg, R1}}.
@@ -317,7 +328,7 @@ uniform_s(N, {#{bits:=Bits, next:=Next} = Alg, R0})
?uniform_range(N, Alg, R1, V, MaxMinusN, I);
uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0})
when is_integer(N), 1 =< N ->
- %% Old broken algorithm with skewed probability
+ %% Old algorithm with skewed probability
%% and gap in ranges > Max
{V, R1} = Next(R0),
if
@@ -328,6 +339,189 @@ uniform_s(N, {#{max:=Max, next:=Next} = Alg, R0})
{trunc(F * N) + 1, {Alg, R1}}
end.
+%% uniform_real/0: returns a random float X where 0.0 < X =< 1.0,
+%% updating the state in the process dictionary.
+
+-spec uniform_real() -> X :: float().
+uniform_real() ->
+ {X, Seed} = uniform_real_s(seed_get()),
+ _ = seed_put(Seed),
+ X.
+
+%% uniform_real_s/1: given a state, uniform_s/1
+%% returns a random float X where 0.0 < X =< 1.0,
+%% and a new state.
+%%
+%% This function does not use the same form of uniformity
+%% as the uniform_s/1 function.
+%%
+%% Instead, this function does not generate numbers with equal
+%% distance in the interval, but rather tries to keep all mantissa
+%% bits random also for small numbers, meaning that the distance
+%% between possible numbers decreases when the numbers
+%% approaches 0.0, as does the possibility for a particular
+%% number. Hence uniformity is preserved.
+%%
+%% To generate 56 bits at the time instead of 53 is actually
+%% a speed optimization since the probability to have to
+%% generate a second word decreases by 1/2 for every extra bit.
+%%
+%% This function generates normalized numbers, so the smallest number
+%% that can be generated is 2^-1022 with the distance 2^-1074
+%% to the next to smallest number, compared to 2^-53 for uniform_s/1.
+%%
+%% This concept of uniformity should work better for applications
+%% where you need to calculate 1.0/X or math:log(X) since those
+%% operations benefits from larger precision approaching 0.0,
+%% and that this function does not return 0.0 nor denormalized
+%% numbers very close to 0.0. The log() operation in The Box-Muller
+%% transformation for normal distribution is an example of this.
+%%
+%%-define(TWO_POW_MINUS55, (math:pow(2, -55))).
+%%-define(TWO_POW_MINUS110, (math:pow(2, -110))).
+%%-define(TWO_POW_MINUS55, 2.7755575615628914e-17).
+%%-define(TWO_POW_MINUS110, 7.7037197775489436e-34).
+%%
+-spec uniform_real_s(State :: state()) -> {X :: float(), NewState :: state()}.
+uniform_real_s({#{bits:=Bits, next:=Next} = Alg, R0}) ->
+ %% Generate a 56 bit number without using the weak low bits.
+ %%
+ %% Be sure to use only 53 bits when multiplying with
+ %% math:pow(2.0, -N) to avoid rounding which would make
+ %% "even" floats more probable than "odd".
+ %%
+ {V1, R1} = Next(R0),
+ M1 = V1 bsr (Bits - 56),
+ if
+ ?BIT(55) =< M1 ->
+ %% We have 56 bits - waste 3
+ {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}};
+ ?BIT(54) =< M1 ->
+ %% We have 55 bits - waste 2
+ {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}};
+ ?BIT(53) =< M1 ->
+ %% We have 54 bits - waste 1
+ {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}};
+ ?BIT(52) =< M1 ->
+ %% We have 53 bits - use all
+ {M1 * math:pow(2.0, -56), {Alg, R1}};
+ true ->
+ %% Need more bits
+ {V2, R2} = Next(R1),
+ uniform_real_s(Alg, Next, M1, -56, R2, V2, Bits)
+ end;
+uniform_real_s({#{max:=_, next:=Next} = Alg, R0}) ->
+ %% Generate a 56 bit number.
+ %% Ignore the weak low bits for these old algorithms,
+ %% just produce something reasonable.
+ %%
+ %% Be sure to use only 53 bits when multiplying with
+ %% math:pow(2.0, -N) to avoid rounding which would make
+ %% "even" floats more probable than "odd".
+ %%
+ {V1, R1} = Next(R0),
+ M1 = ?MASK(56, V1),
+ if
+ ?BIT(55) =< M1 ->
+ %% We have 56 bits - waste 3
+ {(M1 bsr 3) * math:pow(2.0, -53), {Alg, R1}};
+ ?BIT(54) =< M1 ->
+ %% We have 55 bits - waste 2
+ {(M1 bsr 2) * math:pow(2.0, -54), {Alg, R1}};
+ ?BIT(53) =< M1 ->
+ %% We have 54 bits - waste 1
+ {(M1 bsr 1) * math:pow(2.0, -55), {Alg, R1}};
+ ?BIT(52) =< M1 ->
+ %% We have 53 bits - use all
+ {M1 * math:pow(2.0, -56), {Alg, R1}};
+ true ->
+ %% Need more bits
+ {V2, R2} = Next(R1),
+ uniform_real_s(Alg, Next, M1, -56, R2, V2, 56)
+ end.
+
+uniform_real_s(Alg, _Next, M0, -1064, R1, V1, Bits) -> % 19*56
+ %% This is a very theoretical bottom case.
+ %% The odds of getting here is about 2^-1008,
+ %% through a white box test case, or thanks to
+ %% a malfunctioning PRNG producing 18 56-bit zeros in a row.
+ %%
+ %% Fill up to 53 bits, we have at most 52
+ B0 = (53 - ?BC(M0, 52)), % Missing bits
+ {(((M0 bsl B0) bor (V1 bsr (Bits - B0))) * math:pow(2.0, -1064 - B0)),
+ {Alg, R1}};
+uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits) ->
+ if
+ %% Optimize the most probable.
+ %% Fill up to 53 bits.
+ ?BIT(51) =< M0 ->
+ %% We have 52 bits in M0 - need 1
+ {(((M0 bsl 1) bor (V1 bsr (Bits - 1)))
+ * math:pow(2.0, BitNo - 1)),
+ {Alg, R1}};
+ ?BIT(50) =< M0 ->
+ %% We have 51 bits in M0 - need 2
+ {(((M0 bsl 2) bor (V1 bsr (Bits - 2)))
+ * math:pow(2.0, BitNo - 2)),
+ {Alg, R1}};
+ ?BIT(49) =< M0 ->
+ %% We have 50 bits in M0 - need 3
+ {(((M0 bsl 3) bor (V1 bsr (Bits - 3)))
+ * math:pow(2.0, BitNo - 3)),
+ {Alg, R1}};
+ M0 == 0 ->
+ M1 = V1 bsr (Bits - 56),
+ if
+ ?BIT(55) =< M1 ->
+ %% We have 56 bits - waste 3
+ {(M1 bsr 3) * math:pow(2.0, BitNo - 53), {Alg, R1}};
+ ?BIT(54) =< M1 ->
+ %% We have 55 bits - waste 2
+ {(M1 bsr 2) * math:pow(2.0, BitNo - 54), {Alg, R1}};
+ ?BIT(53) =< M1 ->
+ %% We have 54 bits - waste 1
+ {(M1 bsr 1) * math:pow(2.0, BitNo - 55), {Alg, R1}};
+ ?BIT(52) =< M1 ->
+ %% We have 53 bits - use all
+ {M1 * math:pow(2.0, BitNo - 56), {Alg, R1}};
+ BitNo =:= -1008 ->
+ %% Endgame
+ %% For the last round we can not have 14 zeros or more
+ %% at the top of M1 because then we will underflow,
+ %% so we need at least 43 bits
+ if
+ ?BIT(42) =< M1 ->
+ %% We have 43 bits - get the last bits
+ uniform_real_s(Alg, Next, M1, BitNo - 56, R1);
+ true ->
+ %% Would underflow 2^-1022 - start all over
+ %%
+ %% We could just crash here since the odds for
+ %% the PRNG being broken is much higher than
+ %% for a good PRNG generating this many zeros
+ %% in a row. Maybe we should write an error
+ %% report or call this a system limit...?
+ uniform_real_s({Alg, R1})
+ end;
+ true ->
+ %% Need more bits
+ uniform_real_s(Alg, Next, M1, BitNo - 56, R1)
+ end;
+ true ->
+ %% Fill up to 53 bits
+ B0 = 53 - ?BC(M0, 49), % Number of bits we need to append
+ {(((M0 bsl B0) bor (V1 bsr (Bits - B0)))
+ * math:pow(2.0, BitNo - B0)),
+ {Alg, R1}}
+ end.
+%%
+uniform_real_s(#{bits:=Bits} = Alg, Next, M0, BitNo, R0) ->
+ {V1, R1} = Next(R0),
+ uniform_real_s(Alg, Next, M0, BitNo, R1, V1, Bits);
+uniform_real_s(#{max:=_} = Alg, Next, M0, BitNo, R0) ->
+ {V1, R1} = Next(R0),
+ uniform_real_s(Alg, Next, M0, BitNo, R1, ?MASK(56, V1), 56).
+
%% jump/1: given a state, jump/1
%% returns a new state which is equivalent to that
%% after a large number of call defined for each algorithm.
@@ -1025,3 +1219,42 @@ normal_fi(Indx) ->
1.0214971439701471e-02,8.6165827693987316e-03,7.0508754713732268e-03,
5.5224032992509968e-03,4.0379725933630305e-03,2.6090727461021627e-03,
1.2602859304985975e-03}).
+
+%%%bitcount64(0) -> 0;
+%%%bitcount64(V) -> 1 + bitcount(V, 64).
+%%%
+%%%-define(
+%%% BITCOUNT(V, N),
+%%% bitcount(V, N) ->
+%%% if
+%%% (1 bsl ((N) bsr 1)) =< (V) ->
+%%% ((N) bsr 1) + bitcount((V) bsr ((N) bsr 1), ((N) bsr 1));
+%%% true ->
+%%% bitcount((V), ((N) bsr 1))
+%%% end).
+%%%?BITCOUNT(V, 64);
+%%%?BITCOUNT(V, 32);
+%%%?BITCOUNT(V, 16);
+%%%?BITCOUNT(V, 8);
+%%%?BITCOUNT(V, 4);
+%%%?BITCOUNT(V, 2);
+%%%bitcount(_, 1) -> 0.
+
+bc64(V) -> ?BC(V, 64).
+
+%% Linear from high bit - higher probability first gives faster execution
+bc(V, B, N) when B =< V -> N;
+bc(V, B, N) -> bc(V, B bsr 1, N - 1).
+
+make_float(S, E, M) ->
+ <<F/float>> = <<S:1, E:11, M:52>>,
+ F.
+
+float2str(N) ->
+ <<S:1, E:11, M:52>> = <<(float(N))/float>>,
+ lists:flatten(
+ io_lib:format(
+ "~c~c.~13.16.0bE~b",
+ [case S of 1 -> $-; 0 -> $+ end,
+ case E of 0 -> $0; _ -> $1 end,
+ M, E - 16#3ff])).
diff --git a/lib/stdlib/src/slave.erl b/lib/stdlib/src/slave.erl
index d7cf6386f5..b3f3206d67 100644
--- a/lib/stdlib/src/slave.erl
+++ b/lib/stdlib/src/slave.erl
@@ -320,7 +320,7 @@ mk_cmd(Host, Name, Args, Waiter, Prog0) ->
%% emulator and flags as the test node. The return from lib:progname()
%% could then typically be '/<full_path_to>/cerl -gcov').
quote_progname(Progname) ->
- do_quote_progname(string:tokens(to_list(Progname)," ")).
+ do_quote_progname(string:lexemes(to_list(Progname)," ")).
do_quote_progname([Prog]) ->
"\""++Prog++"\"";
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index 3c449d3cb9..5fb48acfab 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -101,13 +101,14 @@
timer,
unicode,
unicode_util,
+ uri_string,
win32reg,
zip]},
{registered,[timer_server,rsh_starter,take_over_monitor,pool_master,
dets]},
{applications, [kernel]},
{env, []},
- {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-9.0","crypto-3.3",
+ {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-10.0","crypto-3.3",
"compiler-5.0"]}
]}.
diff --git a/lib/stdlib/src/stdlib.appup.src b/lib/stdlib/src/stdlib.appup.src
index 800c2c61f3..e4e3fb83e9 100644
--- a/lib/stdlib/src/stdlib.appup.src
+++ b/lib/stdlib/src/stdlib.appup.src
@@ -18,9 +18,7 @@
%% %CopyrightEnd%
{"%VSN%",
%% Up from - max one major revision back
- [{<<"3\\.[0-3](\\.[0-9]+)*">>,[restart_new_emulator]}, % OTP-19.*
- {<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}], % OTP-20.*
+ [{<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}], % OTP-20.*
%% Down to - max one major revision back
- [{<<"3\\.[0-3](\\.[0-9]+)*">>,[restart_new_emulator]}, % OTP-19.*
- {<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}] % OTP-20.*
+ [{<<"3\\.4(\\.[0-9]+)*">>,[restart_new_emulator]}] % OTP-20.*
}.
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index 4972da297d..5a4d2df2a6 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -87,6 +87,16 @@
%%% May be removed
-export([list_to_float/1, list_to_integer/1]).
+-deprecated([{len,1},{concat,2},
+ {str,2},{chr,2},{rchr,2},{rstr,2},
+ {span,2},{cspan,2},{substr,'_'},{tokens,2},
+ {chars,'_'},
+ {copies,2},{words,'_'},{strip,'_'},
+ {sub_word,'_'},{left,'_'},{right,'_'},
+ {sub_string,'_'},{centre,'_'},{join,2},
+ {to_upper,1}, {to_lower,1}
+ ]).
+
%% Uses bifs: string:list_to_float/1 and string:list_to_integer/1
-spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when
String :: string(),
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
new file mode 100644
index 0000000000..22212da222
--- /dev/null
+++ b/lib/stdlib/src/uri_string.erl
@@ -0,0 +1,1842 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2017. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%
+%% [RFC 3986, Chapter 2.2. Reserved Characters]
+%%
+%% reserved = gen-delims / sub-delims
+%%
+%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+%%
+%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+%% / "*" / "+" / "," / ";" / "="
+%%
+%%
+%% [RFC 3986, Chapter 2.3. Unreserved Characters]
+%%
+%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+%%
+%%
+%% [RFC 3986, Chapter 3. Syntax Components]
+%%
+%% The generic URI syntax consists of a hierarchical sequence of
+%% components referred to as the scheme, authority, path, query, and
+%% fragment.
+%%
+%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+%%
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%%
+%% The scheme and path components are required, though the path may be
+%% empty (no characters). When authority is present, the path must
+%% either be empty or begin with a slash ("/") character. When
+%% authority is not present, the path cannot begin with two slash
+%% characters ("//"). These restrictions result in five different ABNF
+%% rules for a path (Section 3.3), only one of which will match any
+%% given URI reference.
+%%
+%% The following are two example URIs and their component parts:
+%%
+%% foo://example.com:8042/over/there?name=ferret#nose
+%% \_/ \______________/\_________/ \_________/ \__/
+%% | | | | |
+%% scheme authority path query fragment
+%% | _____________________|__
+%% / \ / \
+%% urn:example:animal:ferret:nose
+%%
+%%
+%% [RFC 3986, Chapter 3.1. Scheme]
+%%
+%% Each URI begins with a scheme name that refers to a specification for
+%% assigning identifiers within that scheme.
+%%
+%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+%%
+%%
+%% [RFC 3986, Chapter 3.2. Authority]
+%%
+%% Many URI schemes include a hierarchical element for a naming
+%% authority so that governance of the name space defined by the
+%% remainder of the URI is delegated to that authority (which may, in
+%% turn, delegate it further).
+%%
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%%
+%%
+%% [RFC 3986, Chapter 3.2.1. User Information]
+%%
+%% The userinfo subcomponent may consist of a user name and, optionally,
+%% scheme-specific information about how to gain authorization to access
+%% the resource. The user information, if present, is followed by a
+%% commercial at-sign ("@") that delimits it from the host.
+%%
+%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+%%
+%%
+%% [RFC 3986, Chapter 3.2.2. Host]
+%%
+%% The host subcomponent of authority is identified by an IP literal
+%% encapsulated within square brackets, an IPv4 address in dotted-
+%% decimal form, or a registered name.
+%%
+%% host = IP-literal / IPv4address / reg-name
+%%
+%% IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+%%
+%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+%%
+%% IPv6address = 6( h16 ":" ) ls32
+%% / "::" 5( h16 ":" ) ls32
+%% / [ h16 ] "::" 4( h16 ":" ) ls32
+%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+%% / [ *4( h16 ":" ) h16 ] "::" ls32
+%% / [ *5( h16 ":" ) h16 ] "::" h16
+%% / [ *6( h16 ":" ) h16 ] "::"
+%%
+%% ls32 = ( h16 ":" h16 ) / IPv4address
+%% ; least-significant 32 bits of address
+%%
+%% h16 = 1*4HEXDIG
+%% ; 16 bits of address represented in hexadecimal
+%%
+%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+%%
+%% dec-octet = DIGIT ; 0-9
+%% / %x31-39 DIGIT ; 10-99
+%% / "1" 2DIGIT ; 100-199
+%% / "2" %x30-34 DIGIT ; 200-249
+%% / "25" %x30-35 ; 250-255
+%%
+%% reg-name = *( unreserved / pct-encoded / sub-delims )
+%%
+%%
+%% [RFC 3986, Chapter 3.2.2. Port]
+%%
+%% The port subcomponent of authority is designated by an optional port
+%% number in decimal following the host and delimited from it by a
+%% single colon (":") character.
+%%
+%% port = *DIGIT
+%%
+%%
+%% [RFC 3986, Chapter 3.3. Path]
+%%
+%% The path component contains data, usually organized in hierarchical
+%% form, that, along with data in the non-hierarchical query component
+%% (Section 3.4), serves to identify a resource within the scope of the
+%% URI's scheme and naming authority (if any). The path is terminated
+%% by the first question mark ("?") or number sign ("#") character, or
+%% by the end of the URI.
+%%
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%%
+%% path-abempty = *( "/" segment )
+%% path-absolute = "/" [ segment-nz *( "/" segment ) ]
+%% path-noscheme = segment-nz-nc *( "/" segment )
+%% path-rootless = segment-nz *( "/" segment )
+%% path-empty = 0<pchar>
+%% segment = *pchar
+%% segment-nz = 1*pchar
+%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+%% ; non-zero-length segment without any colon ":"
+%%
+%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+%%
+%%
+%% [RFC 3986, Chapter 3.4. Query]
+%%
+%% The query component contains non-hierarchical data that, along with
+%% data in the path component (Section 3.3), serves to identify a
+%% resource within the scope of the URI's scheme and naming authority
+%% (if any). The query component is indicated by the first question
+%% mark ("?") character and terminated by a number sign ("#") character
+%% or by the end of the URI.
+%%
+%% query = *( pchar / "/" / "?" )
+%%
+%%
+%% [RFC 3986, Chapter 3.5. Fragment]
+%%
+%% The fragment identifier component of a URI allows indirect
+%% identification of a secondary resource by reference to a primary
+%% resource and additional identifying information.
+%%
+%% fragment = *( pchar / "/" / "?" )
+%%
+%%
+%% [RFC 3986, Chapter 4.1. URI Reference]
+%%
+%% URI-reference is used to denote the most common usage of a resource
+%% identifier.
+%%
+%% URI-reference = URI / relative-ref
+%%
+%%
+%% [RFC 3986, Chapter 4.2. Relative Reference]
+%%
+%% A relative reference takes advantage of the hierarchical syntax
+%% (Section 1.2.3) to express a URI reference relative to the name space
+%% of another hierarchical URI.
+%%
+%% relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+%%
+%% relative-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-noscheme
+%% / path-empty
+%%
+%%
+%% [RFC 3986, Chapter 4.3. Absolute URI]
+%%
+%% Some protocol elements allow only the absolute form of a URI without
+%% a fragment identifier. For example, defining a base URI for later
+%% use by relative references calls for an absolute-URI syntax rule that
+%% does not allow a fragment.
+%%
+%% absolute-URI = scheme ":" hier-part [ "?" query ]
+%%
+-module(uri_string).
+
+%%-------------------------------------------------------------------------
+%% External API
+%%-------------------------------------------------------------------------
+-export([normalize/1, parse/1,
+ recompose/1, transcode/2]).
+-export_type([error/0, uri_map/0, uri_string/0]).
+
+
+%%-------------------------------------------------------------------------
+%% Internal API
+%%-------------------------------------------------------------------------
+-export([is_host/1, is_path/1]). % suppress warnings
+
+
+%%-------------------------------------------------------------------------
+%% Macros
+%%-------------------------------------------------------------------------
+-define(CHAR(Char), <<Char/utf8>>).
+-define(STRING_EMPTY, <<>>).
+-define(STRING(MatchStr), <<MatchStr/binary>>).
+-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>).
+
+-define(DEC2HEX(X),
+ if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0;
+ ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10
+ end).
+
+-define(HEX2DEC(X),
+ if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0;
+ ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10;
+ ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10
+ end).
+
+
+%%%=========================================================================
+%%% API
+%%%=========================================================================
+
+%%-------------------------------------------------------------------------
+%% URI compliant with RFC 3986
+%% ASCII %x21 - %x7A ("!" - "z") except
+%% %x34 " double quote
+%% %x60 < less than
+%% %x62 > greater than
+%% %x92 \ backslash
+%% %x94 ^ caret / circumflex
+%% %x96 ` grave / accent
+%%-------------------------------------------------------------------------
+-type uri_string() :: iodata().
+-type error() :: {error, atom(), term()}.
+
+
+%%-------------------------------------------------------------------------
+%% RFC 3986, Chapter 3. Syntax Components
+%%-------------------------------------------------------------------------
+-type uri_map() ::
+ #{fragment => unicode:chardata(),
+ host => unicode:chardata(),
+ path => unicode:chardata(),
+ port => non_neg_integer() | undefined,
+ query => unicode:chardata(),
+ scheme => unicode:chardata(),
+ userinfo => unicode:chardata()} | #{}.
+
+
+%%-------------------------------------------------------------------------
+%% Normalize URIs
+%%-------------------------------------------------------------------------
+-spec normalize(URIString) -> NormalizedURI when
+ URIString :: uri_string(),
+ NormalizedURI :: uri_string().
+normalize(URIString) ->
+ %% Percent-encoding normalization and case normalization for
+ %% percent-encoded triplets are achieved by running parse and
+ %% recompose on the input URI string.
+ recompose(
+ normalize_path_segment(
+ normalize_scheme_based(
+ normalize_case(
+ parse(URIString))))).
+
+
+%%-------------------------------------------------------------------------
+%% Parse URIs
+%%-------------------------------------------------------------------------
+-spec parse(URIString) -> URIMap when
+ URIString :: uri_string(),
+ URIMap :: uri_map()
+ | error().
+parse(URIString) when is_binary(URIString) ->
+ try parse_uri_reference(URIString, #{})
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+parse(URIString) when is_list(URIString) ->
+ try
+ Binary = unicode:characters_to_binary(URIString),
+ Map = parse_uri_reference(Binary, #{}),
+ convert_mapfields_to_list(Map)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Recompose URIs
+%%-------------------------------------------------------------------------
+-spec recompose(URIMap) -> URIString when
+ URIMap :: uri_map(),
+ URIString :: uri_string()
+ | error().
+recompose(Map) ->
+ case is_valid_map(Map) of
+ false ->
+ {error, invalid_map, Map};
+ true ->
+ try
+ T0 = update_scheme(Map, empty),
+ T1 = update_userinfo(Map, T0),
+ T2 = update_host(Map, T1),
+ T3 = update_port(Map, T2),
+ T4 = update_path(Map, T3),
+ T5 = update_query(Map, T4),
+ update_fragment(Map, T5)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Transcode URIs
+%%-------------------------------------------------------------------------
+-spec transcode(URIString, Options) -> Result when
+ URIString :: uri_string(),
+ Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}],
+ Result :: uri_string()
+ | error().
+transcode(URIString, Options) when is_binary(URIString) ->
+ try
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ List = convert_to_list(URIString, InEnc),
+ Output = transcode(List, [], InEnc, OutEnc),
+ convert_to_binary(Output, utf8, OutEnc)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+transcode(URIString, Options) when is_list(URIString) ->
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ Flattened = flatten_list(URIString, InEnc),
+ try transcode(Flattened, [], InEnc, OutEnc)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
+%%%========================================================================
+%%% Internal functions
+%%%========================================================================
+
+%%-------------------------------------------------------------------------
+%% Converts Map fields to lists
+%%-------------------------------------------------------------------------
+convert_mapfields_to_list(Map) ->
+ Fun = fun (_, V) when is_binary(V) -> unicode:characters_to_list(V);
+ (_, V) -> V end,
+ maps:map(Fun, Map).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 4.1. URI Reference]
+%%
+%% URI-reference is used to denote the most common usage of a resource
+%% identifier.
+%%
+%% URI-reference = URI / relative-ref
+%%-------------------------------------------------------------------------
+-spec parse_uri_reference(binary(), uri_map()) -> uri_map().
+parse_uri_reference(<<>>, _) -> #{path => <<>>};
+parse_uri_reference(URIString, URI) ->
+ try parse_scheme_start(URIString, URI)
+ catch
+ throw:{_,_,_} ->
+ parse_relative_part(URIString, URI)
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 4.2. Relative Reference]
+%%
+%% A relative reference takes advantage of the hierarchical syntax
+%% (Section 1.2.3) to express a URI reference relative to the name space
+%% of another hierarchical URI.
+%%
+%% relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+%%
+%% relative-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-noscheme
+%% / path-empty
+%%-------------------------------------------------------------------------
+-spec parse_relative_part(binary(), uri_map()) -> uri_map().
+parse_relative_part(?STRING_REST("//", Rest), URI) ->
+ %% Parse userinfo - "//" is NOT part of authority
+ try parse_userinfo(Rest, URI) of
+ {T, URI1} ->
+ Userinfo = calculate_parsed_userinfo(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{userinfo => decode_userinfo(Userinfo)}
+ catch
+ throw:{_,_,_} ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{host => decode_host(remove_brackets(Host))}
+ end;
+parse_relative_part(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-absolute
+ Path = calculate_parsed_part(Rest, T),
+ URI1#{path => decode_path(?STRING_REST($/, Path))};
+parse_relative_part(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{query => decode_query(Query)};
+parse_relative_part(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{fragment => decode_fragment(Fragment)};
+parse_relative_part(?STRING_REST(Char, Rest), URI) ->
+ case is_segment_nz_nc(Char) of
+ true ->
+ {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme
+ Path = calculate_parsed_part(Rest, T),
+ URI1#{path => decode_path(?STRING_REST(Char, Path))};
+ false -> throw({error,invalid_uri,[Char]})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.3. Path]
+%%
+%% The path component contains data, usually organized in hierarchical
+%% form, that, along with data in the non-hierarchical query component
+%% (Section 3.4), serves to identify a resource within the scope of the
+%% URI's scheme and naming authority (if any). The path is terminated
+%% by the first question mark ("?") or number sign ("#") character, or
+%% by the end of the URI.
+%%
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%%
+%% path-abempty = *( "/" segment )
+%% path-absolute = "/" [ segment-nz *( "/" segment ) ]
+%% path-noscheme = segment-nz-nc *( "/" segment )
+%% path-rootless = segment-nz *( "/" segment )
+%% path-empty = 0<pchar>
+%% segment = *pchar
+%% segment-nz = 1*pchar
+%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+%% ; non-zero-length segment without any colon ":"
+%%
+%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% path-abempty
+%%-------------------------------------------------------------------------
+-spec parse_segment(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_segment(?STRING_REST($/, Rest), URI) ->
+ parse_segment(Rest, URI); % segment
+parse_segment(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_segment(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_segment(?STRING_REST(Char, Rest), URI) ->
+ case is_pchar(Char) of
+ true -> parse_segment(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_segment(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% path-noscheme
+%%-------------------------------------------------------------------------
+-spec parse_segment_nz_nc(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_segment_nz_nc(?STRING_REST($/, Rest), URI) ->
+ parse_segment(Rest, URI); % segment
+parse_segment_nz_nc(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_segment_nz_nc(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) ->
+ case is_segment_nz_nc(Char) of
+ true -> parse_segment_nz_nc(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_segment_nz_nc(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is pchar.
+-spec is_pchar(char()) -> boolean().
+is_pchar($%) -> true; % pct-encoded
+is_pchar($:) -> true;
+is_pchar($@) -> true;
+is_pchar(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+%% Check if char is segment_nz_nc.
+-spec is_segment_nz_nc(char()) -> boolean().
+is_segment_nz_nc($%) -> true; % pct-encoded
+is_segment_nz_nc($@) -> true;
+is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.1. Scheme]
+%%
+%% Each URI begins with a scheme name that refers to a specification for
+%% assigning identifiers within that scheme.
+%%
+%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+%%-------------------------------------------------------------------------
+-spec parse_scheme_start(binary(), uri_map()) -> uri_map().
+parse_scheme_start(?STRING_REST(Char, Rest), URI) ->
+ case is_alpha(Char) of
+ true -> {T, URI1} = parse_scheme(Rest, URI),
+ Scheme = calculate_parsed_scheme(Rest, T),
+ URI2 = maybe_add_path(URI1),
+ URI2#{scheme => ?STRING_REST(Char, Scheme)};
+ false -> throw({error,invalid_uri,[Char]})
+ end.
+
+%% Add path component if it missing after parsing the URI.
+%% According to the URI specification there is always a
+%% path component in every URI-reference and it can be
+%% empty.
+maybe_add_path(Map) ->
+ case maps:is_key(path, Map) of
+ false ->
+ Map#{path => <<>>};
+ _Else ->
+ Map
+ end.
+
+
+-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_scheme(?STRING_REST($:, Rest), URI) ->
+ {_, URI1} = parse_hier(Rest, URI),
+ {Rest, URI1};
+parse_scheme(?STRING_REST(Char, Rest), URI) ->
+ case is_scheme(Char) of
+ true -> parse_scheme(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_scheme(?STRING_EMPTY, _URI) ->
+ throw({error,invalid_uri,<<>>}).
+
+
+%% Check if char is allowed in scheme
+-spec is_scheme(char()) -> boolean().
+is_scheme($+) -> true;
+is_scheme($-) -> true;
+is_scheme($.) -> true;
+is_scheme(Char) -> is_alpha(Char) orelse is_digit(Char).
+
+
+%%-------------------------------------------------------------------------
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%%-------------------------------------------------------------------------
+-spec parse_hier(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_hier(?STRING_REST("//", Rest), URI) ->
+ % Parse userinfo - "//" is NOT part of authority
+ try parse_userinfo(Rest, URI) of
+ {T, URI1} ->
+ Userinfo = calculate_parsed_userinfo(Rest, T),
+ {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}}
+ catch
+ throw:{_,_,_} ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ {Rest, URI1#{host => decode_host(remove_brackets(Host))}}
+ end;
+parse_hier(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-absolute
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_hier(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_hier(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless
+ case is_pchar(Char) of
+ true -> % segment_nz
+ {T, URI1} = parse_segment(Rest, URI),
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}};
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_hier(?STRING_EMPTY, URI) ->
+ {<<>>, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2. Authority]
+%%
+%% Many URI schemes include a hierarchical element for a naming
+%% authority so that governance of the name space defined by the
+%% remainder of the URI is delegated to that authority (which may, in
+%% turn, delegate it further).
+%%
+%% The authority component is preceded by a double slash ("//") and is
+%% terminated by the next slash ("/"), question mark ("?"), or number
+%% sign ("#") character, or by the end of the URI.
+%%
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%%
+%%
+%% [RFC 3986, Chapter 3.2.1. User Information]
+%%
+%% The userinfo subcomponent may consist of a user name and, optionally,
+%% scheme-specific information about how to gain authorization to access
+%% the resource. The user information, if present, is followed by a
+%% commercial at-sign ("@") that delimits it from the host.
+%%
+%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+%%-------------------------------------------------------------------------
+-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_userinfo(?CHAR($@), URI) ->
+ {?STRING_EMPTY, URI#{host => <<>>}};
+parse_userinfo(?STRING_REST($@, Rest), URI) ->
+ {T, URI1} = parse_host(Rest, URI),
+ Host = calculate_parsed_host_port(Rest, T),
+ {Rest, URI1#{host => decode_host(remove_brackets(Host))}};
+parse_userinfo(?STRING_REST(Char, Rest), URI) ->
+ case is_userinfo(Char) of
+ true -> parse_userinfo(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_userinfo(?STRING_EMPTY, _URI) ->
+ %% URI cannot end in userinfo state
+ throw({error,invalid_uri,<<>>}).
+
+
+%% Check if char is allowed in userinfo
+-spec is_userinfo(char()) -> boolean().
+is_userinfo($%) -> true; % pct-encoded
+is_userinfo($:) -> true;
+is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2.2. Host]
+%%
+%% The host subcomponent of authority is identified by an IP literal
+%% encapsulated within square brackets, an IPv4 address in dotted-
+%% decimal form, or a registered name.
+%%
+%% host = IP-literal / IPv4address / reg-name
+%%
+%% IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+%%
+%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+%%
+%% IPv6address = 6( h16 ":" ) ls32
+%% / "::" 5( h16 ":" ) ls32
+%% / [ h16 ] "::" 4( h16 ":" ) ls32
+%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+%% / [ *4( h16 ":" ) h16 ] "::" ls32
+%% / [ *5( h16 ":" ) h16 ] "::" h16
+%% / [ *6( h16 ":" ) h16 ] "::"
+%%
+%% ls32 = ( h16 ":" h16 ) / IPv4address
+%% ; least-significant 32 bits of address
+%%
+%% h16 = 1*4HEXDIG
+%% ; 16 bits of address represented in hexadecimal
+%%
+%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+%%
+%% dec-octet = DIGIT ; 0-9
+%% / %x31-39 DIGIT ; 10-99
+%% / "1" 2DIGIT ; 100-199
+%% / "2" %x30-34 DIGIT ; 200-249
+%% / "25" %x30-35 ; 250-255
+%%
+%% reg-name = *( unreserved / pct-encoded / sub-delims )
+%%-------------------------------------------------------------------------
+-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_host(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_host(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_host(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_host(?STRING_REST($[, Rest), URI) ->
+ parse_ipv6_bin(Rest, [], URI);
+parse_host(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_host(?STRING_REST(Char, Rest), URI) ->
+ case is_digit(Char) of
+ true -> parse_ipv4_bin(Rest, [Char], URI);
+ false -> parse_reg_name(?STRING_REST(Char, Rest), URI)
+ end;
+parse_host(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+-spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_reg_name(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_reg_name(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_reg_name(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_reg_name(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_reg_name(?STRING_REST(Char, Rest), URI) ->
+ case is_reg_name(Char) of
+ true -> parse_reg_name(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_reg_name(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+%% Check if char is allowed in reg-name
+-spec is_reg_name(char()) -> boolean().
+is_reg_name($%) -> true;
+is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+
+-spec parse_ipv4_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) ->
+ case is_ipv4(Char) of
+ true -> parse_ipv4_bin(Rest, [Char|Acc], URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv4_bin(?STRING_EMPTY, Acc, URI) ->
+ _ = validate_ipv4_address(lists:reverse(Acc)),
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in IPv4 addresses
+-spec is_ipv4(char()) -> boolean().
+is_ipv4($.) -> true;
+is_ipv4(Char) -> is_digit(Char).
+
+-spec validate_ipv4_address(list()) -> list().
+validate_ipv4_address(Addr) ->
+ case inet:parse_ipv4strict_address(Addr) of
+ {ok, _} -> Addr;
+ {error, _} -> throw({error,invalid_uri,Addr})
+ end.
+
+
+-spec parse_ipv6_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) ->
+ _ = validate_ipv6_address(lists:reverse(Acc)),
+ parse_ipv6_bin_end(Rest, URI);
+parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) ->
+ case is_ipv6(Char) of
+ true -> parse_ipv6_bin(Rest, [Char|Acc], URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) ->
+ throw({error,invalid_uri,<<>>}).
+
+%% Check if char is allowed in IPv6 addresses
+-spec is_ipv6(char()) -> boolean().
+is_ipv6($:) -> true;
+is_ipv6($.) -> true;
+is_ipv6(Char) -> is_hex_digit(Char).
+
+
+-spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) ->
+ {T, URI1} = parse_port(Rest, URI),
+ H = calculate_parsed_host_port(Rest, T),
+ Port = get_port(H),
+ {Rest, URI1#{port => Port}};
+parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) ->
+ case is_ipv6(Char) of
+ true -> parse_ipv6_bin_end(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_ipv6_bin_end(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+-spec validate_ipv6_address(list()) -> list().
+validate_ipv6_address(Addr) ->
+ case inet:parse_ipv6strict_address(Addr) of
+ {ok, _} -> Addr;
+ {error, _} -> throw({error,invalid_uri,Addr})
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.2.2. Port]
+%%
+%% The port subcomponent of authority is designated by an optional port
+%% number in decimal following the host and delimited from it by a
+%% single colon (":") character.
+%%
+%% port = *DIGIT
+%%-------------------------------------------------------------------------
+-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_port(?STRING_REST($/, Rest), URI) ->
+ {T, URI1} = parse_segment(Rest, URI), % path-abempty
+ Path = calculate_parsed_part(Rest, T),
+ {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}};
+parse_port(?STRING_REST($?, Rest), URI) ->
+ {T, URI1} = parse_query(Rest, URI), % path-empty ?query
+ Query = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{query => decode_query(Query)}};
+parse_port(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI), % path-empty
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_port(?STRING_REST(Char, Rest), URI) ->
+ case is_digit(Char) of
+ true -> parse_port(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_port(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.4. Query]
+%%
+%% The query component contains non-hierarchical data that, along with
+%% data in the path component (Section 3.3), serves to identify a
+%% resource within the scope of the URI's scheme and naming authority
+%% (if any). The query component is indicated by the first question
+%% mark ("?") character and terminated by a number sign ("#") character
+%% or by the end of the URI.
+%%
+%% query = *( pchar / "/" / "?" )
+%%-------------------------------------------------------------------------
+-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_query(?STRING_REST($#, Rest), URI) ->
+ {T, URI1} = parse_fragment(Rest, URI),
+ Fragment = calculate_parsed_query_fragment(Rest, T),
+ {Rest, URI1#{fragment => decode_fragment(Fragment)}};
+parse_query(?STRING_REST(Char, Rest), URI) ->
+ case is_query(Char) of
+ true -> parse_query(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_query(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in query
+-spec is_query(char()) -> boolean().
+is_query($/) -> true;
+is_query($?) -> true;
+is_query(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 3.5. Fragment]
+%%
+%% The fragment identifier component of a URI allows indirect
+%% identification of a secondary resource by reference to a primary
+%% resource and additional identifying information.
+%%
+%% fragment = *( pchar / "/" / "?" )
+%%-------------------------------------------------------------------------
+-spec parse_fragment(binary(), uri_map()) -> {binary(), uri_map()}.
+parse_fragment(?STRING_REST(Char, Rest), URI) ->
+ case is_fragment(Char) of
+ true -> parse_fragment(Rest, URI);
+ false -> throw({error,invalid_uri,[Char]})
+ end;
+parse_fragment(?STRING_EMPTY, URI) ->
+ {?STRING_EMPTY, URI}.
+
+
+%% Check if char is allowed in fragment
+-spec is_fragment(char()) -> boolean().
+is_fragment($/) -> true;
+is_fragment($?) -> true;
+is_fragment(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.2. Reserved Characters]
+%%
+%% reserved = gen-delims / sub-delims
+%%
+%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+%%
+%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+%% / "*" / "+" / "," / ";" / "="
+%%
+%%-------------------------------------------------------------------------
+
+%% Check if char is sub-delim.
+-spec is_sub_delim(char()) -> boolean().
+is_sub_delim($!) -> true;
+is_sub_delim($$) -> true;
+is_sub_delim($&) -> true;
+is_sub_delim($') -> true;
+is_sub_delim($() -> true;
+is_sub_delim($)) -> true;
+
+is_sub_delim($*) -> true;
+is_sub_delim($+) -> true;
+is_sub_delim($,) -> true;
+is_sub_delim($;) -> true;
+is_sub_delim($=) -> true;
+is_sub_delim(_) -> false.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.3. Unreserved Characters]
+%%
+%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+%%
+%%-------------------------------------------------------------------------
+-spec is_unreserved(char()) -> boolean().
+is_unreserved($-) -> true;
+is_unreserved($.) -> true;
+is_unreserved($_) -> true;
+is_unreserved($~) -> true;
+is_unreserved(Char) -> is_alpha(Char) orelse is_digit(Char).
+
+-spec is_alpha(char()) -> boolean().
+is_alpha(C)
+ when $A =< C, C =< $Z;
+ $a =< C, C =< $z -> true;
+is_alpha(_) -> false.
+
+-spec is_digit(char()) -> boolean().
+is_digit(C)
+ when $0 =< C, C =< $9 -> true;
+is_digit(_) -> false.
+
+-spec is_hex_digit(char()) -> boolean().
+is_hex_digit(C)
+ when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true;
+is_hex_digit(_) -> false.
+
+
+%% Remove enclosing brackets from binary
+-spec remove_brackets(binary()) -> binary().
+remove_brackets(<<$[/utf8, Rest/binary>>) ->
+ {H,T} = split_binary(Rest, byte_size(Rest) - 1),
+ case T =:= <<$]/utf8>> of
+ true -> H;
+ false -> Rest
+ end;
+remove_brackets(Addr) -> Addr.
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for calculating the parsed binary.
+%%-------------------------------------------------------------------------
+-spec calculate_parsed_scheme(binary(), binary()) -> binary().
+calculate_parsed_scheme(Input, <<>>) ->
+ strip_last_char(Input, [$:]);
+calculate_parsed_scheme(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_part(binary(), binary()) -> binary().
+calculate_parsed_part(Input, <<>>) ->
+ strip_last_char(Input, [$?,$#]);
+calculate_parsed_part(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_userinfo(binary(), binary()) -> binary().
+calculate_parsed_userinfo(Input, <<>>) ->
+ strip_last_char(Input, [$?,$#,$@]);
+calculate_parsed_userinfo(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+-spec calculate_parsed_host_port(binary(), binary()) -> binary().
+calculate_parsed_host_port(Input, <<>>) ->
+ strip_last_char(Input, [$:,$?,$#,$/]);
+calculate_parsed_host_port(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+calculate_parsed_query_fragment(Input, <<>>) ->
+ strip_last_char(Input, [$#]);
+calculate_parsed_query_fragment(Input, Unparsed) ->
+ get_parsed_binary(Input, Unparsed).
+
+
+get_port(<<>>) ->
+ undefined;
+get_port(B) ->
+ try binary_to_integer(B)
+ catch
+ error:badarg ->
+ throw({error, invalid_uri, B})
+ end.
+
+
+%% Strip last char if it is in list
+%%
+%% This function is optimized for speed: parse/1 is about 10% faster than
+%% with an alternative implementation based on lists and sets.
+strip_last_char(<<>>, _) -> <<>>;
+strip_last_char(Input, [C0]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1,C2]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ C2 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end;
+strip_last_char(Input, [C0,C1,C2,C3]) ->
+ case binary:last(Input) of
+ C0 ->
+ init_binary(Input);
+ C1 ->
+ init_binary(Input);
+ C2 ->
+ init_binary(Input);
+ C3 ->
+ init_binary(Input);
+ _Else ->
+ Input
+ end.
+
+
+%% Get parsed binary
+get_parsed_binary(Input, Unparsed) ->
+ {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)),
+ First.
+
+
+%% Return all bytes of the binary except the last one. The binary must be non-empty.
+init_binary(B) ->
+ {Init, _} =
+ split_binary(B, byte_size(B) - 1),
+ Init.
+
+
+%% Returns the size of a binary exluding the first element.
+%% Used in calls to split_binary().
+-spec byte_size_exl_head(binary()) -> number().
+byte_size_exl_head(<<>>) -> 0;
+byte_size_exl_head(Binary) -> byte_size(Binary) + 1.
+
+
+%%-------------------------------------------------------------------------
+%% [RFC 3986, Chapter 2.1. Percent-Encoding]
+%%
+%% A percent-encoding mechanism is used to represent a data octet in a
+%% component when that octet's corresponding character is outside the
+%% allowed set or is being used as a delimiter of, or within, the
+%% component. A percent-encoded octet is encoded as a character
+%% triplet, consisting of the percent character "%" followed by the two
+%% hexadecimal digits representing that octet's numeric value. For
+%% example, "%20" is the percent-encoding for the binary octet
+%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space
+%% character (SP). Section 2.4 describes when percent-encoding and
+%% decoding is applied.
+%%
+%% pct-encoded = "%" HEXDIG HEXDIG
+%%-------------------------------------------------------------------------
+-spec decode_userinfo(binary()) -> binary().
+decode_userinfo(Cs) ->
+ check_utf8(decode(Cs, fun is_userinfo/1, <<>>)).
+
+-spec decode_host(binary()) -> binary().
+decode_host(Cs) ->
+ check_utf8(decode(Cs, fun is_host/1, <<>>)).
+
+-spec decode_path(binary()) -> binary().
+decode_path(Cs) ->
+ check_utf8(decode(Cs, fun is_path/1, <<>>)).
+
+-spec decode_query(binary()) -> binary().
+decode_query(Cs) ->
+ check_utf8(decode(Cs, fun is_query/1, <<>>)).
+
+-spec decode_fragment(binary()) -> binary().
+decode_fragment(Cs) ->
+ check_utf8(decode(Cs, fun is_fragment/1, <<>>)).
+
+
+%% Returns Cs if it is utf8 encoded.
+check_utf8(Cs) ->
+ case unicode:characters_to_list(Cs) of
+ {incomplete,_,_} ->
+ throw({error,invalid_utf8,Cs});
+ {error,_,_} ->
+ throw({error,invalid_utf8,Cs});
+ _ -> Cs
+ end.
+
+%%-------------------------------------------------------------------------
+%% Percent-encode
+%%-------------------------------------------------------------------------
+
+%% Only validates as scheme cannot have percent-encoded characters
+-spec encode_scheme(list()|binary()) -> list() | binary().
+encode_scheme([]) ->
+ throw({error,invalid_scheme,""});
+encode_scheme(<<>>) ->
+ throw({error,invalid_scheme,<<>>});
+encode_scheme(Scheme) ->
+ case validate_scheme(Scheme) of
+ true -> Scheme;
+ false -> throw({error,invalid_scheme,Scheme})
+ end.
+
+-spec encode_userinfo(list()|binary()) -> list() | binary().
+encode_userinfo(Cs) ->
+ encode(Cs, fun is_userinfo/1).
+
+-spec encode_host(list()|binary()) -> list() | binary().
+encode_host(Cs) ->
+ case classify_host(Cs) of
+ regname -> Cs;
+ ipv4 -> Cs;
+ ipv6 -> bracket_ipv6(Cs);
+ other -> encode(Cs, fun is_reg_name/1)
+ end.
+
+-spec encode_path(list()|binary()) -> list() | binary().
+encode_path(Cs) ->
+ encode(Cs, fun is_path/1).
+
+-spec encode_query(list()|binary()) -> list() | binary().
+encode_query(Cs) ->
+ encode(Cs, fun is_query/1).
+
+-spec encode_fragment(list()|binary()) -> list() | binary().
+encode_fragment(Cs) ->
+ encode(Cs, fun is_fragment/1).
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for percent-decode
+%%-------------------------------------------------------------------------
+decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ B = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ decode(Cs, Fun, <<Acc/binary, B>>);
+ false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>})
+ end;
+decode(<<C,Cs/binary>>, Fun, Acc) ->
+ case Fun(C) of
+ true -> decode(Cs, Fun, <<Acc/binary, C>>);
+ false -> throw({error,invalid_percent_encoding,<<C,Cs/binary>>})
+ end;
+decode(<<>>, _Fun, Acc) ->
+ Acc.
+
+%% Check if char is allowed in host
+-spec is_host(char()) -> boolean().
+is_host($:) -> true;
+is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char).
+
+%% Check if char is allowed in path
+-spec is_path(char()) -> boolean().
+is_path($/) -> true;
+is_path(Char) -> is_pchar(Char).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for percent-encode
+%%-------------------------------------------------------------------------
+-spec encode(list()|binary(), fun()) -> list() | binary().
+encode(Component, Fun) when is_list(Component) ->
+ B = unicode:characters_to_binary(Component),
+ unicode:characters_to_list(encode(B, Fun, <<>>));
+encode(Component, Fun) when is_binary(Component) ->
+ encode(Component, Fun, <<>>).
+%%
+encode(<<Char/utf8, Rest/binary>>, Fun, Acc) ->
+ C = encode_codepoint_binary(Char, Fun),
+ encode(Rest, Fun, <<Acc/binary,C/binary>>);
+encode(<<Char, Rest/binary>>, _Fun, _Acc) ->
+ throw({error,invalid_input,<<Char,Rest/binary>>});
+encode(<<>>, _Fun, Acc) ->
+ Acc.
+
+
+-spec encode_codepoint_binary(integer(), fun()) -> binary().
+encode_codepoint_binary(C, Fun) ->
+ case Fun(C) of
+ false -> percent_encode_binary(C);
+ true -> <<C>>
+ end.
+
+
+-spec percent_encode_binary(integer()) -> binary().
+percent_encode_binary(Code) ->
+ percent_encode_binary(<<Code/utf8>>, <<>>).
+
+
+percent_encode_binary(<<A:4,B:4,Rest/binary>>, Acc) ->
+ percent_encode_binary(Rest, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>);
+percent_encode_binary(<<>>, Acc) ->
+ Acc.
+
+
+%%-------------------------------------------------------------------------
+%%-------------------------------------------------------------------------
+validate_scheme([]) -> true;
+validate_scheme([H|T]) ->
+ case is_scheme(H) of
+ true -> validate_scheme(T);
+ false -> false
+ end;
+validate_scheme(<<>>) -> true;
+validate_scheme(<<H, Rest/binary>>) ->
+ case is_scheme(H) of
+ true -> validate_scheme(Rest);
+ false -> false
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Classifies hostname into the following categories:
+%% regname, ipv4 - address does not contain reserved characters to be
+%% percent-encoded
+%% ipv6 - address does not contain reserved characters but it shall be
+%% encolsed in brackets
+%% other - address shall be percent-encoded
+%%-------------------------------------------------------------------------
+classify_host([]) -> other;
+classify_host(Addr) when is_binary(Addr) ->
+ A = unicode:characters_to_list(Addr),
+ classify_host_ipv6(A);
+classify_host(Addr) ->
+ classify_host_ipv6(Addr).
+
+classify_host_ipv6(Addr) ->
+ case is_ipv6_address(Addr) of
+ true -> ipv6;
+ false -> classify_host_ipv4(Addr)
+ end.
+
+classify_host_ipv4(Addr) ->
+ case is_ipv4_address(Addr) of
+ true -> ipv4;
+ false -> classify_host_regname(Addr)
+ end.
+
+classify_host_regname([]) -> regname;
+classify_host_regname([H|T]) ->
+ case is_reg_name(H) of
+ true -> classify_host_regname(T);
+ false -> other
+ end.
+
+is_ipv4_address(Addr) ->
+ case inet:parse_ipv4strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+is_ipv6_address(Addr) ->
+ case inet:parse_ipv6strict_address(Addr) of
+ {ok, _} -> true;
+ {error, _} -> false
+ end.
+
+bracket_ipv6(Addr) when is_binary(Addr) ->
+ concat(<<$[,Addr/binary>>,<<$]>>);
+bracket_ipv6(Addr) when is_list(Addr) ->
+ [$[|Addr] ++ "]".
+
+
+%%-------------------------------------------------------------------------
+%% Helper funtions for recompose
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% Checks if input Map has valid combination of fields that can be
+%% recomposed into a URI.
+%%
+%% The implementation is based on a decision tree that fulfills the
+%% following rules:
+%% - 'path' shall always be present in the input map
+%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%% - 'host' shall be present in the input map when 'path' starts with
+%% two slashes ("//")
+%% path = path-abempty ; begins with "/" or is empty
+%% / path-absolute ; begins with "/" but not "//"
+%% / path-noscheme ; begins with a non-colon segment
+%% / path-rootless ; begins with a segment
+%% / path-empty ; zero characters
+%% path-abempty = *( "/" segment )
+%% segment = *pchar
+%% - 'host' shall be present if userinfo or port is present in input map
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%% - All fields shall be valid (scheme, userinfo, host, port, path, query
+%% or fragment).
+%%-------------------------------------------------------------------------
+is_valid_map(#{path := Path} = Map) ->
+ ((starts_with_two_slash(Path) andalso is_valid_map_host(Map))
+ orelse
+ (maps:is_key(userinfo, Map) andalso is_valid_map_host(Map))
+ orelse
+ (maps:is_key(port, Map) andalso is_valid_map_host(Map))
+ orelse
+ all_fields_valid(Map));
+is_valid_map(#{}) ->
+ false.
+
+
+is_valid_map_host(Map) ->
+ maps:is_key(host, Map) andalso all_fields_valid(Map).
+
+
+all_fields_valid(Map) ->
+ Fun = fun(scheme, _, Acc) -> Acc;
+ (userinfo, _, Acc) -> Acc;
+ (host, _, Acc) -> Acc;
+ (port, _, Acc) -> Acc;
+ (path, _, Acc) -> Acc;
+ (query, _, Acc) -> Acc;
+ (fragment, _, Acc) -> Acc;
+ (_, _, _) -> false
+ end,
+ maps:fold(Fun, true, Map).
+
+
+starts_with_two_slash([$/,$/|_]) ->
+ true;
+starts_with_two_slash(?STRING_REST("//", _)) ->
+ true;
+starts_with_two_slash(_) -> false.
+
+
+update_scheme(#{scheme := Scheme}, _) ->
+ add_colon_postfix(encode_scheme(Scheme));
+update_scheme(#{}, _) ->
+ empty.
+
+
+update_userinfo(#{userinfo := Userinfo}, empty) ->
+ add_auth_prefix(encode_userinfo(Userinfo));
+update_userinfo(#{userinfo := Userinfo}, URI) ->
+ concat(URI,add_auth_prefix(encode_userinfo(Userinfo)));
+update_userinfo(#{}, empty) ->
+ empty;
+update_userinfo(#{}, URI) ->
+ URI.
+
+
+update_host(#{host := Host}, empty) ->
+ add_auth_prefix(encode_host(Host));
+update_host(#{host := Host} = Map, URI) ->
+ concat(URI,add_host_prefix(Map, encode_host(Host)));
+update_host(#{}, empty) ->
+ empty;
+update_host(#{}, URI) ->
+ URI.
+
+
+%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI
+update_port(#{port := undefined}, URI) ->
+ concat(URI, <<":">>);
+update_port(#{port := Port}, URI) ->
+ concat(URI,add_colon(encode_port(Port)));
+update_port(#{}, URI) ->
+ URI.
+
+
+update_path(#{path := Path}, empty) ->
+ encode_path(Path);
+update_path(#{path := Path}, URI) ->
+ concat(URI,encode_path(Path));
+update_path(#{}, empty) ->
+ empty;
+update_path(#{}, URI) ->
+ URI.
+
+
+update_query(#{query := Query}, empty) ->
+ encode_query(Query);
+update_query(#{query := Query}, URI) ->
+ concat(URI,add_question_mark(encode_query(Query)));
+update_query(#{}, empty) ->
+ empty;
+update_query(#{}, URI) ->
+ URI.
+
+
+update_fragment(#{fragment := Fragment}, empty) ->
+ add_hashmark(encode_fragment(Fragment));
+update_fragment(#{fragment := Fragment}, URI) ->
+ concat(URI,add_hashmark(encode_fragment(Fragment)));
+update_fragment(#{}, empty) ->
+ "";
+update_fragment(#{}, URI) ->
+ URI.
+
+%%-------------------------------------------------------------------------
+%% Concatenates its arguments that can be lists and binaries.
+%% The result is a list if at least one of its argument is a list and
+%% binary otherwise.
+%%-------------------------------------------------------------------------
+concat(A, B) when is_binary(A), is_binary(B) ->
+ <<A/binary, B/binary>>;
+concat(A, B) when is_binary(A), is_list(B) ->
+ unicode:characters_to_list(A) ++ B;
+concat(A, B) when is_list(A) ->
+ A ++ maybe_to_list(B).
+
+add_hashmark(Comp) when is_binary(Comp) ->
+ <<$#, Comp/binary>>;
+add_hashmark(Comp) when is_list(Comp) ->
+ [$#|Comp].
+
+add_question_mark(Comp) when is_binary(Comp) ->
+ <<$?, Comp/binary>>;
+add_question_mark(Comp) when is_list(Comp) ->
+ [$?|Comp].
+
+add_colon(Comp) when is_binary(Comp) ->
+ <<$:, Comp/binary>>.
+
+add_colon_postfix(Comp) when is_binary(Comp) ->
+ <<Comp/binary,$:>>;
+add_colon_postfix(Comp) when is_list(Comp) ->
+ Comp ++ ":".
+
+add_auth_prefix(Comp) when is_binary(Comp) ->
+ <<"//", Comp/binary>>;
+add_auth_prefix(Comp) when is_list(Comp) ->
+ [$/,$/|Comp].
+
+add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) ->
+ <<$@,Host/binary>>;
+add_host_prefix(#{}, Host) when is_binary(Host) ->
+ <<"//",Host/binary>>;
+add_host_prefix(#{userinfo := _}, Host) when is_list(Host) ->
+ [$@|Host];
+add_host_prefix(#{}, Host) when is_list(Host) ->
+ [$/,$/|Host].
+
+maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp);
+maybe_to_list(Comp) -> Comp.
+
+encode_port(Port) ->
+ integer_to_binary(Port).
+
+%%-------------------------------------------------------------------------
+%% Helper functions for transcode
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>).
+%% 1. Convert (transcode/2) input to list form (list of unicode codepoints)
+%% "x%00%00%00%F6"
+%% 2. Accumulate characters until percent-encoded segment (transcode/4).
+%% Acc = "x"
+%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4)
+%% <<0,0,0,246>>
+%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8):
+%% <<195,182>>
+%% 5. Percent-encode out-encoded binary:
+%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>>
+%% 6. Convert binary to list form, reverse it and append the accumulator
+%% "6B%3C%" + "x"
+%% 7. Reverse Acc and return it
+%%-------------------------------------------------------------------------
+transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode_pct(L, Acc, <<>>, InEnc, OutEnc);
+transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode(L, Acc, [], InEnc, OutEnc).
+%%
+transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) ->
+ transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding);
+transcode([C|Rest], Acc, List, InEncoding, OutEncoding) ->
+ transcode(Rest, Acc, [C|List], InEncoding, OutEncoding);
+transcode([], Acc, List, _InEncoding, _OutEncoding) ->
+ lists:reverse(List ++ Acc).
+
+
+%% Transcode percent-encoded segment
+transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding);
+ false -> throw({error, invalid_percent_encoding,L})
+ end;
+transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_to_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = lists:reverse(convert_to_list(PctEncUtf8, utf8)),
+ transcode(L, Out ++ Acc, [], InEncoding, OutEncoding);
+transcode_pct([], Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_to_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = convert_to_list(PctEncUtf8, utf8),
+ lists:reverse(Acc) ++ Out.
+
+
+%% Convert to binary
+convert_to_binary(Binary, InEncoding, OutEncoding) ->
+ case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of
+ {error, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ {incomplete, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ Result ->
+ Result
+ end.
+
+
+%% Convert to list
+convert_to_list(Binary, InEncoding) ->
+ case unicode:characters_to_list(Binary, InEncoding) of
+ {error, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ {incomplete, _List, RestData} ->
+ throw({error, invalid_input, RestData});
+ Result ->
+ Result
+ end.
+
+
+%% Flatten input list
+flatten_list([], _) ->
+ [];
+flatten_list(L, InEnc) ->
+ flatten_list(L, InEnc, []).
+%%
+flatten_list([H|T], InEnc, Acc) when is_binary(H) ->
+ L = convert_to_list(H, InEnc),
+ flatten_list(T, InEnc, lists:reverse(L) ++ Acc);
+flatten_list([H|T], InEnc, Acc) when is_list(H) ->
+ flatten_list(H ++ T, InEnc, Acc);
+flatten_list([H|T], InEnc, Acc) ->
+ flatten_list(T, InEnc, [H|Acc]);
+flatten_list([], _InEnc, Acc) ->
+ lists:reverse(Acc);
+flatten_list(Arg, _, _) ->
+ throw({error, invalid_input, Arg}).
+
+
+percent_encode_segment(Segment) ->
+ percent_encode_binary(Segment, <<>>).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for normalize
+%%-------------------------------------------------------------------------
+
+%% 6.2.2.1. Case Normalization
+normalize_case(#{scheme := Scheme, host := Host} = Map) ->
+ Map#{scheme => to_lower(Scheme),
+ host => to_lower(Host)};
+normalize_case(#{host := Host} = Map) ->
+ Map#{host => to_lower(Host)};
+normalize_case(#{scheme := Scheme} = Map) ->
+ Map#{scheme => to_lower(Scheme)};
+normalize_case(#{} = Map) ->
+ Map.
+
+
+to_lower(Cs) when is_list(Cs) ->
+ B = convert_to_binary(Cs, utf8, utf8),
+ convert_to_list(to_lower(B), utf8);
+to_lower(Cs) when is_binary(Cs) ->
+ to_lower(Cs, <<>>).
+%%
+to_lower(<<C,Cs/binary>>, Acc) when $A =< C, C =< $Z ->
+ to_lower(Cs, <<Acc/binary,(C + 32)>>);
+to_lower(<<C,Cs/binary>>, Acc) ->
+ to_lower(Cs, <<Acc/binary,C>>);
+to_lower(<<>>, Acc) ->
+ Acc.
+
+
+%% 6.2.2.3. Path Segment Normalization
+%% 5.2.4. Remove Dot Segments
+normalize_path_segment(Map) ->
+ Path = maps:get(path, Map, undefined),
+ Map#{path => remove_dot_segments(Path)}.
+
+
+remove_dot_segments(Path) when is_binary(Path) ->
+ remove_dot_segments(Path, <<>>);
+remove_dot_segments(Path) when is_list(Path) ->
+ B = convert_to_binary(Path, utf8, utf8),
+ B1 = remove_dot_segments(B, <<>>),
+ convert_to_list(B1, utf8).
+%%
+remove_dot_segments(<<>>, Output) ->
+ Output;
+remove_dot_segments(<<"../",T/binary>>, Output) ->
+ remove_dot_segments(T, Output);
+remove_dot_segments(<<"./",T/binary>>, Output) ->
+ remove_dot_segments(T, Output);
+remove_dot_segments(<<"/./",T/binary>>, Output) ->
+ remove_dot_segments(<<$/,T/binary>>, Output);
+remove_dot_segments(<<"/.">>, Output) ->
+ remove_dot_segments(<<$/>>, Output);
+remove_dot_segments(<<"/../",T/binary>>, Output) ->
+ Out1 = remove_last_segment(Output),
+ remove_dot_segments(<<$/,T/binary>>, Out1);
+remove_dot_segments(<<"/..">>, Output) ->
+ Out1 = remove_last_segment(Output),
+ remove_dot_segments(<<$/>>, Out1);
+remove_dot_segments(<<$.>>, Output) ->
+ remove_dot_segments(<<>>, Output);
+remove_dot_segments(<<"..">>, Output) ->
+ remove_dot_segments(<<>>, Output);
+remove_dot_segments(Input, Output) ->
+ {First, Rest} = first_path_segment(Input),
+ remove_dot_segments(Rest, <<Output/binary,First/binary>>).
+
+
+first_path_segment(Input) ->
+ F = first_path_segment(Input, <<>>),
+ split_binary(Input, byte_size(F)).
+%%
+first_path_segment(<<$/,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,$/>>);
+first_path_segment(<<C,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>).
+
+
+first_path_segment_end(<<>>, Acc) ->
+ Acc;
+first_path_segment_end(<<$/,_/binary>>, Acc) ->
+ Acc;
+first_path_segment_end(<<C,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>).
+
+
+remove_last_segment(<<>>) ->
+ <<>>;
+remove_last_segment(B) ->
+ {Init, Last} = split_binary(B, byte_size(B) - 1),
+ case Last of
+ <<$/>> ->
+ Init;
+ _Char ->
+ remove_last_segment(Init)
+ end.
+
+
+%% RFC 3986, 6.2.3. Scheme-Based Normalization
+normalize_scheme_based(Map) ->
+ Scheme = maps:get(scheme, Map, undefined),
+ Port = maps:get(port, Map, undefined),
+ Path= maps:get(path, Map, undefined),
+ normalize_scheme_based(Map, Scheme, Port, Path).
+%%
+normalize_scheme_based(Map, Scheme, Port, Path)
+ when Scheme =:= "http"; Scheme =:= <<"http">> ->
+ normalize_http(Map, Port, Path);
+normalize_scheme_based(Map, Scheme, Port, Path)
+ when Scheme =:= "https"; Scheme =:= <<"https">> ->
+ normalize_https(Map, Port, Path);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "ftp"; Scheme =:= <<"ftp">> ->
+ normalize_ftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "ssh"; Scheme =:= <<"ssh">> ->
+ normalize_ssh_sftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "sftp"; Scheme =:= <<"sftp">> ->
+ normalize_ssh_sftp(Map, Port);
+normalize_scheme_based(Map, Scheme, Port, _Path)
+ when Scheme =:= "tftp"; Scheme =:= <<"tftp">> ->
+ normalize_tftp(Map, Port);
+normalize_scheme_based(Map, _, _, _) ->
+ Map.
+
+
+normalize_http(Map, Port, Path) ->
+ M1 = normalize_port(Map, Port, 80),
+ normalize_http_path(M1, Path).
+
+
+normalize_https(Map, Port, Path) ->
+ M1 = normalize_port(Map, Port, 443),
+ normalize_http_path(M1, Path).
+
+
+normalize_ftp(Map, Port) ->
+ normalize_port(Map, Port, 21).
+
+
+normalize_ssh_sftp(Map, Port) ->
+ normalize_port(Map, Port, 22).
+
+
+normalize_tftp(Map, Port) ->
+ normalize_port(Map, Port, 69).
+
+
+normalize_port(Map, Port, Default) ->
+ case Port of
+ Default ->
+ maps:remove(port, Map);
+ _Else ->
+ Map
+ end.
+
+
+normalize_http_path(Map, Path) ->
+ case Path of
+ "" ->
+ Map#{path => "/"};
+ <<>> ->
+ Map#{path => <<"/">>};
+ _Else ->
+ Map
+ end.