diff options
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r-- | lib/stdlib/src/array.erl | 2 | ||||
-rw-r--r-- | lib/stdlib/src/erl_eval.erl | 46 | ||||
-rw-r--r-- | lib/stdlib/src/gen_event.erl | 4 | ||||
-rw-r--r-- | lib/stdlib/src/gen_fsm.erl | 4 | ||||
-rw-r--r-- | lib/stdlib/src/gen_server.erl | 4 | ||||
-rw-r--r-- | lib/stdlib/src/gen_statem.erl | 6 | ||||
-rw-r--r-- | lib/stdlib/src/io.erl | 11 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib_pretty.erl | 2 | ||||
-rw-r--r-- | lib/stdlib/src/maps.erl | 9 | ||||
-rw-r--r-- | lib/stdlib/src/proc_lib.erl | 2 | ||||
-rw-r--r-- | lib/stdlib/src/string.erl | 12 | ||||
-rw-r--r-- | lib/stdlib/src/supervisor.erl | 6 | ||||
-rw-r--r-- | lib/stdlib/src/supervisor_bridge.erl | 4 | ||||
-rw-r--r-- | lib/stdlib/src/uri_string.erl | 206 |
14 files changed, 214 insertions, 104 deletions
diff --git a/lib/stdlib/src/array.erl b/lib/stdlib/src/array.erl index a237eaa489..939b1fb488 100644 --- a/lib/stdlib/src/array.erl +++ b/lib/stdlib/src/array.erl @@ -290,7 +290,7 @@ new(Size, Fixed, Default) -> end, #array{size = Size, max = M, default = Default, elements = E}. --spec find_max(integer(), integer()) -> integer(). +-spec find_max(integer(), non_neg_integer()) -> non_neg_integer(). find_max(I, M) when I >= M -> find_max(I, ?extend(M)); diff --git a/lib/stdlib/src/erl_eval.erl b/lib/stdlib/src/erl_eval.erl index 0f6d48b9a3..31c0e60fe1 100644 --- a/lib/stdlib/src/erl_eval.erl +++ b/lib/stdlib/src/erl_eval.erl @@ -29,7 +29,7 @@ -export([new_bindings/0,bindings/1,binding/2,add_binding/3,del_binding/2]). -export([extended_parse_exprs/1, extended_parse_term/1, subst_values_for_vars/2]). --export([is_constant_expr/1, partial_eval/1]). +-export([is_constant_expr/1, partial_eval/1, eval_str/1]). %% Is used by standalone Erlang (escript). %% Also used by shell.erl. @@ -1557,6 +1557,50 @@ ev_expr({cons,_,H,T}) -> [ev_expr(H) | ev_expr(T)]. %% true = erl_internal:guard_bif(F, length(As)), %% apply(erlang, F, [ev_expr(X) || X <- As]); +%% eval_str(InStr) -> {ok, OutStr} | {error, ErrStr'} +%% InStr must represent a body +%% Note: If InStr is a binary it has to be a Latin-1 string. +%% If you have a UTF-8 encoded binary you have to call +%% unicode:characters_to_list/1 before the call to eval_str(). + +-define(result(F,D), lists:flatten(io_lib:format(F, D))). + +-spec eval_str(string() | unicode:latin1_binary()) -> + {'ok', string()} | {'error', string()}. + +eval_str(Str) when is_list(Str) -> + case erl_scan:tokens([], Str, 0) of + {more, _} -> + {error, "Incomplete form (missing .<cr>)??"}; + {done, {ok, Toks, _}, Rest} -> + case all_white(Rest) of + true -> + case erl_parse:parse_exprs(Toks) of + {ok, Exprs} -> + case catch erl_eval:exprs(Exprs, erl_eval:new_bindings()) of + {value, Val, _} -> + {ok, Val}; + Other -> + {error, ?result("*** eval: ~p", [Other])} + end; + {error, {_Line, Mod, Args}} -> + Msg = ?result("*** ~ts",[Mod:format_error(Args)]), + {error, Msg} + end; + false -> + {error, ?result("Non-white space found after " + "end-of-form :~ts", [Rest])} + end + end; +eval_str(Bin) when is_binary(Bin) -> + eval_str(binary_to_list(Bin)). + +all_white([$\s|T]) -> all_white(T); +all_white([$\n|T]) -> all_white(T); +all_white([$\t|T]) -> all_white(T); +all_white([]) -> true; +all_white(_) -> false. + ret_expr(_Old, New) -> %% io:format("~w: reduced ~s => ~s~n", %% [line(Old), erl_pp:expr(Old), erl_pp:expr(New)]), diff --git a/lib/stdlib/src/gen_event.erl b/lib/stdlib/src/gen_event.erl index 3ee2031d02..8213282867 100644 --- a/lib/stdlib/src/gen_event.erl +++ b/lib/stdlib/src/gen_event.erl @@ -589,7 +589,7 @@ server_update(Handler1, Func, Event, SName) -> ?LOG_WARNING(#{label=>{gen_event,no_handle_info}, module=>Mod1, message=>Event}, - #{domain=>[beam,erlang,otp], + #{domain=>[otp], report_cb=>fun gen_event:format_log/1, error_logger=>#{tag=>warning_msg}}), % warningmap?? {ok, Handler1}; @@ -751,7 +751,7 @@ report_error(Handler, Reason, State, LastIn, SName) -> state=>format_status(terminate,Handler#handler.module, get(),State), reason=>Reason}, - #{domain=>[beam,erlang,otp], + #{domain=>[otp], report_cb=>fun gen_event:format_log/1, error_logger=>#{tag=>error}}). diff --git a/lib/stdlib/src/gen_fsm.erl b/lib/stdlib/src/gen_fsm.erl index 1646186761..caaaf8fa2e 100644 --- a/lib/stdlib/src/gen_fsm.erl +++ b/lib/stdlib/src/gen_fsm.erl @@ -505,7 +505,7 @@ handle_msg(Msg, Parent, Name, StateName, StateData, Mod, _Time, HibernateAfterTi ?LOG_WARNING(#{label=>{gen_fsm,no_handle_info}, module=>Mod, message=>Msg}, - #{domain=>[beam,erlang,otp], + #{domain=>[otp], report_cb=>fun gen_fsm:format_log/1, error_logger=>#{tag=>warning_msg}}), loop(Parent, Name, StateName, StateData, Mod, infinity, HibernateAfterTimeout, []); @@ -616,7 +616,7 @@ error_info(Reason, Name, Msg, StateName, StateData, Debug) -> state_name=>StateName, state_data=>StateData, reason=>Reason}, - #{domain=>[beam,erlang,otp], + #{domain=>[otp], report_cb=>fun gen_fsm:format_log/1, error_logger=>#{tag=>error}}), sys:print_log(Debug), diff --git a/lib/stdlib/src/gen_server.erl b/lib/stdlib/src/gen_server.erl index 09f77c0810..342cc2a8e3 100644 --- a/lib/stdlib/src/gen_server.erl +++ b/lib/stdlib/src/gen_server.erl @@ -645,7 +645,7 @@ try_dispatch(Mod, Func, Msg, State) -> #{label=>{gen_server,no_handle_info}, module=>Mod, message=>Msg}, - #{domain=>[beam,erlang,otp], + #{domain=>[otp], report_cb=>fun gen_server:format_log/1, error_logger=>#{tag=>warning_msg}}), {ok, {noreply, State}}; @@ -891,7 +891,7 @@ error_info(Reason, Name, From, Msg, Mod, State, Debug) -> state=>format_status(terminate, Mod, get(), State), reason=>Reason, client_info=>client_stacktrace(From)}, - #{domain=>[beam,erlang,otp], + #{domain=>[otp], report_cb=>fun gen_server:format_log/1, error_logger=>#{tag=>error}}), sys:print_log(Debug), diff --git a/lib/stdlib/src/gen_statem.erl b/lib/stdlib/src/gen_statem.erl index b36b8cd5a5..faa43fbc1e 100644 --- a/lib/stdlib/src/gen_statem.erl +++ b/lib/stdlib/src/gen_statem.erl @@ -1448,13 +1448,13 @@ loop_event_done( [?sys_debug( Debug_0, {S#state.name,State}, - {postpone,Event_0,State}), + {postpone,Event_0,NextState}), Event_0|P_0]; false -> [?sys_debug( Debug_0, {S#state.name,State}, - {consume,Event_0,State})|P_0] + {consume,Event_0,NextState})|P_0] end, {Events_2,P_2,Timers_2} = %% Move all postponed events to queue, @@ -1900,7 +1900,7 @@ error_info( state_enter=>StateEnter, state=>format_status(terminate, get(), S), reason=>{Class,Reason,Stacktrace}}, - #{domain=>[beam,erlang,otp], + #{domain=>[otp], report_cb=>fun gen_statem:format_log/1, error_logger=>#{tag=>error}}). diff --git a/lib/stdlib/src/io.erl b/lib/stdlib/src/io.erl index f510f61e9f..5d5773c80c 100644 --- a/lib/stdlib/src/io.erl +++ b/lib/stdlib/src/io.erl @@ -86,7 +86,16 @@ put_chars(Chars) -> CharData :: unicode:chardata(). put_chars(Io, Chars) -> - o_request(Io, {put_chars,unicode,Chars}, put_chars). + put_chars(Io, unicode, Chars). + +%% This function is here to make the erlang:raise in o_request actually raise to +%% a valid function. +-spec put_chars(IoDevice, Encoding, CharData) -> 'ok' when + IoDevice :: device(), + Encoding :: unicode, + CharData :: unicode:chardata(). +put_chars(Io, Encoding, Chars) -> + o_request(Io, {put_chars,Encoding,Chars}, put_chars). -spec nl() -> 'ok'. diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index 3d5a979b3e..dca1b37ef3 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -131,6 +131,8 @@ print(Term, Col, Ll, D, M0, T, RecDefFun, Enc, Str) when is_tuple(Term); %% use Len as CHAR_MAX if M0 = -1 M = max_cs(M0, Len), if + Ll =:= 0 -> + write(If); Len < Ll - Col, Len =< M -> %% write the whole thing on a single line when there is room write(If); diff --git a/lib/stdlib/src/maps.erl b/lib/stdlib/src/maps.erl index a13f340709..a1634547f3 100644 --- a/lib/stdlib/src/maps.erl +++ b/lib/stdlib/src/maps.erl @@ -249,7 +249,7 @@ fold(Fun,Init,Map) when is_function(Fun,3), is_map(Map) -> fold(Fun,Init,Iterator) when is_function(Fun,3), ?IS_ITERATOR(Iterator) -> fold_1(Fun,Init,Iterator); fold(Fun,Init,Map) -> - erlang:error(error_type(Map),[Fun,Init,Map]). + erlang:error(error_type_iter(Map),[Fun,Init,Map]). fold_1(Fun, Acc, Iter) -> case next(Iter) of @@ -272,7 +272,7 @@ map(Fun,Map) when is_function(Fun, 2), is_map(Map) -> map(Fun,Iterator) when is_function(Fun, 2), ?IS_ITERATOR(Iterator) -> maps:from_list(map_1(Fun, Iterator)); map(Fun,Map) -> - erlang:error(error_type(Map),[Fun,Map]). + erlang:error(error_type_iter(Map),[Fun,Map]). map_1(Fun, Iter) -> case next(Iter) of @@ -342,5 +342,8 @@ with(Ks,M) -> erlang:error(error_type(M),[Ks,M]). -error_type(M) when is_map(M); ?IS_ITERATOR(M) -> badarg; +error_type(M) when is_map(M) -> badarg; error_type(V) -> {badmap, V}. + +error_type_iter(M) when is_map(M); ?IS_ITERATOR(M) -> badarg; +error_type_iter(V) -> {badmap, V}. diff --git a/lib/stdlib/src/proc_lib.erl b/lib/stdlib/src/proc_lib.erl index 5f14e78f91..cca1628aba 100644 --- a/lib/stdlib/src/proc_lib.erl +++ b/lib/stdlib/src/proc_lib.erl @@ -508,7 +508,7 @@ crash_report(Class, Reason, StartF, Stacktrace) -> ?LOG_ERROR(#{label=>{proc_lib,crash}, report=>[my_info(Class, Reason, StartF, Stacktrace), linked_info(self())]}, - #{domain=>[beam,erlang,otp,sasl], + #{domain=>[otp,sasl], report_cb=>fun proc_lib:report_cb/1, logger_formatter=>#{title=>"CRASH REPORT"}, error_logger=>#{tag=>error_report,type=>crash_report}}). diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl index f5d271c06d..cf48b882e4 100644 --- a/lib/stdlib/src/string.erl +++ b/lib/stdlib/src/string.erl @@ -691,9 +691,9 @@ uppercase_list(CPs0, Changed) -> uppercase_bin(CP1, <<CP2/utf8, Bin/binary>>, _Changed) when $a =< CP1, CP1 =< $z, CP2 < 256 -> [CP1-32|uppercase_bin(CP2, Bin, true)]; -uppercase_bin(CP1, <<CP2/utf8, Bin/binary>>, _Changed) +uppercase_bin(CP1, <<CP2/utf8, Bin/binary>>, Changed) when CP1 < 128, CP2 < 256 -> - [CP1|uppercase_bin(CP2, Bin, false)]; + [CP1|uppercase_bin(CP2, Bin, Changed)]; uppercase_bin(CP1, Bin, Changed) -> case unicode_util:uppercase([CP1|Bin]) of [CP1|CPs] -> @@ -732,9 +732,9 @@ lowercase_list(CPs0, Changed) -> lowercase_bin(CP1, <<CP2/utf8, Bin/binary>>, _Changed) when $A =< CP1, CP1 =< $Z, CP2 < 256 -> [CP1+32|lowercase_bin(CP2, Bin, true)]; -lowercase_bin(CP1, <<CP2/utf8, Bin/binary>>, _Changed) +lowercase_bin(CP1, <<CP2/utf8, Bin/binary>>, Changed) when CP1 < 128, CP2 < 256 -> - [CP1|lowercase_bin(CP2, Bin, false)]; + [CP1|lowercase_bin(CP2, Bin, Changed)]; lowercase_bin(CP1, Bin, Changed) -> case unicode_util:lowercase([CP1|Bin]) of [CP1|CPs] -> @@ -773,9 +773,9 @@ casefold_list(CPs0, Changed) -> casefold_bin(CP1, <<CP2/utf8, Bin/binary>>, _Changed) when $A =< CP1, CP1 =< $Z, CP2 < 256 -> [CP1+32|casefold_bin(CP2, Bin, true)]; -casefold_bin(CP1, <<CP2/utf8, Bin/binary>>, _Changed) +casefold_bin(CP1, <<CP2/utf8, Bin/binary>>, Changed) when CP1 < 128, CP2 < 256 -> - [CP1|casefold_bin(CP2, Bin, false)]; + [CP1|casefold_bin(CP2, Bin, Changed)]; casefold_bin(CP1, Bin, Changed) -> case unicode_util:casefold([CP1|Bin]) of [CP1|CPs] -> diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index eb46ac611a..9b6c2a5f0b 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -43,7 +43,7 @@ {errorContext,Error}, {reason,Reason}, {offender,extract_child(Child)}]}, - #{domain=>[beam,erlang,otp,sasl], + #{domain=>[otp,sasl], report_cb=>fun logger:format_otp_report/1, logger_formatter=>#{title=>"SUPERVISOR REPORT"}, error_logger=>#{tag=>error_report, @@ -580,7 +580,7 @@ handle_info({'EXIT', Pid, Reason}, State) -> handle_info(Msg, State) -> ?LOG_ERROR("Supervisor received unexpected message: ~tp~n",[Msg], - #{domain=>[beam,erlang,otp], + #{domain=>[otp], error_logger=>#{tag=>error}}), {noreply, State}. @@ -1419,7 +1419,7 @@ report_progress(Child, SupName) -> ?LOG_INFO(#{label=>{supervisor,progress}, report=>[{supervisor,SupName}, {started,extract_child(Child)}]}, - #{domain=>[beam,erlang,otp,sasl], + #{domain=>[otp,sasl], report_cb=>fun logger:format_otp_report/1, logger_formatter=>#{title=>"PROGRESS REPORT"}, error_logger=>#{tag=>info_report,type=>progress}}). diff --git a/lib/stdlib/src/supervisor_bridge.erl b/lib/stdlib/src/supervisor_bridge.erl index 39372935fa..2db0a895d6 100644 --- a/lib/stdlib/src/supervisor_bridge.erl +++ b/lib/stdlib/src/supervisor_bridge.erl @@ -135,7 +135,7 @@ report_progress(Pid, Mod, StartArgs, SupName) -> report=>[{supervisor, SupName}, {started, [{pid, Pid}, {mfa, {Mod, init, [StartArgs]}}]}]}, - #{domain=>[beam,erlang,otp,sasl], + #{domain=>[otp,sasl], report_cb=>fun logger:format_otp_report/1, logger_formatter=>#{title=>"PROGRESS REPORT"}, error_logger=>#{tag=>info_report,type=>progress}}). @@ -146,7 +146,7 @@ report_error(Error, Reason, #state{name = Name, pid = Pid, mod = Mod}) -> {errorContext, Error}, {reason, Reason}, {offender, [{pid, Pid}, {mod, Mod}]}]}, - #{domain=>[beam,erlang,otp,sasl], + #{domain=>[otp,sasl], report_cb=>fun logger:format_otp_report/1, logger_formatter=>#{title=>"SUPERVISOR REPORT"}, error_logger=>#{tag=>error_report,type=>supervisor_report}}). diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 28d36ea229..48cce90d68 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -297,7 +297,10 @@ NormalizedURI :: uri_string() | error(). normalize(URIMap) -> - normalize(URIMap, []). + try normalize(URIMap, []) + catch + throw:{error, Atom, RestData} -> {error, Atom, RestData} + end. -spec normalize(URI, Options) -> NormalizedURI when @@ -523,34 +526,34 @@ parse_relative_part(?STRING_REST("//", Rest), URI) -> {T, URI1} -> Userinfo = calculate_parsed_userinfo(Rest, T), URI2 = maybe_add_path(URI1), - URI2#{userinfo => decode_userinfo(Userinfo)} + URI2#{userinfo => Userinfo} catch throw:{_,_,_} -> {T, URI1} = parse_host(Rest, URI), Host = calculate_parsed_host_port(Rest, T), URI2 = maybe_add_path(URI1), - URI2#{host => decode_host(remove_brackets(Host))} + URI2#{host => remove_brackets(Host)} end; parse_relative_part(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-absolute Path = calculate_parsed_part(Rest, T), - URI1#{path => decode_path(?STRING_REST($/, Path))}; + URI1#{path => ?STRING_REST($/, Path)}; parse_relative_part(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query Query = calculate_parsed_query_fragment(Rest, T), URI2 = maybe_add_path(URI1), - URI2#{query => decode_query(Query)}; + URI2#{query => Query}; parse_relative_part(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty Fragment = calculate_parsed_query_fragment(Rest, T), URI2 = maybe_add_path(URI1), - URI2#{fragment => decode_fragment(Fragment)}; + URI2#{fragment => Fragment}; parse_relative_part(?STRING_REST(Char, Rest), URI) -> case is_segment_nz_nc(Char) of true -> {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme Path = calculate_parsed_part(Rest, T), - URI1#{path => decode_path(?STRING_REST(Char, Path))}; + URI1#{path => ?STRING_REST(Char, Path)}; false -> throw({error,invalid_uri,[Char]}) end. @@ -593,11 +596,11 @@ parse_segment(?STRING_REST($/, Rest), URI) -> parse_segment(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % ?query Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => decode_query(Query)}}; + {Rest, URI1#{query => Query}}; parse_segment(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => decode_fragment(Fragment)}}; + {Rest, URI1#{fragment => Fragment}}; parse_segment(?STRING_REST(Char, Rest), URI) -> case is_pchar(Char) of true -> parse_segment(Rest, URI); @@ -616,11 +619,11 @@ parse_segment_nz_nc(?STRING_REST($/, Rest), URI) -> parse_segment_nz_nc(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % ?query Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => decode_query(Query)}}; + {Rest, URI1#{query => Query}}; parse_segment_nz_nc(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => decode_fragment(Fragment)}}; + {Rest, URI1#{fragment => Fragment}}; parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) -> case is_segment_nz_nc(Char) of true -> parse_segment_nz_nc(Rest, URI); @@ -709,31 +712,31 @@ parse_hier(?STRING_REST("//", Rest), URI) -> try parse_userinfo(Rest, URI) of {T, URI1} -> Userinfo = calculate_parsed_userinfo(Rest, T), - {Rest, URI1#{userinfo => decode_userinfo(Userinfo)}} + {Rest, URI1#{userinfo => Userinfo}} catch throw:{_,_,_} -> {T, URI1} = parse_host(Rest, URI), Host = calculate_parsed_host_port(Rest, T), - {Rest, URI1#{host => decode_host(remove_brackets(Host))}} + {Rest, URI1#{host => remove_brackets(Host)}} end; parse_hier(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-absolute Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; parse_hier(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => decode_query(Query)}}; + {Rest, URI1#{query => Query}}; parse_hier(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => decode_fragment(Fragment)}}; + {Rest, URI1#{fragment => Fragment}}; parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless case is_pchar(Char) of true -> % segment_nz {T, URI1} = parse_segment(Rest, URI), Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => decode_path(?STRING_REST(Char, Path))}}; + {Rest, URI1#{path => ?STRING_REST(Char, Path)}}; false -> throw({error,invalid_uri,[Char]}) end; parse_hier(?STRING_EMPTY, URI) -> @@ -770,7 +773,7 @@ parse_userinfo(?CHAR($@), URI) -> parse_userinfo(?STRING_REST($@, Rest), URI) -> {T, URI1} = parse_host(Rest, URI), Host = calculate_parsed_host_port(Rest, T), - {Rest, URI1#{host => decode_host(remove_brackets(Host))}}; + {Rest, URI1#{host => remove_brackets(Host)}}; parse_userinfo(?STRING_REST(Char, Rest), URI) -> case is_userinfo(Char) of true -> parse_userinfo(Rest, URI); @@ -836,20 +839,25 @@ parse_host(?STRING_REST($:, Rest), URI) -> parse_host(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; parse_host(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => decode_query(Query)}}; + {Rest, URI1#{query => Query}}; parse_host(?STRING_REST($[, Rest), URI) -> parse_ipv6_bin(Rest, [], URI); parse_host(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => decode_fragment(Fragment)}}; + {Rest, URI1#{fragment => Fragment}}; parse_host(?STRING_REST(Char, Rest), URI) -> case is_digit(Char) of - true -> parse_ipv4_bin(Rest, [Char], URI); + true -> + try parse_ipv4_bin(Rest, [Char], URI) + catch + throw:{_,_,_} -> + parse_reg_name(?STRING_REST(Char, Rest), URI) + end; false -> parse_reg_name(?STRING_REST(Char, Rest), URI) end; parse_host(?STRING_EMPTY, URI) -> @@ -865,15 +873,15 @@ parse_reg_name(?STRING_REST($:, Rest), URI) -> parse_reg_name(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; parse_reg_name(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => decode_query(Query)}}; + {Rest, URI1#{query => Query}}; parse_reg_name(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => decode_fragment(Fragment)}}; + {Rest, URI1#{fragment => Fragment}}; parse_reg_name(?STRING_REST(Char, Rest), URI) -> case is_reg_name(Char) of true -> parse_reg_name(Rest, URI); @@ -899,17 +907,17 @@ parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_segment(Rest, URI), % path-abempty Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_query(Rest, URI), % path-empty ?query Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => decode_query(Query)}}; + {Rest, URI1#{query => Query}}; parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_fragment(Rest, URI), % path-empty Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => decode_fragment(Fragment)}}; + {Rest, URI1#{fragment => Fragment}}; parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) -> case is_ipv4(Char) of true -> parse_ipv4_bin(Rest, [Char|Acc], URI); @@ -961,15 +969,15 @@ parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => decode_query(Query)}}; + {Rest, URI1#{query => Query}}; parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => decode_fragment(Fragment)}}; + {Rest, URI1#{fragment => Fragment}}; parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) -> case is_ipv6(Char) of true -> parse_ipv6_bin_end(Rest, URI); @@ -999,15 +1007,15 @@ validate_ipv6_address(Addr) -> parse_port(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => decode_path(?STRING_REST($/, Path))}}; + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; parse_port(?STRING_REST($?, Rest), URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => decode_query(Query)}}; + {Rest, URI1#{query => Query}}; parse_port(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => decode_fragment(Fragment)}}; + {Rest, URI1#{fragment => Fragment}}; parse_port(?STRING_REST(Char, Rest), URI) -> case is_digit(Char) of true -> parse_port(Rest, URI); @@ -1033,7 +1041,7 @@ parse_port(?STRING_EMPTY, URI) -> parse_query(?STRING_REST($#, Rest), URI) -> {T, URI1} = parse_fragment(Rest, URI), Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => decode_fragment(Fragment)}}; + {Rest, URI1#{fragment => Fragment}}; parse_query(?STRING_REST(Char, Rest), URI) -> case is_query(Char) of true -> parse_query(Rest, URI); @@ -1088,6 +1096,31 @@ is_fragment(Char) -> is_pchar(Char). %% %%------------------------------------------------------------------------- +%% Return true if input char is reserved. +-spec is_reserved(char()) -> boolean(). +is_reserved($:) -> true; +is_reserved($/) -> true; +is_reserved($?) -> true; +is_reserved($#) -> true; +is_reserved($[) -> true; +is_reserved($]) -> true; +is_reserved($@) -> true; + +is_reserved($!) -> true; +is_reserved($$) -> true; +is_reserved($&) -> true; +is_reserved($') -> true; +is_reserved($() -> true; +is_reserved($)) -> true; + +is_reserved($*) -> true; +is_reserved($+) -> true; +is_reserved($,) -> true; +is_reserved($;) -> true; +is_reserved($=) -> true; +is_reserved(_) -> false. + + %% Check if char is sub-delim. -spec is_sub_delim(char()) -> boolean(). is_sub_delim($!) -> true; @@ -1276,36 +1309,6 @@ byte_size_exl_head(Binary) -> byte_size(Binary) + 1. %% %% pct-encoded = "%" HEXDIG HEXDIG %%------------------------------------------------------------------------- --spec decode_userinfo(binary()) -> binary(). -decode_userinfo(Cs) -> - check_utf8(decode(Cs, fun is_userinfo/1, <<>>)). - --spec decode_host(binary()) -> binary(). -decode_host(Cs) -> - check_utf8(decode(Cs, fun is_host/1, <<>>)). - --spec decode_path(binary()) -> binary(). -decode_path(Cs) -> - check_utf8(decode(Cs, fun is_path/1, <<>>)). - --spec decode_query(binary()) -> binary(). -decode_query(Cs) -> - check_utf8(decode(Cs, fun is_query/1, <<>>)). - --spec decode_fragment(binary()) -> binary(). -decode_fragment(Cs) -> - check_utf8(decode(Cs, fun is_fragment/1, <<>>)). - - -%% Returns Cs if it is utf8 encoded. -check_utf8(Cs) -> - case unicode:characters_to_list(Cs) of - {incomplete,_,_} -> - throw({error,invalid_utf8,Cs}); - {error,_,_} -> - throw({error,invalid_utf8,Cs}); - _ -> Cs - end. %%------------------------------------------------------------------------- %% Percent-encode @@ -1351,20 +1354,56 @@ encode_fragment(Cs) -> %%------------------------------------------------------------------------- %% Helper funtions for percent-decode %%------------------------------------------------------------------------- -decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) -> + +-spec decode(list()|binary()) -> list() | binary(). +decode(Cs) -> + decode(Cs, <<>>). +%% +decode(L, Acc) when is_list(L) -> + B0 = unicode:characters_to_binary(L), + B1 = decode(B0, Acc), + unicode:characters_to_list(B1); +decode(<<$%,C0,C1,Cs/binary>>, Acc) -> case is_hex_digit(C0) andalso is_hex_digit(C1) of true -> B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), - decode(Cs, Fun, <<Acc/binary, B>>); + case is_reserved(B) of + true -> + %% [2.2] Characters in the reserved set are protected from + %% normalization. + %% [2.1] For consistency, URI producers and normalizers should + %% use uppercase hexadecimal digits for all percent- + %% encodings. + H0 = hex_to_upper(C0), + H1 = hex_to_upper(C1), + decode(Cs, <<Acc/binary,$%,H0,H1>>); + false -> + decode(Cs, <<Acc/binary, B>>) + end; false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>}) end; -decode(<<C,Cs/binary>>, Fun, Acc) -> - case Fun(C) of - true -> decode(Cs, Fun, <<Acc/binary, C>>); - false -> throw({error,invalid_percent_encoding,<<C,Cs/binary>>}) - end; -decode(<<>>, _Fun, Acc) -> - Acc. +decode(<<C,Cs/binary>>, Acc) -> + decode(Cs, <<Acc/binary, C>>); +decode(<<>>, Acc) -> + check_utf8(Acc). + +%% Returns Cs if it is utf8 encoded. +check_utf8(Cs) -> + case unicode:characters_to_list(Cs) of + {incomplete,_,_} -> + throw({error,invalid_utf8,Cs}); + {error,_,_} -> + throw({error,invalid_utf8,Cs}); + _ -> Cs + end. + +%% Convert hex digit to uppercase form +hex_to_upper(H) when $a =< H, H =< $f -> + H - 32; +hex_to_upper(H) when $0 =< H, H =< $9;$A =< H, H =< $F-> + H; +hex_to_upper(H) -> + throw({error,invalid_input, H}). %% Check if char is allowed in host -spec is_host(char()) -> boolean(). @@ -1925,9 +1964,10 @@ base10_decode_unicode(<<H,_/binary>>, _, _) -> %%------------------------------------------------------------------------- normalize_map(URIMap) -> - normalize_path_segment( - normalize_scheme_based( - normalize_case(URIMap))). + normalize_path_segment( + normalize_scheme_based( + normalize_percent_encoding( + normalize_case(URIMap)))). %% 6.2.2.1. Case Normalization @@ -1942,6 +1982,18 @@ normalize_case(#{} = Map) -> Map. +%% 6.2.2.2. Percent-Encoding Normalization +normalize_percent_encoding(Map) -> + Fun = fun (K,V) when K =:= userinfo; K =:= host; K =:= path; + K =:= query; K =:= fragment -> + decode(V); + %% Handle port and scheme + (_,V) -> + V + end, + maps:map(Fun, Map). + + to_lower(Cs) when is_list(Cs) -> B = convert_to_binary(Cs, utf8, utf8), convert_to_list(to_lower(B), utf8); |