diff options
author | Micael Karlberg <[email protected]> | 2012-03-15 11:59:40 +0100 |
---|---|---|
committer | Micael Karlberg <[email protected]> | 2012-03-15 11:59:40 +0100 |
commit | a366623c674e993667fedbe01ad52dc4fab5b4f0 (patch) | |
tree | f377605e6270ad279cfe48253b8ae84b5c99b951 /lib/inets/src/http_lib/http_uri.erl | |
parent | 0c11d7235ed1f0f5c595cf3d9a433adf9c61cc8c (diff) | |
download | otp-a366623c674e993667fedbe01ad52dc4fab5b4f0.tar.gz otp-a366623c674e993667fedbe01ad52dc4fab5b4f0.tar.bz2 otp-a366623c674e993667fedbe01ad52dc4fab5b4f0.zip |
[inets] The module http_uri now officially supported
The module http_uri now officially supported.
Also, the http_uri:parse/1,2 function has been
extended with more scheme support and a way
to provide your own scheme info.
OTP-9983
Diffstat (limited to 'lib/inets/src/http_lib/http_uri.erl')
-rw-r--r-- | lib/inets/src/http_lib/http_uri.erl | 143 |
1 files changed, 97 insertions, 46 deletions
diff --git a/lib/inets/src/http_lib/http_uri.erl b/lib/inets/src/http_lib/http_uri.erl index d0d1033cf6..5962001c3a 100644 --- a/lib/inets/src/http_lib/http_uri.erl +++ b/lib/inets/src/http_lib/http_uri.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2011. All Rights Reserved. +%% Copyright Ericsson AB 2006-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -17,39 +17,95 @@ %% %CopyrightEnd% %% %% -%% RFC 3986 +%% This is from chapter 3, Syntax Components, of RFC 3986: +%% +%% The generic URI syntax consists of a hierarchical sequence of +%% components referred to as the scheme, authority, path, query, and +%% fragment. +%% +%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +%% +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%% +%% The scheme and path components are required, though the path may be +%% empty (no characters). When authority is present, the path must +%% either be empty or begin with a slash ("/") character. When +%% authority is not present, the path cannot begin with two slash +%% characters ("//"). These restrictions result in five different ABNF +%% rules for a path (Section 3.3), only one of which will match any +%% given URI reference. +%% +%% The following are two example URIs and their component parts: +%% +%% foo://example.com:8042/over/there?name=ferret#nose +%% \_/ \______________/\_________/ \_________/ \__/ +%% | | | | | +%% scheme authority path query fragment +%% | _____________________|__ +%% / \ / \ +%% urn:example:animal:ferret:nose +%% +%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +%% authority = [ userinfo "@" ] host [ ":" port ] +%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +%% %% -module(http_uri). -export([parse/1, parse/2, + scheme_defaults/0, encode/1, decode/1]). +-export_type([scheme/0, default_scheme_port_number/0]). + %%%========================================================================= %%% API %%%========================================================================= + +-type scheme() :: atom(). +-type default_scheme_port_number() :: pos_integer(). + +-spec scheme_defaults() -> + [{scheme(), default_scheme_port_number()}]. + +scheme_defaults() -> + [{http, 80}, + {https, 443}, + {ftp, 21}, + {ssh, 22}, + {sftp, 22}, + {tftp, 69}]. + parse(AbsURI) -> parse(AbsURI, []). parse(AbsURI, Opts) -> - case parse_scheme(AbsURI) of + case parse_scheme(AbsURI, Opts) of {error, Reason} -> {error, Reason}; - {Scheme, Rest} -> - case (catch parse_uri_rest(Scheme, Rest, Opts)) of - {UserInfo, Host, Port, Path, Query} -> + {Scheme, DefaultPort, Rest} -> + case (catch parse_uri_rest(Scheme, DefaultPort, Rest, Opts)) of + {ok, {UserInfo, Host, Port, Path, Query}} -> {ok, {Scheme, UserInfo, Host, Port, Path, Query}}; + {error, Reason} -> + {error, {Reason, Scheme, AbsURI}}; _ -> - {error, {malformed_url, AbsURI}} + {error, {malformed_url, Scheme, AbsURI}} end end. +reserved() -> + sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?, + $#, $[, $], $<, $>, $\", ${, $}, $|, + $\\, $', $^, $%, $ ]). + encode(URI) -> - Reserved = sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?, - $#, $[, $], $<, $>, $\", ${, $}, $|, - $\\, $', $^, $%, $ ]), - %% lists:append(lists:map(fun(Char) -> uri_encode(Char, Reserved) end, URI)). + Reserved = reserved(), lists:append([uri_encode(Char, Reserved) || Char <- URI]). decode(String) -> @@ -67,23 +123,31 @@ do_decode([]) -> %%% Internal functions %%%======================================================================== -parse_scheme(AbsURI) -> +which_scheme_defaults(Opts) -> + Key = scheme_defaults, + case lists:keysearch(Key, 1, Opts) of + {value, {Key, SchemeDefaults}} -> + SchemeDefaults; + false -> + scheme_defaults() + end. + +parse_scheme(AbsURI, Opts) -> case split_uri(AbsURI, ":", {error, no_scheme}, 1, 1) of {error, no_scheme} -> {error, no_scheme}; - {StrScheme, Rest} -> - %% case list_to_atom(http_util:to_lower(StrScheme)) of - %% Scheme when (Scheme =:= http) orelse (Scheme =:= https) -> - %% {Scheme, Rest}; - %% Scheme -> - %% {error, {not_supported_scheme, Scheme}} - %% end - - %% Allow all schemes even if they are unknown - {list_to_atom(http_util:to_lower(StrScheme)), Rest} + {SchemeStr, Rest} -> + Scheme = list_to_atom(http_util:to_lower(SchemeStr)), + SchemeDefaults = which_scheme_defaults(Opts), + case lists:keysearch(Scheme, 1, SchemeDefaults) of + {value, {Scheme, DefaultPort}} -> + {Scheme, DefaultPort, Rest}; + false -> + {Scheme, no_default_port, Rest} + end end. -parse_uri_rest(Scheme, "//" ++ URIPart, Opts) -> +parse_uri_rest(Scheme, DefaultPort, "//" ++ URIPart, Opts) -> {Authority, PathQuery} = case split_uri(URIPart, "/", URIPart, 1, 0) of Split = {_, _} -> @@ -96,26 +160,25 @@ parse_uri_rest(Scheme, "//" ++ URIPart, Opts) -> {URIPart,""} end end, - {UserInfo, HostPort} = split_uri(Authority, "@", {"", Authority}, 1, 1), - {Host, Port} = parse_host_port(Scheme, HostPort, Opts), + {Host, Port} = parse_host_port(Scheme, DefaultPort, HostPort, Opts), {Path, Query} = parse_path_query(PathQuery), - {UserInfo, Host, Port, Path, Query}. + {ok, {UserInfo, Host, Port, Path, Query}}. parse_path_query(PathQuery) -> {Path, Query} = split_uri(PathQuery, "\\?", {PathQuery, ""}, 1, 0), {path(Path), Query}. -parse_host_port(Scheme,"[" ++ HostPort, Opts) -> %ipv6 - DefaultPort = default_port(Scheme), +%% In this version of the function, we no longer need +%% the Scheme argument, but just in case... +parse_host_port(_Scheme, DefaultPort, "[" ++ HostPort, Opts) -> %ipv6 {Host, ColonPort} = split_uri(HostPort, "\\]", {HostPort, ""}, 1, 1), Host2 = maybe_ipv6_host_with_brackets(Host, Opts), {_, Port} = split_uri(ColonPort, ":", {"", DefaultPort}, 0, 1), {Host2, int_port(Port)}; -parse_host_port(Scheme, HostPort, _Opts) -> - DefaultPort = default_port(Scheme), +parse_host_port(_Scheme, DefaultPort, HostPort, _Opts) -> {Host, Port} = split_uri(HostPort, ":", {HostPort, DefaultPort}, 1, 1), {Host, int_port(Port)}. @@ -136,26 +199,14 @@ maybe_ipv6_host_with_brackets(Host, Opts) -> Host end. -default_port(http) -> - 80; -default_port(https) -> - 443; - -%%% Added some additional default ports -%%% Other protocols would have to be handled by the calling function -default_port(ftp) -> 21; -default_port(ssh) -> 22; -default_port(sftp) -> 22; -default_port(tftp) -> 69; - -default_port(_) -> - undefined. - int_port(Port) when is_integer(Port) -> Port; int_port(Port) when is_list(Port) -> - list_to_integer(Port). + list_to_integer(Port); +%% This is the case where no port was found and there was no default port +int_port(no_default_port) -> + throw({error, no_default_port}). path("") -> "/"; |