diff options
author | Micael Karlberg <bmk@erlang.org> | 2012-03-15 11:59:40 +0100 |
---|---|---|
committer | Micael Karlberg <bmk@erlang.org> | 2012-03-15 11:59:40 +0100 |
commit | a366623c674e993667fedbe01ad52dc4fab5b4f0 (patch) | |
tree | f377605e6270ad279cfe48253b8ae84b5c99b951 | |
parent | 0c11d7235ed1f0f5c595cf3d9a433adf9c61cc8c (diff) | |
download | otp-a366623c674e993667fedbe01ad52dc4fab5b4f0.tar.gz otp-a366623c674e993667fedbe01ad52dc4fab5b4f0.tar.bz2 otp-a366623c674e993667fedbe01ad52dc4fab5b4f0.zip |
[inets] The module http_uri now officially supported
The module http_uri now officially supported.
Also, the http_uri:parse/1,2 function has been
extended with more scheme support and a way
to provide your own scheme info.
OTP-9983
-rw-r--r-- | lib/inets/doc/src/Makefile | 1 | ||||
-rw-r--r-- | lib/inets/doc/src/http_uri.xml | 160 | ||||
-rw-r--r-- | lib/inets/doc/src/notes.xml | 12 | ||||
-rw-r--r-- | lib/inets/doc/src/ref_man.xml | 9 | ||||
-rw-r--r-- | lib/inets/src/http_client/httpc.erl | 26 | ||||
-rw-r--r-- | lib/inets/src/http_client/httpc_manager.erl | 15 | ||||
-rw-r--r-- | lib/inets/src/http_client/httpc_response.erl | 16 | ||||
-rw-r--r-- | lib/inets/src/http_lib/http_uri.erl | 143 | ||||
-rw-r--r-- | lib/inets/test/httpc_SUITE.erl | 6 |
9 files changed, 326 insertions, 62 deletions
diff --git a/lib/inets/doc/src/Makefile b/lib/inets/doc/src/Makefile index 53d505b102..c4152a1d72 100644 --- a/lib/inets/doc/src/Makefile +++ b/lib/inets/doc/src/Makefile @@ -48,6 +48,7 @@ XML_REF3_FILES = \ inets.xml \ ftp.xml \ tftp.xml \ + http_uri.xml\ httpc.xml\ httpd.xml \ httpd_conf.xml \ diff --git a/lib/inets/doc/src/http_uri.xml b/lib/inets/doc/src/http_uri.xml new file mode 100644 index 0000000000..bd31ae42d2 --- /dev/null +++ b/lib/inets/doc/src/http_uri.xml @@ -0,0 +1,160 @@ +<?xml version="1.0" encoding="iso-8859-1" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>2012</year><year>2012</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>http_uri</title> + <prepared></prepared> + <responsible></responsible> + <docno></docno> + <date></date> + <rev></rev> + </header> + + <module>http_uri</module> + <modulesummary>URI utility module</modulesummary> + + <description> + <p>This module provides utility functions for working with URIs, + according to RFC 3986. </p> + + </description> + + <section> + <title>COMMON DATA TYPES </title> + <p>Type definitions that are used more than once in + this module:</p> + <code type="none"><![CDATA[ +boolean() = true | false +string() = list of ASCII characters + ]]></code> + + </section> + + <section> + <title>URI DATA TYPES </title> + <p>Type definitions that are related to URI:</p> + <p>For more information about URI, see RFC 3986. </p> + + <code type="none"><![CDATA[ +uri() = string() - Syntax according to the URI definition in rfc 3986, ex: "http://www.erlang.org/" +user_info() = string() +scheme() = atom() - Example: http, https +host() = string() +port() = pos_integer() +path() = string() - Representing a file path or directory path +query() = string() + ]]></code> + + <marker id="scheme_defaults"></marker> + </section> + + <funcs> + <func> + <name>scheme_defaults() -> SchemeDefaults</name> + <fsummary>A list of scheme and their default ports</fsummary> + <type> + <v>SchemeDefaults = [{scheme(), default_scheme_port_number()}] </v> + <v>default_scheme_port_number() = pos_integer()</v> + </type> + <desc> + <p>This function provides a list of the scheme and their default + port numbers currently supported (by default) by this utility. </p> + + <marker id="parse"></marker> + </desc> + </func> + + <func> + <name>parse(URI) -> {ok, Result} | {error, Reason}</name> + <name>parse(URI, Options) -> {ok, Result} | {error, Reason}</name> + <fsummary>Parse an URI</fsummary> + <type> + <v>URI = uri() </v> + <v>Options = [Option] </v> + <v>Option = {ipv6_host_with_brackets, boolean()} | + {scheme_defaults, scheme_defaults()}]</v> + <v>Result = {Scheme, UserInfo, Host, Port, Path, Query}</v> + <v>UserInfo = user_info()</v> + <v>Host = host()</v> + <v>Port = pos_integer()</v> + <v>Path = path()</v> + <v>Query = query()</v> + <v>Reason = term() </v> + </type> + <desc> + <p>This function is used to parse an URI. If no scheme defaults + are provided, the value of + <seealso marker="#scheme_defaults">scheme_defaults</seealso> + function will be used. </p> + + <p>Note that when parsing an URI with an unknown scheme (that is, + a scheme not found in the scheme defaults) a port number must be + provided or else the parsing will fail. </p> + + <marker id="encode"></marker> + </desc> + </func> + + <func> + <name>encode(URI) -> HexEncodedURI</name> + + <fsummary>Hex encode an URI</fsummary> + <type> + <v>URI = uri()</v> + <v>HexEncodedURI = string() - Hex encoded uri</v> + </type> + + <desc> + <p>Hex encode an URI. </p> + + <marker id="decode"></marker> + </desc> + </func> + + <func> + <name>decode(HexEncodedURI) -> URI</name> + + <fsummary>Decode a hex encoded URI</fsummary> + <type> + <v>HexEncodedURI = string() - A possibly hex encoded uri</v> + <v>URI = uri()</v> + </type> + + <desc> + <p>Decode a possibly hex encoded URI. </p> + + </desc> + </func> + + </funcs> + +<!-- + <section> + <title>SEE ALSO</title> + <p>RFC 2616, <seealso marker="inets">inets(3)</seealso>, + <seealso marker="kernel:gen_tcp">gen_tcp(3)</seealso>, + <seealso marker="ssl:ssl">ssl(3)</seealso> + </p> + </section> +--> + +</erlref> diff --git a/lib/inets/doc/src/notes.xml b/lib/inets/doc/src/notes.xml index f2cd03b6a8..dfdeb4016c 100644 --- a/lib/inets/doc/src/notes.xml +++ b/lib/inets/doc/src/notes.xml @@ -70,6 +70,18 @@ <p>Own Id: OTP-9979</p> </item> + <item> + <p>Utility module + <seealso marker="http_uri">http_uri</seealso> + now officially supported. </p> + <p>Also, the + <seealso marker="http_uri#parse">parse</seealso> + function has been extended with more + scheme support and a way to provide your own scheme info. </p> + <p>Own Id: OTP-9983</p> + <p>Aux Id: Seq 12022</p> + </item> + </list> </section> diff --git a/lib/inets/doc/src/ref_man.xml b/lib/inets/doc/src/ref_man.xml index 45d5dfcd0e..e44829827c 100644 --- a/lib/inets/doc/src/ref_man.xml +++ b/lib/inets/doc/src/ref_man.xml @@ -1,10 +1,10 @@ -<?xml version="1.0" encoding="latin1" ?> +<?xml version="1.0" encoding="iso-8859-1" ?> <!DOCTYPE application SYSTEM "application.dtd"> <application xmlns:xi="http://www.w3.org/2001/XInclude"> <header> <copyright> - <year>1997</year><year>2010</year> + <year>1997</year><year>2012</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -30,8 +30,8 @@ </header> <description> <p>Inets is a container for Internet clients and - servers. Currently a FTP client, a HTTP client and server, and - a tftp client and server has been incorporated in Inets.</p> + servers. Currently a FTP client, a HTTP client and server, and + a tftp client and server has been incorporated in Inets.</p> </description> <xi:include href="inets.xml"/> <xi:include href="ftp.xml"/> @@ -45,6 +45,7 @@ <xi:include href="mod_auth.xml"/> <xi:include href="mod_esi.xml"/> <xi:include href="mod_security.xml"/> + <xi:include href="http_uri.xml"/> </application> diff --git a/lib/inets/src/http_client/httpc.erl b/lib/inets/src/http_client/httpc.erl index 2c51c2081c..0a30fe1e20 100644 --- a/lib/inets/src/http_client/httpc.erl +++ b/lib/inets/src/http_client/httpc.erl @@ -158,7 +158,7 @@ request(Method, {http_options, HTTPOptions}, {options, Options}, {profile, Profile}]), - case http_uri:parse(Url, Options) of + case uri_parse(Url, Options) of {error, Reason} -> {error, Reason}; {ok, ParsedUrl} -> @@ -179,7 +179,7 @@ request(Method, {http_options, HTTPOptions}, {options, Options}, {profile, Profile}]), - case http_uri:parse(Url, Options) of + case uri_parse(Url, Options) of {error, Reason} -> {error, Reason}; {ok, ParsedUrl} -> @@ -328,7 +328,7 @@ store_cookies(SetCookieHeaders, Url, Profile) %% Since the Address part is not actually used %% by the manager when storing cookies, we dont %% care about ipv6-host-with-brackets. - {ok, {_, _, Host, Port, Path, _}} = http_uri:parse(Url), + {ok, {_, _, Host, Port, Path, _}} = uri_parse(Url), Address = {Host, Port}, ProfileName = profile_name(Profile), Cookies = httpc_cookie:cookies(SetCookieHeaders, Path, Host), @@ -585,10 +585,6 @@ maybe_encode_uri(#http_options{url_encode = true}, URI) -> maybe_encode_uri(_, URI) -> URI. -uri_parse(AbsURI) -> - http_uri:parse(AbsURI, [{scheme_defaults, [{http, 80}, {https, 443}]}]). - - mk_chunkify_fun(ProcessBody) -> fun(eof_body) -> eof; @@ -1202,6 +1198,22 @@ validate_headers(RequestHeaders, _, _) -> RequestHeaders. +%%-------------------------------------------------------------------------- +%% These functions is just simple wrappers to parse specifically HTTP URIs +%%-------------------------------------------------------------------------- + +scheme_defaults() -> + [{http, 80}, {https, 443}]. + +uri_parse(URI) -> + http_uri:parse(URI, [{scheme_defaults, scheme_defaults()}]). + +uri_parse(URI, Opts) -> + http_uri:parse(URI, [{scheme_defaults, scheme_defaults()} | Opts]). + + +%%-------------------------------------------------------------------------- + child_name2info(undefined) -> {error, no_such_service}; child_name2info(httpc_manager) -> diff --git a/lib/inets/src/http_client/httpc_manager.erl b/lib/inets/src/http_client/httpc_manager.erl index 33b5dfe046..b225b43214 100644 --- a/lib/inets/src/http_client/httpc_manager.erl +++ b/lib/inets/src/http_client/httpc_manager.erl @@ -446,7 +446,7 @@ handle_call(which_cookies, _, #state{cookie_db = CookieDb} = State) -> handle_call({which_cookies, Url, Options}, _, #state{cookie_db = CookieDb} = State) -> ?hcrv("which cookies", [{url, Url}, {options, Options}]), - case http_uri:parse(Url, Options) of + case uri_parse(Url, Options) of {ok, {Scheme, _, Host, Port, Path, _}} -> CookieHeaders = httpc_cookie:header(CookieDb, Scheme, {Host, Port}, Path), @@ -894,6 +894,19 @@ make_db_name(ProfileName, Post) -> list_to_atom(atom_to_list(ProfileName) ++ Post). +%%-------------------------------------------------------------------------- +%% These functions is just simple wrappers to parse specifically HTTP URIs +%%-------------------------------------------------------------------------- + +scheme_defaults() -> + [{http, 80}, {https, 443}]. + +uri_parse(URI, Opts) -> + http_uri:parse(URI, [{scheme_defaults, scheme_defaults()} | Opts]). + + +%%-------------------------------------------------------------------------- + call(ProfileName, Msg) -> Timeout = infinity, diff --git a/lib/inets/src/http_client/httpc_response.erl b/lib/inets/src/http_client/httpc_response.erl index 919115a23a..23924e355e 100644 --- a/lib/inets/src/http_client/httpc_response.erl +++ b/lib/inets/src/http_client/httpc_response.erl @@ -342,7 +342,7 @@ redirect(Response = {StatusLine, Headers, Body}, Request) -> RedirUrl -> UrlParseOpts = [{ipv6_host_with_brackets, Request#request.ipv6_host_with_brackets}], - case http_uri:parse(RedirUrl, UrlParseOpts) of + case uri_parse(RedirUrl, UrlParseOpts) of {error, no_scheme} when (Request#request.settings)#http_options.relaxed -> NewLocation = fix_relative_uri(Request, RedirUrl), @@ -437,3 +437,17 @@ format_response({StatusLine, Headers, Body}) -> end, {{StatusLine, http_response:header_list(Headers), NewBody}, Data}. +%%-------------------------------------------------------------------------- +%% These functions is just simple wrappers to parse specifically HTTP URIs +%%-------------------------------------------------------------------------- + +scheme_defaults() -> + [{http, 80}, {https, 443}]. + +uri_parse(URI, Opts) -> + http_uri:parse(URI, [{scheme_defaults, scheme_defaults()} | Opts]). + + +%%-------------------------------------------------------------------------- + + diff --git a/lib/inets/src/http_lib/http_uri.erl b/lib/inets/src/http_lib/http_uri.erl index d0d1033cf6..5962001c3a 100644 --- a/lib/inets/src/http_lib/http_uri.erl +++ b/lib/inets/src/http_lib/http_uri.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2011. All Rights Reserved. +%% Copyright Ericsson AB 2006-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -17,39 +17,95 @@ %% %CopyrightEnd% %% %% -%% RFC 3986 +%% This is from chapter 3, Syntax Components, of RFC 3986: +%% +%% The generic URI syntax consists of a hierarchical sequence of +%% components referred to as the scheme, authority, path, query, and +%% fragment. +%% +%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +%% +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%% +%% The scheme and path components are required, though the path may be +%% empty (no characters). When authority is present, the path must +%% either be empty or begin with a slash ("/") character. When +%% authority is not present, the path cannot begin with two slash +%% characters ("//"). These restrictions result in five different ABNF +%% rules for a path (Section 3.3), only one of which will match any +%% given URI reference. +%% +%% The following are two example URIs and their component parts: +%% +%% foo://example.com:8042/over/there?name=ferret#nose +%% \_/ \______________/\_________/ \_________/ \__/ +%% | | | | | +%% scheme authority path query fragment +%% | _____________________|__ +%% / \ / \ +%% urn:example:animal:ferret:nose +%% +%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +%% authority = [ userinfo "@" ] host [ ":" port ] +%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +%% %% -module(http_uri). -export([parse/1, parse/2, + scheme_defaults/0, encode/1, decode/1]). +-export_type([scheme/0, default_scheme_port_number/0]). + %%%========================================================================= %%% API %%%========================================================================= + +-type scheme() :: atom(). +-type default_scheme_port_number() :: pos_integer(). + +-spec scheme_defaults() -> + [{scheme(), default_scheme_port_number()}]. + +scheme_defaults() -> + [{http, 80}, + {https, 443}, + {ftp, 21}, + {ssh, 22}, + {sftp, 22}, + {tftp, 69}]. + parse(AbsURI) -> parse(AbsURI, []). parse(AbsURI, Opts) -> - case parse_scheme(AbsURI) of + case parse_scheme(AbsURI, Opts) of {error, Reason} -> {error, Reason}; - {Scheme, Rest} -> - case (catch parse_uri_rest(Scheme, Rest, Opts)) of - {UserInfo, Host, Port, Path, Query} -> + {Scheme, DefaultPort, Rest} -> + case (catch parse_uri_rest(Scheme, DefaultPort, Rest, Opts)) of + {ok, {UserInfo, Host, Port, Path, Query}} -> {ok, {Scheme, UserInfo, Host, Port, Path, Query}}; + {error, Reason} -> + {error, {Reason, Scheme, AbsURI}}; _ -> - {error, {malformed_url, AbsURI}} + {error, {malformed_url, Scheme, AbsURI}} end end. +reserved() -> + sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?, + $#, $[, $], $<, $>, $\", ${, $}, $|, + $\\, $', $^, $%, $ ]). + encode(URI) -> - Reserved = sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?, - $#, $[, $], $<, $>, $\", ${, $}, $|, - $\\, $', $^, $%, $ ]), - %% lists:append(lists:map(fun(Char) -> uri_encode(Char, Reserved) end, URI)). + Reserved = reserved(), lists:append([uri_encode(Char, Reserved) || Char <- URI]). decode(String) -> @@ -67,23 +123,31 @@ do_decode([]) -> %%% Internal functions %%%======================================================================== -parse_scheme(AbsURI) -> +which_scheme_defaults(Opts) -> + Key = scheme_defaults, + case lists:keysearch(Key, 1, Opts) of + {value, {Key, SchemeDefaults}} -> + SchemeDefaults; + false -> + scheme_defaults() + end. + +parse_scheme(AbsURI, Opts) -> case split_uri(AbsURI, ":", {error, no_scheme}, 1, 1) of {error, no_scheme} -> {error, no_scheme}; - {StrScheme, Rest} -> - %% case list_to_atom(http_util:to_lower(StrScheme)) of - %% Scheme when (Scheme =:= http) orelse (Scheme =:= https) -> - %% {Scheme, Rest}; - %% Scheme -> - %% {error, {not_supported_scheme, Scheme}} - %% end - - %% Allow all schemes even if they are unknown - {list_to_atom(http_util:to_lower(StrScheme)), Rest} + {SchemeStr, Rest} -> + Scheme = list_to_atom(http_util:to_lower(SchemeStr)), + SchemeDefaults = which_scheme_defaults(Opts), + case lists:keysearch(Scheme, 1, SchemeDefaults) of + {value, {Scheme, DefaultPort}} -> + {Scheme, DefaultPort, Rest}; + false -> + {Scheme, no_default_port, Rest} + end end. -parse_uri_rest(Scheme, "//" ++ URIPart, Opts) -> +parse_uri_rest(Scheme, DefaultPort, "//" ++ URIPart, Opts) -> {Authority, PathQuery} = case split_uri(URIPart, "/", URIPart, 1, 0) of Split = {_, _} -> @@ -96,26 +160,25 @@ parse_uri_rest(Scheme, "//" ++ URIPart, Opts) -> {URIPart,""} end end, - {UserInfo, HostPort} = split_uri(Authority, "@", {"", Authority}, 1, 1), - {Host, Port} = parse_host_port(Scheme, HostPort, Opts), + {Host, Port} = parse_host_port(Scheme, DefaultPort, HostPort, Opts), {Path, Query} = parse_path_query(PathQuery), - {UserInfo, Host, Port, Path, Query}. + {ok, {UserInfo, Host, Port, Path, Query}}. parse_path_query(PathQuery) -> {Path, Query} = split_uri(PathQuery, "\\?", {PathQuery, ""}, 1, 0), {path(Path), Query}. -parse_host_port(Scheme,"[" ++ HostPort, Opts) -> %ipv6 - DefaultPort = default_port(Scheme), +%% In this version of the function, we no longer need +%% the Scheme argument, but just in case... +parse_host_port(_Scheme, DefaultPort, "[" ++ HostPort, Opts) -> %ipv6 {Host, ColonPort} = split_uri(HostPort, "\\]", {HostPort, ""}, 1, 1), Host2 = maybe_ipv6_host_with_brackets(Host, Opts), {_, Port} = split_uri(ColonPort, ":", {"", DefaultPort}, 0, 1), {Host2, int_port(Port)}; -parse_host_port(Scheme, HostPort, _Opts) -> - DefaultPort = default_port(Scheme), +parse_host_port(_Scheme, DefaultPort, HostPort, _Opts) -> {Host, Port} = split_uri(HostPort, ":", {HostPort, DefaultPort}, 1, 1), {Host, int_port(Port)}. @@ -136,26 +199,14 @@ maybe_ipv6_host_with_brackets(Host, Opts) -> Host end. -default_port(http) -> - 80; -default_port(https) -> - 443; - -%%% Added some additional default ports -%%% Other protocols would have to be handled by the calling function -default_port(ftp) -> 21; -default_port(ssh) -> 22; -default_port(sftp) -> 22; -default_port(tftp) -> 69; - -default_port(_) -> - undefined. - int_port(Port) when is_integer(Port) -> Port; int_port(Port) when is_list(Port) -> - list_to_integer(Port). + list_to_integer(Port); +%% This is the case where no port was found and there was no default port +int_port(no_default_port) -> + throw({error, no_default_port}). path("") -> "/"; diff --git a/lib/inets/test/httpc_SUITE.erl b/lib/inets/test/httpc_SUITE.erl index 61bb5214f3..a116edef77 100644 --- a/lib/inets/test/httpc_SUITE.erl +++ b/lib/inets/test/httpc_SUITE.erl @@ -1974,7 +1974,7 @@ parse_url(Config) when is_list(Config) -> http_uri:parse("http://[2010:836B:4179::836B:4179]/foobar.html", [{foo, false}]), {error, - {malformed_url,"http://2010:836B:4179::836B:4179/foobar.html"}} = + {malformed_url, _, "http://2010:836B:4179::836B:4179/foobar.html"}} = http_uri:parse("http://2010:836B:4179::836B:4179/foobar.html"), %% ipv4 @@ -1990,8 +1990,8 @@ parse_url(Config) when is_list(Config) -> http_uri:parse("http://nisse:foobar@localhost:8888/foobar.html"), %% Scheme error - {error,no_scheme} = http_uri:parse("localhost/foobar.html"), - {error,{not_supported_scheme,localhost}} = + {error, no_scheme} = http_uri:parse("localhost/foobar.html"), + {error, {malformed_url, _, _}} = http_uri:parse("localhost:8888/foobar.html"), %% Query |