From a366623c674e993667fedbe01ad52dc4fab5b4f0 Mon Sep 17 00:00:00 2001 From: Micael Karlberg Date: Thu, 15 Mar 2012 11:59:40 +0100 Subject: [inets] The module http_uri now officially supported The module http_uri now officially supported. Also, the http_uri:parse/1,2 function has been extended with more scheme support and a way to provide your own scheme info. OTP-9983 --- lib/inets/doc/src/Makefile | 1 + lib/inets/doc/src/http_uri.xml | 160 +++++++++++++++++++++++++++ lib/inets/doc/src/notes.xml | 12 ++ lib/inets/doc/src/ref_man.xml | 9 +- lib/inets/src/http_client/httpc.erl | 26 +++-- lib/inets/src/http_client/httpc_manager.erl | 15 ++- lib/inets/src/http_client/httpc_response.erl | 16 ++- lib/inets/src/http_lib/http_uri.erl | 143 ++++++++++++++++-------- lib/inets/test/httpc_SUITE.erl | 6 +- 9 files changed, 326 insertions(+), 62 deletions(-) create mode 100644 lib/inets/doc/src/http_uri.xml (limited to 'lib/inets') diff --git a/lib/inets/doc/src/Makefile b/lib/inets/doc/src/Makefile index 53d505b102..c4152a1d72 100644 --- a/lib/inets/doc/src/Makefile +++ b/lib/inets/doc/src/Makefile @@ -48,6 +48,7 @@ XML_REF3_FILES = \ inets.xml \ ftp.xml \ tftp.xml \ + http_uri.xml\ httpc.xml\ httpd.xml \ httpd_conf.xml \ diff --git a/lib/inets/doc/src/http_uri.xml b/lib/inets/doc/src/http_uri.xml new file mode 100644 index 0000000000..bd31ae42d2 --- /dev/null +++ b/lib/inets/doc/src/http_uri.xml @@ -0,0 +1,160 @@ + + + + +
+ + 20122012 + Ericsson AB. All Rights Reserved. + + + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + + + http_uri + + + + + +
+ + http_uri + URI utility module + + +

This module provides utility functions for working with URIs, + according to RFC 3986.

+ +
+ +
+ COMMON DATA TYPES +

Type definitions that are used more than once in + this module:

+ + +
+ +
+ URI DATA TYPES +

Type definitions that are related to URI:

+

For more information about URI, see RFC 3986.

+ + + + +
+ + + + scheme_defaults() -> SchemeDefaults + A list of scheme and their default ports + + SchemeDefaults = [{scheme(), default_scheme_port_number()}] + default_scheme_port_number() = pos_integer() + + +

This function provides a list of the scheme and their default + port numbers currently supported (by default) by this utility.

+ + +
+
+ + + parse(URI) -> {ok, Result} | {error, Reason} + parse(URI, Options) -> {ok, Result} | {error, Reason} + Parse an URI + + URI = uri() + Options = [Option] + Option = {ipv6_host_with_brackets, boolean()} | + {scheme_defaults, scheme_defaults()}] + Result = {Scheme, UserInfo, Host, Port, Path, Query} + UserInfo = user_info() + Host = host() + Port = pos_integer() + Path = path() + Query = query() + Reason = term() + + +

This function is used to parse an URI. If no scheme defaults + are provided, the value of + scheme_defaults + function will be used.

+ +

Note that when parsing an URI with an unknown scheme (that is, + a scheme not found in the scheme defaults) a port number must be + provided or else the parsing will fail.

+ + +
+
+ + + encode(URI) -> HexEncodedURI + + Hex encode an URI + + URI = uri() + HexEncodedURI = string() - Hex encoded uri + + + +

Hex encode an URI.

+ + +
+
+ + + decode(HexEncodedURI) -> URI + + Decode a hex encoded URI + + HexEncodedURI = string() - A possibly hex encoded uri + URI = uri() + + + +

Decode a possibly hex encoded URI.

+ +
+
+ +
+ + + +
diff --git a/lib/inets/doc/src/notes.xml b/lib/inets/doc/src/notes.xml index f2cd03b6a8..dfdeb4016c 100644 --- a/lib/inets/doc/src/notes.xml +++ b/lib/inets/doc/src/notes.xml @@ -70,6 +70,18 @@

Own Id: OTP-9979

+ +

Utility module + http_uri + now officially supported.

+

Also, the + parse + function has been extended with more + scheme support and a way to provide your own scheme info.

+

Own Id: OTP-9983

+

Aux Id: Seq 12022

+
+ diff --git a/lib/inets/doc/src/ref_man.xml b/lib/inets/doc/src/ref_man.xml index 45d5dfcd0e..e44829827c 100644 --- a/lib/inets/doc/src/ref_man.xml +++ b/lib/inets/doc/src/ref_man.xml @@ -1,10 +1,10 @@ - +
- 19972010 + 19972012 Ericsson AB. All Rights Reserved. @@ -30,8 +30,8 @@

Inets is a container for Internet clients and - servers. Currently a FTP client, a HTTP client and server, and - a tftp client and server has been incorporated in Inets.

+ servers. Currently a FTP client, a HTTP client and server, and + a tftp client and server has been incorporated in Inets.

@@ -45,6 +45,7 @@ +
diff --git a/lib/inets/src/http_client/httpc.erl b/lib/inets/src/http_client/httpc.erl index 2c51c2081c..0a30fe1e20 100644 --- a/lib/inets/src/http_client/httpc.erl +++ b/lib/inets/src/http_client/httpc.erl @@ -158,7 +158,7 @@ request(Method, {http_options, HTTPOptions}, {options, Options}, {profile, Profile}]), - case http_uri:parse(Url, Options) of + case uri_parse(Url, Options) of {error, Reason} -> {error, Reason}; {ok, ParsedUrl} -> @@ -179,7 +179,7 @@ request(Method, {http_options, HTTPOptions}, {options, Options}, {profile, Profile}]), - case http_uri:parse(Url, Options) of + case uri_parse(Url, Options) of {error, Reason} -> {error, Reason}; {ok, ParsedUrl} -> @@ -328,7 +328,7 @@ store_cookies(SetCookieHeaders, Url, Profile) %% Since the Address part is not actually used %% by the manager when storing cookies, we dont %% care about ipv6-host-with-brackets. - {ok, {_, _, Host, Port, Path, _}} = http_uri:parse(Url), + {ok, {_, _, Host, Port, Path, _}} = uri_parse(Url), Address = {Host, Port}, ProfileName = profile_name(Profile), Cookies = httpc_cookie:cookies(SetCookieHeaders, Path, Host), @@ -585,10 +585,6 @@ maybe_encode_uri(#http_options{url_encode = true}, URI) -> maybe_encode_uri(_, URI) -> URI. -uri_parse(AbsURI) -> - http_uri:parse(AbsURI, [{scheme_defaults, [{http, 80}, {https, 443}]}]). - - mk_chunkify_fun(ProcessBody) -> fun(eof_body) -> eof; @@ -1202,6 +1198,22 @@ validate_headers(RequestHeaders, _, _) -> RequestHeaders. +%%-------------------------------------------------------------------------- +%% These functions is just simple wrappers to parse specifically HTTP URIs +%%-------------------------------------------------------------------------- + +scheme_defaults() -> + [{http, 80}, {https, 443}]. + +uri_parse(URI) -> + http_uri:parse(URI, [{scheme_defaults, scheme_defaults()}]). + +uri_parse(URI, Opts) -> + http_uri:parse(URI, [{scheme_defaults, scheme_defaults()} | Opts]). + + +%%-------------------------------------------------------------------------- + child_name2info(undefined) -> {error, no_such_service}; child_name2info(httpc_manager) -> diff --git a/lib/inets/src/http_client/httpc_manager.erl b/lib/inets/src/http_client/httpc_manager.erl index 33b5dfe046..b225b43214 100644 --- a/lib/inets/src/http_client/httpc_manager.erl +++ b/lib/inets/src/http_client/httpc_manager.erl @@ -446,7 +446,7 @@ handle_call(which_cookies, _, #state{cookie_db = CookieDb} = State) -> handle_call({which_cookies, Url, Options}, _, #state{cookie_db = CookieDb} = State) -> ?hcrv("which cookies", [{url, Url}, {options, Options}]), - case http_uri:parse(Url, Options) of + case uri_parse(Url, Options) of {ok, {Scheme, _, Host, Port, Path, _}} -> CookieHeaders = httpc_cookie:header(CookieDb, Scheme, {Host, Port}, Path), @@ -894,6 +894,19 @@ make_db_name(ProfileName, Post) -> list_to_atom(atom_to_list(ProfileName) ++ Post). +%%-------------------------------------------------------------------------- +%% These functions is just simple wrappers to parse specifically HTTP URIs +%%-------------------------------------------------------------------------- + +scheme_defaults() -> + [{http, 80}, {https, 443}]. + +uri_parse(URI, Opts) -> + http_uri:parse(URI, [{scheme_defaults, scheme_defaults()} | Opts]). + + +%%-------------------------------------------------------------------------- + call(ProfileName, Msg) -> Timeout = infinity, diff --git a/lib/inets/src/http_client/httpc_response.erl b/lib/inets/src/http_client/httpc_response.erl index 919115a23a..23924e355e 100644 --- a/lib/inets/src/http_client/httpc_response.erl +++ b/lib/inets/src/http_client/httpc_response.erl @@ -342,7 +342,7 @@ redirect(Response = {StatusLine, Headers, Body}, Request) -> RedirUrl -> UrlParseOpts = [{ipv6_host_with_brackets, Request#request.ipv6_host_with_brackets}], - case http_uri:parse(RedirUrl, UrlParseOpts) of + case uri_parse(RedirUrl, UrlParseOpts) of {error, no_scheme} when (Request#request.settings)#http_options.relaxed -> NewLocation = fix_relative_uri(Request, RedirUrl), @@ -437,3 +437,17 @@ format_response({StatusLine, Headers, Body}) -> end, {{StatusLine, http_response:header_list(Headers), NewBody}, Data}. +%%-------------------------------------------------------------------------- +%% These functions is just simple wrappers to parse specifically HTTP URIs +%%-------------------------------------------------------------------------- + +scheme_defaults() -> + [{http, 80}, {https, 443}]. + +uri_parse(URI, Opts) -> + http_uri:parse(URI, [{scheme_defaults, scheme_defaults()} | Opts]). + + +%%-------------------------------------------------------------------------- + + diff --git a/lib/inets/src/http_lib/http_uri.erl b/lib/inets/src/http_lib/http_uri.erl index d0d1033cf6..5962001c3a 100644 --- a/lib/inets/src/http_lib/http_uri.erl +++ b/lib/inets/src/http_lib/http_uri.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2011. All Rights Reserved. +%% Copyright Ericsson AB 2006-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -17,39 +17,95 @@ %% %CopyrightEnd% %% %% -%% RFC 3986 +%% This is from chapter 3, Syntax Components, of RFC 3986: +%% +%% The generic URI syntax consists of a hierarchical sequence of +%% components referred to as the scheme, authority, path, query, and +%% fragment. +%% +%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +%% +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%% +%% The scheme and path components are required, though the path may be +%% empty (no characters). When authority is present, the path must +%% either be empty or begin with a slash ("/") character. When +%% authority is not present, the path cannot begin with two slash +%% characters ("//"). These restrictions result in five different ABNF +%% rules for a path (Section 3.3), only one of which will match any +%% given URI reference. +%% +%% The following are two example URIs and their component parts: +%% +%% foo://example.com:8042/over/there?name=ferret#nose +%% \_/ \______________/\_________/ \_________/ \__/ +%% | | | | | +%% scheme authority path query fragment +%% | _____________________|__ +%% / \ / \ +%% urn:example:animal:ferret:nose +%% +%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +%% authority = [ userinfo "@" ] host [ ":" port ] +%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +%% %% -module(http_uri). -export([parse/1, parse/2, + scheme_defaults/0, encode/1, decode/1]). +-export_type([scheme/0, default_scheme_port_number/0]). + %%%========================================================================= %%% API %%%========================================================================= + +-type scheme() :: atom(). +-type default_scheme_port_number() :: pos_integer(). + +-spec scheme_defaults() -> + [{scheme(), default_scheme_port_number()}]. + +scheme_defaults() -> + [{http, 80}, + {https, 443}, + {ftp, 21}, + {ssh, 22}, + {sftp, 22}, + {tftp, 69}]. + parse(AbsURI) -> parse(AbsURI, []). parse(AbsURI, Opts) -> - case parse_scheme(AbsURI) of + case parse_scheme(AbsURI, Opts) of {error, Reason} -> {error, Reason}; - {Scheme, Rest} -> - case (catch parse_uri_rest(Scheme, Rest, Opts)) of - {UserInfo, Host, Port, Path, Query} -> + {Scheme, DefaultPort, Rest} -> + case (catch parse_uri_rest(Scheme, DefaultPort, Rest, Opts)) of + {ok, {UserInfo, Host, Port, Path, Query}} -> {ok, {Scheme, UserInfo, Host, Port, Path, Query}}; + {error, Reason} -> + {error, {Reason, Scheme, AbsURI}}; _ -> - {error, {malformed_url, AbsURI}} + {error, {malformed_url, Scheme, AbsURI}} end end. +reserved() -> + sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?, + $#, $[, $], $<, $>, $\", ${, $}, $|, + $\\, $', $^, $%, $ ]). + encode(URI) -> - Reserved = sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?, - $#, $[, $], $<, $>, $\", ${, $}, $|, - $\\, $', $^, $%, $ ]), - %% lists:append(lists:map(fun(Char) -> uri_encode(Char, Reserved) end, URI)). + Reserved = reserved(), lists:append([uri_encode(Char, Reserved) || Char <- URI]). decode(String) -> @@ -67,23 +123,31 @@ do_decode([]) -> %%% Internal functions %%%======================================================================== -parse_scheme(AbsURI) -> +which_scheme_defaults(Opts) -> + Key = scheme_defaults, + case lists:keysearch(Key, 1, Opts) of + {value, {Key, SchemeDefaults}} -> + SchemeDefaults; + false -> + scheme_defaults() + end. + +parse_scheme(AbsURI, Opts) -> case split_uri(AbsURI, ":", {error, no_scheme}, 1, 1) of {error, no_scheme} -> {error, no_scheme}; - {StrScheme, Rest} -> - %% case list_to_atom(http_util:to_lower(StrScheme)) of - %% Scheme when (Scheme =:= http) orelse (Scheme =:= https) -> - %% {Scheme, Rest}; - %% Scheme -> - %% {error, {not_supported_scheme, Scheme}} - %% end - - %% Allow all schemes even if they are unknown - {list_to_atom(http_util:to_lower(StrScheme)), Rest} + {SchemeStr, Rest} -> + Scheme = list_to_atom(http_util:to_lower(SchemeStr)), + SchemeDefaults = which_scheme_defaults(Opts), + case lists:keysearch(Scheme, 1, SchemeDefaults) of + {value, {Scheme, DefaultPort}} -> + {Scheme, DefaultPort, Rest}; + false -> + {Scheme, no_default_port, Rest} + end end. -parse_uri_rest(Scheme, "//" ++ URIPart, Opts) -> +parse_uri_rest(Scheme, DefaultPort, "//" ++ URIPart, Opts) -> {Authority, PathQuery} = case split_uri(URIPart, "/", URIPart, 1, 0) of Split = {_, _} -> @@ -96,26 +160,25 @@ parse_uri_rest(Scheme, "//" ++ URIPart, Opts) -> {URIPart,""} end end, - {UserInfo, HostPort} = split_uri(Authority, "@", {"", Authority}, 1, 1), - {Host, Port} = parse_host_port(Scheme, HostPort, Opts), + {Host, Port} = parse_host_port(Scheme, DefaultPort, HostPort, Opts), {Path, Query} = parse_path_query(PathQuery), - {UserInfo, Host, Port, Path, Query}. + {ok, {UserInfo, Host, Port, Path, Query}}. parse_path_query(PathQuery) -> {Path, Query} = split_uri(PathQuery, "\\?", {PathQuery, ""}, 1, 0), {path(Path), Query}. -parse_host_port(Scheme,"[" ++ HostPort, Opts) -> %ipv6 - DefaultPort = default_port(Scheme), +%% In this version of the function, we no longer need +%% the Scheme argument, but just in case... +parse_host_port(_Scheme, DefaultPort, "[" ++ HostPort, Opts) -> %ipv6 {Host, ColonPort} = split_uri(HostPort, "\\]", {HostPort, ""}, 1, 1), Host2 = maybe_ipv6_host_with_brackets(Host, Opts), {_, Port} = split_uri(ColonPort, ":", {"", DefaultPort}, 0, 1), {Host2, int_port(Port)}; -parse_host_port(Scheme, HostPort, _Opts) -> - DefaultPort = default_port(Scheme), +parse_host_port(_Scheme, DefaultPort, HostPort, _Opts) -> {Host, Port} = split_uri(HostPort, ":", {HostPort, DefaultPort}, 1, 1), {Host, int_port(Port)}. @@ -136,26 +199,14 @@ maybe_ipv6_host_with_brackets(Host, Opts) -> Host end. -default_port(http) -> - 80; -default_port(https) -> - 443; - -%%% Added some additional default ports -%%% Other protocols would have to be handled by the calling function -default_port(ftp) -> 21; -default_port(ssh) -> 22; -default_port(sftp) -> 22; -default_port(tftp) -> 69; - -default_port(_) -> - undefined. - int_port(Port) when is_integer(Port) -> Port; int_port(Port) when is_list(Port) -> - list_to_integer(Port). + list_to_integer(Port); +%% This is the case where no port was found and there was no default port +int_port(no_default_port) -> + throw({error, no_default_port}). path("") -> "/"; diff --git a/lib/inets/test/httpc_SUITE.erl b/lib/inets/test/httpc_SUITE.erl index 61bb5214f3..a116edef77 100644 --- a/lib/inets/test/httpc_SUITE.erl +++ b/lib/inets/test/httpc_SUITE.erl @@ -1974,7 +1974,7 @@ parse_url(Config) when is_list(Config) -> http_uri:parse("http://[2010:836B:4179::836B:4179]/foobar.html", [{foo, false}]), {error, - {malformed_url,"http://2010:836B:4179::836B:4179/foobar.html"}} = + {malformed_url, _, "http://2010:836B:4179::836B:4179/foobar.html"}} = http_uri:parse("http://2010:836B:4179::836B:4179/foobar.html"), %% ipv4 @@ -1990,8 +1990,8 @@ parse_url(Config) when is_list(Config) -> http_uri:parse("http://nisse:foobar@localhost:8888/foobar.html"), %% Scheme error - {error,no_scheme} = http_uri:parse("localhost/foobar.html"), - {error,{not_supported_scheme,localhost}} = + {error, no_scheme} = http_uri:parse("localhost/foobar.html"), + {error, {malformed_url, _, _}} = http_uri:parse("localhost:8888/foobar.html"), %% Query -- cgit v1.2.3