aboutsummaryrefslogtreecommitdiffstats
path: root/lib/inets/src/http_lib/http_uri.erl
diff options
context:
space:
mode:
authorMicael Karlberg <[email protected]>2012-03-15 11:59:40 +0100
committerMicael Karlberg <[email protected]>2012-03-15 11:59:40 +0100
commita366623c674e993667fedbe01ad52dc4fab5b4f0 (patch)
treef377605e6270ad279cfe48253b8ae84b5c99b951 /lib/inets/src/http_lib/http_uri.erl
parent0c11d7235ed1f0f5c595cf3d9a433adf9c61cc8c (diff)
downloadotp-a366623c674e993667fedbe01ad52dc4fab5b4f0.tar.gz
otp-a366623c674e993667fedbe01ad52dc4fab5b4f0.tar.bz2
otp-a366623c674e993667fedbe01ad52dc4fab5b4f0.zip
[inets] The module http_uri now officially supported
The module http_uri now officially supported. Also, the http_uri:parse/1,2 function has been extended with more scheme support and a way to provide your own scheme info. OTP-9983
Diffstat (limited to 'lib/inets/src/http_lib/http_uri.erl')
-rw-r--r--lib/inets/src/http_lib/http_uri.erl143
1 files changed, 97 insertions, 46 deletions
diff --git a/lib/inets/src/http_lib/http_uri.erl b/lib/inets/src/http_lib/http_uri.erl
index d0d1033cf6..5962001c3a 100644
--- a/lib/inets/src/http_lib/http_uri.erl
+++ b/lib/inets/src/http_lib/http_uri.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2006-2011. All Rights Reserved.
+%% Copyright Ericsson AB 2006-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -17,39 +17,95 @@
%% %CopyrightEnd%
%%
%%
-%% RFC 3986
+%% This is from chapter 3, Syntax Components, of RFC 3986:
+%%
+%% The generic URI syntax consists of a hierarchical sequence of
+%% components referred to as the scheme, authority, path, query, and
+%% fragment.
+%%
+%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+%%
+%% hier-part = "//" authority path-abempty
+%% / path-absolute
+%% / path-rootless
+%% / path-empty
+%%
+%% The scheme and path components are required, though the path may be
+%% empty (no characters). When authority is present, the path must
+%% either be empty or begin with a slash ("/") character. When
+%% authority is not present, the path cannot begin with two slash
+%% characters ("//"). These restrictions result in five different ABNF
+%% rules for a path (Section 3.3), only one of which will match any
+%% given URI reference.
+%%
+%% The following are two example URIs and their component parts:
+%%
+%% foo://example.com:8042/over/there?name=ferret#nose
+%% \_/ \______________/\_________/ \_________/ \__/
+%% | | | | |
+%% scheme authority path query fragment
+%% | _____________________|__
+%% / \ / \
+%% urn:example:animal:ferret:nose
+%%
+%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+%% authority = [ userinfo "@" ] host [ ":" port ]
+%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+%%
%%
-module(http_uri).
-export([parse/1, parse/2,
+ scheme_defaults/0,
encode/1, decode/1]).
+-export_type([scheme/0, default_scheme_port_number/0]).
+
%%%=========================================================================
%%% API
%%%=========================================================================
+
+-type scheme() :: atom().
+-type default_scheme_port_number() :: pos_integer().
+
+-spec scheme_defaults() ->
+ [{scheme(), default_scheme_port_number()}].
+
+scheme_defaults() ->
+ [{http, 80},
+ {https, 443},
+ {ftp, 21},
+ {ssh, 22},
+ {sftp, 22},
+ {tftp, 69}].
+
parse(AbsURI) ->
parse(AbsURI, []).
parse(AbsURI, Opts) ->
- case parse_scheme(AbsURI) of
+ case parse_scheme(AbsURI, Opts) of
{error, Reason} ->
{error, Reason};
- {Scheme, Rest} ->
- case (catch parse_uri_rest(Scheme, Rest, Opts)) of
- {UserInfo, Host, Port, Path, Query} ->
+ {Scheme, DefaultPort, Rest} ->
+ case (catch parse_uri_rest(Scheme, DefaultPort, Rest, Opts)) of
+ {ok, {UserInfo, Host, Port, Path, Query}} ->
{ok, {Scheme, UserInfo, Host, Port, Path, Query}};
+ {error, Reason} ->
+ {error, {Reason, Scheme, AbsURI}};
_ ->
- {error, {malformed_url, AbsURI}}
+ {error, {malformed_url, Scheme, AbsURI}}
end
end.
+reserved() ->
+ sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?,
+ $#, $[, $], $<, $>, $\", ${, $}, $|,
+ $\\, $', $^, $%, $ ]).
+
encode(URI) ->
- Reserved = sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?,
- $#, $[, $], $<, $>, $\", ${, $}, $|,
- $\\, $', $^, $%, $ ]),
- %% lists:append(lists:map(fun(Char) -> uri_encode(Char, Reserved) end, URI)).
+ Reserved = reserved(),
lists:append([uri_encode(Char, Reserved) || Char <- URI]).
decode(String) ->
@@ -67,23 +123,31 @@ do_decode([]) ->
%%% Internal functions
%%%========================================================================
-parse_scheme(AbsURI) ->
+which_scheme_defaults(Opts) ->
+ Key = scheme_defaults,
+ case lists:keysearch(Key, 1, Opts) of
+ {value, {Key, SchemeDefaults}} ->
+ SchemeDefaults;
+ false ->
+ scheme_defaults()
+ end.
+
+parse_scheme(AbsURI, Opts) ->
case split_uri(AbsURI, ":", {error, no_scheme}, 1, 1) of
{error, no_scheme} ->
{error, no_scheme};
- {StrScheme, Rest} ->
- %% case list_to_atom(http_util:to_lower(StrScheme)) of
- %% Scheme when (Scheme =:= http) orelse (Scheme =:= https) ->
- %% {Scheme, Rest};
- %% Scheme ->
- %% {error, {not_supported_scheme, Scheme}}
- %% end
-
- %% Allow all schemes even if they are unknown
- {list_to_atom(http_util:to_lower(StrScheme)), Rest}
+ {SchemeStr, Rest} ->
+ Scheme = list_to_atom(http_util:to_lower(SchemeStr)),
+ SchemeDefaults = which_scheme_defaults(Opts),
+ case lists:keysearch(Scheme, 1, SchemeDefaults) of
+ {value, {Scheme, DefaultPort}} ->
+ {Scheme, DefaultPort, Rest};
+ false ->
+ {Scheme, no_default_port, Rest}
+ end
end.
-parse_uri_rest(Scheme, "//" ++ URIPart, Opts) ->
+parse_uri_rest(Scheme, DefaultPort, "//" ++ URIPart, Opts) ->
{Authority, PathQuery} =
case split_uri(URIPart, "/", URIPart, 1, 0) of
Split = {_, _} ->
@@ -96,26 +160,25 @@ parse_uri_rest(Scheme, "//" ++ URIPart, Opts) ->
{URIPart,""}
end
end,
-
{UserInfo, HostPort} = split_uri(Authority, "@", {"", Authority}, 1, 1),
- {Host, Port} = parse_host_port(Scheme, HostPort, Opts),
+ {Host, Port} = parse_host_port(Scheme, DefaultPort, HostPort, Opts),
{Path, Query} = parse_path_query(PathQuery),
- {UserInfo, Host, Port, Path, Query}.
+ {ok, {UserInfo, Host, Port, Path, Query}}.
parse_path_query(PathQuery) ->
{Path, Query} = split_uri(PathQuery, "\\?", {PathQuery, ""}, 1, 0),
{path(Path), Query}.
-parse_host_port(Scheme,"[" ++ HostPort, Opts) -> %ipv6
- DefaultPort = default_port(Scheme),
+%% In this version of the function, we no longer need
+%% the Scheme argument, but just in case...
+parse_host_port(_Scheme, DefaultPort, "[" ++ HostPort, Opts) -> %ipv6
{Host, ColonPort} = split_uri(HostPort, "\\]", {HostPort, ""}, 1, 1),
Host2 = maybe_ipv6_host_with_brackets(Host, Opts),
{_, Port} = split_uri(ColonPort, ":", {"", DefaultPort}, 0, 1),
{Host2, int_port(Port)};
-parse_host_port(Scheme, HostPort, _Opts) ->
- DefaultPort = default_port(Scheme),
+parse_host_port(_Scheme, DefaultPort, HostPort, _Opts) ->
{Host, Port} = split_uri(HostPort, ":", {HostPort, DefaultPort}, 1, 1),
{Host, int_port(Port)}.
@@ -136,26 +199,14 @@ maybe_ipv6_host_with_brackets(Host, Opts) ->
Host
end.
-default_port(http) ->
- 80;
-default_port(https) ->
- 443;
-
-%%% Added some additional default ports
-%%% Other protocols would have to be handled by the calling function
-default_port(ftp) -> 21;
-default_port(ssh) -> 22;
-default_port(sftp) -> 22;
-default_port(tftp) -> 69;
-
-default_port(_) ->
- undefined.
-
int_port(Port) when is_integer(Port) ->
Port;
int_port(Port) when is_list(Port) ->
- list_to_integer(Port).
+ list_to_integer(Port);
+%% This is the case where no port was found and there was no default port
+int_port(no_default_port) ->
+ throw({error, no_default_port}).
path("") ->
"/";