%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2006-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% %CopyrightEnd% %% %% %% This is from chapter 3, Syntax Components, of RFC 3986: %% %% The generic URI syntax consists of a hierarchical sequence of %% components referred to as the scheme, authority, path, query, and %% fragment. %% %% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] %% %% hier-part = "//" authority path-abempty %% / path-absolute %% / path-rootless %% / path-empty %% %% The scheme and path components are required, though the path may be %% empty (no characters). When authority is present, the path must %% either be empty or begin with a slash ("/") character. When %% authority is not present, the path cannot begin with two slash %% characters ("//"). These restrictions result in five different ABNF %% rules for a path (Section 3.3), only one of which will match any %% given URI reference. %% %% The following are two example URIs and their component parts: %% %% foo://example.com:8042/over/there?name=ferret#nose %% \_/ \______________/\_________/ \_________/ \__/ %% | | | | | %% scheme authority path query fragment %% | _____________________|__ %% / \ / \ %% urn:example:animal:ferret:nose %% %% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) %% authority = [ userinfo "@" ] host [ ":" port ] %% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) %% %% -module(http_uri). -export([parse/1, parse/2, scheme_defaults/0, encode/1, decode/1]). -export_type([scheme/0, default_scheme_port_number/0]). %%%========================================================================= %%% API %%%========================================================================= -type scheme() :: atom(). -type default_scheme_port_number() :: pos_integer(). -spec scheme_defaults() -> [{scheme(), default_scheme_port_number()}]. scheme_defaults() -> [{http, 80}, {https, 443}, {ftp, 21}, {ssh, 22}, {sftp, 22}, {tftp, 69}]. parse(AbsURI) -> parse(AbsURI, []). parse(AbsURI, Opts) -> case parse_scheme(AbsURI, Opts) of {error, Reason} -> {error, Reason}; {Scheme, DefaultPort, Rest} -> case (catch parse_uri_rest(Scheme, DefaultPort, Rest, Opts)) of {ok, {UserInfo, Host, Port, Path, Query}} -> {ok, {Scheme, UserInfo, Host, Port, Path, Query}}; {error, Reason} -> {error, {Reason, Scheme, AbsURI}}; _ -> {error, {malformed_url, Scheme, AbsURI}} end end. reserved() -> sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?, $#, $[, $], $<, $>, $\", ${, $}, $|, $\\, $', $^, $%, $ ]). encode(URI) -> Reserved = reserved(), lists:append([uri_encode(Char, Reserved) || Char <- URI]). decode(String) -> do_decode(String). do_decode([$%,Hex1,Hex2|Rest]) -> [hex2dec(Hex1)*16+hex2dec(Hex2)|do_decode(Rest)]; do_decode([First|Rest]) -> [First|do_decode(Rest)]; do_decode([]) -> []. %%%======================================================================== %%% Internal functions %%%======================================================================== which_scheme_defaults(Opts) -> Key = scheme_defaults, case lists:keysearch(Key, 1, Opts) of {value, {Key, SchemeDefaults}} -> SchemeDefaults; false -> scheme_defaults() end. parse_scheme(AbsURI, Opts) -> case split_uri(AbsURI, ":", {error, no_scheme}, 1, 1) of {error, no_scheme} -> {error, no_scheme}; {SchemeStr, Rest} -> Scheme = list_to_atom(http_util:to_lower(SchemeStr)), SchemeDefaults = which_scheme_defaults(Opts), case lists:keysearch(Scheme, 1, SchemeDefaults) of {value, {Scheme, DefaultPort}} -> {Scheme, DefaultPort, Rest}; false -> {Scheme, no_default_port, Rest} end end. parse_uri_rest(Scheme, DefaultPort, "//" ++ URIPart, Opts) -> {Authority, PathQuery} = case split_uri(URIPart, "/", URIPart, 1, 0) of Split = {_, _} -> Split; URIPart -> case split_uri(URIPart, "\\?", URIPart, 1, 0) of Split = {_, _} -> Split; URIPart -> {URIPart,""} end end, {UserInfo, HostPort} = split_uri(Authority, "@", {"", Authority}, 1, 1), {Host, Port} = parse_host_port(Scheme, DefaultPort, HostPort, Opts), {Path, Query} = parse_path_query(PathQuery), {ok, {UserInfo, Host, Port, Path, Query}}. parse_path_query(PathQuery) -> {Path, Query} = split_uri(PathQuery, "\\?", {PathQuery, ""}, 1, 0), {path(Path), Query}. %% In this version of the function, we no longer need %% the Scheme argument, but just in case... parse_host_port(_Scheme, DefaultPort, "[" ++ HostPort, Opts) -> %ipv6 {Host, ColonPort} = split_uri(HostPort, "\\]", {HostPort, ""}, 1, 1), Host2 = maybe_ipv6_host_with_brackets(Host, Opts), {_, Port} = split_uri(ColonPort, ":", {"", DefaultPort}, 0, 1), {Host2, int_port(Port)}; parse_host_port(_Scheme, DefaultPort, HostPort, _Opts) -> {Host, Port} = split_uri(HostPort, ":", {HostPort, DefaultPort}, 1, 1), {Host, int_port(Port)}. split_uri(UriPart, SplitChar, NoMatchResult, SkipLeft, SkipRight) -> case inets_regexp:first_match(UriPart, SplitChar) of {match, Match, _} -> {string:substr(UriPart, 1, Match - SkipLeft), string:substr(UriPart, Match + SkipRight, length(UriPart))}; nomatch -> NoMatchResult end. maybe_ipv6_host_with_brackets(Host, Opts) -> case lists:keysearch(ipv6_host_with_brackets, 1, Opts) of {value, {ipv6_host_with_brackets, true}} -> "[" ++ Host ++ "]"; _ -> Host end. int_port(Port) when is_integer(Port) -> Port; int_port(Port) when is_list(Port) -> list_to_integer(Port); %% This is the case where no port was found and there was no default port int_port(no_default_port) -> throw({error, no_default_port}). path("") -> "/"; path(Path) -> Path. uri_encode(Char, Reserved) -> case sets:is_element(Char, Reserved) of true -> [ $% | http_util:integer_to_hexlist(Char)]; false -> [Char] end. hex2dec(X) when (X>=$0) andalso (X=<$9) -> X-$0; hex2dec(X) when (X>=$A) andalso (X=<$F) -> X-$A+10; hex2dec(X) when (X>=$a) andalso (X=<$f) -> X-$a+10.