From 485d58dfa91b91d98135dc95e5615f421715dae5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Thu, 21 Nov 2019 15:21:23 +0100 Subject: Add cow_link implementing RFC8288 (link header) --- ebin/cowlib.app | 2 +- src/cow_http_hd.erl | 8 +- src/cow_link.erl | 427 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 435 insertions(+), 2 deletions(-) create mode 100644 src/cow_link.erl diff --git a/ebin/cowlib.app b/ebin/cowlib.app index 960f5d2..5f04bb9 100644 --- a/ebin/cowlib.app +++ b/ebin/cowlib.app @@ -1,7 +1,7 @@ {application, 'cowlib', [ {description, "Support library for manipulating Web protocols."}, {vsn, "2.8.0"}, - {modules, ['cow_base64url','cow_cookie','cow_date','cow_hpack','cow_http','cow_http2','cow_http2_machine','cow_http_hd','cow_http_te','cow_iolists','cow_mimetypes','cow_multipart','cow_qs','cow_spdy','cow_sse','cow_uri','cow_ws']}, + {modules, ['cow_base64url','cow_cookie','cow_date','cow_hpack','cow_http','cow_http2','cow_http2_machine','cow_http_hd','cow_http_te','cow_iolists','cow_link','cow_mimetypes','cow_multipart','cow_qs','cow_spdy','cow_sse','cow_uri','cow_ws']}, {registered, []}, {applications, [kernel,stdlib,crypto]}, {env, []} diff --git a/src/cow_http_hd.erl b/src/cow_http_hd.erl index 59bd3bf..cbc9f6b 100644 --- a/src/cow_http_hd.erl +++ b/src/cow_http_hd.erl @@ -65,7 +65,7 @@ -export([parse_if_unmodified_since/1]). % @todo -export([parse_last_event_id/1]). eventsource -export([parse_last_modified/1]). -% @todo -export([parse_link/1]). RFC5988 +-export([parse_link/1]). % @todo -export([parse_location/1]). RFC7231 -export([parse_max_forwards/1]). % @todo -export([parse_memento_datetime/1]). RFC7089 @@ -2178,6 +2178,12 @@ parse_last_modified_test_() -> [{V, fun() -> R = parse_last_modified(V) end} || {V, R} <- Tests]. -endif. +%% @doc Parse the Link header. + +-spec parse_link(binary()) -> [cow_link:link()]. +parse_link(Link) -> + cow_link:parse_link(Link). + %% @doc Parse the Max-Forwards header. -spec parse_max_forwards(binary()) -> non_neg_integer(). diff --git a/src/cow_link.erl b/src/cow_link.erl new file mode 100644 index 0000000..4cf24c2 --- /dev/null +++ b/src/cow_link.erl @@ -0,0 +1,427 @@ +%% Copyright (c) 2019, Loïc Hoguin +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-module(cow_link). +-compile({no_auto_import, [link/1]}). + +-export([parse_link/1]). +-export([resolve_link/2]). +-export([resolve_link/3]). +-export([link/1]). + +-include("cow_inline.hrl"). +-include("cow_parse.hrl"). + +-type link() :: #{ + target := binary(), + rel := binary(), + attributes := [{binary(), binary()}] +}. +-export_type([link/0]). + +-type resolve_opts() :: #{ + allow_anchor => boolean() +}. + +-type uri() :: uri_string:uri_map() | uri_string:uri_string() | undefined. + +%% Parse a link header. + +%% This function returns the URI target from the header directly. +%% Relative URIs must then be resolved as per RFC3986 5. In some +%% cases it might not be possible to resolve URIs, for example when +%% the link header is returned with a 404 status code. +-spec parse_link(binary()) -> [link()]. +parse_link(Link) -> + before_target(Link, []). + +before_target(<<>>, Acc) -> lists:reverse(Acc); +before_target(<<$<,R/bits>>, Acc) -> target(R, Acc, <<>>); +before_target(<>, Acc) when ?IS_WS(C) -> before_target(R, Acc). + +target(<<$>,R/bits>>, Acc, T) -> param_sep(R, Acc, T, []); +target(<>, Acc, T) -> target(R, Acc, <>). + +param_sep(<<>>, Acc, T, P) -> lists:reverse(acc_link(Acc, T, P)); +param_sep(<<$,,R/bits>>, Acc, T, P) -> before_target(R, acc_link(Acc, T, P)); +param_sep(<<$;,R/bits>>, Acc, T, P) -> before_param(R, Acc, T, P); +param_sep(<>, Acc, T, P) when ?IS_WS(C) -> param_sep(R, Acc, T, P). + +before_param(<>, Acc, T, P) when ?IS_WS(C) -> before_param(R, Acc, T, P); +before_param(<>, Acc, T, P) when ?IS_TOKEN(C) -> ?LOWER(param, R, Acc, T, P, <<>>). + +param(<<$=,$",R/bits>>, Acc, T, P, K) -> quoted(R, Acc, T, P, K, <<>>); +param(<<$=,C,R/bits>>, Acc, T, P, K) when ?IS_TOKEN(C) -> value(R, Acc, T, P, K, <>); +param(<>, Acc, T, P, K) when ?IS_TOKEN(C) -> ?LOWER(param, R, Acc, T, P, K). + +quoted(<<$",R/bits>>, Acc, T, P, K, V) -> param_sep(R, Acc, T, [{K, V}|P]); +quoted(<<$\\,C,R/bits>>, Acc, T, P, K, V) when ?IS_VCHAR_OBS(C) -> quoted(R, Acc, T, P, K, <>); +quoted(<>, Acc, T, P, K, V) when ?IS_VCHAR_OBS(C) -> quoted(R, Acc, T, P, K, <>). + +value(<>, Acc, T, P, K, V) when ?IS_TOKEN(C) -> value(R, Acc, T, P, K, <>); +value(R, Acc, T, P, K, V) -> param_sep(R, Acc, T, [{K, V}|P]). + +acc_link(Acc, Target, Params0) -> + Params1 = lists:reverse(Params0), + %% The rel parameter MUST be present. (RFC8288 3.3) + {value, {_, Rel}, Params2} = lists:keytake(<<"rel">>, 1, Params1), + %% Occurrences after the first MUST be ignored by parsers. + Params = filter_out_duplicates(Params2, #{}), + [#{ + target => Target, + rel => ?LOWER(Rel), + attributes => Params + }|Acc]. + +%% This function removes duplicates for attributes that don't allow them. +filter_out_duplicates([], _) -> + []; +%% The "rel" is mandatory and was already removed from params. +filter_out_duplicates([{<<"rel">>, _}|Tail], State) -> + filter_out_duplicates(Tail, State); +filter_out_duplicates([{<<"anchor">>, _}|Tail], State=#{anchor := true}) -> + filter_out_duplicates(Tail, State); +filter_out_duplicates([{<<"media">>, _}|Tail], State=#{media := true}) -> + filter_out_duplicates(Tail, State); +filter_out_duplicates([{<<"title">>, _}|Tail], State=#{title := true}) -> + filter_out_duplicates(Tail, State); +filter_out_duplicates([{<<"title*">>, _}|Tail], State=#{title_star := true}) -> + filter_out_duplicates(Tail, State); +filter_out_duplicates([{<<"type">>, _}|Tail], State=#{type := true}) -> + filter_out_duplicates(Tail, State); +filter_out_duplicates([Tuple={<<"anchor">>, _}|Tail], State) -> + [Tuple|filter_out_duplicates(Tail, State#{anchor => true})]; +filter_out_duplicates([Tuple={<<"media">>, _}|Tail], State) -> + [Tuple|filter_out_duplicates(Tail, State#{media => true})]; +filter_out_duplicates([Tuple={<<"title">>, _}|Tail], State) -> + [Tuple|filter_out_duplicates(Tail, State#{title => true})]; +filter_out_duplicates([Tuple={<<"title*">>, _}|Tail], State) -> + [Tuple|filter_out_duplicates(Tail, State#{title_star => true})]; +filter_out_duplicates([Tuple={<<"type">>, _}|Tail], State) -> + [Tuple|filter_out_duplicates(Tail, State#{type => true})]; +filter_out_duplicates([Tuple|Tail], State) -> + [Tuple|filter_out_duplicates(Tail, State)]. + +-ifdef(TEST). +parse_link_test_() -> + Tests = [ + {<<>>, []}, + {<<" ">>, []}, + %% Examples from the RFC. + {<<"; rel=\"previous\"; title=\"previous chapter\"">>, [ + #{ + target => <<"http://example.com/TheBook/chapter2">>, + rel => <<"previous">>, + attributes => [ + {<<"title">>, <<"previous chapter">>} + ] + } + ]}, + {<<"; rel=\"http://example.net/foo\"">>, [ + #{ + target => <<"/">>, + rel => <<"http://example.net/foo">>, + attributes => [] + } + ]}, + {<<"; rel=\"copyright\"; anchor=\"#foo\"">>, [ + #{ + target => <<"/terms">>, + rel => <<"copyright">>, + attributes => [ + {<<"anchor">>, <<"#foo">>} + ] + } + ]}, +% {<<"; rel=\"previous\"; title*=UTF-8'de'letztes%20Kapitel, " +% "; rel=\"next\"; title*=UTF-8'de'n%c3%a4chstes%20Kapitel">>, [ +% %% @todo +% ]} + {<<"; rel=\"start http://example.net/relation/other\"">>, [ + #{ + target => <<"http://example.org/">>, + rel => <<"start http://example.net/relation/other">>, + attributes => [] + } + ]}, + {<<"; rel=\"start\", " + "; rel=\"index\"">>, [ + #{ + target => <<"https://example.org/">>, + rel => <<"start">>, + attributes => [] + }, + #{ + target => <<"https://example.org/index">>, + rel => <<"index">>, + attributes => [] + } + ]}, + %% Relation types are case insensitive. + {<<"; rel=\"SELF\"">>, [ + #{ + target => <<"/">>, + rel => <<"self">>, + attributes => [] + } + ]}, + {<<"; rel=\"HTTP://EXAMPLE.NET/FOO\"">>, [ + #{ + target => <<"/">>, + rel => <<"http://example.net/foo">>, + attributes => [] + } + ]}, + %% Attribute names are case insensitive. + {<<"; REL=\"copyright\"; ANCHOR=\"#foo\"">>, [ + #{ + target => <<"/terms">>, + rel => <<"copyright">>, + attributes => [ + {<<"anchor">>, <<"#foo">>} + ] + } + ]} + ], + [{V, fun() -> R = parse_link(V) end} || {V, R} <- Tests]. +-endif. + +%% Resolve a link based on the context URI and options. + +-spec resolve_link(Link, uri()) -> Link | false when Link::link(). +resolve_link(Link, ContextURI) -> + resolve_link(Link, ContextURI, #{}). + +-spec resolve_link(Link, uri(), resolve_opts()) -> Link | false when Link::link(). +%% When we do not have a context URI we only succeed when the target URI is absolute. +%% The target URI will only be normalized in that case. +resolve_link(Link=#{target := TargetURI}, undefined, _) -> + case uri_string:parse(TargetURI) of + URIMap = #{scheme := _} -> + Link#{target => uri_string:normalize(URIMap)}; + _ -> + false + end; +resolve_link(Link=#{attributes := Params}, ContextURI, Opts) -> + AllowAnchor = maps:get(allow_anchor, Opts, true), + case lists:keyfind(<<"anchor">>, 1, Params) of + false -> + do_resolve_link(Link, ContextURI); + {_, Anchor} when AllowAnchor -> + do_resolve_link(Link, resolve(Anchor, ContextURI)); + _ -> + false + end. + +do_resolve_link(Link=#{target := TargetURI}, ContextURI) -> + Link#{target => uri_string:recompose(resolve(TargetURI, ContextURI))}. + +-ifdef(TEST). +resolve_link_test_() -> + Tests = [ + %% No context URI available. + {#{target => <<"http://a/b/./c">>}, undefined, #{}, + #{target => <<"http://a/b/c">>}}, + {#{target => <<"a/b/./c">>}, undefined, #{}, + false}, + %% Context URI available, allow_anchor => true. + {#{target => <<"http://a/b">>, attributes => []}, <<"http://a/c">>, #{}, + #{target => <<"http://a/b">>, attributes => []}}, + {#{target => <<"b">>, attributes => []}, <<"http://a/c">>, #{}, + #{target => <<"http://a/b">>, attributes => []}}, + {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"#frag">>}]}, <<"http://a/c">>, #{}, + #{target => <<"http://a/b">>, attributes => [{<<"anchor">>, <<"#frag">>}]}}, + {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"d/e">>}]}, <<"http://a/c">>, #{}, + #{target => <<"http://a/d/b">>, attributes => [{<<"anchor">>, <<"d/e">>}]}}, + %% Context URI available, allow_anchor => false. + {#{target => <<"http://a/b">>, attributes => []}, <<"http://a/c">>, #{allow_anchor => false}, + #{target => <<"http://a/b">>, attributes => []}}, + {#{target => <<"b">>, attributes => []}, <<"http://a/c">>, #{allow_anchor => false}, + #{target => <<"http://a/b">>, attributes => []}}, + {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"#frag">>}]}, + <<"http://a/c">>, #{allow_anchor => false}, false}, + {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"d/e">>}]}, + <<"http://a/c">>, #{allow_anchor => false}, false} + ], + [{iolist_to_binary(io_lib:format("~0p", [L])), + fun() -> R = resolve_link(L, C, O) end} || {L, C, O, R} <- Tests]. +-endif. + +%% @todo This function should ideally be part of Erlang/OTP's uri_string module. +resolve(URI, BaseURI) -> + case resolve1(ensure_map_uri(URI), BaseURI) of + TargetURI = #{path := Path0} -> + %% We remove dot segments. Normalizing the entire URI + %% will sometimes add an extra slash we don't want. + #{path := Path} = uri_string:normalize(#{path => Path0}, [return_map]), + TargetURI#{path => Path}; + TargetURI -> + TargetURI + end. + +resolve1(URI=#{scheme := _}, _) -> + URI; +resolve1(URI=#{host := _}, BaseURI) -> + #{scheme := Scheme} = ensure_map_uri(BaseURI), + URI#{scheme => Scheme}; +resolve1(URI=#{path := <<>>}, BaseURI0) -> + BaseURI = ensure_map_uri(BaseURI0), + Keys = case maps:is_key(query, URI) of + true -> [scheme, host, port, path]; + false -> [scheme, host, port, path, query] + end, + maps:merge(URI, maps:with(Keys, BaseURI)); +resolve1(URI=#{path := <<"/",_/bits>>}, BaseURI0) -> + BaseURI = ensure_map_uri(BaseURI0), + maps:merge(URI, maps:with([scheme, host, port], BaseURI)); +resolve1(URI=#{path := Path}, BaseURI0) -> + BaseURI = ensure_map_uri(BaseURI0), + maps:merge( + URI#{path := merge_paths(Path, BaseURI)}, + maps:with([scheme, host, port], BaseURI)). + +merge_paths(Path, #{host := _, path := <<>>}) -> + <<$/, Path/binary>>; +merge_paths(Path, #{path := BasePath0}) -> + case string:split(BasePath0, <<$/>>, trailing) of + [BasePath, _] -> <>; + [_] -> <<$/, Path/binary>> + end. + +ensure_map_uri(URI) when is_map(URI) -> URI; +ensure_map_uri(URI) -> uri_string:parse(iolist_to_binary(URI)). + +-ifdef(TEST). +resolve_test_() -> + Tests = [ + %% 5.4.1. Normal Examples + {<<"g:h">>, <<"g:h">>}, + {<<"g">>, <<"http://a/b/c/g">>}, + {<<"./g">>, <<"http://a/b/c/g">>}, + {<<"g/">>, <<"http://a/b/c/g/">>}, + {<<"/g">>, <<"http://a/g">>}, + {<<"//g">>, <<"http://g">>}, + {<<"?y">>, <<"http://a/b/c/d;p?y">>}, + {<<"g?y">>, <<"http://a/b/c/g?y">>}, + {<<"#s">>, <<"http://a/b/c/d;p?q#s">>}, + {<<"g#s">>, <<"http://a/b/c/g#s">>}, + {<<"g?y#s">>, <<"http://a/b/c/g?y#s">>}, + {<<";x">>, <<"http://a/b/c/;x">>}, + {<<"g;x">>, <<"http://a/b/c/g;x">>}, + {<<"g;x?y#s">>, <<"http://a/b/c/g;x?y#s">>}, + {<<"">>, <<"http://a/b/c/d;p?q">>}, + {<<".">>, <<"http://a/b/c/">>}, + {<<"./">>, <<"http://a/b/c/">>}, + {<<"..">>, <<"http://a/b/">>}, + {<<"../">>, <<"http://a/b/">>}, + {<<"../g">>, <<"http://a/b/g">>}, + {<<"../..">>, <<"http://a/">>}, + {<<"../../">>, <<"http://a/">>}, + {<<"../../g">>, <<"http://a/g">>}, + %% 5.4.2. Abnormal Examples + {<<"../../../g">>, <<"http://a/g">>}, + {<<"../../../../g">>, <<"http://a/g">>}, + {<<"/./g">>, <<"http://a/g">>}, + {<<"/../g">>, <<"http://a/g">>}, + {<<"g.">>, <<"http://a/b/c/g.">>}, + {<<".g">>, <<"http://a/b/c/.g">>}, + {<<"g..">>, <<"http://a/b/c/g..">>}, + {<<"..g">>, <<"http://a/b/c/..g">>}, + {<<"./../g">>, <<"http://a/b/g">>}, + {<<"./g/.">>, <<"http://a/b/c/g/">>}, + {<<"g/./h">>, <<"http://a/b/c/g/h">>}, + {<<"g/../h">>, <<"http://a/b/c/h">>}, + {<<"g;x=1/./y">>, <<"http://a/b/c/g;x=1/y">>}, + {<<"g;x=1/../y">>, <<"http://a/b/c/y">>}, + {<<"g?y/./x">>, <<"http://a/b/c/g?y/./x">>}, + {<<"g?y/../x">>, <<"http://a/b/c/g?y/../x">>}, + {<<"g#s/./x">>, <<"http://a/b/c/g#s/./x">>}, + {<<"g#s/../x">>, <<"http://a/b/c/g#s/../x">>}, + {<<"http:g">>, <<"http:g">>} %% for strict parsers + ], + [{V, fun() -> R = uri_string:recompose(resolve(V, <<"http://a/b/c/d;p?q">>)) end} || {V, R} <- Tests]. +-endif. + +%% Build a link header. + +-spec link([link()]) -> iodata(). +link(Links) -> + lists:join(<<", ">>, [do_link(Link) || Link <- Links]). + +do_link(#{target := TargetURI, rel := Rel, attributes := Params}) -> + [ + $<, TargetURI, <<">" + "; rel=\"">>, Rel, $", + [[<<"; ">>, Key, <<"=\"">>, Value, $"] + || {Key, Value} <- Params] + ]. + +-ifdef(TEST). +link_test_() -> + Tests = [ + {<<>>, []}, + %% Examples from the RFC. + {<<"; rel=\"previous\"; title=\"previous chapter\"">>, [ + #{ + target => <<"http://example.com/TheBook/chapter2">>, + rel => <<"previous">>, + attributes => [ + {<<"title">>, <<"previous chapter">>} + ] + } + ]}, + {<<"; rel=\"http://example.net/foo\"">>, [ + #{ + target => <<"/">>, + rel => <<"http://example.net/foo">>, + attributes => [] + } + ]}, + {<<"; rel=\"copyright\"; anchor=\"#foo\"">>, [ + #{ + target => <<"/terms">>, + rel => <<"copyright">>, + attributes => [ + {<<"anchor">>, <<"#foo">>} + ] + } + ]}, +% {<<"; rel=\"previous\"; title*=UTF-8'de'letztes%20Kapitel, " +% "; rel=\"next\"; title*=UTF-8'de'n%c3%a4chstes%20Kapitel">>, [ +% %% @todo +% ]} + {<<"; rel=\"start http://example.net/relation/other\"">>, [ + #{ + target => <<"http://example.org/">>, + rel => <<"start http://example.net/relation/other">>, + attributes => [] + } + ]}, + {<<"; rel=\"start\", " + "; rel=\"index\"">>, [ + #{ + target => <<"https://example.org/">>, + rel => <<"start">>, + attributes => [] + }, + #{ + target => <<"https://example.org/index">>, + rel => <<"index">>, + attributes => [] + } + ]} + ], + [{iolist_to_binary(io_lib:format("~0p", [V])), + fun() -> R = iolist_to_binary(link(V)) end} || {R, V} <- Tests]. +-endif. -- cgit v1.2.3