aboutsummaryrefslogblamecommitdiffstats
path: root/src/cow_ws.erl
blob: de7d0b5e1896161550ff2ef8ad774d54d2f62116 (plain) (tree)



















































































































































































































































































































                                                                                                                                               
%% Copyright (c) 2015, Loïc Hoguin <[email protected]>
%%
%% Permission to use, copy, modify, and/or distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
%% copyright notice and this permission notice appear in all copies.
%%
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

-module(cow_ws).

-export([parse_header/3]).
-export([parse_close_code/2]).
-export([parse_payload/9]).
-export([frame/2]).

-type close_code() :: 1000..1003 | 1006..1011 | 3000..4999.
-export_type([close_code/0]).

-type frag_state() :: undefined | {fin | nofin, text | binary}.
-export_type([frag_state/0]).

-type frame() :: close | ping | pong
	| {text | binary | close | ping | pong, iodata()}
	| {close, close_code(), iodata()}.
-export_type([frame/0]).

-type extensions() :: map().
-type frame_type() :: fragment | text | binary | close | ping | pong.
-type mask_key() :: undefined | 0..16#ffffffff.
-type rsv() :: <<_:3>>.
-type utf8_state() :: <<>> | <<_:8>> | <<_:16>> | <<_:24>>.

%% @doc Parse and validate the Websocket frame header.
%%
%% This function also updates the fragmentation state according to
%% information found in the frame's header.

-spec parse_header(binary(), extensions(), frag_state())
	-> error | more | {frame_type(), frag_state(), rsv(), non_neg_integer(), mask_key(), binary()}.
%% RSV bits MUST be 0 unless an extension is negotiated
%% that defines meanings for non-zero values.
parse_header(<< _:1, Rsv:3, _/bits >>, Extensions, _) when Extensions =:= #{}, Rsv =/= 0 -> error;
%% Last 2 RSV bits MUST be 0 if deflate-frame extension is used.
parse_header(<< _:2, 1:1, _/bits >>, #{deflate_frame := _}, _) -> error;
parse_header(<< _:3, 1:1, _/bits >>, #{deflate_frame := _}, _) -> error;
%% Invalid opcode. Note that these opcodes may be used by extensions.
parse_header(<< _:4, 3:4, _/bits >>, _, _) -> error;
parse_header(<< _:4, 4:4, _/bits >>, _, _) -> error;
parse_header(<< _:4, 5:4, _/bits >>, _, _) -> error;
parse_header(<< _:4, 6:4, _/bits >>, _, _) -> error;
parse_header(<< _:4, 7:4, _/bits >>, _, _) -> error;
parse_header(<< _:4, 11:4, _/bits >>, _, _) -> error;
parse_header(<< _:4, 12:4, _/bits >>, _, _) -> error;
parse_header(<< _:4, 13:4, _/bits >>, _, _) -> error;
parse_header(<< _:4, 14:4, _/bits >>, _, _) -> error;
parse_header(<< _:4, 15:4, _/bits >>, _, _) -> error;
%% Control frames MUST NOT be fragmented.
parse_header(<< 0:1, _:3, Opcode:4, _/bits >>, _, _) when Opcode >= 8 -> error;
%% A frame MUST NOT use the zero opcode unless fragmentation was initiated.
parse_header(<< _:4, 0:4, _/bits >>, _, undefined) -> error;
%% Non-control opcode when expecting control message or next fragment.
parse_header(<< _:4, 1:4, _/bits >>, _, {_, _}) -> error;
parse_header(<< _:4, 2:4, _/bits >>, _, {_, _}) -> error;
parse_header(<< _:4, 3:4, _/bits >>, _, {_, _}) -> error;
parse_header(<< _:4, 4:4, _/bits >>, _, {_, _}) -> error;
parse_header(<< _:4, 5:4, _/bits >>, _, {_, _}) -> error;
parse_header(<< _:4, 6:4, _/bits >>, _, {_, _}) -> error;
parse_header(<< _:4, 7:4, _/bits >>, _, {_, _}) -> error;
%% Close control frame length MUST be 0 or >= 2.
parse_header(<< _:4, 8:4, _:1, 1:7, _/bits >>, _, _) -> error;
%% Close control frame with incomplete close code. Need more data.
parse_header(Data = << _:4, 8:4, 0:1, Len:7, _/bits >>, _, _) when Len > 1, byte_size(Data) < 4 -> more;
parse_header(Data = << _:4, 8:4, 1:1, Len:7, _/bits >>, _, _) when Len > 1, byte_size(Data) < 8 -> more;
%% 7 bits payload length.
parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 0:1, Len:7, Rest/bits >>, _, FragState) when Len < 126 ->
	parse_header(Opcode, Fin, FragState, Rsv, Len, undefined, Rest);
parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 1:1, Len:7, MaskKey:32, Rest/bits >>, _, FragState) when Len < 126 ->
	parse_header(Opcode, Fin, FragState, Rsv, Len, MaskKey, Rest);
%% 16 bits payload length.
parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 0:1, 126:7, Len:16, Rest/bits >>, _, FragState) when Len > 125, Opcode < 8 ->
	parse_header(Opcode, Fin, FragState, Rsv, Len, undefined, Rest);
parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 1:1, 126:7, Len:16, MaskKey:32, Rest/bits >>, _, FragState) when Len > 125, Opcode < 8 ->
	parse_header(Opcode, Fin, FragState, Rsv, Len, MaskKey, Rest);
%% 63 bits payload length.
parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 0:1, 127:7, 0:1, Len:63, Rest/bits >>, _, FragState) when Len > 16#ffff, Opcode < 8 ->
	parse_header(Opcode, Fin, FragState, Rsv, Len, undefined, Rest);
parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 1:1, 127:7, 0:1, Len:63, MaskKey:32, Rest/bits >>, _, FragState) when Len > 16#ffff, Opcode < 8 ->
	parse_header(Opcode, Fin, FragState, Rsv, Len, MaskKey, Rest);
%% When payload length is over 63 bits, the most significant bit MUST be 0.
parse_header(<< _:9, 127:7, 1:1, _/bits >>, _, _) -> error;
%% For the next two clauses, it can be one of the following:
%%
%% * The minimal number of bytes MUST be used to encode the length
%% * All control frames MUST have a payload length of 125 bytes or less
parse_header(<< _:8, 0:1, 126:7, _:16, _/bits >>, _, _) -> error;
parse_header(<< _:8, 1:1, 126:7, _:48, _/bits >>, _, _) -> error;
parse_header(<< _:8, 0:1, 127:7, _:64, _/bits >>, _, _) -> error;
parse_header(<< _:8, 1:1, 127:7, _:96, _/bits >>, _, _) -> error;
%% Need more data.
parse_header(_, _, _) -> more.

parse_header(Opcode, Fin, FragState, Rsv, Len, MaskKey, Rest) ->
	Type = opcode_to_frame_type(Opcode),
	Type2 = case Fin of
		0 -> fragment;
		1 -> Type
	end,
	{Type2, frag_state(Type, Fin, FragState), Rsv, Len, MaskKey, Rest}.

opcode_to_frame_type(0) -> fragment;
opcode_to_frame_type(1) -> text;
opcode_to_frame_type(2) -> binary;
opcode_to_frame_type(8) -> close;
opcode_to_frame_type(9) -> ping;
opcode_to_frame_type(10) -> pong.

frag_state(Type, 0, undefined) -> {nofin, Type};
frag_state(fragment, 0, FragState = {nofin, _}) -> FragState;
frag_state(fragment, 1, {nofin, Type}) -> {fin, Type};
frag_state(_, 1, FragState) -> FragState.

%% @doc Parse and validate the close frame's close code.
%%
%% The close code is part of the payload and must therefore be unmasked.

-spec parse_close_code(binary(), mask_key()) -> {ok, close_code(), binary()} | error.
parse_close_code(<< MaskedCode:2/binary, Rest/bits >>, MaskKey) ->
	<< Code:16 >> = unmask(MaskedCode, MaskKey, 0),
	if
		Code < 1000; Code =:= 1004; Code =:= 1005; Code =:= 1006;
				(Code > 1011) and (Code < 3000); Code > 4999 ->
			error;
		true ->
			{ok, Code, Rest}
	end.

%% @doc Parse and validate the frame's payload.
%%
%% Validation is only required for text and close frames which feature
%% a UTF-8 payload.

-spec parse_payload(binary(), mask_key(), utf8_state(), non_neg_integer(),
		frame_type(), non_neg_integer(), frag_state(), extensions(), rsv())
	-> {ok, binary(), utf8_state(), binary()} | {more, binary(), utf8_state()} | error.
parse_payload(Data, MaskKey, Utf8State, ParsedLen, Type, Len, FragState, #{deflate_frame := Inflate}, << 1:1, 0:2 >>) ->
	{Data2, Rest, Eof} = split_payload(Data, Len),
	Payload = inflate_frame(unmask(Data2, MaskKey, ParsedLen), Inflate, FragState, Eof),
	validate_payload(Payload, Rest, Utf8State, ParsedLen, Type, FragState, Eof);
parse_payload(Data, MaskKey, Utf8State, ParsedLen, Type, Len, FragState, _, << 0:3 >>) ->
	{Data2, Rest, Eof} = split_payload(Data, Len),
	Payload = unmask(Data2, MaskKey, ParsedLen),
	validate_payload(Payload, Rest, Utf8State, ParsedLen, Type, FragState, Eof).

split_payload(Data, Len) ->
	case byte_size(Data) of
		Len ->
			{Data, <<>>, true};
		DataLen when DataLen < Len ->
			{Data, <<>>, false};
		_ ->
			<< Data2:Len/binary, Rest/bits >> = Data,
			{Data2, Rest, true}
	end.

unmask(Data, MaskKey, 0) ->
	do_unmask(Data, MaskKey, <<>>);
%% We unmask on the fly so we need to continue from the right mask byte.
unmask(Data, MaskKey, UnmaskedLen) ->
	Left = UnmaskedLen rem 4,
	Right = 4 - Left,
	MaskKey2 = (MaskKey bsl (Left * 8)) + (MaskKey bsr (Right * 8)),
	do_unmask(Data, MaskKey2, <<>>).

do_unmask(<<>>, _, Unmasked) ->
	Unmasked;
do_unmask(<< O:32, Rest/bits >>, MaskKey, Acc) ->
	T = O bxor MaskKey,
	do_unmask(Rest, MaskKey, << Acc/binary, T:32 >>);
do_unmask(<< O:24 >>, MaskKey, Acc) ->
	<< MaskKey2:24, _:8 >> = << MaskKey:32 >>,
	T = O bxor MaskKey2,
	<< Acc/binary, T:24 >>;
do_unmask(<< O:16 >>, MaskKey, Acc) ->
	<< MaskKey2:16, _:16 >> = << MaskKey:32 >>,
	T = O bxor MaskKey2,
	<< Acc/binary, T:16 >>;
do_unmask(<< O:8 >>, MaskKey, Acc) ->
	<< MaskKey2:8, _:24 >> = << MaskKey:32 >>,
	T = O bxor MaskKey2,
	<< Acc/binary, T:8 >>.

%% @todo Try using iodata() and see if it improves anything.
inflate_frame(Data, Inflate, fin, true) ->
	iolist_to_binary(zlib:inflate(Inflate, << Data/binary, 0, 0, 255, 255 >>));
inflate_frame(Data, Inflate, _, _) ->
	iolist_to_binary(zlib:inflate(Inflate, Data)).

%% Text frames and close control frames MUST have a payload that is valid UTF-8.
validate_payload(Payload, Rest, Utf8State, _, Type, _, Eof) when Type =:= text; Type =:= close ->
	case validate_utf8(<< Utf8State/binary, Payload/binary >>) of
		false -> error;
		Utf8State when not Eof -> {more, Payload, Utf8State};
		<<>> when Eof -> {ok, Payload, <<>>, Rest};
		_ -> error
	end;
validate_payload(Payload, Rest, Utf8State, _, fragment, {Fin, text}, Eof) ->
	case validate_utf8(<< Utf8State/binary, Payload/binary >>) of
		false -> error;
		<<>> when Eof -> {ok, Payload, <<>>, Rest};
		Utf8State2 when Eof, Fin =:= nofin -> {ok, Payload, Utf8State2, Rest};
		Utf8State2 when not Eof -> {more, Payload, Utf8State2};
		_ -> error
	end;
validate_payload(Payload, _, Utf8State, _, _, _, false) ->
	{more, Payload, Utf8State};
validate_payload(Payload, Rest, Utf8State, _, _, _, true) ->
	{ok, Payload, Utf8State, Rest}.

%% Returns <<>> if the argument is valid UTF-8, false if not,
%% or the incomplete part of the argument if we need more data.
validate_utf8(Valid = <<>>) ->
	Valid;
validate_utf8(<< _/utf8, Rest/bits >>) ->
	validate_utf8(Rest);
%% 2 bytes. Codepages C0 and C1 are invalid; fail early.
validate_utf8(<< 2#1100000:7, _/bits >>) ->
	false;
validate_utf8(Incomplete = << 2#110:3, _:5 >>) ->
	Incomplete;
%% 3 bytes.
validate_utf8(Incomplete = << 2#1110:4, _:4 >>) ->
	Incomplete;
validate_utf8(Incomplete = << 2#1110:4, _:4, 2#10:2, _:6 >>) ->
	Incomplete;
%% 4 bytes. Codepage F4 may have invalid values greater than 0x10FFFF.
validate_utf8(<< 2#11110100:8, 2#10:2, High:6, _/bits >>) when High >= 2#10000 ->
	false;
validate_utf8(Incomplete = << 2#11110:5, _:3 >>) ->
	Incomplete;
validate_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6 >>) ->
	Incomplete;
validate_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6, 2#10:2, _:6 >>) ->
	Incomplete;
%% Invalid.
validate_utf8(_) ->
	false.

%% @doc Construct an unmasked Websocket frame.

-spec frame(frame(), extensions()) -> iodata().
%% Control frames. Control packets must not be > 125 in length.
frame(close, _) ->
	<< 1:1, 0:3, 8:4, 0:8 >>;
frame(ping, _) ->
	<< 1:1, 0:3, 9:4, 0:8 >>;
frame(pong, _) ->
	<< 1:1, 0:3, 10:4, 0:8 >>;
frame({close, Payload}, Extensions) ->
	frame({close, 1000, Payload}, Extensions);
frame({close, StatusCode, Payload}, _) ->
	Len = 2 + iolist_size(Payload),
	true = Len =< 125,
	[<< 1:1, 0:3, 8:4, 0:1, Len:7, StatusCode:16 >>, Payload];
frame({ping, Payload}, _) ->
	Len = iolist_size(Payload),
	true = Len =< 125,
	[<< 1:1, 0:3, 9:4, 0:1, Len:7 >>, Payload];
frame({pong, Payload}, _) ->
	Len = iolist_size(Payload),
	true = Len =< 125,
	[<< 1:1, 0:3, 10:4, 0:1, Len:7 >>, Payload];
%% Data frames, deflate-frame extension.
frame({text, Payload}, #{deflate_frame := Deflate}) ->
	Payload2 = deflate_frame(Payload, Deflate),
	Len = payload_length(Payload2),
	[<< 1:1, 1:1, 0:2, 1:4, 0:1, Len/bits >>, Payload2];
frame({binary, Payload}, #{deflate_frame := Deflate}) ->
	Payload2 = deflate_frame(Payload, Deflate),
	Len = payload_length(Payload2),
	[<< 1:1, 1:1, 0:2, 2:4, 0:1, Len/bits >>, Payload2];
%% Data frames.
frame({text, Payload}, _) ->
	Len = payload_length(Payload),
	[<< 1:1, 0:3, 1:4, 0:1, Len/bits >>, Payload];
frame({binary, Payload}, _) ->
	Len = payload_length(Payload),
	[<< 1:1, 0:3, 2:4, 0:1, Len/bits >>, Payload].

payload_length(Payload) ->
	case byte_size(Payload) of
		N when N =< 125 -> << N:7 >>;
		N when N =< 16#ffff -> << 126:7, N:16 >>;
		N when N =< 16#7fffffffffffffff -> << 127:7, N:64 >>
	end.

deflate_frame(Payload, Deflate) ->
	Deflated = iolist_to_binary(zlib:deflate(Deflate, Payload, sync)),
	Len = byte_size(Deflated) - 4,
	case Deflated of
		<< Body:Len/binary, 0:8, 0:8, 255:8, 255:8 >> -> Body;
		_ -> Deflated
	end.