aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLoïc Hoguin <[email protected]>2013-01-13 00:10:32 +0100
committerLoïc Hoguin <[email protected]>2013-01-13 00:10:32 +0100
commit5dd09737d09f1a9e4b63e514e18973f174417215 (patch)
tree9b6ed7e8aba2626906da6e2ba4b3f88c83115c41 /src
parentcccc0bc475d903e553c66c844f5bc0b0934e2b59 (diff)
downloadcowboy-5dd09737d09f1a9e4b63e514e18973f174417215.tar.gz
cowboy-5dd09737d09f1a9e4b63e514e18973f174417215.tar.bz2
cowboy-5dd09737d09f1a9e4b63e514e18973f174417215.zip
Websocket text frames are now checked for UTF-8 correctness
The autobahntestsuite now passes 100% of the tests. We are getting close to fully implementing the Websocket RFC.
Diffstat (limited to 'src')
-rw-r--r--src/cowboy_websocket.erl92
1 files changed, 91 insertions, 1 deletions
diff --git a/src/cowboy_websocket.erl b/src/cowboy_websocket.erl
index 50d1c88..a10b008 100644
--- a/src/cowboy_websocket.erl
+++ b/src/cowboy_websocket.erl
@@ -45,7 +45,8 @@
timeout_ref = undefined :: undefined | reference(),
messages = undefined :: undefined | {atom(), atom(), atom()},
hibernate = false :: boolean(),
- frag_state = undefined :: frag_state()
+ frag_state = undefined :: frag_state(),
+ utf8_state = <<>> :: binary()
}).
%% @doc Upgrade an HTTP request to the Websocket protocol.
@@ -285,6 +286,65 @@ websocket_data(State, Req, HandlerState, Opcode, Len, MaskKey, Data, 1) ->
-> {ok, Req, cowboy_middleware:env()}
| {suspend, module(), atom(), [any()]}
when Req::cowboy_req:req().
+%% Text frames must have a payload that is valid UTF-8.
+websocket_payload(State=#state{utf8_state=Incomplete},
+ Req, HandlerState, Opcode=1, Len, MaskKey, Unmasked, Data)
+ when byte_size(Data) < Len ->
+ Unmasked2 = websocket_unmask(Data,
+ rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
+ case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
+ false ->
+ websocket_close(State, Req, HandlerState, {error, badframe});
+ Utf8State ->
+ websocket_payload_loop(State#state{utf8_state=Utf8State},
+ Req, HandlerState, Opcode, Len - byte_size(Data), MaskKey,
+ << Unmasked/binary, Unmasked2/binary >>)
+ end;
+websocket_payload(State=#state{utf8_state=Incomplete},
+ Req, HandlerState, Opcode=1, Len, MaskKey, Unmasked, Data) ->
+ << End:Len/binary, Rest/bits >> = Data,
+ Unmasked2 = websocket_unmask(End,
+ rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
+ case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
+ <<>> ->
+ websocket_dispatch(State#state{utf8_state= <<>>},
+ Req, HandlerState, Rest, Opcode,
+ << Unmasked/binary, Unmasked2/binary >>);
+ _ ->
+ websocket_close(State, Req, HandlerState, {error, badframe})
+ end;
+%% Fragmented text frames may cut payload in the middle of UTF-8 codepoints.
+websocket_payload(State=#state{frag_state={_, 1, _}, utf8_state=Incomplete},
+ Req, HandlerState, Opcode=0, Len, MaskKey, Unmasked, Data)
+ when byte_size(Data) < Len ->
+ Unmasked2 = websocket_unmask(Data,
+ rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
+ case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
+ false ->
+ websocket_close(State, Req, HandlerState, {error, badframe});
+ Utf8State ->
+ websocket_payload_loop(State#state{utf8_state=Utf8State},
+ Req, HandlerState, Opcode, Len - byte_size(Data), MaskKey,
+ << Unmasked/binary, Unmasked2/binary >>)
+ end;
+websocket_payload(State=#state{frag_state={Fin, 1, _}, utf8_state=Incomplete},
+ Req, HandlerState, Opcode=0, Len, MaskKey, Unmasked, Data) ->
+ << End:Len/binary, Rest/bits >> = Data,
+ Unmasked2 = websocket_unmask(End,
+ rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
+ case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
+ <<>> ->
+ websocket_dispatch(State#state{utf8_state= <<>>},
+ Req, HandlerState, Rest, Opcode,
+ << Unmasked/binary, Unmasked2/binary >>);
+ Utf8State when is_binary(Utf8State), Fin =:= nofin ->
+ websocket_dispatch(State#state{utf8_state=Utf8State},
+ Req, HandlerState, Rest, Opcode,
+ << Unmasked/binary, Unmasked2/binary >>);
+ _ ->
+ websocket_close(State, Req, HandlerState, {error, badframe})
+ end;
+%% Other frames have a binary payload.
websocket_payload(State, Req, HandlerState,
Opcode, Len, MaskKey, Unmasked, Data)
when byte_size(Data) < Len ->
@@ -325,6 +385,36 @@ rotate_mask_key(MaskKey, UnmaskedLen) ->
Right = 4 - Left,
(MaskKey bsl (Left * 8)) + (MaskKey bsr (Right * 8)).
+%% Returns <<>> if the argument is valid UTF-8, false if not,
+%% or the incomplete part of the argument if we need more data.
+-spec is_utf8(binary()) -> false | binary().
+is_utf8(Valid = <<>>) ->
+ Valid;
+is_utf8(<< _/utf8, Rest/binary >>) ->
+ is_utf8(Rest);
+%% 2 bytes. Codepages C0 and C1 are invalid; fail early.
+is_utf8(<< 2#1100000:7, _/bits >>) ->
+ false;
+is_utf8(Incomplete = << 2#110:3, _:5 >>) ->
+ Incomplete;
+%% 3 bytes.
+is_utf8(Incomplete = << 2#1110:4, _:4 >>) ->
+ Incomplete;
+is_utf8(Incomplete = << 2#1110:4, _:4, 2#10:2, _:6 >>) ->
+ Incomplete;
+%% 4 bytes. Codepage F4 may have invalid values greater than 0x10FFFF.
+is_utf8(<< 2#11110100:8, 2#10:2, High:6, _/bits >>) when High >= 2#10000 ->
+ false;
+is_utf8(Incomplete = << 2#11110:5, _:3 >>) ->
+ Incomplete;
+is_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6 >>) ->
+ Incomplete;
+is_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6, 2#10:2, _:6 >>) ->
+ Incomplete;
+%% Invalid.
+is_utf8(_) ->
+ false.
+
-spec websocket_payload_loop(#state{}, Req, any(),
opcode(), non_neg_integer(), mask_key(), binary())
-> {ok, Req, cowboy_middleware:env()}