From 7e5494a264267ffb731dda63ee28a7cea20855da Mon Sep 17 00:00:00 2001 From: Daniel White Date: Tue, 3 Mar 2015 16:39:58 +1100 Subject: Fix parsing of multipart bodies without headers This was discovered while verifying if cow_multipart was a suitable replacement for our existing multipart parser. Our suite for parsing an example from RFC 2046 failed. --- src/cow_multipart.erl | 48 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/src/cow_multipart.erl b/src/cow_multipart.erl index c53cdb4..284c597 100644 --- a/src/cow_multipart.erl +++ b/src/cow_multipart.erl @@ -98,6 +98,29 @@ >>). -define(TEST4_BOUNDARY, <<"boundary">>). +%% RFC 2046, Section 5.1.1: +-define(TEST5_MIME, << + "This is the preamble. It is to be ignored, though it\r\n" + "is a handy place for composition agents to include an\r\n" + "explanatory note to non-MIME conformant readers.\r\n" + "\r\n" + "--simple boundary\r\n", + "\r\n" + "This is implicitly typed plain US-ASCII text.\r\n" + "It does NOT end with a linebreak." + "\r\n" + "--simple boundary\r\n", + "Content-type: text/plain; charset=us-ascii\r\n" + "\r\n" + "This is explicitly typed plain US-ASCII text.\r\n" + "It DOES end with a linebreak.\r\n" + "\r\n" + "--simple boundary--\r\n" + "\r\n" + "This is the epilogue. It is also to be ignored." +>>). +-define(TEST5_BOUNDARY, <<"simple boundary">>). + %% Parsing. %% %% The multipart format is defined in RFC 2045. @@ -179,12 +202,12 @@ skip_preamble(Stream, Boundary) -> end end. -%% There is a line break right after the boundary, skip it. -%% -%% We only skip it now because there might be no headers at all, -%% which means the \r\n\r\n indicating the end of headers also -%% includes this line break. +before_parse_headers(<< "\r\n\r\n", Stream/bits >>) -> + %% This indicates that there are no headers, so we can abort + %% immediately. + {ok, [], Stream}; before_parse_headers(<< "\r\n", Stream/bits >>) -> + %% There is a line break right after the boundary, skip it. parse_hd_name(Stream, [], <<>>). parse_hd_name(<< C, Rest/bits >>, H, SoFar) -> @@ -340,6 +363,21 @@ parse_epilogue_crlf_test() -> {done, Epilogue} = parse_headers(Rest2, ?TEST4_BOUNDARY), ok. +parse_rfc2046_test() -> + %% The following is an included in RFC 2046, Section 5.1.1. + Body1 = <<"This is implicitly typed plain US-ASCII text.\r\n" + "It does NOT end with a linebreak.">>, + Body2 = <<"This is explicitly typed plain US-ASCII text.\r\n" + "It DOES end with a linebreak.\r\n">>, + H2 = [{<<"content-type">>, <<"text/plain; charset=us-ascii">>}], + Epilogue = <<"\r\n\r\nThis is the epilogue. It is also to be ignored.">>, + {ok, [], Rest} = parse_headers(?TEST5_MIME, ?TEST5_BOUNDARY), + {done, Body1, Rest2} = parse_body(Rest, ?TEST5_BOUNDARY), + {ok, H2, Rest3} = parse_headers(Rest2, ?TEST5_BOUNDARY), + {done, Body2, Rest4} = parse_body(Rest3, ?TEST5_BOUNDARY), + {done, Epilogue} = parse_headers(Rest4, ?TEST5_BOUNDARY), + ok. + parse_partial_test() -> {ok, <<0:8000, "abcdef">>, <<"\rghij">>} = parse_body(<<0:8000, "abcdef\rghij">>, <<"boundary">>), -- cgit v1.2.3