From b24651c3bb6fef59c0e92c24e69151d1a92c4b08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=9F=D1=80=D0=BE?= =?UTF-8?q?=D1=85=D0=BE=D1=80=D0=BE=D0=B2?= Date: Mon, 19 Jan 2015 05:06:53 +0300 Subject: Add zlib limited output buffer size functionality This functionality may be useful for compressed streams with high compression ratio (in case of gzip it may be up to x1000), when small amount of compressed data will produce large amount of uncompressed output. This may lead to DoS attacks, because server easily goes out of memory. Example of such high compression ratio stream: ``` dd if=/dev/zero of=sparse.bin bs=1MB count=100 # 100mb of zeroes gzip sparse.bin # 95kb sparse.bin.gz $ erl > {ok, Compressed} = file:read_file("sparse.bin.gz"), > 97082 = size(Compressed), > Uncompressed = zlib:gunzip(Compressed), > 100000000 = iolist_size(Uncompressed). ``` --- erts/doc/src/zlib.xml | 47 ++++++++++++++++++++ erts/emulator/drivers/common/zlib_drv.c | 73 ++++++++++++++++++++++++++++++++ erts/preloaded/ebin/zlib.beam | Bin 13188 -> 14156 bytes erts/preloaded/src/zlib.erl | 39 ++++++++++++++++- lib/kernel/test/zlib_SUITE.erl | 35 ++++++++++++++- 5 files changed, 192 insertions(+), 2 deletions(-) diff --git a/erts/doc/src/zlib.xml b/erts/doc/src/zlib.xml index da8ccdecdf..673b743e2e 100644 --- a/erts/doc/src/zlib.xml +++ b/erts/doc/src/zlib.xml @@ -311,6 +311,53 @@ list_to_binary([B1,B2]) compressor.

+ + + Decompress data with limited output size + +

Like inflate/2, but decompress no more data than + will fit in the buffer configured via setBufSize/2. + Is is useful when decompressing a stream with a high compression + ratio such that a small amount of compressed input may expand up to + 1000 times. + It returns {more, Decompressed}, when there is more output + available, and inflateChunk/1 should be used to read it. + It may introduce some output latency (reading + input without producing any output).

+

If a preset dictionary is needed at this point (see + inflateSetDictionary below), inflateChunk/2 throws a + {need_dictionary,Adler} exception where Adler is + the adler32 checksum of the dictionary chosen by the + compressor.

+ +
+walk(Compressed, Handler) ->
+    Z = zlib:open(),
+    zlib:inflateInit(Z),
+    % Limit single uncompressed chunk size to 512kb
+    zlib:setBufSize(Z, 512 * 1024),
+    loop(Z, Handler, zlib:inflateChunk(Z, Compressed)),
+    zlib:inflateEnd(Z),
+    zlib:close(Z).
+
+loop(Z, Handler, {more, Uncompressed}) ->
+    Handler(Uncompressed),
+    loop(Z, Handler, zlib:inflateChunk(Z));
+loop(Z, Handler, Uncompressed) ->
+    Handler(Uncompressed).
+        
+
+
+ + + Read next uncompressed chunk + +

Read next chunk of uncompressed data, initialized by + inflateChunk/2.

+

This function should be repeatedly called, while it returns + {more, Decompressed}.

+
+
Initialize the decompression dictionary diff --git a/erts/emulator/drivers/common/zlib_drv.c b/erts/emulator/drivers/common/zlib_drv.c index 3143e4511d..f7b2d91d23 100644 --- a/erts/emulator/drivers/common/zlib_drv.c +++ b/erts/emulator/drivers/common/zlib_drv.c @@ -62,8 +62,17 @@ #define CRC32_COMBINE 23 #define ADLER32_COMBINE 24 +#define INFLATE_CHUNK 25 + + #define DEFAULT_BUFSZ 4000 +/* This flag is used in the same places, where zlib return codes + * (Z_OK, Z_STREAM_END, Z_NEED_DICT) are. So, we need to set it to + * relatively large value to avoid possible value clashes in future. + * */ +#define INFLATE_HAS_MORE 100 + static int zlib_init(void); static ErlDrvData zlib_start(ErlDrvPort port, char* buf); static void zlib_stop(ErlDrvData e); @@ -295,6 +304,58 @@ static int zlib_inflate(ZLibData* d, int flush) return res; } +static int zlib_inflate_chunk(ZLibData* d) +{ + int res = Z_OK; + + if ((d->bin == NULL) && (zlib_output_init(d) < 0)) { + errno = ENOMEM; + return Z_ERRNO; + } + + while ((driver_sizeq(d->port) > 0) && (d->s.avail_out > 0) && + (res != Z_STREAM_END)) { + int vlen; + SysIOVec* iov = driver_peekq(d->port, &vlen); + int len; + + d->s.next_in = iov[0].iov_base; + d->s.avail_in = iov[0].iov_len; + while((d->s.avail_in > 0) && (d->s.avail_out > 0) && (res != Z_STREAM_END)) { + res = inflate(&d->s, Z_NO_FLUSH); + if (res == Z_NEED_DICT) { + /* Essential to eat the header bytes that zlib has looked at */ + len = iov[0].iov_len - d->s.avail_in; + driver_deq(d->port, len); + return res; + } + if (res == Z_BUF_ERROR) { + /* Was possible more output, but actually not */ + res = Z_OK; + } + else if (res < 0) { + return res; + } + } + len = iov[0].iov_len - d->s.avail_in; + driver_deq(d->port, len); + } + + /* We are here because all input was consumed or EOS reached or output + * buffer is full */ + if (d->want_crc) { + d->crc = crc32(d->crc, (unsigned char*) d->bin->orig_bytes, + d->binsz - d->s.avail_out); + } + zlib_output(d); + if ((res == Z_OK) && (d->s.avail_in > 0)) + res = INFLATE_HAS_MORE; + else if (res == Z_STREAM_END) { + d->inflate_eos_seen = 1; + } + return res; +} + static int zlib_deflate(ZLibData* d, int flush) { int res = Z_OK; @@ -568,6 +629,18 @@ static ErlDrvSSizeT zlib_ctl(ErlDrvData drv_data, unsigned int command, char *bu return zlib_return(res, rbuf, rlen); } + case INFLATE_CHUNK: + if (d->state != ST_INFLATE) goto badarg; + if (len != 0) goto badarg; + res = zlib_inflate_chunk(d); + if (res == INFLATE_HAS_MORE) { + return zlib_value2(4, 0, rbuf, rlen); + } else if (res == Z_NEED_DICT) { + return zlib_value2(3, d->s.adler, rbuf, rlen); + } else { + return zlib_return(res, rbuf, rlen); + } + case GET_QSIZE: return zlib_value(driver_sizeq(d->port), rbuf, rlen); diff --git a/erts/preloaded/ebin/zlib.beam b/erts/preloaded/ebin/zlib.beam index 7006764d96..ed6ab5b254 100644 Binary files a/erts/preloaded/ebin/zlib.beam and b/erts/preloaded/ebin/zlib.beam differ diff --git a/erts/preloaded/src/zlib.erl b/erts/preloaded/src/zlib.erl index df7b2e6198..5ebc67dcaa 100644 --- a/erts/preloaded/src/zlib.erl +++ b/erts/preloaded/src/zlib.erl @@ -24,6 +24,7 @@ deflate/2,deflate/3,deflateEnd/1, inflateInit/1,inflateInit/2,inflateSetDictionary/2, inflateSync/1,inflateReset/1,inflate/2,inflateEnd/1, + inflateChunk/1, inflateChunk/2, setBufSize/2,getBufSize/1, crc32/1,crc32/2,crc32/3,adler32/2,adler32/3,getQSize/1, crc32_combine/4,adler32_combine/4, @@ -100,6 +101,7 @@ -define(INFLATE_RESET, 12). -define(INFLATE_END, 13). -define(INFLATE, 14). +-define(INFLATE_CHUNK, 25). -define(CRC32_0, 15). -define(CRC32_1, 16). @@ -263,6 +265,39 @@ inflate(Z, Data) -> erlang:error(badarg) end. +-spec inflateChunk(Z, Data) -> Decompressed | {more, Decompressed} when + Z :: zstream(), + Data :: iodata(), + Decompressed :: iolist(). +inflateChunk(Z, Data) -> + try port_command(Z, Data) of + true -> + inflateChunk(Z) + catch + error:_Err -> + flush(Z), + erlang:error(badarg) + end. + +-spec inflateChunk(Z) -> Decompressed | {more, Decompressed} when + Z :: zstream(), + Decompressed :: iolist(). +inflateChunk(Z) -> + Status = call(Z, ?INFLATE_CHUNK, []), + Data = receive + {Z, {data, Bin}} -> + Bin + after 0 -> + [] + end, + + case Status of + Good when (Good == ok) orelse (Good == stream_end) -> + Data; + inflate_has_more -> + {more, Data} + end. + -spec inflateEnd(Z) -> 'ok' when Z :: zstream(). inflateEnd(Z) -> @@ -514,7 +549,9 @@ call(Z, Cmd, Arg) -> [2,A,B,C,D] -> (A bsl 24)+(B bsl 16)+(C bsl 8)+D; [3,A,B,C,D] -> - erlang:error({need_dictionary,(A bsl 24)+(B bsl 16)+(C bsl 8)+D}) + erlang:error({need_dictionary,(A bsl 24)+(B bsl 16)+(C bsl 8)+D}); + [4, _, _, _, _] -> + inflate_has_more catch error:badarg -> %% Rethrow loses port_control from stacktrace. erlang:error(badarg) diff --git a/lib/kernel/test/zlib_SUITE.erl b/lib/kernel/test/zlib_SUITE.erl index 3be6f39d95..e99151284f 100644 --- a/lib/kernel/test/zlib_SUITE.erl +++ b/lib/kernel/test/zlib_SUITE.erl @@ -82,7 +82,7 @@ groups() -> api_deflateSetDictionary, api_deflateReset, api_deflateParams, api_deflate, api_deflateEnd, api_inflateInit, api_inflateSetDictionary, - api_inflateSync, api_inflateReset, api_inflate, + api_inflateSync, api_inflateReset, api_inflate, api_inflateChunk, api_inflateEnd, api_setBufsz, api_getBufsz, api_crc32, api_adler32, api_getQSize, api_un_compress, api_un_zip, api_g_un_zip]}, @@ -357,6 +357,39 @@ api_inflate(Config) when is_list(Config) -> ?m({'EXIT',{data_error,_}}, zlib:inflate(Z1, <<2,1,2,1,2>>)), ?m(ok, zlib:close(Z1)). +api_inflateChunk(doc) -> "Test inflateChunk"; +api_inflateChunk(suite) -> []; +api_inflateChunk(Config) when is_list(Config) -> + ChunkSize = 1024, + Data = << <<(I rem 150)>> || I <- lists:seq(1, 3 * ChunkSize) >>, + Part1 = binary:part(Data, 0, ChunkSize), + Part2 = binary:part(Data, ChunkSize, ChunkSize), + Part3 = binary:part(Data, ChunkSize * 2, ChunkSize), + ?line Compressed = zlib:compress(Data), + ?line Z1 = zlib:open(), + ?line zlib:setBufSize(Z1, ChunkSize), + ?m(ok, zlib:inflateInit(Z1)), + ?m([], zlib:inflateChunk(Z1, <<>>)), + ?m({more, Part1}, zlib:inflateChunk(Z1, Compressed)), + ?m({more, Part2}, zlib:inflateChunk(Z1)), + ?m(Part3, zlib:inflateChunk(Z1)), + ?m(ok, zlib:inflateEnd(Z1)), + + ?m(ok, zlib:inflateInit(Z1)), + ?m({more, Part1}, zlib:inflateChunk(Z1, Compressed)), + + ?m(ok, zlib:inflateReset(Z1)), + + ?line zlib:setBufSize(Z1, size(Data)), + ?m(Data, zlib:inflateChunk(Z1, Compressed)), + ?m(ok, zlib:inflateEnd(Z1)), + + ?m(ok, zlib:inflateInit(Z1)), + ?m(?BARG, zlib:inflateChunk(gurka, Compressed)), + ?m(?BARG, zlib:inflateChunk(Z1, 4384)), + ?m({'EXIT',{data_error,_}}, zlib:inflateEnd(Z1)), + ?m(ok, zlib:close(Z1)). + api_inflateEnd(doc) -> "Test inflateEnd"; api_inflateEnd(suite) -> []; api_inflateEnd(Config) when is_list(Config) -> -- cgit v1.2.3