diff options
Diffstat (limited to 'lib/kernel/src/raw_file_io_inflate.erl')
-rw-r--r-- | lib/kernel/src/raw_file_io_inflate.erl | 261 |
1 files changed, 261 insertions, 0 deletions
diff --git a/lib/kernel/src/raw_file_io_inflate.erl b/lib/kernel/src/raw_file_io_inflate.erl new file mode 100644 index 0000000000..7e9780310c --- /dev/null +++ b/lib/kernel/src/raw_file_io_inflate.erl @@ -0,0 +1,261 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(raw_file_io_inflate). + +-behavior(gen_statem). + +-export([init/1, callback_mode/0, terminate/3]). +-export([opening/3, opened_gzip/3, opened_passthrough/3]). + +-include("file_int.hrl"). + +-define(INFLATE_CHUNK_SIZE, (1 bsl 10)). +-define(GZIP_WBITS, (16 + 15)). + +callback_mode() -> state_functions. + +init({Owner, Secret, [compressed]}) -> + Monitor = monitor(process, Owner), + %% We're using the undocumented inflateInit/3 to open the stream in + %% 'reset mode', which resets the inflate state at the end of every stream, + %% allowing us to read concatenated gzip files. + Z = zlib:open(), + ok = zlib:inflateInit(Z, ?GZIP_WBITS, reset), + Data = + #{ owner => Owner, + monitor => Monitor, + secret => Secret, + position => 0, + buffer => prim_buffer:new(), + zlib => Z }, + {ok, opening, Data}. + +%% The old driver fell back to plain reads if the file didn't start with the +%% magic gzip bytes. +choose_decompression_state(PrivateFd) -> + State = + case ?CALL_FD(PrivateFd, read, [2]) of + {ok, <<16#1F, 16#8B>>} -> opened_gzip; + _Other -> opened_passthrough + end, + {ok, 0} = ?CALL_FD(PrivateFd, position, [0]), + State. + +opening({call, From}, {'$open', Secret, Filename, Modes}, #{ secret := Secret } = Data) -> + case raw_file_io:open(Filename, Modes) of + {ok, PrivateFd} -> + NextState = choose_decompression_state(PrivateFd), + NewData = Data#{ handle => PrivateFd }, + {next_state, NextState, NewData, [{reply, From, ok}]}; + Other -> + {stop_and_reply, normal, [{reply, From, Other}]} + end; +opening(_Event, _Contents, _Data) -> + {keep_state_and_data, [postpone]}. + +internal_close(From, Data) -> + #{ handle := PrivateFd } = Data, + Response = ?CALL_FD(PrivateFd, close, []), + {stop_and_reply, normal, [{reply, From, Response}]}. + +opened_passthrough(info, {'DOWN', Monitor, process, _Owner, _Reason}, #{ monitor := Monitor }) -> + {stop, shutdown}; + +opened_passthrough(info, _Message, _Data) -> + keep_state_and_data; + +opened_passthrough({call, {Owner, _Tag} = From}, [close], #{ owner := Owner } = Data) -> + internal_close(From, Data); + +opened_passthrough({call, {Owner, _Tag} = From}, [Method | Args], #{ owner := Owner } = Data) -> + #{ handle := PrivateFd } = Data, + Response = ?CALL_FD(PrivateFd, Method, Args), + {keep_state_and_data, [{reply, From, Response}]}; + +opened_passthrough({call, _From}, _Command, _Data) -> + %% The client functions filter this out, so we'll crash if the user does + %% anything stupid on purpose. + {shutdown, protocol_violation}; + +opened_passthrough(_Event, _Request, _Data) -> + keep_state_and_data. + +%% + +opened_gzip(info, {'DOWN', Monitor, process, _Owner, _Reason}, #{ monitor := Monitor }) -> + {stop, shutdown}; + +opened_gzip(info, _Message, _Data) -> + keep_state_and_data; + +opened_gzip({call, {Owner, _Tag} = From}, [close], #{ owner := Owner } = Data) -> + internal_close(From, Data); + +opened_gzip({call, {Owner, _Tag} = From}, [position, Mark], #{ owner := Owner } = Data) -> + case position(Data, Mark) of + {ok, NewData, Result} -> + Response = {ok, Result}, + {keep_state, NewData, [{reply, From, Response}]}; + Other -> + {keep_state_and_data, [{reply, From, Other}]} + end; + +opened_gzip({call, {Owner, _Tag} = From}, [read, Size], #{ owner := Owner } = Data) -> + case read(Data, Size) of + {ok, NewData, Result} -> + Response = {ok, Result}, + {keep_state, NewData, [{reply, From, Response}]}; + Other -> + {keep_state_and_data, [{reply, From, Other}]} + end; + +opened_gzip({call, {Owner, _Tag} = From}, [read_line], #{ owner := Owner } = Data) -> + case read_line(Data) of + {ok, NewData, Result} -> + Response = {ok, Result}, + {keep_state, NewData, [{reply, From, Response}]}; + Other -> + {keep_state_and_data, [{reply, From, Other}]} + end; + +opened_gzip({call, {Owner, _Tag} = From}, [write, _IOData], #{ owner := Owner }) -> + Response = {error, ebadf}, + {keep_state_and_data, [{reply, From, Response}]}; + +opened_gzip({call, {Owner, _Tag} = From}, _Request, #{ owner := Owner }) -> + Response = {error, enotsup}, + {keep_state_and_data, [{reply, From, Response}]}; + +opened_gzip({call, _From}, _Request, _Data) -> + %% The client functions filter this out, so we'll crash if the user does + %% anything stupid on purpose. + {shutdown, protocol_violation}; + +opened_gzip(_Event, _Request, _Data) -> + keep_state_and_data. + +%% + +read(#{ buffer := Buffer } = Data, Size) -> + try read_1(Data, Buffer, prim_buffer:size(Buffer), Size) of + Result -> Result + catch + error:badarg -> {error, badarg}; + error:_ -> {error, eio} + end. +read_1(Data, Buffer, BufferSize, ReadSize) when BufferSize >= ReadSize -> + #{ position := Position } = Data, + Decompressed = prim_buffer:read(Buffer, ReadSize), + {ok, Data#{ position => (Position + ReadSize) }, Decompressed}; +read_1(Data, Buffer, BufferSize, ReadSize) when BufferSize < ReadSize -> + #{ handle := PrivateFd } = Data, + case ?CALL_FD(PrivateFd, read, [?INFLATE_CHUNK_SIZE]) of + {ok, Compressed} -> + #{ zlib := Z } = Data, + Uncompressed = erlang:iolist_to_iovec(zlib:inflate(Z, Compressed)), + prim_buffer:write(Buffer, Uncompressed), + read_1(Data, Buffer, prim_buffer:size(Buffer), ReadSize); + eof when BufferSize > 0 -> + read_1(Data, Buffer, BufferSize, BufferSize); + Other -> + Other + end. + +read_line(#{ buffer := Buffer } = Data) -> + try read_line_1(Data, Buffer, prim_buffer:find_byte_index(Buffer, $\n)) of + {ok, NewData, Decompressed} -> {ok, NewData, Decompressed}; + Other -> Other + catch + error:badarg -> {error, badarg}; + error:_ -> {error, eio} + end. + +read_line_1(Data, Buffer, not_found) -> + #{ handle := PrivateFd, zlib := Z } = Data, + case ?CALL_FD(PrivateFd, read, [?INFLATE_CHUNK_SIZE]) of + {ok, Compressed} -> + Uncompressed = erlang:iolist_to_iovec(zlib:inflate(Z, Compressed)), + prim_buffer:write(Buffer, Uncompressed), + read_line_1(Data, Buffer, prim_buffer:find_byte_index(Buffer, $\n)); + eof -> + case prim_buffer:size(Buffer) of + Size when Size > 0 -> {ok, prim_buffer:read(Buffer, Size)}; + Size when Size =:= 0 -> eof + end; + Error -> + Error + end; +read_line_1(Data, Buffer, {ok, LFIndex}) -> + %% Translate CRLF into just LF, completely ignoring which encoding is used, + %% but treat the file position as including CR. + #{ position := Position } = Data, + NewData = Data#{ position => (Position + LFIndex + 1) }, + CRIndex = (LFIndex - 1), + TranslatedLine = + case prim_buffer:read(Buffer, LFIndex + 1) of + <<Line:CRIndex/binary, "\r\n">> -> <<Line/binary, "\n">>; + Line -> Line + end, + {ok, NewData, TranslatedLine}. + +%% +%% We support seeking in both directions as long as it isn't relative to EOF. +%% +%% Seeking backwards is extremely inefficient since we have to seek to the very +%% beginning and then decompress up to the desired point. +%% + +position(Data, Mark) when is_atom(Mark) -> + position(Data, {Mark, 0}); +position(Data, Offset) when is_integer(Offset) -> + position(Data, {bof, Offset}); +position(Data, {bof, Offset}) when is_integer(Offset) -> + position_1(Data, Offset); +position(Data, {cur, Offset}) when is_integer(Offset) -> + #{ position := Position } = Data, + position_1(Data, Position + Offset); +position(_Data, {eof, Offset}) when is_integer(Offset) -> + {error, einval}; +position(_Data, _Other) -> + {error, badarg}. + +position_1(_Data, Desired) when Desired < 0 -> + {error, einval}; +position_1(#{ position := Desired } = Data, Desired) -> + {ok, Data, Desired}; +position_1(#{ position := Current } = Data, Desired) when Current < Desired -> + case read(Data, min(Desired - Current, ?INFLATE_CHUNK_SIZE)) of + {ok, NewData, _Data} -> position_1(NewData, Desired); + eof -> {ok, Data, Current}; + Other -> Other + end; +position_1(#{ position := Current } = Data, Desired) when Current > Desired -> + #{ handle := PrivateFd, buffer := Buffer, zlib := Z } = Data, + case ?CALL_FD(PrivateFd, position, [bof]) of + {ok, 0} -> + ok = zlib:inflateReset(Z), + prim_buffer:wipe(Buffer), + position_1(Data#{ position => 0 }, Desired); + Other -> + Other + end. + +terminate(_Reason, _State, _Data) -> + ok. |