From ebbd26eeea4115c946d1254d94acd50f150b4455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20H=C3=B6gberg?= Date: Fri, 3 Nov 2017 11:49:27 +0100 Subject: Reimplement efile_drv as a dirty NIF This improves the latency of file operations as dirty schedulers are a bit more eager to run jobs than async threads, and use a single global queue rather than per-thread queues, eliminating the risk of a job stalling behind a long-running job on the same thread while other async threads sit idle. There's no such thing as a free lunch though; the lowered latency comes at the cost of increased busy-waiting which may have an adverse effect on some applications. This behavior can be tweaked with the +sbwt flag, but unfortunately it affects all types of schedulers and not just dirty ones. We plan to add type-specific flags at a later stage. sendfile has been moved to inet_drv to lessen the effect of a nasty race; the cooperation between inet_drv and efile has never been airtight and the socket dying at the wrong time (Regardless of reason) could result in fd aliasing. Moving it to the inet driver makes it impossible to trigger this by closing the socket in the middle of a sendfile operation, while still allowing it to be aborted -- something that can't be done if it stays in the file driver. The race still occurs if the controlling process dies in the short window between dispatching the sendfile operation and the dup(2) call in the driver, but it's much less likely to happen now. A proper fix is in the works. -- Notable functional differences: * The use_threads option for file:sendfile/5 no longer has any effect. * The file-specific DTrace probes have been removed. The same effect can be achieved with normal tracing together with the nif__entry/nif__return probes to track scheduling. -- OTP-14256 --- lib/kernel/src/raw_file_io_inflate.erl | 261 +++++++++++++++++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100644 lib/kernel/src/raw_file_io_inflate.erl (limited to 'lib/kernel/src/raw_file_io_inflate.erl') diff --git a/lib/kernel/src/raw_file_io_inflate.erl b/lib/kernel/src/raw_file_io_inflate.erl new file mode 100644 index 0000000000..7e9780310c --- /dev/null +++ b/lib/kernel/src/raw_file_io_inflate.erl @@ -0,0 +1,261 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(raw_file_io_inflate). + +-behavior(gen_statem). + +-export([init/1, callback_mode/0, terminate/3]). +-export([opening/3, opened_gzip/3, opened_passthrough/3]). + +-include("file_int.hrl"). + +-define(INFLATE_CHUNK_SIZE, (1 bsl 10)). +-define(GZIP_WBITS, (16 + 15)). + +callback_mode() -> state_functions. + +init({Owner, Secret, [compressed]}) -> + Monitor = monitor(process, Owner), + %% We're using the undocumented inflateInit/3 to open the stream in + %% 'reset mode', which resets the inflate state at the end of every stream, + %% allowing us to read concatenated gzip files. + Z = zlib:open(), + ok = zlib:inflateInit(Z, ?GZIP_WBITS, reset), + Data = + #{ owner => Owner, + monitor => Monitor, + secret => Secret, + position => 0, + buffer => prim_buffer:new(), + zlib => Z }, + {ok, opening, Data}. + +%% The old driver fell back to plain reads if the file didn't start with the +%% magic gzip bytes. +choose_decompression_state(PrivateFd) -> + State = + case ?CALL_FD(PrivateFd, read, [2]) of + {ok, <<16#1F, 16#8B>>} -> opened_gzip; + _Other -> opened_passthrough + end, + {ok, 0} = ?CALL_FD(PrivateFd, position, [0]), + State. + +opening({call, From}, {'$open', Secret, Filename, Modes}, #{ secret := Secret } = Data) -> + case raw_file_io:open(Filename, Modes) of + {ok, PrivateFd} -> + NextState = choose_decompression_state(PrivateFd), + NewData = Data#{ handle => PrivateFd }, + {next_state, NextState, NewData, [{reply, From, ok}]}; + Other -> + {stop_and_reply, normal, [{reply, From, Other}]} + end; +opening(_Event, _Contents, _Data) -> + {keep_state_and_data, [postpone]}. + +internal_close(From, Data) -> + #{ handle := PrivateFd } = Data, + Response = ?CALL_FD(PrivateFd, close, []), + {stop_and_reply, normal, [{reply, From, Response}]}. + +opened_passthrough(info, {'DOWN', Monitor, process, _Owner, _Reason}, #{ monitor := Monitor }) -> + {stop, shutdown}; + +opened_passthrough(info, _Message, _Data) -> + keep_state_and_data; + +opened_passthrough({call, {Owner, _Tag} = From}, [close], #{ owner := Owner } = Data) -> + internal_close(From, Data); + +opened_passthrough({call, {Owner, _Tag} = From}, [Method | Args], #{ owner := Owner } = Data) -> + #{ handle := PrivateFd } = Data, + Response = ?CALL_FD(PrivateFd, Method, Args), + {keep_state_and_data, [{reply, From, Response}]}; + +opened_passthrough({call, _From}, _Command, _Data) -> + %% The client functions filter this out, so we'll crash if the user does + %% anything stupid on purpose. + {shutdown, protocol_violation}; + +opened_passthrough(_Event, _Request, _Data) -> + keep_state_and_data. + +%% + +opened_gzip(info, {'DOWN', Monitor, process, _Owner, _Reason}, #{ monitor := Monitor }) -> + {stop, shutdown}; + +opened_gzip(info, _Message, _Data) -> + keep_state_and_data; + +opened_gzip({call, {Owner, _Tag} = From}, [close], #{ owner := Owner } = Data) -> + internal_close(From, Data); + +opened_gzip({call, {Owner, _Tag} = From}, [position, Mark], #{ owner := Owner } = Data) -> + case position(Data, Mark) of + {ok, NewData, Result} -> + Response = {ok, Result}, + {keep_state, NewData, [{reply, From, Response}]}; + Other -> + {keep_state_and_data, [{reply, From, Other}]} + end; + +opened_gzip({call, {Owner, _Tag} = From}, [read, Size], #{ owner := Owner } = Data) -> + case read(Data, Size) of + {ok, NewData, Result} -> + Response = {ok, Result}, + {keep_state, NewData, [{reply, From, Response}]}; + Other -> + {keep_state_and_data, [{reply, From, Other}]} + end; + +opened_gzip({call, {Owner, _Tag} = From}, [read_line], #{ owner := Owner } = Data) -> + case read_line(Data) of + {ok, NewData, Result} -> + Response = {ok, Result}, + {keep_state, NewData, [{reply, From, Response}]}; + Other -> + {keep_state_and_data, [{reply, From, Other}]} + end; + +opened_gzip({call, {Owner, _Tag} = From}, [write, _IOData], #{ owner := Owner }) -> + Response = {error, ebadf}, + {keep_state_and_data, [{reply, From, Response}]}; + +opened_gzip({call, {Owner, _Tag} = From}, _Request, #{ owner := Owner }) -> + Response = {error, enotsup}, + {keep_state_and_data, [{reply, From, Response}]}; + +opened_gzip({call, _From}, _Request, _Data) -> + %% The client functions filter this out, so we'll crash if the user does + %% anything stupid on purpose. + {shutdown, protocol_violation}; + +opened_gzip(_Event, _Request, _Data) -> + keep_state_and_data. + +%% + +read(#{ buffer := Buffer } = Data, Size) -> + try read_1(Data, Buffer, prim_buffer:size(Buffer), Size) of + Result -> Result + catch + error:badarg -> {error, badarg}; + error:_ -> {error, eio} + end. +read_1(Data, Buffer, BufferSize, ReadSize) when BufferSize >= ReadSize -> + #{ position := Position } = Data, + Decompressed = prim_buffer:read(Buffer, ReadSize), + {ok, Data#{ position => (Position + ReadSize) }, Decompressed}; +read_1(Data, Buffer, BufferSize, ReadSize) when BufferSize < ReadSize -> + #{ handle := PrivateFd } = Data, + case ?CALL_FD(PrivateFd, read, [?INFLATE_CHUNK_SIZE]) of + {ok, Compressed} -> + #{ zlib := Z } = Data, + Uncompressed = erlang:iolist_to_iovec(zlib:inflate(Z, Compressed)), + prim_buffer:write(Buffer, Uncompressed), + read_1(Data, Buffer, prim_buffer:size(Buffer), ReadSize); + eof when BufferSize > 0 -> + read_1(Data, Buffer, BufferSize, BufferSize); + Other -> + Other + end. + +read_line(#{ buffer := Buffer } = Data) -> + try read_line_1(Data, Buffer, prim_buffer:find_byte_index(Buffer, $\n)) of + {ok, NewData, Decompressed} -> {ok, NewData, Decompressed}; + Other -> Other + catch + error:badarg -> {error, badarg}; + error:_ -> {error, eio} + end. + +read_line_1(Data, Buffer, not_found) -> + #{ handle := PrivateFd, zlib := Z } = Data, + case ?CALL_FD(PrivateFd, read, [?INFLATE_CHUNK_SIZE]) of + {ok, Compressed} -> + Uncompressed = erlang:iolist_to_iovec(zlib:inflate(Z, Compressed)), + prim_buffer:write(Buffer, Uncompressed), + read_line_1(Data, Buffer, prim_buffer:find_byte_index(Buffer, $\n)); + eof -> + case prim_buffer:size(Buffer) of + Size when Size > 0 -> {ok, prim_buffer:read(Buffer, Size)}; + Size when Size =:= 0 -> eof + end; + Error -> + Error + end; +read_line_1(Data, Buffer, {ok, LFIndex}) -> + %% Translate CRLF into just LF, completely ignoring which encoding is used, + %% but treat the file position as including CR. + #{ position := Position } = Data, + NewData = Data#{ position => (Position + LFIndex + 1) }, + CRIndex = (LFIndex - 1), + TranslatedLine = + case prim_buffer:read(Buffer, LFIndex + 1) of + <> -> <>; + Line -> Line + end, + {ok, NewData, TranslatedLine}. + +%% +%% We support seeking in both directions as long as it isn't relative to EOF. +%% +%% Seeking backwards is extremely inefficient since we have to seek to the very +%% beginning and then decompress up to the desired point. +%% + +position(Data, Mark) when is_atom(Mark) -> + position(Data, {Mark, 0}); +position(Data, Offset) when is_integer(Offset) -> + position(Data, {bof, Offset}); +position(Data, {bof, Offset}) when is_integer(Offset) -> + position_1(Data, Offset); +position(Data, {cur, Offset}) when is_integer(Offset) -> + #{ position := Position } = Data, + position_1(Data, Position + Offset); +position(_Data, {eof, Offset}) when is_integer(Offset) -> + {error, einval}; +position(_Data, _Other) -> + {error, badarg}. + +position_1(_Data, Desired) when Desired < 0 -> + {error, einval}; +position_1(#{ position := Desired } = Data, Desired) -> + {ok, Data, Desired}; +position_1(#{ position := Current } = Data, Desired) when Current < Desired -> + case read(Data, min(Desired - Current, ?INFLATE_CHUNK_SIZE)) of + {ok, NewData, _Data} -> position_1(NewData, Desired); + eof -> {ok, Data, Current}; + Other -> Other + end; +position_1(#{ position := Current } = Data, Desired) when Current > Desired -> + #{ handle := PrivateFd, buffer := Buffer, zlib := Z } = Data, + case ?CALL_FD(PrivateFd, position, [bof]) of + {ok, 0} -> + ok = zlib:inflateReset(Z), + prim_buffer:wipe(Buffer), + position_1(Data#{ position => 0 }, Desired); + Other -> + Other + end. + +terminate(_Reason, _State, _Data) -> + ok. -- cgit v1.2.3