diff options
Diffstat (limited to 'lib/kernel/src/file_io_server.erl')
-rw-r--r-- | lib/kernel/src/file_io_server.erl | 882 |
1 files changed, 882 insertions, 0 deletions
diff --git a/lib/kernel/src/file_io_server.erl b/lib/kernel/src/file_io_server.erl new file mode 100644 index 0000000000..37e803c493 --- /dev/null +++ b/lib/kernel/src/file_io_server.erl @@ -0,0 +1,882 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2000-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(file_io_server). + +%% A simple file server for io to one file instance per server instance. + +-export([format_error/1]). +-export([start/3, start_link/3]). + +-export([count_and_find/3]). + +-record(state, {handle,owner,mref,buf,read_mode,unic}). + +-define(PRIM_FILE, prim_file). +-define(READ_SIZE_LIST, 128). +-define(READ_SIZE_BINARY, (8*1024)). + +-define(eat_message(M, T), receive M -> M after T -> timeout end). + +%%%----------------------------------------------------------------- +%%% Exported functions + +format_error({_Line, ?MODULE, Reason}) -> + io_lib:format("~w", [Reason]); +format_error({_Line, Mod, Reason}) -> + Mod:format_error(Reason); +format_error(ErrorId) -> + erl_posix_msg:message(ErrorId). + +start(Owner, FileName, ModeList) + when is_pid(Owner), is_list(FileName), is_list(ModeList) -> + do_start(spawn, Owner, FileName, ModeList). + +start_link(Owner, FileName, ModeList) + when is_pid(Owner), is_list(FileName), is_list(ModeList) -> + do_start(spawn_link, Owner, FileName, ModeList). + +%%%----------------------------------------------------------------- +%%% Server starter, dispatcher and helpers + +do_start(Spawn, Owner, FileName, ModeList) -> + Self = self(), + Ref = make_ref(), + Pid = + erlang:Spawn( + fun() -> + %% process_flag(trap_exit, true), + case parse_options(ModeList) of + {ReadMode, UnicodeMode, Opts} -> + case ?PRIM_FILE:open(FileName, Opts) of + {error, Reason} = Error -> + Self ! {Ref, Error}, + exit(Reason); + {ok, Handle} -> + %% XXX must I handle R6 nodes here? + M = erlang:monitor(process, Owner), + Self ! {Ref, ok}, + server_loop( + #state{handle = Handle, + owner = Owner, + mref = M, + buf = <<>>, + read_mode = ReadMode, + unic = UnicodeMode}) + end; + {error,Reason1} = Error1 -> + Self ! {Ref, Error1}, + exit(Reason1) + end + end), + Mref = erlang:monitor(process, Pid), + receive + {Ref, {error, _Reason} = Error} -> + erlang:demonitor(Mref), + receive {'DOWN', Mref, _, _, _} -> ok after 0 -> ok end, + Error; + {Ref, ok} -> + erlang:demonitor(Mref), + receive + {'DOWN', Mref, _, _, Reason} -> + {error, Reason} + after 0 -> + {ok, Pid} + end; + {'DOWN', Mref, _, _, Reason} -> + {error, Reason} + end. + +%%% Returns {ReadMode, UnicodeMode, RealOpts} +parse_options(List) -> + parse_options(expand_encoding(List), list, latin1, []). + +parse_options([],list,Uni,Acc) -> + {list,Uni,[binary|lists:reverse(Acc)]}; +parse_options([],binary,Uni,Acc) -> + {binary,Uni,lists:reverse(Acc)}; +parse_options([{encoding, Encoding}|T],RMode,_,Acc) -> + case valid_enc(Encoding) of + {ok, ExpandedEnc} -> + parse_options(T,RMode,ExpandedEnc,Acc); + {error,Reason} -> + {error,Reason} + end; +parse_options([binary|T],_,Uni,Acc) -> + parse_options(T,binary,Uni,[binary|Acc]); +parse_options([H|T],R,U,Acc) -> + parse_options(T,R,U,[H|Acc]). + +expand_encoding([]) -> + []; +expand_encoding([latin1 | T]) -> + [{encoding,latin1} | expand_encoding(T)]; +expand_encoding([unicode | T]) -> + [{encoding,unicode} | expand_encoding(T)]; +expand_encoding([H|T]) -> + [H|expand_encoding(T)]. + +valid_enc(latin1) -> + {ok,latin1}; +valid_enc(utf8) -> + {ok,unicode}; +valid_enc(unicode) -> + {ok,unicode}; +valid_enc(utf16) -> + {ok,{utf16,big}}; +valid_enc({utf16,big}) -> + {ok,{utf16,big}}; +valid_enc({utf16,little}) -> + {ok,{utf16,little}}; +valid_enc(utf32) -> + {ok,{utf32,big}}; +valid_enc({utf32,big}) -> + {ok,{utf32,big}}; +valid_enc({utf32,little}) -> + {ok,{utf32,little}}; +valid_enc(_Other) -> + {error,badarg}. + + + +server_loop(#state{mref = Mref} = State) -> + receive + {file_request, From, ReplyAs, Request} when is_pid(From) -> + case file_request(Request, State) of + {reply, Reply, NewState} -> + file_reply(From, ReplyAs, Reply), + server_loop(NewState); + {error, Reply, NewState} -> + %% error is the same as reply, except that + %% it breaks the io_request_loop further down + file_reply(From, ReplyAs, Reply), + server_loop(NewState); + {stop, Reason, Reply, _NewState} -> + file_reply(From, ReplyAs, Reply), + exit(Reason) + end; + {io_request, From, ReplyAs, Request} when is_pid(From) -> + case io_request(Request, State) of + {reply, Reply, NewState} -> + io_reply(From, ReplyAs, Reply), + server_loop(NewState); + {error, Reply, NewState} -> + %% error is the same as reply, except that + %% it breaks the io_request_loop further down + io_reply(From, ReplyAs, Reply), + server_loop(NewState); + {stop, Reason, Reply, _NewState} -> + io_reply(From, ReplyAs, Reply), + exit(Reason) + end; + {'DOWN', Mref, _, _, Reason} -> + exit(Reason); + _ -> + server_loop(State) + end. + +file_reply(From, ReplyAs, Reply) -> + From ! {file_reply, ReplyAs, Reply}. + +io_reply(From, ReplyAs, Reply) -> + From ! {io_reply, ReplyAs, Reply}. + +%%%----------------------------------------------------------------- +%%% file requests + +file_request({pread,At,Sz}, + #state{handle=Handle,buf=Buf,read_mode=ReadMode}=State) -> + case position(Handle, At, Buf) of + {ok,_Offs} -> + case ?PRIM_FILE:read(Handle, Sz) of + {ok,Bin} when ReadMode =:= list -> + std_reply({ok,binary_to_list(Bin)}, State); + Reply -> + std_reply(Reply, State) + end; + Reply -> + std_reply(Reply, State) + end; +file_request({pwrite,At,Data}, + #state{handle=Handle,buf=Buf}=State) -> + case position(Handle, At, Buf) of + {ok,_Offs} -> + std_reply(?PRIM_FILE:write(Handle, Data), State); + Reply -> + std_reply(Reply, State) + end; +file_request(sync, + #state{handle=Handle}=State) -> + case ?PRIM_FILE:sync(Handle) of + {error,_}=Reply -> + {stop,normal,Reply,State}; + Reply -> + {reply,Reply,State} + end; +file_request(close, + #state{handle=Handle}=State) -> + {stop,normal,?PRIM_FILE:close(Handle),State#state{buf= <<>>}}; +file_request({position,At}, + #state{handle=Handle,buf=Buf}=State) -> + std_reply(position(Handle, At, Buf), State); +file_request(truncate, + #state{handle=Handle}=State) -> + case ?PRIM_FILE:truncate(Handle) of + {error,_Reason}=Reply -> + {stop,normal,Reply,State#state{buf= <<>>}}; + Reply -> + {reply,Reply,State} + end; +file_request(Unknown, + #state{}=State) -> + Reason = {request, Unknown}, + {error,{error,Reason},State}. + +std_reply({error,_}=Reply, State) -> + {error,Reply,State#state{buf= <<>>}}; +std_reply(Reply, State) -> + {reply,Reply,State#state{buf= <<>>}}. + +%%%----------------------------------------------------------------- +%%% I/O request + +%% New protocol with encoding tags (R13) +io_request({put_chars, Enc, Chars}, + #state{buf= <<>>}=State) -> + put_chars(Chars, Enc, State); +io_request({put_chars, Enc, Chars}, + #state{handle=Handle,buf=Buf}=State) -> + case position(Handle, cur, Buf) of + {error,_}=Reply -> + {stop,normal,Reply,State#state{buf= <<>>}}; + _ -> + put_chars(Chars, Enc, State#state{buf= <<>>}) + end; +io_request({put_chars,Enc,Mod,Func,Args}, + #state{}=State) -> + case catch apply(Mod, Func, Args) of + Chars when is_list(Chars); is_binary(Chars) -> + io_request({put_chars,Enc,Chars}, State); + _ -> + {error,{error,Func},State} + end; + + +io_request({get_until,Enc,_Prompt,Mod,Func,XtraArgs}, + #state{}=State) -> + get_chars(io_lib, get_until, {Mod, Func, XtraArgs}, Enc, State); +io_request({get_chars,Enc,_Prompt,N}, + #state{}=State) -> + get_chars(N, Enc, State); + +%% +%% This optimization gives almost nothing - needs more working... +%% Disabled for now. /PaN +%% +%% io_request({get_line,Enc,_Prompt}, +%% #state{unic=latin1}=State) -> +%% get_line(Enc,State); + +io_request({get_line,Enc,_Prompt}, + #state{}=State) -> + get_chars(io_lib, collect_line, [], Enc, State); + + +io_request({setopts, Opts}, + #state{}=State) when is_list(Opts) -> + setopts(Opts, State); + +io_request(getopts, + #state{}=State) -> + getopts(State); + +%% BC with pre-R13 nodes +io_request({put_chars, Chars},#state{}=State) -> + io_request({put_chars, latin1, Chars},State); +io_request({put_chars,Mod,Func,Args}, #state{}=State) -> + io_request({put_chars,latin1,Mod,Func,Args}, State); +io_request({get_until,_Prompt,Mod,Func,XtraArgs}, #state{}=State) -> + io_request({get_until,latin1,_Prompt,Mod,Func,XtraArgs}, State); +io_request({get_chars,_Prompt,N}, #state{}=State) -> + io_request({get_chars,latin1,_Prompt,N}, State); +io_request({get_line,_Prompt}, #state{}=State) -> + io_request({get_line,latin1,_Prompt}, State); + +io_request({requests,Requests}, + #state{}=State) when is_list(Requests) -> + io_request_loop(Requests, {reply,ok,State}); +io_request(Unknown, + #state{}=State) -> + Reason = {request,Unknown}, + {error,{error,Reason},State}. + + + +%% Process a list of requests as long as the results are ok. + +io_request_loop([], Result) -> + Result; +io_request_loop([_Request|_Tail], + {stop,_Reason,_Reply,_State}=Result) -> + Result; +io_request_loop([_Request|_Tail], + {error,_Reply,_State}=Result) -> + Result; +io_request_loop([Request|Tail], + {reply,_Reply,State}) -> + io_request_loop(Tail, io_request(Request, State)). + + + +%% I/O request put_chars +%% +put_chars(Chars, latin1, #state{handle=Handle, unic=latin1}=State) -> + case ?PRIM_FILE:write(Handle, Chars) of + {error,_}=Reply -> + {stop,normal,Reply,State}; + Reply -> + {reply,Reply,State} + end; +put_chars(Chars, InEncoding, #state{handle=Handle, unic=OutEncoding}=State) -> + case unicode:characters_to_binary(Chars,InEncoding,OutEncoding) of + Bin when is_binary(Bin) -> + case ?PRIM_FILE:write(Handle, Bin) of + {error,_}=Reply -> + {stop,normal,Reply,State}; + Reply -> + {reply,Reply,State} + end; + {error,_,_} -> + {stop,normal,{error,{no_translation, InEncoding, OutEncoding}},State} + end. + +%% +%% Process the I/O request get_line for latin1 encoding of file specially +%% Unfortunately this function gives almost nothing, it needs more work +%% I disable it for now /PaN +%% +%% srch(<<>>,_,_) -> +%% nomatch; +%% srch(<<X:8,_/binary>>,X,N) -> +%% {match,N}; +%% srch(<<_:8,T/binary>>,X,N) -> +%% srch(T,X,N+1). +%% get_line(OutEnc, #state{handle=Handle,buf = <<>>,unic=latin1}=State) -> +%% case ?PRIM_FILE:read(Handle,?READ_SIZE_BINARY) of +%% {ok, B} -> +%% get_line(OutEnc, State#state{buf = B}); +%% eof -> +%% {reply,eof,State}; +%% {error,Reason}=Error -> +%% {stop,Reason,Error,State} +%% end; +%% get_line(OutEnc, #state{handle=Handle,buf=Buf,read_mode=ReadMode,unic=latin1}=State) -> +%% case srch(Buf,$\n,0) of +%% nomatch -> +%% case ?PRIM_FILE:read(Handle,?READ_SIZE_BINARY) of +%% {ok, B} -> +%% get_line(OutEnc,State#state{buf = <<Buf/binary,B/binary>>}); +%% eof -> +%% std_reply(cast(Buf, ReadMode,latin1,OutEnc), State); +%% {error,Reason}=Error -> +%% {stop,Reason,Error,State#state{buf= <<>>}} +%% end; +%% {match,Pos} when Pos >= 1-> +%% PosP1 = Pos + 1, +%% <<Res0:PosP1/binary,NewBuf/binary>> = Buf, +%% PosM1 = Pos - 1, +%% Res = case Res0 of +%% <<Chomped:PosM1/binary,$\r:8,$\n:8>> -> +%% cat(Chomped, <<"\n">>, ReadMode,latin1,OutEnc); +%% _Other -> +%% cast(Res0, ReadMode,latin1,OutEnc) +%% end, +%% {reply,Res,State#state{buf=NewBuf}}; +%% {match,Pos} -> +%% PosP1 = Pos + 1, +%% <<Res:PosP1/binary,NewBuf/binary>> = Buf, +%% {reply,Res,State#state{buf=NewBuf}} +%% end; +%% get_line(_, #state{}=State) -> +%% {error,{error,get_line},State}. + +%% +%% Process the I/O request get_chars +%% +get_chars(0, Enc, #state{read_mode=ReadMode,unic=InEncoding}=State) -> + {reply,cast(<<>>, ReadMode,InEncoding, Enc),State}; +get_chars(N, Enc, #state{buf=Buf,read_mode=ReadMode,unic=latin1}=State) + when is_integer(N), N > 0, N =< byte_size(Buf) -> + {B1,B2} = split_binary(Buf, N), + {reply,cast(B1, ReadMode,latin1,Enc),State#state{buf=B2}}; +get_chars(N, Enc, #state{buf=Buf,read_mode=ReadMode,unic=latin1}=State) + when is_integer(N), N > 0, N =< byte_size(Buf) -> + {B1,B2} = split_binary(Buf, N), + {reply,cast(B1, ReadMode,latin1,Enc),State#state{buf=B2}}; +get_chars(N, OutEnc,#state{handle=Handle,buf=Buf,read_mode=ReadMode,unic=latin1}=State) + when is_integer(N), N > 0 -> + BufSize = byte_size(Buf), + NeedSize = N-BufSize, + Size = erlang:max(NeedSize, ?READ_SIZE_BINARY), + case ?PRIM_FILE:read(Handle, Size) of + {ok, B} -> + if BufSize+byte_size(B) < N -> + std_reply(cat(Buf, B, ReadMode,latin1,OutEnc), State); + true -> + {B1,B2} = split_binary(B, NeedSize), + {reply,cat(Buf, B1, ReadMode, latin1,OutEnc),State#state{buf=B2}} + end; + eof when BufSize =:= 0 -> + {reply,eof,State}; + eof -> + std_reply(cast(Buf, ReadMode,latin1,OutEnc), State); + {error,Reason}=Error -> + {stop,Reason,Error,State#state{buf= <<>>}} + end; +get_chars(N, OutEnc,#state{handle=Handle,buf=Buf,read_mode=ReadMode,unic=InEncoding}=State) + when is_integer(N), N > 0 -> + try + %% This is rather tricky, we need to count the actual number of characters + %% in the buffer first as unicode characters are not constant in length + {BufCount, SplitPos} = count_and_find(Buf,N,InEncoding), + case BufCount >= N of + true -> + {B1,B2} = case SplitPos of + none -> {Buf,<<>>}; + _ ->split_binary(Buf,SplitPos) + end, + {reply,cast(B1, ReadMode,InEncoding,OutEnc),State#state{buf=B2}}; + false -> + %% Need more, Try to read 4*needed in bytes... + NeedSize = (N - BufCount) * 4, + Size = erlang:max(NeedSize, ?READ_SIZE_BINARY), + case ?PRIM_FILE:read(Handle, Size) of + {ok, B} -> + NewBuf = list_to_binary([Buf,B]), + {NewCount,NewSplit} = count_and_find(NewBuf,N,InEncoding), + case NewCount >= N of + true -> + {B01,B02} = case NewSplit of + none -> {NewBuf,<<>>}; + _ ->split_binary(NewBuf, NewSplit) + end, + {reply,cast(B01, ReadMode,InEncoding,OutEnc), + State#state{buf=B02}}; + false -> + %% Reached end of file + std_reply(cast(NewBuf, ReadMode,InEncoding,OutEnc), + State#state{buf = <<>>}) + end; + eof when BufCount =:= 0 -> + {reply,eof,State}; + eof -> + std_reply(cast(Buf, ReadMode,InEncoding,OutEnc), State#state{buf = <<>>}); + {error,Reason}=Error -> + {stop,Reason,Error,State#state{buf = <<>>}} + end + end + catch + exit:ExError -> + {stop,ExError,{error,ExError},State#state{buf= <<>>}} + end; + +get_chars(_N, _, #state{}=State) -> + {error,{error,get_chars},State}. + +get_chars(Mod, Func, XtraArg, OutEnc, #state{buf= <<>>}=State) -> + get_chars_empty(Mod, Func, XtraArg, start, OutEnc, State); +get_chars(Mod, Func, XtraArg, OutEnc, #state{buf=Buf}=State) -> + get_chars_apply(Mod, Func, XtraArg, start, OutEnc, State#state{buf= <<>>}, Buf). + +get_chars_empty(Mod, Func, XtraArg, S, latin1, + #state{handle=Handle,read_mode=ReadMode, unic=latin1}=State) -> + case ?PRIM_FILE:read(Handle, read_size(ReadMode)) of + {ok,Bin} -> + get_chars_apply(Mod, Func, XtraArg, S, latin1, State, Bin); + eof -> + get_chars_apply(Mod, Func, XtraArg, S, latin1, State, eof); + {error,Reason}=Error -> + {stop,Reason,Error,State} + end; +get_chars_empty(Mod, Func, XtraArg, S, OutEnc, + #state{handle=Handle,read_mode=ReadMode}=State) -> + case ?PRIM_FILE:read(Handle, read_size(ReadMode)) of + {ok,Bin} -> + get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, Bin); + eof -> + get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, eof); + {error,Reason}=Error -> + {stop,Reason,Error,State} + end. +get_chars_notempty(Mod, Func, XtraArg, S, OutEnc, + #state{handle=Handle,read_mode=ReadMode,buf = B}=State) -> + case ?PRIM_FILE:read(Handle, read_size(ReadMode)) of + {ok,Bin} -> + get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, list_to_binary([B,Bin])); + eof -> + case B of + <<>> -> + get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, eof); + _ -> + {stop,invalid_unicode,{error,invalid_unicode},State} + end; + {error,Reason}=Error -> + {stop,Reason,Error,State} + end. + + +get_chars_apply(Mod, Func, XtraArg, S0, latin1, + #state{read_mode=ReadMode,unic=latin1}=State, Data0) -> + Data1 = case ReadMode of + list when is_binary(Data0) -> binary_to_list(Data0); + _ -> Data0 + end, + case catch Mod:Func(S0, Data1, latin1, XtraArg) of + {stop,Result,Buf} -> + {reply,Result,State#state{buf=cast_binary(Buf)}}; + {'EXIT',Reason} -> + {stop,Reason,{error,err_func(Mod, Func, XtraArg)},State}; + S1 -> + get_chars_empty(Mod, Func, XtraArg, S1, latin1, State) + end; +get_chars_apply(Mod, Func, XtraArg, S0, OutEnc, + #state{read_mode=ReadMode,unic=InEnc}=State, Data0) -> + try + {Data1,NewBuff} = case ReadMode of + list when is_binary(Data0) -> + case unicode:characters_to_list(Data0,InEnc) of + {Tag,Decoded,Rest} when Decoded =/= [], Tag =:= error; Decoded =/= [], Tag =:= incomplete -> + {Decoded,erlang:iolist_to_binary(Rest)}; + {error, [], _} -> + exit(invalid_unicode); + {incomplete, [], R} -> + {[],R}; + List when is_list(List) -> + {List,<<>>} + end; + binary when is_binary(Data0) -> + case unicode:characters_to_binary(Data0,InEnc,OutEnc) of + {Tag2,Decoded2,Rest2} when Decoded2 =/= <<>>, Tag2 =:= error; Decoded2 =/= <<>>, Tag2 =:= incomplete -> + {Decoded2,erlang:iolist_to_binary(Rest2)}; + {error, <<>>, _} -> + exit(invalid_unicode); + {incomplete, <<>>, R} -> + {<<>>,R}; + Binary when is_binary(Binary) -> + {Binary,<<>>} + end; + _ -> %i.e. eof + {Data0,<<>>} + end, + case catch Mod:Func(S0, Data1, OutEnc, XtraArg) of + {stop,Result,Buf} -> + {reply,Result,State#state{buf = (if + is_binary(Buf) -> + list_to_binary([unicode:characters_to_binary(Buf,OutEnc,InEnc),NewBuff]); + is_list(Buf) -> + list_to_binary([unicode:characters_to_binary(Buf,unicode,InEnc),NewBuff]); + true -> + NewBuff + end)}}; + {'EXIT',Reason} -> + {stop,Reason,{error,err_func(Mod, Func, XtraArg)},State}; + S1 -> + get_chars_notempty(Mod, Func, XtraArg, S1, OutEnc, State#state{buf=NewBuff}) + end + catch + exit:ExReason -> + {stop,ExReason,{error,err_func(Mod, Func, XtraArg)},State}; + error:ErrReason -> + {stop,ErrReason,{error,err_func(Mod, Func, XtraArg)},State} + end. + + + +%% Convert error code to make it look as before +err_func(io_lib, get_until, {_,F,_}) -> + F; +err_func(_, F, _) -> + F. + + + +%% Process the I/O request setopts +%% +%% setopts +setopts(Opts0,State) -> + Opts = proplists:unfold( + proplists:substitute_negations( + [{list,binary}], + expand_encoding(Opts0))), + case check_valid_opts(Opts) of + true -> + do_setopts(Opts,State); + false -> + {error,{error,enotsup},State} + end. +check_valid_opts([]) -> + true; +check_valid_opts([{binary,_}|T]) -> + check_valid_opts(T); +check_valid_opts([{encoding,_Enc}|T]) -> + check_valid_opts(T); +check_valid_opts(_) -> + false. +do_setopts(Opts, State) -> + case valid_enc(proplists:get_value(encoding, Opts, State#state.unic)) of + {ok,NewUnic} -> + case proplists:get_value(binary, Opts) of + true -> + {reply,ok,State#state{read_mode=binary, unic=NewUnic}}; + false -> + {reply,ok,State#state{read_mode=list, unic=NewUnic}}; + undefined -> + {reply,ok,State#state{unic=NewUnic}} + end; + _ -> + {error,{error,badarg},State} + end. + +getopts(#state{read_mode=RM, unic=Unic} = State) -> + Bin = {binary, case RM of + binary -> + true; + _ -> + false + end}, + Uni = {encoding, Unic}, + {reply,[Bin,Uni],State}. + + +%% Concatenate two binaries and convert the result to list or binary +cat(B1, B2, binary,latin1,latin1) -> + list_to_binary([B1,B2]); +cat(B1, B2, binary,InEncoding,OutEncoding) -> + case unicode:characters_to_binary([B1,B2],InEncoding,OutEncoding) of + Good when is_binary(Good) -> + Good; + _ -> + exit({no_translation,InEncoding,OutEncoding}) + end; +%% Dialyzer finds this is never used... +%% cat(B1, B2, list, InEncoding, OutEncoding) when InEncoding =/= latin1 -> +%% % Catch i.e. unicode -> latin1 errors by using the outencoding although otherwise +%% % irrelevant for lists... +%% try +%% unicode:characters_to_list(unicode:characters_to_binary([B1,B2],InEncoding,OutEncoding), +%% OutEncoding) +%% catch +%% error:_ -> +%% exit({no_translation,InEncoding,OutEncoding}) +%% end. +cat(B1, B2, list, latin1,_) -> + binary_to_list(B1)++binary_to_list(B2). + +%% Cast binary to list or binary +cast(B, binary, latin1, latin1) -> + B; +cast(B, binary, InEncoding, OutEncoding) -> + case unicode:characters_to_binary(B,InEncoding,OutEncoding) of + Good when is_binary(Good) -> + Good; + _ -> + exit({no_translation,InEncoding,OutEncoding}) + end; +cast(B, list, latin1, _) -> + binary_to_list(B); +cast(B, list, InEncoding, OutEncoding) -> + try + unicode:characters_to_list(unicode:characters_to_binary(B,InEncoding,OutEncoding), + OutEncoding) + catch + error:_ -> + exit({no_translation,InEncoding,OutEncoding}) + end. + +%% Convert buffer to binary +cast_binary(Binary) when is_binary(Binary) -> + Binary; +cast_binary(List) when is_list(List) -> + list_to_binary(List); +cast_binary(_EOF) -> + <<>>. + +%% Read size for different read modes +read_size(binary) -> + ?READ_SIZE_BINARY; +read_size(list) -> + ?READ_SIZE_LIST. + +%% Utf utility +count_and_find(Bin,N,Encoding) -> + cafu(Bin,N,0,0,none,case Encoding of + unicode -> utf8; + Oth -> Oth + end). + +cafu(<<>>,0,Count,ByteCount,_SavePos,_) -> + {Count,ByteCount}; +cafu(<<>>,_N,Count,_ByteCount,SavePos,_) -> + {Count,SavePos}; +cafu(<<_/utf8,Rest/binary>>, 0, Count, ByteCount, _SavePos, utf8) -> + cafu(Rest,-1,Count+1,0,ByteCount,utf8); +cafu(<<_/utf8,Rest/binary>>, N, Count, _ByteCount, SavePos, utf8) when N < 0 -> + cafu(Rest,-1,Count+1,0,SavePos,utf8); +cafu(<<_/utf8,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, utf8) -> + Delta = byte_size(Whole) - byte_size(Rest), + cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,utf8); +cafu(<<_/utf16-big,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf16,big}) -> + cafu(Rest,-1,Count+1,0,ByteCount,{utf16,big}); +cafu(<<_/utf16-big,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf16,big}) when N < 0 -> + cafu(Rest,-1,Count+1,0,SavePos,{utf16,big}); +cafu(<<_/utf16-big,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf16,big}) -> + Delta = byte_size(Whole) - byte_size(Rest), + cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf16,big}); +cafu(<<_/utf16-little,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf16,little}) -> + cafu(Rest,-1,Count+1,0,ByteCount,{utf16,little}); +cafu(<<_/utf16-little,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf16,little}) when N < 0 -> + cafu(Rest,-1,Count+1,0,SavePos,{utf16,little}); +cafu(<<_/utf16-little,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf16,little}) -> + Delta = byte_size(Whole) - byte_size(Rest), + cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf16,little}); +cafu(<<_/utf32-big,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf32,big}) -> + cafu(Rest,-1,Count+1,0,ByteCount,{utf32,big}); +cafu(<<_/utf32-big,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf32,big}) when N < 0 -> + cafu(Rest,-1,Count+1,0,SavePos,{utf32,big}); +cafu(<<_/utf32-big,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf32,big}) -> + Delta = byte_size(Whole) - byte_size(Rest), + cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf32,big}); +cafu(<<_/utf32-little,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf32,little}) -> + cafu(Rest,-1,Count+1,0,ByteCount,{utf32,little}); +cafu(<<_/utf32-little,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf32,little}) when N < 0 -> + cafu(Rest,-1,Count+1,0,SavePos,{utf32,little}); +cafu(<<_/utf32-little,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf32,little}) -> + Delta = byte_size(Whole) - byte_size(Rest), + cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf32,little}); +cafu(_Other,0,Count,ByteCount,_,_) -> % Non Unicode character, + % but found our point, OK this time + {Count,ByteCount}; +cafu(Other,_N,Count,0,SavePos,Enc) -> % Not enough, but valid chomped unicode + % at end. + case cbv(Enc,Other) of + false -> + exit(invalid_unicode); + _ -> + {Count,SavePos} + end; +cafu(Other,_N,Count,ByteCount,none,Enc) -> % Return what we'we got this far + % although not complete, + % it's not (yet) in error + case cbv(Enc,Other) of + false -> + exit(invalid_unicode); + _ -> + {Count,ByteCount} + end; +cafu(Other,_N,Count,_ByteCount,SavePos,Enc) -> % As above but we have + % found a position + case cbv(Enc,Other) of + false -> + exit(invalid_unicode); + _ -> + {Count,SavePos} + end. + +%% +%% Bluntly stolen from stdlib/unicode.erl (cbv means can be valid?) +%% +cbv(utf8,<<1:1,1:1,0:1,_:5>>) -> + 1; +cbv(utf8,<<1:1,1:1,1:1,0:1,_:4,R/binary>>) -> + case R of + <<>> -> + 2; + <<1:1,0:1,_:6>> -> + 1; + _ -> + false + end; +cbv(utf8,<<1:1,1:1,1:1,1:1,0:1,_:3,R/binary>>) -> + case R of + <<>> -> + 3; + <<1:1,0:1,_:6>> -> + 2; + <<1:1,0:1,_:6,1:1,0:1,_:6>> -> + 1; + _ -> + false + end; +cbv(utf8,_) -> + false; + +cbv({utf16,big},<<A:8>>) when A =< 215; A >= 224 -> + 1; +cbv({utf16,big},<<54:6,_:2>>) -> + 3; +cbv({utf16,big},<<54:6,_:10>>) -> + 2; +cbv({utf16,big},<<54:6,_:10,55:6,_:2>>) -> + 1; +cbv({utf16,big},_) -> + false; +cbv({utf16,little},<<_:8>>) -> + 1; % or 3, we'll see +cbv({utf16,little},<<_:8,54:6,_:2>>) -> + 2; +cbv({utf16,little},<<_:8,54:6,_:2,_:8>>) -> + 1; +cbv({utf16,little},_) -> + false; + + +cbv({utf32,big}, <<0:8>>) -> + 3; +cbv({utf32,big}, <<0:8,X:8>>) when X =< 16 -> + 2; +cbv({utf32,big}, <<0:8,X:8,Y:8>>) + when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) -> + 1; +cbv({utf32,big},_) -> + false; +cbv({utf32,little},<<_:8>>) -> + 3; +cbv({utf32,little},<<_:8,_:8>>) -> + 2; +cbv({utf32,little},<<X:8,255:8,0:8>>) when X =:= 254; X =:= 255 -> + false; +cbv({utf32,little},<<_:8,Y:8,X:8>>) + when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) -> + 1; +cbv({utf32,little},_) -> + false. + + +%%%----------------------------------------------------------------- +%%% ?PRIM_FILE helpers + +%% Compensates ?PRIM_FILE:position/2 for the number of bytes +%% we have buffered + +position(Handle, cur, Buf) -> + position(Handle, {cur, 0}, Buf); +position(Handle, {cur, Offs}, Buf) when is_binary(Buf) -> + ?PRIM_FILE:position(Handle, {cur, Offs-byte_size(Buf)}); +position(Handle, At, _Buf) -> + ?PRIM_FILE:position(Handle, At). + |