%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2000-2010. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% %CopyrightEnd% %% -module(file_io_server). %% A simple file server for io to one file instance per server instance. -export([format_error/1]). -export([start/3, start_link/3]). -export([count_and_find/3]). -record(state, {handle,owner,mref,buf,read_mode,unic}). -define(PRIM_FILE, prim_file). -define(READ_SIZE_LIST, 128). -define(READ_SIZE_BINARY, (8*1024)). -define(eat_message(M, T), receive M -> M after T -> timeout end). %%%----------------------------------------------------------------- %%% Exported functions format_error({_Line, ?MODULE, Reason}) -> io_lib:format("~w", [Reason]); format_error({_Line, Mod, Reason}) -> Mod:format_error(Reason); format_error(ErrorId) -> erl_posix_msg:message(ErrorId). start(Owner, FileName, ModeList) when is_pid(Owner), (is_list(FileName) orelse is_binary(FileName)), is_list(ModeList) -> do_start(spawn, Owner, FileName, ModeList). start_link(Owner, FileName, ModeList) when is_pid(Owner), (is_list(FileName) orelse is_binary(FileName)), is_list(ModeList) -> do_start(spawn_link, Owner, FileName, ModeList). %%%----------------------------------------------------------------- %%% Server starter, dispatcher and helpers do_start(Spawn, Owner, FileName, ModeList) -> Self = self(), Ref = make_ref(), Pid = erlang:Spawn( fun() -> %% process_flag(trap_exit, true), case parse_options(ModeList) of {ReadMode, UnicodeMode, Opts} -> case ?PRIM_FILE:open(FileName, Opts) of {error, Reason} = Error -> Self ! {Ref, Error}, exit(Reason); {ok, Handle} -> %% XXX must I handle R6 nodes here? M = erlang:monitor(process, Owner), Self ! {Ref, ok}, server_loop( #state{handle = Handle, owner = Owner, mref = M, buf = <<>>, read_mode = ReadMode, unic = UnicodeMode}) end; {error,Reason1} = Error1 -> Self ! {Ref, Error1}, exit(Reason1) end end), Mref = erlang:monitor(process, Pid), receive {Ref, {error, _Reason} = Error} -> erlang:demonitor(Mref), receive {'DOWN', Mref, _, _, _} -> ok after 0 -> ok end, Error; {Ref, ok} -> erlang:demonitor(Mref), receive {'DOWN', Mref, _, _, Reason} -> {error, Reason} after 0 -> {ok, Pid} end; {'DOWN', Mref, _, _, Reason} -> {error, Reason} end. %%% Returns {ReadMode, UnicodeMode, RealOpts} parse_options(List) -> parse_options(expand_encoding(List), list, latin1, []). parse_options([], list, Uni, Acc) -> {list,Uni,[binary|lists:reverse(Acc)]}; parse_options([], binary, Uni, Acc) -> {binary,Uni,lists:reverse(Acc)}; parse_options([{encoding, Encoding}|T], RMode, _, Acc) -> case valid_enc(Encoding) of {ok, ExpandedEnc} -> parse_options(T, RMode, ExpandedEnc, Acc); {error,_Reason} = Error -> Error end; parse_options([binary|T], _, Uni, Acc) -> parse_options(T, binary, Uni, [binary|Acc]); parse_options([H|T], R, U, Acc) -> parse_options(T, R, U, [H|Acc]). expand_encoding([]) -> []; expand_encoding([latin1 | T]) -> [{encoding,latin1} | expand_encoding(T)]; expand_encoding([unicode | T]) -> [{encoding,unicode} | expand_encoding(T)]; expand_encoding([H|T]) -> [H|expand_encoding(T)]. valid_enc(latin1) -> {ok,latin1}; valid_enc(utf8) -> {ok,unicode}; valid_enc(unicode) -> {ok,unicode}; valid_enc(utf16) -> {ok,{utf16,big}}; valid_enc({utf16,big}) -> {ok,{utf16,big}}; valid_enc({utf16,little}) -> {ok,{utf16,little}}; valid_enc(utf32) -> {ok,{utf32,big}}; valid_enc({utf32,big}) -> {ok,{utf32,big}}; valid_enc({utf32,little}) -> {ok,{utf32,little}}; valid_enc(_Other) -> {error,badarg}. server_loop(#state{mref = Mref} = State) -> receive {file_request, From, ReplyAs, Request} when is_pid(From) -> case file_request(Request, State) of {reply, Reply, NewState} -> file_reply(From, ReplyAs, Reply), server_loop(NewState); {error, Reply, NewState} -> %% error is the same as reply, except that %% it breaks the io_request_loop further down file_reply(From, ReplyAs, Reply), server_loop(NewState); {stop, Reason, Reply, _NewState} -> file_reply(From, ReplyAs, Reply), exit(Reason) end; {io_request, From, ReplyAs, Request} when is_pid(From) -> case io_request(Request, State) of {reply, Reply, NewState} -> io_reply(From, ReplyAs, Reply), server_loop(NewState); {error, Reply, NewState} -> %% error is the same as reply, except that %% it breaks the io_request_loop further down io_reply(From, ReplyAs, Reply), server_loop(NewState); {stop, Reason, Reply, _NewState} -> io_reply(From, ReplyAs, Reply), exit(Reason) end; {'DOWN', Mref, _, _, Reason} -> exit(Reason); _ -> server_loop(State) end. file_reply(From, ReplyAs, Reply) -> From ! {file_reply, ReplyAs, Reply}. io_reply(From, ReplyAs, Reply) -> From ! {io_reply, ReplyAs, Reply}. %%%----------------------------------------------------------------- %%% file requests file_request({advise,Offset,Length,Advise}, #state{handle=Handle}=State) -> case ?PRIM_FILE:advise(Handle, Offset, Length, Advise) of {error,_}=Reply -> {stop,normal,Reply,State}; Reply -> {reply,Reply,State} end; file_request({pread,At,Sz}, #state{handle=Handle,buf=Buf,read_mode=ReadMode}=State) -> case position(Handle, At, Buf) of {ok,_Offs} -> case ?PRIM_FILE:read(Handle, Sz) of {ok,Bin} when ReadMode =:= list -> std_reply({ok,binary_to_list(Bin)}, State); Reply -> std_reply(Reply, State) end; Reply -> std_reply(Reply, State) end; file_request({pwrite,At,Data}, #state{handle=Handle,buf=Buf}=State) -> case position(Handle, At, Buf) of {ok,_Offs} -> std_reply(?PRIM_FILE:write(Handle, Data), State); Reply -> std_reply(Reply, State) end; file_request(datasync, #state{handle=Handle}=State) -> case ?PRIM_FILE:datasync(Handle) of {error,_}=Reply -> {stop,normal,Reply,State}; Reply -> {reply,Reply,State} end; file_request(sync, #state{handle=Handle}=State) -> case ?PRIM_FILE:sync(Handle) of {error,_}=Reply -> {stop,normal,Reply,State}; Reply -> {reply,Reply,State} end; file_request(close, #state{handle=Handle}=State) -> {stop,normal,?PRIM_FILE:close(Handle),State#state{buf= <<>>}}; file_request({position,At}, #state{handle=Handle,buf=Buf}=State) -> std_reply(position(Handle, At, Buf), State); file_request(truncate, #state{handle=Handle}=State) -> case ?PRIM_FILE:truncate(Handle) of {error,_Reason}=Reply -> {stop,normal,Reply,State#state{buf= <<>>}}; Reply -> {reply,Reply,State} end; file_request(Unknown, #state{}=State) -> Reason = {request, Unknown}, {error,{error,Reason},State}. std_reply({error,_}=Reply, State) -> {error,Reply,State#state{buf= <<>>}}; std_reply(Reply, State) -> {reply,Reply,State#state{buf= <<>>}}. %%%----------------------------------------------------------------- %%% I/O request %% New protocol with encoding tags (R13) io_request({put_chars, Enc, Chars}, #state{buf= <<>>}=State) -> put_chars(Chars, Enc, State); io_request({put_chars, Enc, Chars}, #state{handle=Handle,buf=Buf}=State) -> case position(Handle, cur, Buf) of {error,_}=Reply -> {stop,normal,Reply,State#state{buf= <<>>}}; _ -> put_chars(Chars, Enc, State#state{buf= <<>>}) end; io_request({put_chars,Enc,Mod,Func,Args}, #state{}=State) -> case catch apply(Mod, Func, Args) of Chars when is_list(Chars); is_binary(Chars) -> io_request({put_chars,Enc,Chars}, State); _ -> {error,{error,Func},State} end; io_request({get_until,Enc,_Prompt,Mod,Func,XtraArgs}, #state{}=State) -> get_chars(io_lib, get_until, {Mod, Func, XtraArgs}, Enc, State); io_request({get_chars,Enc,_Prompt,N}, #state{}=State) -> get_chars(N, Enc, State); %% %% This optimization gives almost nothing - needs more working... %% Disabled for now. /PaN %% %% io_request({get_line,Enc,_Prompt}, %% #state{unic=latin1}=State) -> %% get_line(Enc,State); io_request({get_line,Enc,_Prompt}, #state{}=State) -> get_chars(io_lib, collect_line, [], Enc, State); io_request({setopts, Opts}, #state{}=State) when is_list(Opts) -> setopts(Opts, State); io_request(getopts, #state{}=State) -> getopts(State); %% BC with pre-R13 nodes io_request({put_chars, Chars},#state{}=State) -> io_request({put_chars, latin1, Chars},State); io_request({put_chars,Mod,Func,Args}, #state{}=State) -> io_request({put_chars,latin1,Mod,Func,Args}, State); io_request({get_until,_Prompt,Mod,Func,XtraArgs}, #state{}=State) -> io_request({get_until,latin1,_Prompt,Mod,Func,XtraArgs}, State); io_request({get_chars,_Prompt,N}, #state{}=State) -> io_request({get_chars,latin1,_Prompt,N}, State); io_request({get_line,_Prompt}, #state{}=State) -> io_request({get_line,latin1,_Prompt}, State); io_request({requests,Requests}, #state{}=State) when is_list(Requests) -> io_request_loop(Requests, {reply,ok,State}); io_request(Unknown, #state{}=State) -> Reason = {request,Unknown}, {error,{error,Reason},State}. %% Process a list of requests as long as the results are ok. io_request_loop([], Result) -> Result; io_request_loop([_Request|_Tail], {stop,_Reason,_Reply,_State}=Result) -> Result; io_request_loop([_Request|_Tail], {error,_Reply,_State}=Result) -> Result; io_request_loop([Request|Tail], {reply,_Reply,State}) -> io_request_loop(Tail, io_request(Request, State)). %% I/O request put_chars %% put_chars(Chars, latin1, #state{handle=Handle, unic=latin1}=State) -> case ?PRIM_FILE:write(Handle, Chars) of {error,_}=Reply -> {stop,normal,Reply,State}; Reply -> {reply,Reply,State} end; put_chars(Chars, InEncoding, #state{handle=Handle, unic=OutEncoding}=State) -> case unicode:characters_to_binary(Chars,InEncoding,OutEncoding) of Bin when is_binary(Bin) -> case ?PRIM_FILE:write(Handle, Bin) of {error,_}=Reply -> {stop,normal,Reply,State}; Reply -> {reply,Reply,State} end; {error,_,_} -> {stop,normal,{error,{no_translation, InEncoding, OutEncoding}},State} end. %% %% Process the I/O request get_line for latin1 encoding of file specially %% Unfortunately this function gives almost nothing, it needs more work %% I disable it for now /PaN %% %% srch(<<>>,_,_) -> %% nomatch; %% srch(<<X:8,_/binary>>,X,N) -> %% {match,N}; %% srch(<<_:8,T/binary>>,X,N) -> %% srch(T,X,N+1). %% get_line(OutEnc, #state{handle=Handle,buf = <<>>,unic=latin1}=State) -> %% case ?PRIM_FILE:read(Handle,?READ_SIZE_BINARY) of %% {ok, B} -> %% get_line(OutEnc, State#state{buf = B}); %% eof -> %% {reply,eof,State}; %% {error,Reason}=Error -> %% {stop,Reason,Error,State} %% end; %% get_line(OutEnc, #state{handle=Handle,buf=Buf,read_mode=ReadMode,unic=latin1}=State) -> %% case srch(Buf,$\n,0) of %% nomatch -> %% case ?PRIM_FILE:read(Handle,?READ_SIZE_BINARY) of %% {ok, B} -> %% get_line(OutEnc,State#state{buf = <<Buf/binary,B/binary>>}); %% eof -> %% std_reply(cast(Buf, ReadMode,latin1,OutEnc), State); %% {error,Reason}=Error -> %% {stop,Reason,Error,State#state{buf= <<>>}} %% end; %% {match,Pos} when Pos >= 1-> %% PosP1 = Pos + 1, %% <<Res0:PosP1/binary,NewBuf/binary>> = Buf, %% PosM1 = Pos - 1, %% Res = case Res0 of %% <<Chomped:PosM1/binary,$\r:8,$\n:8>> -> %% cat(Chomped, <<"\n">>, ReadMode,latin1,OutEnc); %% _Other -> %% cast(Res0, ReadMode,latin1,OutEnc) %% end, %% {reply,Res,State#state{buf=NewBuf}}; %% {match,Pos} -> %% PosP1 = Pos + 1, %% <<Res:PosP1/binary,NewBuf/binary>> = Buf, %% {reply,Res,State#state{buf=NewBuf}} %% end; %% get_line(_, #state{}=State) -> %% {error,{error,get_line},State}. %% %% Process the I/O request get_chars %% get_chars(0, Enc, #state{read_mode=ReadMode,unic=InEncoding}=State) -> {reply,cast(<<>>, ReadMode,InEncoding, Enc),State}; get_chars(N, Enc, #state{buf=Buf,read_mode=ReadMode,unic=latin1}=State) when is_integer(N), N > 0, N =< byte_size(Buf) -> {B1,B2} = split_binary(Buf, N), {reply,cast(B1, ReadMode,latin1,Enc),State#state{buf=B2}}; get_chars(N, Enc, #state{buf=Buf,read_mode=ReadMode,unic=latin1}=State) when is_integer(N), N > 0, N =< byte_size(Buf) -> {B1,B2} = split_binary(Buf, N), {reply,cast(B1, ReadMode,latin1,Enc),State#state{buf=B2}}; get_chars(N, OutEnc,#state{handle=Handle,buf=Buf,read_mode=ReadMode,unic=latin1}=State) when is_integer(N), N > 0 -> BufSize = byte_size(Buf), NeedSize = N-BufSize, Size = erlang:max(NeedSize, ?READ_SIZE_BINARY), case ?PRIM_FILE:read(Handle, Size) of {ok, B} -> if BufSize+byte_size(B) < N -> std_reply(cat(Buf, B, ReadMode,latin1,OutEnc), State); true -> {B1,B2} = split_binary(B, NeedSize), {reply,cat(Buf, B1, ReadMode, latin1,OutEnc),State#state{buf=B2}} end; eof when BufSize =:= 0 -> {reply,eof,State}; eof -> std_reply(cast(Buf, ReadMode,latin1,OutEnc), State); {error,Reason}=Error -> {stop,Reason,Error,State#state{buf= <<>>}} end; get_chars(N, OutEnc,#state{handle=Handle,buf=Buf,read_mode=ReadMode,unic=InEncoding}=State) when is_integer(N), N > 0 -> try %% This is rather tricky, we need to count the actual number of characters %% in the buffer first as unicode characters are not constant in length {BufCount, SplitPos} = count_and_find(Buf,N,InEncoding), case BufCount >= N of true -> {B1,B2} = case SplitPos of none -> {Buf,<<>>}; _ ->split_binary(Buf,SplitPos) end, {reply,cast(B1, ReadMode,InEncoding,OutEnc),State#state{buf=B2}}; false -> %% Need more, Try to read 4*needed in bytes... NeedSize = (N - BufCount) * 4, Size = erlang:max(NeedSize, ?READ_SIZE_BINARY), case ?PRIM_FILE:read(Handle, Size) of {ok, B} -> NewBuf = list_to_binary([Buf,B]), {NewCount,NewSplit} = count_and_find(NewBuf,N,InEncoding), case NewCount >= N of true -> {B01,B02} = case NewSplit of none -> {NewBuf,<<>>}; _ ->split_binary(NewBuf, NewSplit) end, {reply,cast(B01, ReadMode,InEncoding,OutEnc), State#state{buf=B02}}; false -> %% Reached end of file std_reply(cast(NewBuf, ReadMode,InEncoding,OutEnc), State#state{buf = <<>>}) end; eof when BufCount =:= 0 -> {reply,eof,State}; eof -> std_reply(cast(Buf, ReadMode,InEncoding,OutEnc), State#state{buf = <<>>}); {error,Reason}=Error -> {stop,Reason,Error,State#state{buf = <<>>}} end end catch exit:ExError -> {stop,ExError,{error,ExError},State#state{buf= <<>>}} end; get_chars(_N, _, #state{}=State) -> {error,{error,get_chars},State}. get_chars(Mod, Func, XtraArg, OutEnc, #state{buf= <<>>}=State) -> get_chars_empty(Mod, Func, XtraArg, start, OutEnc, State); get_chars(Mod, Func, XtraArg, OutEnc, #state{buf=Buf}=State) -> get_chars_apply(Mod, Func, XtraArg, start, OutEnc, State#state{buf= <<>>}, Buf). get_chars_empty(Mod, Func, XtraArg, S, latin1, #state{handle=Handle,read_mode=ReadMode, unic=latin1}=State) -> case ?PRIM_FILE:read(Handle, read_size(ReadMode)) of {ok,Bin} -> get_chars_apply(Mod, Func, XtraArg, S, latin1, State, Bin); eof -> get_chars_apply(Mod, Func, XtraArg, S, latin1, State, eof); {error,Reason}=Error -> {stop,Reason,Error,State} end; get_chars_empty(Mod, Func, XtraArg, S, OutEnc, #state{handle=Handle,read_mode=ReadMode}=State) -> case ?PRIM_FILE:read(Handle, read_size(ReadMode)) of {ok,Bin} -> get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, Bin); eof -> get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, eof); {error,Reason}=Error -> {stop,Reason,Error,State} end. get_chars_notempty(Mod, Func, XtraArg, S, OutEnc, #state{handle=Handle,read_mode=ReadMode,buf = B}=State) -> case ?PRIM_FILE:read(Handle, read_size(ReadMode)) of {ok,Bin} -> get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, list_to_binary([B,Bin])); eof -> case B of <<>> -> get_chars_apply(Mod, Func, XtraArg, S, OutEnc, State, eof); _ -> {stop,invalid_unicode,{error,invalid_unicode},State} end; {error,Reason}=Error -> {stop,Reason,Error,State} end. get_chars_apply(Mod, Func, XtraArg, S0, latin1, #state{read_mode=ReadMode,unic=latin1}=State, Data0) -> Data1 = case ReadMode of list when is_binary(Data0) -> binary_to_list(Data0); _ -> Data0 end, case catch Mod:Func(S0, Data1, latin1, XtraArg) of {stop,Result,Buf} -> {reply,Result,State#state{buf=cast_binary(Buf)}}; {'EXIT',Reason} -> {stop,Reason,{error,err_func(Mod, Func, XtraArg)},State}; S1 -> get_chars_empty(Mod, Func, XtraArg, S1, latin1, State) end; get_chars_apply(Mod, Func, XtraArg, S0, OutEnc, #state{read_mode=ReadMode,unic=InEnc}=State, Data0) -> try {Data1,NewBuff} = case ReadMode of list when is_binary(Data0) -> case unicode:characters_to_list(Data0,InEnc) of {Tag,Decoded,Rest} when Decoded =/= [], Tag =:= error; Decoded =/= [], Tag =:= incomplete -> {Decoded,erlang:iolist_to_binary(Rest)}; {error, [], _} -> exit(invalid_unicode); {incomplete, [], R} -> {[],R}; List when is_list(List) -> {List,<<>>} end; binary when is_binary(Data0) -> case unicode:characters_to_binary(Data0,InEnc,OutEnc) of {Tag2,Decoded2,Rest2} when Decoded2 =/= <<>>, Tag2 =:= error; Decoded2 =/= <<>>, Tag2 =:= incomplete -> {Decoded2,erlang:iolist_to_binary(Rest2)}; {error, <<>>, _} -> exit(invalid_unicode); {incomplete, <<>>, R} -> {<<>>,R}; Binary when is_binary(Binary) -> {Binary,<<>>} end; _ -> %i.e. eof {Data0,<<>>} end, case catch Mod:Func(S0, Data1, OutEnc, XtraArg) of {stop,Result,Buf} -> {reply,Result,State#state{buf = (if is_binary(Buf) -> list_to_binary([unicode:characters_to_binary(Buf,OutEnc,InEnc),NewBuff]); is_list(Buf) -> list_to_binary([unicode:characters_to_binary(Buf,unicode,InEnc),NewBuff]); true -> NewBuff end)}}; {'EXIT',Reason} -> {stop,Reason,{error,err_func(Mod, Func, XtraArg)},State}; S1 -> get_chars_notempty(Mod, Func, XtraArg, S1, OutEnc, State#state{buf=NewBuff}) end catch exit:ExReason -> {stop,ExReason,{error,err_func(Mod, Func, XtraArg)},State}; error:ErrReason -> {stop,ErrReason,{error,err_func(Mod, Func, XtraArg)},State} end. %% Convert error code to make it look as before err_func(io_lib, get_until, {_,F,_}) -> F; err_func(_, F, _) -> F. %% Process the I/O request setopts %% %% setopts setopts(Opts0,State) -> Opts = proplists:unfold( proplists:substitute_negations( [{list,binary}], expand_encoding(Opts0))), case check_valid_opts(Opts) of true -> do_setopts(Opts,State); false -> {error,{error,enotsup},State} end. check_valid_opts([]) -> true; check_valid_opts([{binary,_}|T]) -> check_valid_opts(T); check_valid_opts([{encoding,_Enc}|T]) -> check_valid_opts(T); check_valid_opts(_) -> false. do_setopts(Opts, State) -> case valid_enc(proplists:get_value(encoding, Opts, State#state.unic)) of {ok,NewUnic} -> case proplists:get_value(binary, Opts) of true -> {reply,ok,State#state{read_mode=binary, unic=NewUnic}}; false -> {reply,ok,State#state{read_mode=list, unic=NewUnic}}; undefined -> {reply,ok,State#state{unic=NewUnic}} end; _ -> {error,{error,badarg},State} end. getopts(#state{read_mode=RM, unic=Unic} = State) -> Bin = {binary, RM =:= binary}, Uni = {encoding, Unic}, {reply,[Bin,Uni],State}. %% Concatenate two binaries and convert the result to list or binary cat(B1, B2, binary, latin1, latin1) -> list_to_binary([B1,B2]); cat(B1, B2, binary, InEncoding, OutEncoding) -> case unicode:characters_to_binary([B1,B2],InEncoding,OutEncoding) of Good when is_binary(Good) -> Good; _ -> exit({no_translation,InEncoding,OutEncoding}) end; %% Dialyzer finds this is never used... %% cat(B1, B2, list, InEncoding, OutEncoding) when InEncoding =/= latin1 -> %% % Catch i.e. unicode -> latin1 errors by using the outencoding although otherwise %% % irrelevant for lists... %% try %% unicode:characters_to_list(unicode:characters_to_binary([B1,B2],InEncoding,OutEncoding), %% OutEncoding) %% catch %% error:_ -> %% exit({no_translation,InEncoding,OutEncoding}) %% end. cat(B1, B2, list, latin1,_) -> binary_to_list(B1)++binary_to_list(B2). %% Cast binary to list or binary cast(B, binary, latin1, latin1) -> B; cast(B, binary, InEncoding, OutEncoding) -> case unicode:characters_to_binary(B,InEncoding,OutEncoding) of Good when is_binary(Good) -> Good; _ -> exit({no_translation,InEncoding,OutEncoding}) end; cast(B, list, latin1, _) -> binary_to_list(B); cast(B, list, InEncoding, OutEncoding) -> try unicode:characters_to_list(unicode:characters_to_binary(B,InEncoding,OutEncoding), OutEncoding) catch error:_ -> exit({no_translation,InEncoding,OutEncoding}) end. %% Convert buffer to binary cast_binary(Binary) when is_binary(Binary) -> Binary; cast_binary(List) when is_list(List) -> list_to_binary(List); cast_binary(_EOF) -> <<>>. %% Read size for different read modes read_size(binary) -> ?READ_SIZE_BINARY; read_size(list) -> ?READ_SIZE_LIST. %% Utf utility count_and_find(Bin,N,Encoding) -> cafu(Bin,N,0,0,none,case Encoding of unicode -> utf8; Oth -> Oth end). cafu(<<>>,0,Count,ByteCount,_SavePos,_) -> {Count,ByteCount}; cafu(<<>>,_N,Count,_ByteCount,SavePos,_) -> {Count,SavePos}; cafu(<<_/utf8,Rest/binary>>, 0, Count, ByteCount, _SavePos, utf8) -> cafu(Rest,-1,Count+1,0,ByteCount,utf8); cafu(<<_/utf8,Rest/binary>>, N, Count, _ByteCount, SavePos, utf8) when N < 0 -> cafu(Rest,-1,Count+1,0,SavePos,utf8); cafu(<<_/utf8,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, utf8) -> Delta = byte_size(Whole) - byte_size(Rest), cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,utf8); cafu(<<_/utf16-big,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf16,big}) -> cafu(Rest,-1,Count+1,0,ByteCount,{utf16,big}); cafu(<<_/utf16-big,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf16,big}) when N < 0 -> cafu(Rest,-1,Count+1,0,SavePos,{utf16,big}); cafu(<<_/utf16-big,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf16,big}) -> Delta = byte_size(Whole) - byte_size(Rest), cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf16,big}); cafu(<<_/utf16-little,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf16,little}) -> cafu(Rest,-1,Count+1,0,ByteCount,{utf16,little}); cafu(<<_/utf16-little,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf16,little}) when N < 0 -> cafu(Rest,-1,Count+1,0,SavePos,{utf16,little}); cafu(<<_/utf16-little,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf16,little}) -> Delta = byte_size(Whole) - byte_size(Rest), cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf16,little}); cafu(<<_/utf32-big,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf32,big}) -> cafu(Rest,-1,Count+1,0,ByteCount,{utf32,big}); cafu(<<_/utf32-big,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf32,big}) when N < 0 -> cafu(Rest,-1,Count+1,0,SavePos,{utf32,big}); cafu(<<_/utf32-big,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf32,big}) -> Delta = byte_size(Whole) - byte_size(Rest), cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf32,big}); cafu(<<_/utf32-little,Rest/binary>>, 0, Count, ByteCount, _SavePos, {utf32,little}) -> cafu(Rest,-1,Count+1,0,ByteCount,{utf32,little}); cafu(<<_/utf32-little,Rest/binary>>, N, Count, _ByteCount, SavePos, {utf32,little}) when N < 0 -> cafu(Rest,-1,Count+1,0,SavePos,{utf32,little}); cafu(<<_/utf32-little,Rest/binary>> = Whole, N, Count, ByteCount, SavePos, {utf32,little}) -> Delta = byte_size(Whole) - byte_size(Rest), cafu(Rest,N-1,Count+1,ByteCount+Delta,SavePos,{utf32,little}); cafu(_Other,0,Count,ByteCount,_,_) -> % Non Unicode character, % but found our point, OK this time {Count,ByteCount}; cafu(Other,_N,Count,0,SavePos,Enc) -> % Not enough, but valid chomped unicode % at end. case cbv(Enc,Other) of false -> exit(invalid_unicode); _ -> {Count,SavePos} end; cafu(Other,_N,Count,ByteCount,none,Enc) -> % Return what we'we got this far % although not complete, % it's not (yet) in error case cbv(Enc,Other) of false -> exit(invalid_unicode); _ -> {Count,ByteCount} end; cafu(Other,_N,Count,_ByteCount,SavePos,Enc) -> % As above but we have % found a position case cbv(Enc,Other) of false -> exit(invalid_unicode); _ -> {Count,SavePos} end. %% %% Bluntly stolen from stdlib/unicode.erl (cbv means can be valid?) %% cbv(utf8,<<1:1,1:1,0:1,_:5>>) -> 1; cbv(utf8,<<1:1,1:1,1:1,0:1,_:4,R/binary>>) -> case R of <<>> -> 2; <<1:1,0:1,_:6>> -> 1; _ -> false end; cbv(utf8,<<1:1,1:1,1:1,1:1,0:1,_:3,R/binary>>) -> case R of <<>> -> 3; <<1:1,0:1,_:6>> -> 2; <<1:1,0:1,_:6,1:1,0:1,_:6>> -> 1; _ -> false end; cbv(utf8,_) -> false; cbv({utf16,big},<<A:8>>) when A =< 215; A >= 224 -> 1; cbv({utf16,big},<<54:6,_:2>>) -> 3; cbv({utf16,big},<<54:6,_:10>>) -> 2; cbv({utf16,big},<<54:6,_:10,55:6,_:2>>) -> 1; cbv({utf16,big},_) -> false; cbv({utf16,little},<<_:8>>) -> 1; % or 3, we'll see cbv({utf16,little},<<_:8,54:6,_:2>>) -> 2; cbv({utf16,little},<<_:8,54:6,_:2,_:8>>) -> 1; cbv({utf16,little},_) -> false; cbv({utf32,big}, <<0:8>>) -> 3; cbv({utf32,big}, <<0:8,X:8>>) when X =< 16 -> 2; cbv({utf32,big}, <<0:8,X:8,Y:8>>) when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) -> 1; cbv({utf32,big},_) -> false; cbv({utf32,little},<<_:8>>) -> 3; cbv({utf32,little},<<_:8,_:8>>) -> 2; cbv({utf32,little},<<X:8,255:8,0:8>>) when X =:= 254; X =:= 255 -> false; cbv({utf32,little},<<_:8,Y:8,X:8>>) when X =< 16, ((X > 0) or ((Y =< 215) or (Y >= 224))) -> 1; cbv({utf32,little},_) -> false. %%%----------------------------------------------------------------- %%% ?PRIM_FILE helpers %% Compensates ?PRIM_FILE:position/2 for the number of bytes %% we have buffered position(Handle, cur, Buf) -> position(Handle, {cur, 0}, Buf); position(Handle, {cur, Offs}, Buf) when is_binary(Buf) -> ?PRIM_FILE:position(Handle, {cur, Offs-byte_size(Buf)}); position(Handle, At, _Buf) -> ?PRIM_FILE:position(Handle, At).