diff options
Diffstat (limited to 'lib')
66 files changed, 1942 insertions, 1108 deletions
diff --git a/lib/common_test/test/ct_hooks_SUITE.erl b/lib/common_test/test/ct_hooks_SUITE.erl index 405df1e978..796a0832d7 100644 --- a/lib/common_test/test/ct_hooks_SUITE.erl +++ b/lib/common_test/test/ct_hooks_SUITE.erl @@ -64,7 +64,7 @@ end_per_testcase(TestCase, Config) -> suite() -> - [{timetrap,{seconds,20}}]. + [{timetrap,{minutes,1}}]. all() -> all(suite). diff --git a/lib/common_test/test/ct_master_SUITE.erl b/lib/common_test/test/ct_master_SUITE.erl index 56a343a96f..b89d7d4dc5 100644 --- a/lib/common_test/test/ct_master_SUITE.erl +++ b/lib/common_test/test/ct_master_SUITE.erl @@ -154,6 +154,9 @@ make_spec(DataDir, FileName, NodeNames, Suites, Config) -> {init,NodeName,[ {node_start,[{startup_functions,[]}, {monitor_master,true}, + {boot_timeout,10}, + {init_timeout,10}, + {startup_timeout,10}, {env,Env}]}, {eval,{erlang,nodes,[]}}] } diff --git a/lib/common_test/test/ct_netconfc_SUITE.erl b/lib/common_test/test/ct_netconfc_SUITE.erl index 3042a924fe..c89a4cdabe 100644 --- a/lib/common_test/test/ct_netconfc_SUITE.erl +++ b/lib/common_test/test/ct_netconfc_SUITE.erl @@ -43,12 +43,11 @@ %% there will be clashes with logging processes etc). %%-------------------------------------------------------------------- init_per_suite(Config) -> - Config1 = ct_test_support:init_per_suite(Config), case application:load(crypto) of - {error,Reason} -> + {error,Reason} when Reason=/={already_loaded,crypto} -> {skip, Reason}; _ -> - Config1 + ct_test_support:init_per_suite(Config) end. end_per_suite(Config) -> diff --git a/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl b/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl index d337158bce..54526e8e83 100644 --- a/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl +++ b/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl @@ -1044,12 +1044,9 @@ gen_dsa(LSize,NSize) when is_integer(LSize), is_integer(NSize) -> Key = gen_dsa2(LSize, NSize), {Key, encode_key(Key)}. -encode_key(Key = #'RSAPrivateKey'{}) -> - {ok, Der} = 'OTP-PUB-KEY':encode('RSAPrivateKey', Key), - {'RSAPrivateKey', list_to_binary(Der), not_encrypted}; encode_key(Key = #'DSAPrivateKey'{}) -> - {ok, Der} = 'OTP-PUB-KEY':encode('DSAPrivateKey', Key), - {'DSAPrivateKey', list_to_binary(Der), not_encrypted}. + Der = public_key:der_encode('DSAPrivateKey', Key), + {'DSAPrivateKey', Der, not_encrypted}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lib/common_test/test/ct_test_support.erl b/lib/common_test/test/ct_test_support.erl index fc572aa82f..8814570e99 100644 --- a/lib/common_test/test/ct_test_support.erl +++ b/lib/common_test/test/ct_test_support.erl @@ -1043,8 +1043,8 @@ result_match({SkipOrFail,{ErrorInd,{EMod,EFunc,{Why,'_'}}}}, true; result_match({failed,{timetrap_timeout,{'$approx',Num}}}, {failed,{timetrap_timeout,Value}}) -> - if Value >= trunc(Num-0.02*Num), - Value =< trunc(Num+0.02*Num) -> true; + if Value >= trunc(Num-0.05*Num), + Value =< trunc(Num+0.05*Num) -> true; true -> false end; result_match({user_timetrap_error,{Why,'_'}}, diff --git a/lib/compiler/src/beam_dict.erl b/lib/compiler/src/beam_dict.erl index ff6c7c11dc..531968b3c8 100644 --- a/lib/compiler/src/beam_dict.erl +++ b/lib/compiler/src/beam_dict.erl @@ -138,17 +138,7 @@ string(Str, Dict) when is_list(Str) -> -spec lambda(label(), non_neg_integer(), bdict()) -> {non_neg_integer(), bdict()}. -lambda(Lbl, 0, #asm{lambdas=Lambdas0}=Dict) -> - case lists:keyfind(Lbl, 1, Lambdas0) of - {Lbl,{OldIndex,_,_,_,_}} -> - {OldIndex,Dict}; - false -> - new_lambda(Lbl, 0, Dict) - end; -lambda(Lbl, NumFree, Dict) -> - new_lambda(Lbl, NumFree, Dict). - -new_lambda(Lbl, NumFree, #asm{lambdas=Lambdas0}=Dict) -> +lambda(Lbl, NumFree, #asm{lambdas=Lambdas0}=Dict) -> OldIndex = length(Lambdas0), %% Set Index the same as OldIndex. Index = OldIndex, @@ -245,12 +235,10 @@ string_table(#asm{strings=Strings,string_offset=Size}) -> -spec lambda_table(bdict()) -> {non_neg_integer(), [<<_:192>>]}. -lambda_table(#asm{exports=Ext0,locals=Loc0,lambdas=Lambdas0}) -> +lambda_table(#asm{locals=Loc0,lambdas=Lambdas0}) -> Lambdas1 = sofs:relation(Lambdas0), Loc = sofs:relation([{Lbl,{F,A}} || {F,A,Lbl} <- Loc0]), - Ext = sofs:relation([{Lbl,{F,A}} || {F,A,Lbl} <- Ext0]), - All = sofs:union(Loc, Ext), - Lambdas2 = sofs:relative_product1(Lambdas1, All), + Lambdas2 = sofs:relative_product1(Lambdas1, Loc), Lambdas = [<<F:32,A:32,Lbl:32,Index:32,NumFree:32,OldUniq:32>> || {{_,Lbl,Index,NumFree,OldUniq},{F,A}} <- sofs:to_external(Lambdas2)], {length(Lambdas),Lambdas}. diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl index b1bff47f69..8ef71e1346 100644 --- a/lib/compiler/src/v3_kernel.erl +++ b/lib/compiler/src/v3_kernel.erl @@ -235,8 +235,16 @@ gexpr_test_add(Ke, St0) -> %% expr(Cexpr, Sub, State) -> {Kexpr,[PreKexpr],State}. %% Convert a Core expression, flattening it at the same time. -expr(#c_var{anno=A,name={Name,Arity}}, Sub, St) -> - {#k_local{anno=A,name=get_fsub(Name, Arity, Sub),arity=Arity},[],St}; +expr(#c_var{anno=A,name={_Name,Arity}}=Fname, Sub, St) -> + %% A local in an expression. + %% For now, these are wrapped into a fun by reverse + %% etha-conversion, but really, there should be exactly one + %% such "lambda function" for each escaping local name, + %% instead of one for each occurrence as done now. + Vs = [#c_var{name=list_to_atom("V" ++ integer_to_list(V))} || + V <- integers(1, Arity)], + Fun = #c_fun{anno=A,vars=Vs,body=#c_apply{anno=A,op=Fname,args=Vs}}, + expr(Fun, Sub, St); expr(#c_var{anno=A,name=V}, Sub, St) -> {#k_var{anno=A,name=get_vsub(V, Sub)},[],St}; expr(#c_literal{anno=A,val=V}, _Sub, St) -> @@ -1655,19 +1663,6 @@ uexpr(#ifun{anno=A,vars=Vs,body=B0}, {break,Rs}, St0) -> #k_int{val=Index},#k_int{val=Uniq}|Fvs], ret=Rs}, Free,add_local_function(Fun, St)}; -uexpr(#k_local{anno=A,name=Name,arity=Arity}, {break,Rs}, St) -> - Fs = get_free(Name, Arity, St), - FsCount = length(Fs), - Free = lit_list_vars(Fs), - %% Set dummy values for Index and Uniq -- the real values will - %% be assigned by beam_asm. - Index = Uniq = 0, - Bif = #k_bif{anno=#k{us=Free,ns=lit_list_vars(Rs),a=A}, - op=#k_internal{name=make_fun,arity=FsCount+3}, - args=[#k_atom{val=Name},#k_int{val=FsCount+Arity}, - #k_int{val=Index},#k_int{val=Uniq}|Fs], - ret=Rs}, - {Bif,Free,St}; uexpr(Lit, {break,Rs0}, St0) -> %% Transform literals to puts here. %%ok = io:fwrite("uexpr ~w:~p~n", [?LINE,Lit]), @@ -1848,6 +1843,12 @@ make_list(Es) -> #c_cons{hd=E,tl=Acc} end, #c_literal{val=[]}, Es). +%% List of integers in interval [N,M]. Empty list if N > M. + +integers(N, M) when N =< M -> + [N|integers(N + 1, M)]; +integers(_, _) -> []. + %% is_in_guard(State) -> true|false. is_in_guard(#kern{guard_refc=Refc}) -> diff --git a/lib/dialyzer/test/small_SUITE_data/results/empty_list_infimum b/lib/dialyzer/test/small_SUITE_data/results/empty_list_infimum index 75f1b25725..6eaf60b91d 100644 --- a/lib/dialyzer/test/small_SUITE_data/results/empty_list_infimum +++ b/lib/dialyzer/test/small_SUITE_data/results/empty_list_infimum @@ -1,6 +1,2 @@ -<<<<<<< HEAD -empty_list_infimum.erl:39: Invalid type specification for function empty_list_infimum:list_vhost_permissions/1. The success typing is (_) -> [[{_,_}]] -======= -empty_list_infimum.erl:38: Invalid type specification for function empty_list_infimum:list_vhost_permissions/1. The success typing is (_) -> [[{_,_}]] ->>>>>>> sa/dialyzer-list-spec/OTP-10740 +empty_list_infimum.erl:38: Invalid type specification for function empty_list_infimum:list_vhost_permissions/1. The success typing is (_) -> [[{_,_}]]
\ No newline at end of file diff --git a/lib/diameter/src/Makefile b/lib/diameter/src/Makefile index 60bb7e6a10..3cbcbf536e 100644 --- a/lib/diameter/src/Makefile +++ b/lib/diameter/src/Makefile @@ -133,8 +133,10 @@ gen/$(DICT_YRL).erl: compiler/$(DICT_YRL).yrl # Generate the app file. $(APP_TARGET): $(APP_SRC) ../vsn.mk modules.mk $(gen_verbose)M=`echo $(notdir $(APP_MODULES)) | tr ' ' ,`; \ + R=`echo $(REGISTERED) | tr ' ' ,`; \ sed -e 's;%VSN%;$(VSN);' \ -e "s;%MODULES%;$$M;" \ + -e "s;%REGISTERED%;$$R;" \ $< > $@ $(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk diff --git a/lib/diameter/src/base/diameter.app.src b/lib/diameter/src/base/diameter.app.src index c092fdb022..7e17cd6c9f 100644 --- a/lib/diameter/src/base/diameter.app.src +++ b/lib/diameter/src/base/diameter.app.src @@ -21,7 +21,7 @@ [{description, "Diameter protocol"}, {vsn, "%VSN%"}, {modules, [%MODULES%]}, - {registered, []}, + {registered, [%REGISTERED%]}, {applications, [stdlib, kernel]}, {env, []}, {mod, {diameter_app, []}} diff --git a/lib/diameter/src/modules.mk b/lib/diameter/src/modules.mk index 01f9284881..25207625be 100644 --- a/lib/diameter/src/modules.mk +++ b/lib/diameter/src/modules.mk @@ -103,3 +103,12 @@ EXAMPLES = \ dict/rfc4072_eap.dia \ dict/rfc4590_digest.dia \ dict/rfc4740_sip.dia + +# Registered server names. + +REGISTERED = \ + diameter_config \ + diameter_peer \ + diameter_reg \ + diameter_stats \ + diameter_sync diff --git a/lib/erl_interface/doc/src/ei.xml b/lib/erl_interface/doc/src/ei.xml index 539e16d837..117c787da6 100644 --- a/lib/erl_interface/doc/src/ei.xml +++ b/lib/erl_interface/doc/src/ei.xml @@ -82,6 +82,25 @@ function returns the size required (note that for strings an extra byte is needed for the 0 string terminator).</p> </description> + <section> + <title>DATA TYPES</title> + + <taglist> + <tag><marker id="erlang_char_encoding"/>enum erlang_char_encoding</tag> + <item> + <p/> + <code type="none"> +enum erlang_char_encoding { + ERLANG_ASCII, ERLANG_LATIN1, ERLANG_UTF8 +}; +</code> + <p>The character encoding used for atoms. <c>ERLANG_ASCII</c> represents 7-bit ASCII. + Latin1 and UTF8 are different extensions of 7-bit ASCII. All 7-bit ASCII characters + are valid Latin1 and UTF8 characters. ASCII and Latin1 both represent each character + by one byte. A UTF8 character can consist of one to four bytes.</p> + </item> + </taglist> + </section> <funcs> <func> <name><ret>void</ret><nametext>ei_set_compat_rel(release_number)</nametext></name> @@ -225,12 +244,32 @@ <fsummary>Encode an atom</fsummary> <desc> <p>Encodes an atom in the binary format. The <c><![CDATA[p]]></c> parameter - is the name of the atom. Only upto <c><![CDATA[MAXATOMLEN]]></c> bytes + is the name of the atom in latin1 encoding. Only upto <c>MAXATOMLEN-1</c> bytes are encoded. The name should be zero-terminated, except for the <c><![CDATA[ei_x_encode_atom_len()]]></c> function.</p> </desc> </func> <func> + <name><ret>int</ret><nametext>ei_encode_atom_as(char *buf, int *index, const char *p, enum erlang_char_encoding from_enc, enum erlang_char_encoding to_enc)</nametext></name> + <name><ret>int</ret><nametext>ei_encode_atom_len_as(char *buf, int *index, const char *p, int len, enum erlang_char_encoding from_enc, enum erlang_char_encoding to_enc)</nametext></name> + <name><ret>int</ret><nametext>ei_x_encode_atom_as(ei_x_buff* x, const char *p, enum erlang_char_encoding from_enc, enum erlang_char_encoding to_enc)</nametext></name> + <name><ret>int</ret><nametext>ei_x_encode_atom_len_as(ei_x_buff* x, const char *p, int len, enum erlang_char_encoding from_enc, enum erlang_char_encoding to_enc)</nametext></name> + <fsummary>Encode an atom</fsummary> + <desc> + <p>Encodes an atom in the binary format with character encoding + <c><seealso marker="#erlang_char_encoding">to_enc</seealso></c> (latin1 or utf8). + The <c>p</c> parameter is the name of the atom with character encoding + <c><seealso marker="#erlang_char_encoding">from_enc</seealso></c> (ascii, latin1 or utf8). + The name must either be zero-terminated or a function variant with a <c>len</c> + parameter must be used.</p> + <p>The encoding will fail if <c>p</c> is not a valid string in encoding <c>from_enc</c>, + if the string is too long or if it can not be represented with character encoding <c>to_enc</c>.</p> + <p>These functions were introduced in R16 release of Erlang/OTP as part of a first step + to support UTF8 atoms. Atoms encoded with <c>ERLANG_UTF8</c> + can not be decoded by earlier releases than R16.</p> + </desc> + </func> + <func> <name><ret>int</ret><nametext>ei_encode_binary(char *buf, int *index, const void *p, long len)</nametext></name> <name><ret>int</ret><nametext>ei_x_encode_binary(ei_x_buff* x, const void *p, long len)</nametext></name> <fsummary>Encode a binary</fsummary> @@ -490,11 +529,32 @@ ei_x_encode_empty_list(&x); <fsummary>Decode an atom</fsummary> <desc> <p>This function decodes an atom from the binary format. The - name of the atom is placed at <c><![CDATA[p]]></c>. There can be at most + null terminated name of the atom is placed at <c><![CDATA[p]]></c>. There can be at most <c><![CDATA[MAXATOMLEN]]></c> bytes placed in the buffer.</p> </desc> </func> <func> + <name><ret>int</ret><nametext>ei_decode_atom_as(const char *buf, int *index, char *p, int plen, enum erlang_char_encoding want, enum erlang_char_encoding* was, enum erlang_char_encoding* result)</nametext></name> + <fsummary>Decode an atom</fsummary> + <desc> + <p>This function decodes an atom from the binary format. The + null terminated name of the atom is placed in buffer at <c>p</c> of length + <c>plen</c> bytes.</p> + <p>The wanted string encoding is specified by <c><seealso marker="#erlang_char_encoding"> + want</seealso></c>. The original encoding used in the + binary format (latin1 or utf8) can be obtained from <c>*was</c>. The actual encoding of the resulting string + (7-bit ascii, latin1 or utf8) can be obtained from <c>*result</c>. Both <c>was</c> and <c>result</c> can be <c>NULL</c>. + + <c>*result</c> may differ from <c>want</c> if <c>want</c> is a bitwise-or'd combination like + <c>ERLANG_LATIN1|ERLANG_UTF8</c> or if <c>*result</c> turn out to be pure 7-bit ascii + (compatible with both latin1 and utf8).</p> + <p>This function fails if the atom is too long for the buffer + or if it can not be represented with encoding <c>want</c>.</p> + <p>This function was introduced in R16 release of Erlang/OTP as part of a first step + to support UTF8 atoms.</p> + </desc> + </func> + <func> <name><ret>int</ret><nametext>ei_decode_binary(const char *buf, int *index, void *p, long *len)</nametext></name> <fsummary>Decode a binary</fsummary> <desc> diff --git a/lib/erl_interface/doc/src/erl_eterm.xml b/lib/erl_interface/doc/src/erl_eterm.xml index f403618c59..c7840d7813 100644 --- a/lib/erl_interface/doc/src/erl_eterm.xml +++ b/lib/erl_interface/doc/src/erl_eterm.xml @@ -77,10 +77,12 @@ </p> <taglist> <tag><c><![CDATA[char *ERL_ATOM_PTR(t)]]></c></tag> + <tag><c><![CDATA[char *ERL_ATOM_PTR_UTF8(t)]]></c></tag> <item>A string representing atom <c><![CDATA[t]]></c>. </item> <tag><c><![CDATA[int ERL_ATOM_SIZE(t)]]></c></tag> - <item>The length (in characters) of atom t.</item> + <tag><c><![CDATA[int ERL_ATOM_SIZE_UTF8(t)]]></c></tag> + <item>The length (in bytes) of atom t.</item> <tag><c><![CDATA[void *ERL_BIN_PTR(t)]]></c></tag> <item>A pointer to the contents of <c><![CDATA[t]]></c></item> <tag><c><![CDATA[int ERL_BIN_SIZE(t)]]></c></tag> @@ -92,6 +94,7 @@ <tag><c><![CDATA[double ERL_FLOAT_VALUE(t)]]></c></tag> <item>The floating point value of <c><![CDATA[t]]></c>.</item> <tag><c><![CDATA[ETERM *ERL_PID_NODE(t)]]></c></tag> + <tag><c><![CDATA[ETERM *ERL_PID_NODE_UTF8(t)]]></c></tag> <item>The Node in pid <c><![CDATA[t]]></c>.</item> <tag><c><![CDATA[int ERL_PID_NUMBER(t)]]></c></tag> <item>The sequence number in pid <c><![CDATA[t]]></c>.</item> @@ -104,6 +107,7 @@ <tag><c><![CDATA[int ERL_PORT_CREATION(t)]]></c></tag> <item>The creation number in port <c><![CDATA[t]]></c>.</item> <tag><c><![CDATA[ETERM *ERL_PORT_NODE(t)]]></c></tag> + <tag><c><![CDATA[ETERM *ERL_PORT_NODE_UTF8(t)]]></c></tag> <item>The node in port <c><![CDATA[t]]></c>.</item> <tag><c><![CDATA[int ERL_REF_NUMBER(t)]]></c></tag> <item>The first part of the reference number in ref <c><![CDATA[t]]></c>. Use @@ -296,7 +300,7 @@ iohead ::= Binary <name><ret>ETERM *</ret><nametext>erl_mk_atom(string)</nametext></name> <fsummary>Creates an atom</fsummary> <type> - <v>char *string;</v> + <v>const char *string;</v> </type> <desc> <p>Creates an atom.</p> @@ -305,10 +309,12 @@ iohead ::= Binary <p>Returns an Erlang term containing an atom. Note that it is the callers responsibility to make sure that <c><![CDATA[string]]></c> contains a valid name for an atom.</p> - <p><c><![CDATA[ERL_ATOM_PTR(atom)]]></c> can be used to retrieve the - atom name (as a string). Note that the string is not - 0-terminated in the atom. <c><![CDATA[ERL_ATOM_SIZE(atom)]]></c>returns - the length of the atom name.</p> + <p><c><![CDATA[ERL_ATOM_PTR(atom)]]></c> and <c><![CDATA[ERL_ATOM_PTR_UTF8(atom)]]></c> + can be used to retrieve the atom name (as a null terminated string). <c><![CDATA[ERL_ATOM_SIZE(atom)]]></c> + and <c><![CDATA[ERL_ATOM_SIZE_UTF8(atom)]]></c> returns the length of the atom name.</p> + <note><p>Note that the UTF8 variants were introduced in Erlang/OTP releases R16 + and the string returned by <c>ERL_ATOM_PTR(atom)</c> was not null terminated on older releases.</p> + </note> </desc> </func> <func> diff --git a/lib/erl_interface/include/ei.h b/lib/erl_interface/include/ei.h index ae815b414a..2278a28adb 100644 --- a/lib/erl_interface/include/ei.h +++ b/lib/erl_interface/include/ei.h @@ -115,6 +115,9 @@ #define ERL_FLOAT_EXT 'c' #define NEW_FLOAT_EXT 'F' #define ERL_ATOM_EXT 'd' +#define ERL_SMALL_ATOM_EXT 's' +#define ERL_ATOM_UTF8_EXT 'v' +#define ERL_SMALL_ATOM_UTF8_EXT 'w' #define ERL_REFERENCE_EXT 'e' #define ERL_NEW_REFERENCE_EXT 'r' #define ERL_PORT_EXT 'f' @@ -183,12 +186,21 @@ extern volatile int __erl_errno; #define EI_MAXHOSTNAMELEN 64 #define EI_MAXALIVELEN 63 #define EI_MAX_COOKIE_SIZE 512 -#define MAXATOMLEN 255 +#define MAXATOMLEN (255 + 1) +#define MAXATOMLEN_UTF8 (255*4 + 1) #define MAXNODELEN EI_MAXALIVELEN+1+EI_MAXHOSTNAMELEN +enum erlang_char_encoding { + ERLANG_ASCII = 1, + ERLANG_LATIN1 = 2, + ERLANG_UTF8 = 4, + ERLANG_ANY = ERLANG_ASCII|ERLANG_LATIN1|ERLANG_UTF8 +}; + /* a pid */ typedef struct { - char node[MAXATOMLEN+1]; + char node[MAXATOMLEN_UTF8]; + enum erlang_char_encoding node_org_enc; unsigned int num; unsigned int serial; unsigned int creation; @@ -196,14 +208,16 @@ typedef struct { /* a port */ typedef struct { - char node[MAXATOMLEN+1]; + char node[MAXATOMLEN_UTF8]; + enum erlang_char_encoding node_org_enc; unsigned int id; unsigned int creation; } erlang_port; /* a ref */ typedef struct { - char node[MAXATOMLEN+1]; + char node[MAXATOMLEN_UTF8]; + enum erlang_char_encoding node_org_enc; int len; unsigned int n[3]; unsigned int creation; @@ -223,15 +237,16 @@ typedef struct { long msgtype; erlang_pid from; erlang_pid to; - char toname[MAXATOMLEN+1]; - char cookie[MAXATOMLEN+1]; + char toname[MAXATOMLEN_UTF8]; + char cookie[MAXATOMLEN_UTF8]; erlang_trace token; } erlang_msg; /* a fun */ typedef struct { long arity; - char module[MAXATOMLEN+1]; + char module[MAXATOMLEN_UTF8]; + enum erlang_char_encoding module_org_enc; char md5[16]; long index; long old_index; @@ -256,7 +271,7 @@ typedef struct { union { long i_val; double d_val; - char atom_name[MAXATOMLEN+1]; + char atom_name[MAXATOMLEN_UTF8]; erlang_pid pid; erlang_port port; erlang_ref ref; @@ -425,9 +440,17 @@ int ei_encode_string_len(char *buf, int *index, const char *p, int len); int ei_x_encode_string(ei_x_buff* x, const char* s); int ei_x_encode_string_len(ei_x_buff* x, const char* s, int len); int ei_encode_atom(char *buf, int *index, const char *p); +int ei_encode_atom_as(char *buf, int *index, const char *p, + enum erlang_char_encoding from, enum erlang_char_encoding to); int ei_encode_atom_len(char *buf, int *index, const char *p, int len); +int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len, + enum erlang_char_encoding from, enum erlang_char_encoding to); int ei_x_encode_atom(ei_x_buff* x, const char* s); +int ei_x_encode_atom_as(ei_x_buff* x, const char* s, + enum erlang_char_encoding from, enum erlang_char_encoding to); int ei_x_encode_atom_len(ei_x_buff* x, const char* s, int len); +int ei_x_encode_atom_len_as(ei_x_buff* x, const char* s, int len, + enum erlang_char_encoding from, enum erlang_char_encoding to); int ei_encode_binary(char *buf, int *index, const void *p, long len); int ei_x_encode_binary(ei_x_buff* x, const void* s, int len); int ei_encode_pid(char *buf, int *index, const erlang_pid *p); @@ -477,6 +500,7 @@ int ei_decode_boolean(const char *buf, int *index, int *p); int ei_decode_char(const char *buf, int *index, char *p); int ei_decode_string(const char *buf, int *index, char *p); int ei_decode_atom(const char *buf, int *index, char *p); +int ei_decode_atom_as(const char *buf, int *index, char *p, int destlen, enum erlang_char_encoding want, enum erlang_char_encoding* was, enum erlang_char_encoding* result); int ei_decode_binary(const char *buf, int *index, void *p, long *len); int ei_decode_fun(const char* buf, int* index, erlang_fun* p); void free_fun(erlang_fun* f); diff --git a/lib/erl_interface/include/erl_interface.h b/lib/erl_interface/include/erl_interface.h index 1c4a94700d..98acc0d71d 100644 --- a/lib/erl_interface/include/erl_interface.h +++ b/lib/erl_interface/include/erl_interface.h @@ -95,19 +95,24 @@ #define ERL_FLOAT_VALUE(x) ((x)->uval.fval.f) -#define ERL_ATOM_PTR(x) ((x)->uval.aval.a) -#define ERL_ATOM_SIZE(x) ((x)->uval.aval.len) +#define ERL_ATOM_PTR(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.aval.d) +#define ERL_ATOM_PTR_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.aval.d) +#define ERL_ATOM_SIZE(x) erl_atom_size_latin1((Erl_Atom_data*) &(x)->uval.aval.d) +#define ERL_ATOM_SIZE_UTF8(x) erl_atom_size_utf8((Erl_Atom_data*) &(x)->uval.aval.d) -#define ERL_PID_NODE(x) ((x)->uval.pidval.node) +#define ERL_PID_NODE(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.pidval.node) +#define ERL_PID_NODE_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.pidval.node) #define ERL_PID_NUMBER(x) ((x)->uval.pidval.number) #define ERL_PID_SERIAL(x) ((x)->uval.pidval.serial) #define ERL_PID_CREATION(x) ((x)->uval.pidval.creation) -#define ERL_PORT_NODE(x) ((x)->uval.portval.node) +#define ERL_PORT_NODE(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.portval.node) +#define ERL_PORT_NODE_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.portval.node) #define ERL_PORT_NUMBER(x) ((x)->uval.portval.number) #define ERL_PORT_CREATION(x) ((x)->uval.portval.creation) -#define ERL_REF_NODE(x) ((x)->uval.refval.node) +#define ERL_REF_NODE(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.refval.node) +#define ERL_REF_NODE_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.refval.node) #define ERL_REF_NUMBER(x) ((x)->uval.refval.n[0]) #define ERL_REF_NUMBERS(x) ((x)->uval.refval.n) #define ERL_REF_LEN(x) ((x)->uval.refval.len) @@ -183,14 +188,26 @@ typedef struct { } Erl_Float; typedef struct { + char *utf8; + int lenU; + char *latin1; + int lenL; +} Erl_Atom_data; + +char* erl_atom_ptr_latin1(Erl_Atom_data*); +char* erl_atom_ptr_utf8(Erl_Atom_data*); +int erl_atom_size_latin1(Erl_Atom_data*); +int erl_atom_size_utf8(Erl_Atom_data*); +char* erl_atom_init_latin1(Erl_Atom_data*, const char*); + +typedef struct { Erl_Header h; - int len; - char *a; + Erl_Atom_data d; } Erl_Atom; typedef struct { Erl_Header h; - char * node; + Erl_Atom_data node; unsigned int number; unsigned int serial; unsigned char creation; @@ -198,14 +215,14 @@ typedef struct { typedef struct { Erl_Header h; - char * node; + Erl_Atom_data node; unsigned int number; unsigned char creation; } Erl_Port; typedef struct { Erl_Header h; - char * node; + Erl_Atom_data node; int len; unsigned int n[3]; unsigned char creation; @@ -289,7 +306,7 @@ typedef struct _eterm { } ETERM; -#define MAXREGLEN 255 /* max length of registered (atom) name */ +#define MAXREGLEN (255*4) /* max length of registered (atom) name */ typedef struct { int type; /* one of the message type constants in eiext.h */ @@ -409,6 +426,7 @@ unsigned char erl_ext_type(unsigned char*); /* Note: returned 'char' before R9C unsigned char *erl_peek_ext(unsigned char*,int); int erl_term_len(ETERM*); +int cmp_latin1_vs_utf8(const char* sL, int lenL, const char* sU, int lenU); /* -------------------------------------------------------------------- */ /* Wrappers around ei functions */ diff --git a/lib/erl_interface/src/connect/ei_connect.c b/lib/erl_interface/src/connect/ei_connect.c index 34362b4b9f..4421bbb7fe 100644 --- a/lib/erl_interface/src/connect/ei_connect.c +++ b/lib/erl_interface/src/connect/ei_connect.c @@ -459,6 +459,7 @@ int ei_connect_xinit(ei_cnode* ec, const char *thishostname, /* memmove(&ec->this_ipaddr, thisipaddr, sizeof(ec->this_ipaddr)); */ strcpy(ec->self.node,thisnodename); + ec->self.node_org_enc = ERLANG_LATIN1; ec->self.num = 0; ec->self.serial = 0; ec->self.creation = creation; @@ -1070,7 +1071,7 @@ int ei_rpc(ei_cnode* ec, int fd, char *mod, char *fun, int i, index; ei_term t; erlang_msg msg; - char rex[MAXATOMLEN+1]; + char rex[MAXATOMLEN]; if (ei_rpc_to(ec, fd, mod, fun, inbuf, inbuflen) < 0) { return -1; @@ -1332,7 +1333,9 @@ static int send_name_or_challenge(int fd, char *nodename, | DFLAG_EXTENDED_PIDS_PORTS | DFLAG_FUN_TAGS | DFLAG_NEW_FUN_TAGS - | DFLAG_NEW_FLOATS)); + | DFLAG_NEW_FLOATS + | DFLAG_SMALL_ATOM_TAGS + | DFLAG_UTF8_ATOMS)); if (f_chall) put32be(s, challenge); memcpy(s, nodename, strlen(nodename)); diff --git a/lib/erl_interface/src/connect/ei_connect_int.h b/lib/erl_interface/src/connect/ei_connect_int.h index 3c42b49b82..81c384e38d 100644 --- a/lib/erl_interface/src/connect/ei_connect_int.h +++ b/lib/erl_interface/src/connect/ei_connect_int.h @@ -102,6 +102,8 @@ extern int h_errno; #define DFLAG_NEW_FUN_TAGS 0x80 #define DFLAG_EXTENDED_PIDS_PORTS 0x100 #define DFLAG_NEW_FLOATS 0x800 +#define DFLAG_SMALL_ATOM_TAGS 0x4000 +#define DFLAG_UTF8_ATOMS 0x10000 ei_cnode *ei_fd_to_cnode(int fd); int ei_distversion(int fd); diff --git a/lib/erl_interface/src/connect/eirecv.c b/lib/erl_interface/src/connect/eirecv.c index 86852f947d..075f78e3d2 100644 --- a/lib/erl_interface/src/connect/eirecv.c +++ b/lib/erl_interface/src/connect/eirecv.c @@ -108,7 +108,7 @@ ei_recv_internal (int fd, switch (msg->msgtype) { case ERL_SEND: /* { SEND, Cookie, ToPid } */ if (ei_tracelevel >= 4) show_this_msg = 1; - if (ei_decode_atom(header,&index,msg->cookie) + if (ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL) || ei_decode_pid(header,&index,&msg->to)) { erl_errno = EIO; @@ -120,8 +120,8 @@ ei_recv_internal (int fd, case ERL_REG_SEND: /* { REG_SEND, From, Cookie, ToName } */ if (ei_tracelevel >= 4) show_this_msg = 1; if (ei_decode_pid(header,&index,&msg->from) - || ei_decode_atom(header,&index,msg->cookie) - || ei_decode_atom(header,&index,msg->toname)) + || ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL) + || ei_decode_atom_as(header,&index,msg->toname,sizeof(msg->toname),ERLANG_UTF8,NULL,NULL)) { erl_errno = EIO; return -1; @@ -157,7 +157,7 @@ ei_recv_internal (int fd, case ERL_SEND_TT: /* { SEND_TT, Cookie, ToPid, TraceToken } */ if (ei_tracelevel >= 4) show_this_msg = 1; - if (ei_decode_atom(header,&index,msg->cookie) + if (ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL) || ei_decode_pid(header,&index,&msg->to) || ei_decode_trace(header,&index,&msg->token)) { @@ -171,8 +171,8 @@ ei_recv_internal (int fd, case ERL_REG_SEND_TT: /* { REG_SEND_TT, From, Cookie, ToName, TraceToken } */ if (ei_tracelevel >= 4) show_this_msg = 1; if (ei_decode_pid(header,&index,&msg->from) - || ei_decode_atom(header,&index,msg->cookie) - || ei_decode_atom(header,&index,msg->toname) + || ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL) + || ei_decode_atom_as(header,&index,msg->toname,sizeof(msg->toname),ERLANG_UTF8,NULL,NULL) || ei_decode_trace(header,&index,&msg->token)) { erl_errno = EIO; diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c index c2e6a0426e..9779ad3f35 100644 --- a/lib/erl_interface/src/decode/decode_atom.c +++ b/lib/erl_interface/src/decode/decode_atom.c @@ -21,24 +21,155 @@ #include "eiext.h" #include "putget.h" + int ei_decode_atom(const char *buf, int *index, char *p) { - const char *s = buf + *index; - const char *s0 = s; - int len; + return ei_decode_atom_as(buf, index, p, MAXATOMLEN, ERLANG_LATIN1, NULL, NULL); +} + +int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen, + enum erlang_char_encoding want_enc, + enum erlang_char_encoding* was_encp, + enum erlang_char_encoding* res_encp) +{ + const char *s = buf + *index; + const char *s0 = s; + int len; + enum erlang_char_encoding got_enc; + + switch (get8(s)) { + case ERL_ATOM_EXT: + len = get16be(s); + got_enc = ERLANG_LATIN1; + break; + case ERL_SMALL_ATOM_EXT: + len = get8(s); + got_enc = ERLANG_LATIN1; + break; + case ERL_ATOM_UTF8_EXT: + len = get16be(s); + got_enc = ERLANG_UTF8; + break; + case ERL_SMALL_ATOM_UTF8_EXT: + len = get8(s); + got_enc = ERLANG_UTF8; + break; + default: + return -1; + } + + if ((want_enc & got_enc) || want_enc == ERLANG_ASCII) { + int i, found_non_ascii = 0; + if (len >= destlen) + return -1; + for (i=0; i<len; i++) { + if (s[i] & 0x80) found_non_ascii = 1; + if (p) p[i] = s[i]; + } + if (p) p[len] = 0; + if (want_enc == ERLANG_ASCII && found_non_ascii) { + return -1; + } + if (res_encp) { + *res_encp = found_non_ascii ? got_enc : ERLANG_ASCII; + } + } + else { + int plen = (got_enc == ERLANG_LATIN1) ? + latin1_to_utf8(p, s, len, destlen-1, res_encp) : + utf8_to_latin1(p, s, len, destlen-1, res_encp); + if (plen < 0) return -1; + if (p) p[plen] = 0; + } + if (was_encp) { + *was_encp = got_enc; + } + + s += len; + *index += s-s0; + return 0; +} - if (get8(s) != ERL_ATOM_EXT) return -1; - len = get16be(s); +int utf8_to_latin1(char* dst, const char* src, int slen, int destlen, + enum erlang_char_encoding* res_encp) +{ + const char* const dst_start = dst; + const char* const dst_end = dst + destlen; + int found_non_ascii = 0; + + while (slen > 0) { + if (dst >= dst_end) return -1; + if ((src[0] & 0x80) == 0) { + if (dst_start) { + *dst = *src; + } + ++dst; + ++src; + --slen; + } + else if (slen > 1 && + (src[0] & 0xFE) == 0xC2 && + (src[1] & 0xC0) == 0x80) { + if (dst_start) { + *dst = (char) ((src[0] << 6) | (src[1] & 0x3F)); + } + ++dst; + src += 2; + slen -= 2; + found_non_ascii = 1; + } + else return -1; + } + if (res_encp) { + *res_encp = found_non_ascii ? ERLANG_LATIN1 : ERLANG_ASCII; + } + return dst - dst_start; +} - if (len > MAXATOMLEN) return -1; +int latin1_to_utf8(char* dst, const char* src, int slen, int destlen, + enum erlang_char_encoding* res_encp) +{ + const char* const src_end = src + slen; + const char* const dst_start = dst; + const char* const dst_end = dst + destlen; + int found_non_ascii = 0; - if (p) { - memmove(p,s,len); - p[len] = (char)0; - } - s += len; - *index += s-s0; - - return 0; + while (src < src_end) { + if (dst >= dst_end) return -1; + if ((src[0] & 0x80) == 0) { + if (dst_start) { + *dst = *src; + } + ++dst; + } + else { + if (dst_start) { + unsigned char ch = *src; + dst[0] = 0xC0 | (ch >> 6); + dst[1] = 0x80 | (ch & 0x3F); + } + dst += 2; + found_non_ascii = 1; + } + ++src; + } + if (res_encp) { + *res_encp = found_non_ascii ? ERLANG_UTF8 : ERLANG_ASCII; + } + return dst - dst_start; } + + + +int ei_internal_get_atom(const char** bufp, char* p, + enum erlang_char_encoding* was_encp) +{ + int ix = 0; + if (ei_decode_atom_as(*bufp, &ix, p, MAXATOMLEN_UTF8, ERLANG_UTF8, was_encp, NULL) < 0) + return -1; + *bufp += ix; + return 0; +} + + diff --git a/lib/erl_interface/src/decode/decode_boolean.c b/lib/erl_interface/src/decode/decode_boolean.c index 9fd09c63f1..f20690249b 100644 --- a/lib/erl_interface/src/decode/decode_boolean.c +++ b/lib/erl_interface/src/decode/decode_boolean.c @@ -24,34 +24,20 @@ /* c non-zero -> erlang "true" atom, otherwise "false" */ int ei_decode_boolean(const char *buf, int *index, int *p) { - const char *s = buf + *index; - const char *s0 = s; - int len; + char tbuf[6]; int t; - if (get8(s) != ERL_ATOM_EXT) return -1; + if (ei_decode_atom_as(buf, index, tbuf, sizeof(tbuf), ERLANG_ASCII, NULL, NULL) < 0) + return -1; - len = get16be(s); - - switch (len) { - case 4: - /* typecast makes ansi happy */ - if (strncmp((char*)s,"true",4)) return -1; - t = 1; - break; - - case 5: - if (strncmp((char*)s,"false",5)) return -1; - t = 0; - break; - - default: - return -1; - } - - s += len; + if (memcmp(tbuf, "true", 5) == 0) + t = 1; + else if (memcmp(tbuf, "false", 6) == 0) + t = 0; + else + return -1; + if (p) *p = t; - *index += s-s0; - return 0; } + diff --git a/lib/erl_interface/src/decode/decode_fun.c b/lib/erl_interface/src/decode/decode_fun.c index 64fb9e86d8..7bbef5db44 100644 --- a/lib/erl_interface/src/decode/decode_fun.c +++ b/lib/erl_interface/src/decode/decode_fun.c @@ -42,7 +42,8 @@ int ei_decode_fun(const char *buf, int *index, erlang_fun *p) if (ei_decode_pid(s, &ix, (p == NULL ? (erlang_pid*)NULL : &p->pid)) < 0) return -1; /* then the module (atom) */ - if (ei_decode_atom(s, &ix, (p == NULL ? (char*)NULL : p->module)) < 0) + if (ei_decode_atom_as(s, &ix, (p == NULL ? (char*)NULL : p->module), + MAXATOMLEN_UTF8, ERLANG_UTF8, &p->module_org_enc, NULL) < 0) return -1; /* then the index */ if (ei_decode_long(s, &ix, (p == NULL ? (long*)NULL : &p->index)) < 0) @@ -84,7 +85,8 @@ int ei_decode_fun(const char *buf, int *index, erlang_fun *p) if (p != NULL) p->n_free_vars = i; /* then the module (atom) */ ix = 0; - if (ei_decode_atom(s, &ix, (p == NULL ? (char*)NULL : p->module)) < 0) + if (ei_decode_atom_as(s, &ix, (p == NULL ? (char*)NULL : p->module), + MAXATOMLEN_UTF8, ERLANG_UTF8, &p->module_org_enc, NULL) < 0) return -1; /* then the old_index */ if (ei_decode_long(s, &ix, (p == NULL ? (long*)NULL : &p->old_index)) < 0) diff --git a/lib/erl_interface/src/decode/decode_pid.c b/lib/erl_interface/src/decode/decode_pid.c index 9ed1c36db6..e79952195d 100644 --- a/lib/erl_interface/src/decode/decode_pid.c +++ b/lib/erl_interface/src/decode/decode_pid.c @@ -21,26 +21,16 @@ #include "eiext.h" #include "putget.h" + int ei_decode_pid(const char *buf, int *index, erlang_pid *p) { const char *s = buf + *index; const char *s0 = s; - int len; if (get8(s) != ERL_PID_EXT) return -1; /* first the nodename */ - if (get8(s) != ERL_ATOM_EXT) return -1; - - len = get16be(s); - - if (len > MAXATOMLEN) return -1; - - if (p) { - memmove(p->node, s, len); - p->node[len] = (char)0; - } - s += len; + if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1; /* now the numbers: num (4), serial (4), creation (1) */ if (p) { diff --git a/lib/erl_interface/src/decode/decode_port.c b/lib/erl_interface/src/decode/decode_port.c index 28abed801a..5fd96b51a4 100644 --- a/lib/erl_interface/src/decode/decode_port.c +++ b/lib/erl_interface/src/decode/decode_port.c @@ -25,22 +25,11 @@ int ei_decode_port(const char *buf, int *index, erlang_port *p) { const char *s = buf + *index; const char *s0 = s; - int len; if (get8(s) != ERL_PORT_EXT) return -1; /* first the nodename */ - if (get8(s) != ERL_ATOM_EXT) return -1; - - len = get16be(s); - - if (len > MAXATOMLEN) return -1; - - if (p) { - memmove(p->node, s, len); - p->node[len] = (char)0; - } - s += len; + if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1; /* now the numbers: num (4), creation (1) */ if (p) { diff --git a/lib/erl_interface/src/decode/decode_ref.c b/lib/erl_interface/src/decode/decode_ref.c index 7b15808bc5..7294e5d239 100644 --- a/lib/erl_interface/src/decode/decode_ref.c +++ b/lib/erl_interface/src/decode/decode_ref.c @@ -21,27 +21,18 @@ #include "eiext.h" #include "putget.h" + int ei_decode_ref(const char *buf, int *index, erlang_ref *p) { const char *s = buf + *index; const char *s0 = s; - int count, len, i; + int count, i; switch (get8(s)) { case ERL_REFERENCE_EXT: - /* first the nodename */ - if (get8(s) != ERL_ATOM_EXT) return -1; - - len = get16be(s); - - if (len > MAXATOMLEN) return -1; - - if (p) { - memmove(p->node, s, len); - p->node[len] = (char)0; - } - s += len; + /* nodename */ + if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1; /* now the numbers: num (4), creation (1) */ if (p) { @@ -62,15 +53,7 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p) if (p) p->len = count; /* then the nodename */ - if (get8(s) != ERL_ATOM_EXT) return -1; - len = get16be(s); - if (len > MAXATOMLEN) return -1; - - if (p) { - memmove(p->node, s, len); - p->node[len] = (char)0; - } - s += len; + if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1; /* creation */ if (p) { @@ -95,3 +78,4 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p) return -1; } } + diff --git a/lib/erl_interface/src/encode/encode_atom.c b/lib/erl_interface/src/encode/encode_atom.c index 6f41f045e0..044f17cb60 100644 --- a/lib/erl_interface/src/encode/encode_atom.c +++ b/lib/erl_interface/src/encode/encode_atom.c @@ -22,29 +22,108 @@ #include "eiext.h" #include "putget.h" + +static int copy_ascii_atom(char* dst, const char* src, int slen); +static int copy_utf8_atom(char* dst, const char* src, int slen); + + int ei_encode_atom(char *buf, int *index, const char *p) { size_t len = strlen(p); - if (len >= INT_MAX) return -1; - return ei_encode_atom_len(buf, index, p, len); + if (len >= MAXATOMLEN) + len = MAXATOMLEN - 1; + return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1); } int ei_encode_atom_len(char *buf, int *index, const char *p, int len) { + /* This function is documented to truncate at MAXATOMLEN (256) */ + if (len >= MAXATOMLEN) + len = MAXATOMLEN - 1; + return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1); +} + +int ei_encode_atom_as(char *buf, int *index, const char *p, + enum erlang_char_encoding from_enc, + enum erlang_char_encoding to_enc) +{ + return ei_encode_atom_len_as(buf, index, p, strlen(p), from_enc, to_enc); +} + +int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len, + enum erlang_char_encoding from_enc, + enum erlang_char_encoding to_enc) +{ char *s = buf + *index; char *s0 = s; + int offs; - /* This function is documented to truncate at MAXATOMLEN (256) */ - if (len > MAXATOMLEN) - len = MAXATOMLEN; + if (len >= MAXATOMLEN && (from_enc & (ERLANG_LATIN1|ERLANG_ASCII))) { + return -1; + } - if (!buf) s += 3; - else { - put8(s,ERL_ATOM_EXT); - put16be(s,len); + switch(to_enc) { + case ERLANG_LATIN1: + if (buf) { + put8(s,ERL_ATOM_EXT); + switch (from_enc) { + case ERLANG_UTF8: + len = utf8_to_latin1(s+2, p, len, MAXATOMLEN-1, NULL); + if (len < 0) return -1; + break; + case ERLANG_ASCII: + if (copy_ascii_atom(s+2, p, len) < 0) return -1; + break; + case ERLANG_LATIN1: + memcpy(s+2, p, len); + break; + default: + return -1; + } + put16be(s,len); + } + else { + s += 3; + if (from_enc == ERLANG_UTF8) { + len = utf8_to_latin1(NULL, p, len, MAXATOMLEN-1, NULL); + if (len < 0) return -1; + } + } + break; + + case ERLANG_UTF8: + offs = 1 + 1; + switch (from_enc) { + case ERLANG_LATIN1: + if (len >= 256/2) offs++; + len = latin1_to_utf8((buf ? s+offs : NULL), p, len, MAXATOMLEN_UTF8-1, NULL); + break; + case ERLANG_ASCII: + if (buf && copy_ascii_atom(s+offs, p, len) < 0) return -1; + break; + case ERLANG_UTF8: + if (len >= 256) offs++; + if (buf && copy_utf8_atom(s+offs, p, len) < 0) return -1; + break; + default: + return -1; + } + if (buf) { + if (offs == 2) { + put8(s, ERL_SMALL_ATOM_UTF8_EXT); + put8(s, len); + } + else { + put8(s, ERL_ATOM_UTF8_EXT); + put16be(s, len); + } + } + else s+= offs; + break; - memmove(s,p,len); /* unterminated string */ + default: + return -1; } s += len; @@ -53,3 +132,58 @@ int ei_encode_atom_len(char *buf, int *index, const char *p, int len) return 0; } +int +ei_internal_put_atom(char** bufp, const char* p, int slen, + enum erlang_char_encoding to_enc) +{ + int ix = 0; + if (ei_encode_atom_len_as(*bufp, &ix, p, slen, ERLANG_UTF8, to_enc) < 0) + return -1; + *bufp += ix; + return 0; +} + + +int copy_ascii_atom(char* dst, const char* src, int slen) +{ + while (slen > 0) { + if ((src[0] & 0x80) != 0) return -1; + *dst++ = *src++; + slen--; + } + return 0; +} + +int copy_utf8_atom(char* dst, const char* src, int slen) +{ + int num_chars = 0; + + while (slen > 0) { + if (++num_chars >= MAXATOMLEN) return -1; + if ((src[0] & 0x80) != 0) { + if ((src[0] & 0xE0) == 0xC0) { + if (slen < 2 || (src[1] & 0xC0) != 0x80) return -1; + *dst++ = *src++; + slen--; + } + else if ((src[0] & 0xF0) == 0xE0) { + if (slen < 3 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80) return -1; + *dst++ = *src++; + *dst++ = *src++; + slen -= 2; + } + else if ((src[0] & 0xF8) == 0xF0) { + if (slen < 4 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80 || (src[3] & 0xC0) != 0x80) return -1; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + slen -= 3; + } + else return -1; + } + *dst++ = *src++; + slen--; + } + return 0; +} + diff --git a/lib/erl_interface/src/encode/encode_fun.c b/lib/erl_interface/src/encode/encode_fun.c index 54ee2083d6..4daee32648 100644 --- a/lib/erl_interface/src/encode/encode_fun.c +++ b/lib/erl_interface/src/encode/encode_fun.c @@ -35,7 +35,7 @@ int ei_encode_fun(char *buf, int *index, const erlang_fun *p) ix += sizeof(char) + 4; if (ei_encode_pid(buf, &ix, &p->pid) < 0) return -1; - if (ei_encode_atom(buf, &ix, p->module) < 0) + if (ei_encode_atom_as(buf, &ix, p->module, ERLANG_UTF8, p->module_org_enc) < 0) return -1; if (ei_encode_long(buf, &ix, p->index) < 0) return -1; @@ -60,7 +60,7 @@ int ei_encode_fun(char *buf, int *index, const erlang_fun *p) } else size_p = NULL; ix += 1 + 4 + 1 + sizeof(p->md5) + 4 + 4; - if (ei_encode_atom(buf, &ix, p->module) < 0) + if (ei_encode_atom_as(buf, &ix, p->module, ERLANG_UTF8, p->module_org_enc) < 0) return -1; if (ei_encode_long(buf, &ix, p->old_index) < 0) return -1; diff --git a/lib/erl_interface/src/encode/encode_pid.c b/lib/erl_interface/src/encode/encode_pid.c index ee7f235c17..0cf3ef4efb 100644 --- a/lib/erl_interface/src/encode/encode_pid.c +++ b/lib/erl_interface/src/encode/encode_pid.c @@ -24,29 +24,23 @@ int ei_encode_pid(char *buf, int *index, const erlang_pid *p) { char *s = buf + *index; - char *s0 = s; - int len = strlen(p->node); - - if (!buf) s += 13 + len; - else { - put8(s,ERL_PID_EXT); - /* first the nodename */ - put8(s,ERL_ATOM_EXT); + ++(*index); /* skip ERL_PID_EXT */ + if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, p->node_org_enc) < 0) + return -1; + + if (buf) { + put8(s,ERL_PID_EXT); - put16be(s,len); - - memmove(s, p->node, len); - s += len; + s = buf + *index; /* now the integers */ put32be(s,p->num & 0x7fff); /* 15 bits */ put32be(s,p->serial & 0x1fff); /* 13 bits */ put8(s,(p->creation & 0x03)); /* 2 bits */ } - - *index += s-s0; - + + *index += 4 + 4 + 1; return 0; } diff --git a/lib/erl_interface/src/encode/encode_port.c b/lib/erl_interface/src/encode/encode_port.c index fbbb33182e..2bf9e26d78 100644 --- a/lib/erl_interface/src/encode/encode_port.c +++ b/lib/erl_interface/src/encode/encode_port.c @@ -24,28 +24,23 @@ int ei_encode_port(char *buf, int *index, const erlang_port *p) { char *s = buf + *index; - char *s0 = s; - int len = strlen(p->node); - - if (!buf) s += 9 + len; - else { - put8(s,ERL_PORT_EXT); - /* first the nodename */ - put8(s,ERL_ATOM_EXT); + ++(*index); /* skip ERL_PORT_EXT */ + if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, + p->node_org_enc) < 0) { + return -1; + } + if (buf) { + put8(s,ERL_PORT_EXT); - put16be(s,len); - - memmove(s, p->node, len); - s += len; + s = buf + *index; /* now the integers */ put32be(s,p->id & 0x0fffffff /* 28 bits */); put8(s,(p->creation & 0x03)); } - *index += s-s0; - + *index += 4 + 1; return 0; } diff --git a/lib/erl_interface/src/encode/encode_ref.c b/lib/erl_interface/src/encode/encode_ref.c index 292b452864..e8b3173315 100644 --- a/lib/erl_interface/src/encode/encode_ref.c +++ b/lib/erl_interface/src/encode/encode_ref.c @@ -24,36 +24,32 @@ int ei_encode_ref(char *buf, int *index, const erlang_ref *p) { char *s = buf + *index; - char *s0 = s; - int len = strlen(p->node); int i; + (*index) += 1 + 2; /* skip to node atom */ + if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, + p->node_org_enc) < 0) { + return -1; + } + /* Always encode as an extended reference; all participating parties are now expected to be able to decode extended references. */ - if (!buf) s += 1 + 2 + (3+len) + p->len*4 + 1; - else { + if (buf) { put8(s,ERL_NEW_REFERENCE_EXT); /* first, number of integers */ put16be(s, p->len); /* then the nodename */ - put8(s,ERL_ATOM_EXT); - - put16be(s,len); - - memmove(s, p->node, len); - s += len; + s = buf + *index; /* now the integers */ put8(s,(p->creation & 0x03)); for (i = 0; i < p->len; i++) put32be(s,p->n[i]); - - } - - *index += s-s0; + } + *index += p->len*4 + 1; return 0; } diff --git a/lib/erl_interface/src/legacy/erl_connect.c b/lib/erl_interface/src/legacy/erl_connect.c index 41d4fa3138..f82704ea8b 100644 --- a/lib/erl_interface/src/legacy/erl_connect.c +++ b/lib/erl_interface/src/legacy/erl_connect.c @@ -125,7 +125,7 @@ static ei_cnode erl_if_ec; int erl_connect_init(int this_node_number, char *cookie, short creation) { - char nn[MAXATOMLEN+1]; + char nn[MAXATOMLEN]; sprintf(nn, "c%d", this_node_number); @@ -247,9 +247,15 @@ int erl_send(int fd, ETERM *to ,ETERM *msg) erl_errno = EINVAL; return -1; } - - strncpy(topid.node, (char *)ERL_PID_NODE(to), sizeof(topid.node)); - topid.node[sizeof(topid.node)-1] = '\0'; + + if (to->uval.pidval.node.latin1) { + strcpy(topid.node, to->uval.pidval.node.latin1); + topid.node_org_enc = ERLANG_LATIN1; + } + else { + strcpy(topid.node, to->uval.pidval.node.utf8); + topid.node_org_enc = ERLANG_UTF8; + } topid.num = ERL_PID_NUMBER(to); topid.serial = ERL_PID_SERIAL(to); topid.creation = ERL_PID_CREATION(to); @@ -263,7 +269,7 @@ static int erl_do_receive_msg(int fd, ei_x_buff* x, ErlMessage* emsg) erlang_msg msg; int r; - msg.from.node[0] = msg.to.node[0] = '\0'; + msg.from.node[0] = msg.to.node[0] = msg.toname[0] = '\0'; r = ei_do_receive_msg(fd, 0, &msg, x, 0); if (r == ERL_MSG) { @@ -299,7 +305,7 @@ static int erl_do_receive_msg(int fd, ei_x_buff* x, ErlMessage* emsg) emsg->to = erl_mk_pid(msg.to.node, msg.to.num, msg.to.serial, msg.to.creation); else emsg->to = NULL; - memcpy(emsg->to_name, msg.toname, MAXATOMLEN+1); + strcpy(emsg->to_name, msg.toname); return r; } diff --git a/lib/erl_interface/src/legacy/erl_eterm.c b/lib/erl_interface/src/legacy/erl_eterm.c index 8d559f0f55..aa0fd5ddcf 100644 --- a/lib/erl_interface/src/legacy/erl_eterm.c +++ b/lib/erl_interface/src/legacy/erl_eterm.c @@ -36,6 +36,7 @@ #include "erl_error.h" #include "erl_internal.h" #include "ei_internal.h" +#include "putget.h" #define ERL_IS_BYTE(x) (ERL_IS_INTEGER(x) && (ERL_INT_VALUE(x) & ~0xFF) == 0) @@ -142,9 +143,7 @@ ETERM *erl_mk_atom (const char *s) ep = erl_alloc_eterm(ERL_ATOM); ERL_COUNT(ep) = 1; - ERL_ATOM_SIZE(ep) = strlen(s); - if ((ERL_ATOM_PTR(ep) = strsave(s)) == NULL) - { + if (erl_atom_init_latin1(&ep->uval.aval.d, s) == NULL) { erl_free_term(ep); erl_errno = ENOMEM; return NULL; @@ -152,6 +151,65 @@ ETERM *erl_mk_atom (const char *s) return ep; } +char* erl_atom_ptr_latin1(Erl_Atom_data* a) +{ + if (a->latin1 == NULL) { + enum erlang_char_encoding enc; + a->lenL = utf8_to_latin1(NULL, a->utf8, a->lenU, a->lenU, &enc); + if (a->lenL < 0) { + a->lenL = 0; + return NULL; + } + if (enc == ERLANG_ASCII) { + a->latin1 = a->utf8; + } + else { + a->latin1 = malloc(a->lenL+1); + utf8_to_latin1(a->latin1, a->utf8, a->lenU, a->lenL, NULL); + a->latin1[a->lenL] = '\0'; + } + } + return a->latin1; +} + +char* erl_atom_ptr_utf8(Erl_Atom_data* a) +{ + if (a->utf8 == NULL) { + int dlen = a->lenL * 2; /* over estimation */ + a->utf8 = malloc(dlen + 1); + a->lenU = latin1_to_utf8(a->utf8, a->latin1, a->lenL, dlen, NULL); + a->utf8[a->lenU] = '\0'; + } + return a->utf8; + +} +int erl_atom_size_latin1(Erl_Atom_data* a) +{ + if (a->latin1 == NULL) { + erl_atom_ptr_latin1(a); + } + return a->lenL; +} +int erl_atom_size_utf8(Erl_Atom_data* a) +{ + if (a->utf8 == NULL) { + erl_atom_ptr_utf8(a); + } + return a->lenU; +} +char* erl_atom_init_latin1(Erl_Atom_data* a, const char* s) +{ + a->lenL = strlen(s); + if ((a->latin1 = strsave(s)) == NULL) + { + return NULL; + } + a->utf8 = NULL; + a->lenU = 0; + return a->latin1; +} + + /* * Given a string as input, creates a list. */ @@ -208,12 +266,19 @@ ETERM *erl_mk_pid(const char *node, ep = erl_alloc_eterm(ERL_PID); ERL_COUNT(ep) = 1; - if ((ERL_PID_NODE(ep) = strsave(node)) == NULL) + if (erl_atom_init_latin1(&ep->uval.pidval.node, node) == NULL) { erl_free_term(ep); erl_errno = ENOMEM; return NULL; } + erl_mk_pid_helper(ep, number, serial, creation); + return ep; +} + +void erl_mk_pid_helper(ETERM *ep, unsigned int number, + unsigned int serial, unsigned char creation) +{ ERL_PID_NUMBER(ep) = number & 0x7fff; /* 15 bits */ if (ei_internal_use_r9_pids_ports()) { ERL_PID_SERIAL(ep) = serial & 0x07; /* 3 bits */ @@ -222,7 +287,6 @@ ETERM *erl_mk_pid(const char *node, ERL_PID_SERIAL(ep) = serial & 0x1fff; /* 13 bits */ } ERL_PID_CREATION(ep) = creation & 0x03; /* 2 bits */ - return ep; } /* @@ -239,12 +303,18 @@ ETERM *erl_mk_port(const char *node, ep = erl_alloc_eterm(ERL_PORT); ERL_COUNT(ep) = 1; - if ((ERL_PORT_NODE(ep) = strsave(node)) == NULL) + if (erl_atom_init_latin1(&ep->uval.portval.node, node) == NULL) { erl_free_term(ep); erl_errno = ENOMEM; return NULL; } + erl_mk_port_helper(ep, number, creation); + return ep; +} + +void erl_mk_port_helper(ETERM* ep, unsigned number, unsigned char creation) +{ if (ei_internal_use_r9_pids_ports()) { ERL_PORT_NUMBER(ep) = number & 0x3ffff; /* 18 bits */ } @@ -252,29 +322,29 @@ ETERM *erl_mk_port(const char *node, ERL_PORT_NUMBER(ep) = number & 0x0fffffff; /* 18 bits */ } ERL_PORT_CREATION(ep) = creation & 0x03; /* 2 bits */ - return ep; } /* * Create any kind of reference. */ -ETERM *__erl_mk_reference (const char *node, +ETERM *__erl_mk_reference (ETERM* t, + const char *node, size_t len, unsigned int n[], unsigned char creation) { - ETERM * t; - - if (node == NULL) return NULL; - - t = erl_alloc_eterm(ERL_REF); - ERL_COUNT(t) = 1; - - if ((ERL_REF_NODE(t) = strsave(node)) == NULL) - { - erl_free_term(t); - erl_errno = ENOMEM; - return NULL; + if (t == NULL) { + if (node == NULL) return NULL; + + t = erl_alloc_eterm(ERL_REF); + ERL_COUNT(t) = 1; + + if (erl_atom_init_latin1(&t->uval.refval.node, node) == NULL) + { + erl_free_term(t); + erl_errno = ENOMEM; + return NULL; + } } ERL_REF_LEN(t) = len; ERL_REF_NUMBERS(t)[0] = n[0] & 0x3ffff; /* 18 bits */ @@ -294,7 +364,7 @@ ETERM *erl_mk_ref (const char *node, { unsigned int n[3] = {0, 0, 0}; n[0] = number; - return __erl_mk_reference(node, 1, n, creation); + return __erl_mk_reference(NULL, node, 1, n, creation); } /* @@ -307,7 +377,7 @@ erl_mk_long_ref (const char *node, { unsigned int n[3] = {0, 0, 0}; n[0] = n3; n[1] = n2; n[2] = n1; - return __erl_mk_reference(node, 3, n, creation); + return __erl_mk_reference(NULL, node, 3, n, creation); } /* @@ -758,6 +828,28 @@ int erl_iolist_length (const ETERM* term) return -1; } +static int erl_atom_copy(Erl_Atom_data* dst, const Erl_Atom_data* src) +{ + if (src->latin1 == src->utf8) { + dst->latin1 = dst->utf8 = strsave(src->latin1); + dst->lenL = dst->lenU = strlen(src->latin1); + } + else if (src->latin1) { + dst->latin1 = strsave(src->latin1); + dst->lenL = strlen(src->latin1); + dst->utf8 = NULL; + dst->lenU = 0; + } + else { + dst->utf8 = strsave(src->utf8); + dst->lenU = strlen(src->utf8); + dst->latin1 = NULL; + dst->lenL = 0; + } + return (dst->latin1 != NULL || dst->utf8 == NULL); +} + + /* * Return a brand NEW COPY of an ETERM. */ @@ -796,9 +888,7 @@ ETERM *erl_copy_term(const ETERM *ep) ERL_FLOAT_VALUE(cp) = ERL_FLOAT_VALUE(ep); break; case ERL_ATOM: - ERL_ATOM_SIZE(cp) = ERL_ATOM_SIZE(ep); - ERL_ATOM_PTR(cp) = strsave(ERL_ATOM_PTR(ep)); - if (ERL_ATOM_PTR(cp) == NULL) + if (!erl_atom_copy(&cp->uval.aval.d, &ep->uval.aval.d)) { erl_free_term(cp); erl_errno = ENOMEM; @@ -810,17 +900,17 @@ ETERM *erl_copy_term(const ETERM *ep) name and plug in. Somewhat ugly (also done with port and ref below). */ memcpy(&cp->uval.pidval, &ep->uval.pidval, sizeof(Erl_Pid)); - ERL_PID_NODE(cp) = strsave(ERL_PID_NODE(ep)); + erl_atom_copy(&cp->uval.pidval.node, &ep->uval.pidval.node); ERL_COUNT(cp) = 1; break; case ERL_PORT: memcpy(&cp->uval.portval, &ep->uval.portval, sizeof(Erl_Port)); - ERL_PORT_NODE(cp) = strsave(ERL_PORT_NODE(ep)); + erl_atom_copy(&cp->uval.portval.node, &ep->uval.portval.node); ERL_COUNT(cp) = 1; break; case ERL_REF: memcpy(&cp->uval.refval, &ep->uval.refval, sizeof(Erl_Ref)); - ERL_REF_NODE(cp) = strsave(ERL_REF_NODE(ep)); + erl_atom_copy(&cp->uval.refval.node, &ep->uval.refval.node); ERL_COUNT(cp) = 1; break; case ERL_LIST: @@ -883,29 +973,29 @@ int erl_print_term(FILE *fp, const ETERM *ep) j = i = doquote = 0; switch(ERL_TYPE(ep)) { - case ERL_ATOM: + case ERL_ATOM: { + char* adata = ERL_ATOM_PTR(ep); /* FIXME: what if some weird locale is in use? */ - if (!islower((int)ERL_ATOM_PTR(ep)[0])) + if (!islower(adata[0])) doquote = 1; for (i = 0; !doquote && i < ERL_ATOM_SIZE(ep); i++) { - doquote = !(isalnum((int)ERL_ATOM_PTR(ep)[i]) - || (ERL_ATOM_PTR(ep)[i] == '_')); + doquote = !(isalnum(adata[i]) || (adata[i] == '_')); } if (doquote) { putc('\'', fp); ch_written++; } - fputs(ERL_ATOM_PTR(ep), fp); + fputs(adata, fp); ch_written += ERL_ATOM_SIZE(ep); if (doquote) { putc('\'', fp); ch_written++; } break; - + } case ERL_VARIABLE: if (!isupper((int)ERL_VAR_NAME(ep)[0])) { doquote = 1; diff --git a/lib/erl_interface/src/legacy/erl_eterm.h b/lib/erl_interface/src/legacy/erl_eterm.h index 41b008f04f..2e8129d9cd 100644 --- a/lib/erl_interface/src/legacy/erl_eterm.h +++ b/lib/erl_interface/src/legacy/erl_eterm.h @@ -55,7 +55,9 @@ typedef struct _heapmark { } Erl_HeapMark; -ETERM * __erl_mk_reference(const char *, size_t, unsigned int n[], unsigned char); +void erl_mk_port_helper(ETERM* ep, unsigned number, unsigned char creation); +void erl_mk_pid_helper(ETERM*, unsigned,unsigned, unsigned char); +ETERM * __erl_mk_reference(ETERM*, const char *, size_t, unsigned int n[], unsigned char); int erl_current_fix_desc(void); #endif /* _ERL_ETERM_H */ diff --git a/lib/erl_interface/src/legacy/erl_format.c b/lib/erl_interface/src/legacy/erl_format.c index dc85806c36..533241e396 100644 --- a/lib/erl_interface/src/legacy/erl_format.c +++ b/lib/erl_interface/src/legacy/erl_format.c @@ -574,10 +574,22 @@ static int ematch(ETERM *p, ETERM *t) switch (type_p) { - case ERL_ATOM: - return p->uval.aval.len == t->uval.aval.len && - memcmp(p->uval.aval.a, t->uval.aval.a, p->uval.aval.len) == 0; - + case ERL_ATOM: { + Erl_Atom_data* pa = &p->uval.aval.d; + Erl_Atom_data* ta = &t->uval.aval.d; + if (pa->utf8 && ta->utf8) { + return pa->lenU == ta->lenU && memcmp(pa->utf8, ta->utf8, pa->lenU)==0; + } + else if (pa->latin1 && ta->latin1) { + return pa->lenL == ta->lenL && memcmp(pa->latin1, ta->latin1, pa->lenL)==0; + } + else if (pa->latin1) { + return cmp_latin1_vs_utf8(pa->latin1, pa->lenL, ta->utf8, ta->lenU)==0; + } + else { + return cmp_latin1_vs_utf8(ta->latin1, ta->lenL, pa->utf8, pa->lenU)==0; + } + } case ERL_VARIABLE: if (strcmp(p->uval.vval.name, "_") == 0) /* anon. variable */ return ERL_TRUE; diff --git a/lib/erl_interface/src/legacy/erl_malloc.c b/lib/erl_interface/src/legacy/erl_malloc.c index f51a6c69b3..d09239e02d 100644 --- a/lib/erl_interface/src/legacy/erl_malloc.c +++ b/lib/erl_interface/src/legacy/erl_malloc.c @@ -112,6 +112,18 @@ do { \ (ptr) = NULL; \ } while (0) +static void erl_atom_free(Erl_Atom_data* p) +{ + erl_free(p->latin1); + if (p->utf8 != p->latin1) { + erl_free(p->utf8); + } + p->latin1 = NULL; + p->utf8 = NULL; + p->lenL = 0; + p->lenU = 0; +} + static void _erl_free_term (ETERM *ep, int external, int compound) { restart: @@ -122,7 +134,7 @@ restart: switch(ERL_TYPE(ep)) { case ERL_ATOM: - FREE_AND_CLEAR(ERL_ATOM_PTR(ep)); + erl_atom_free(&ep->uval.aval.d); break; case ERL_VARIABLE: FREE_AND_CLEAR(ERL_VAR_NAME(ep)); @@ -161,13 +173,13 @@ restart: FREE_AND_CLEAR(ERL_BIN_PTR(ep)); break; case ERL_PID: - FREE_AND_CLEAR(ERL_PID_NODE(ep)); + erl_atom_free(&ep->uval.pidval.node); break; case ERL_PORT: - FREE_AND_CLEAR(ERL_PORT_NODE(ep)); + erl_atom_free(&ep->uval.portval.node); break; case ERL_REF: - FREE_AND_CLEAR(ERL_REF_NODE(ep)); + erl_atom_free(&ep->uval.refval.node); break; case ERL_EMPTY_LIST: case ERL_INTEGER: diff --git a/lib/erl_interface/src/legacy/erl_marshal.c b/lib/erl_interface/src/legacy/erl_marshal.c index dad715c762..4c45cebb02 100644 --- a/lib/erl_interface/src/legacy/erl_marshal.c +++ b/lib/erl_interface/src/legacy/erl_marshal.c @@ -44,6 +44,9 @@ int erl_fp_compare(unsigned *a, unsigned *b); static void erl_long_to_fp(long l, unsigned *d); #endif +static int cmpbytes(unsigned char* s1,int l1,unsigned char* s2,int l2); +static int cmpatoms(unsigned char* s1, int l1, unsigned char tag1, unsigned char* s2, int l2, unsigned char tag2); + /* Used when comparing two encoded byte arrays */ /* this global data is ok (from threading point of view) since it is * initialized once and never changed @@ -51,7 +54,13 @@ static void erl_long_to_fp(long l, unsigned *d); #define CMP_ARRAY_SIZE 256 /* FIXME problem for threaded ? */ -static char cmp_array[CMP_ARRAY_SIZE]; + +static enum +{ + ERL_NUM_CMP=1, ERL_ATOM_CMP, ERL_REF_CMP, ERL_FUN_CMP, ERL_PORT_CMP, + ERL_PID_CMP, ERL_TUPLE_CMP, ERL_NIL_CMP, ERL_LIST_CMP, ERL_BIN_CMP +}cmp_array[CMP_ARRAY_SIZE]; + static int init_cmp_array_p=1; /* initialize array, the first time */ #if defined(VXWORKS) && CPU == PPC860 @@ -69,10 +78,8 @@ static int init_cmp_array_p=1; /* initialize array, the first time */ static int cmp_floats(double f1, double f2); static INLINE double to_float(long l); -#define ERL_NUM_CMP 1 -#define ERL_REF_CMP 3 - #define IS_ERL_NUM(t) (cmp_array[t]==ERL_NUM_CMP) +#define IS_ERL_ATOM(t) (cmp_array[t]==ERL_ATOM_CMP) #define CMP_NUM_CLASS_SIZE 256 static unsigned char cmp_num_class[CMP_NUM_CLASS_SIZE]; @@ -100,25 +107,28 @@ void erl_init_marshal(void) { if (init_cmp_array_p) { memset(cmp_array, 0, CMP_ARRAY_SIZE); - cmp_array[ERL_SMALL_INTEGER_EXT] = 1; - cmp_array[ERL_INTEGER_EXT] = 1; - cmp_array[ERL_FLOAT_EXT] = 1; - cmp_array[NEW_FLOAT_EXT] = 1; - cmp_array[ERL_SMALL_BIG_EXT] = 1; - cmp_array[ERL_LARGE_BIG_EXT] = 1; - cmp_array[ERL_ATOM_EXT] = 2; - cmp_array[ERL_REFERENCE_EXT] = 3; - cmp_array[ERL_NEW_REFERENCE_EXT] = 3; - cmp_array[ERL_FUN_EXT] = 4; - cmp_array[ERL_NEW_FUN_EXT] = 4; - cmp_array[ERL_PORT_EXT] = 5; - cmp_array[ERL_PID_EXT] = 6; - cmp_array[ERL_SMALL_TUPLE_EXT] = 7; - cmp_array[ERL_LARGE_TUPLE_EXT] = 7; - cmp_array[ERL_NIL_EXT] = 8; - cmp_array[ERL_STRING_EXT] = 9; - cmp_array[ERL_LIST_EXT] = 9; - cmp_array[ERL_BINARY_EXT] = 10; + cmp_array[ERL_SMALL_INTEGER_EXT] = ERL_NUM_CMP; + cmp_array[ERL_INTEGER_EXT] = ERL_NUM_CMP; + cmp_array[ERL_FLOAT_EXT] = ERL_NUM_CMP; + cmp_array[NEW_FLOAT_EXT] = ERL_NUM_CMP; + cmp_array[ERL_SMALL_BIG_EXT] = ERL_NUM_CMP; + cmp_array[ERL_LARGE_BIG_EXT] = ERL_NUM_CMP; + cmp_array[ERL_ATOM_EXT] = ERL_ATOM_CMP; + cmp_array[ERL_ATOM_UTF8_EXT] = ERL_ATOM_CMP; + cmp_array[ERL_SMALL_ATOM_EXT] = ERL_ATOM_CMP; + cmp_array[ERL_SMALL_ATOM_UTF8_EXT] = ERL_ATOM_CMP; + cmp_array[ERL_REFERENCE_EXT] = ERL_REF_CMP; + cmp_array[ERL_NEW_REFERENCE_EXT] = ERL_REF_CMP; + cmp_array[ERL_FUN_EXT] = ERL_FUN_CMP; + cmp_array[ERL_NEW_FUN_EXT] = ERL_FUN_CMP; + cmp_array[ERL_PORT_EXT] = ERL_PORT_CMP; + cmp_array[ERL_PID_EXT] = ERL_PID_CMP; + cmp_array[ERL_SMALL_TUPLE_EXT] = ERL_TUPLE_CMP; + cmp_array[ERL_LARGE_TUPLE_EXT] = ERL_TUPLE_CMP; + cmp_array[ERL_NIL_EXT] = ERL_NIL_CMP; + cmp_array[ERL_STRING_EXT] = ERL_LIST_CMP; + cmp_array[ERL_LIST_EXT] = ERL_LIST_CMP; + cmp_array[ERL_BINARY_EXT] = ERL_BIN_CMP; init_cmp_array_p = 0; } if (init_cmp_num_class_p) { @@ -156,6 +166,21 @@ static int erl_length_x(const ETERM *ep) { *============================================================== */ +static void encode_atom(Erl_Atom_data* a, unsigned char **ext) +{ + int ix = 0; + if (a->latin1) { + ei_encode_atom_len_as((char*)*ext, &ix, a->latin1, a->lenL, + ERLANG_LATIN1, ERLANG_LATIN1); + } + else if (ei_encode_atom_len_as((char*)*ext, &ix, a->utf8, a->lenU, + ERLANG_UTF8, ERLANG_LATIN1) < 0) { + ei_encode_atom_len_as((char*)*ext, &ix, a->utf8, a->lenU, + ERLANG_UTF8, ERLANG_UTF8); + } + *ext += ix; +} + /* * The actual ENCODE engine. * Returns 0 on success, otherwise 1. @@ -170,12 +195,7 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist) switch(ERL_TYPE(ep)) { case ERL_ATOM: - i = ep->uval.aval.len; - *(*ext)++ = ERL_ATOM_EXT; - *(*ext)++ = (i >>8) &0xff; - *(*ext)++ = i &0xff; - memcpy((void *) *ext, (const void *) ep->uval.aval.a, i); - *ext += i; + encode_atom(&ep->uval.aval.d, ext); return 0; case ERL_INTEGER: @@ -286,12 +306,7 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist) case ERL_PID: *(*ext)++ = ERL_PID_EXT; /* First poke in node as an atom */ - i = strlen((char *)ERL_PID_NODE(ep)); - *(*ext)++ = ERL_ATOM_EXT; - *(*ext)++ = (i >>8) &0xff; - *(*ext)++ = i &0xff; - memcpy(*ext, ERL_PID_NODE(ep), i); - *ext += i; + encode_atom(&ep->uval.pidval.node, ext); /* And then fill in the integer fields */ i = ERL_PID_NUMBER(ep); *(*ext)++ = (i >> 24) &0xff; @@ -319,11 +334,8 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist) *(*ext)++ = (len >> 8) &0xff; *(*ext)++ = len &0xff; - *(*ext)++ = ERL_ATOM_EXT; - *(*ext)++ = (i >> 8) &0xff; - *(*ext)++ = i &0xff; - memcpy(*ext, ERL_REF_NODE(ep), i); - *ext += i; + encode_atom(&ep->uval.refval.node, ext); + *(*ext)++ = ERL_REF_CREATION(ep); /* Then the integer fields */ for (j = 0; j < ERL_REF_LEN(ep); j++) { @@ -338,12 +350,7 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist) case ERL_PORT: *(*ext)++ = ERL_PORT_EXT; /* First poke in node as an atom */ - i = strlen((char *)ERL_PORT_NODE(ep)); - *(*ext)++ = ERL_ATOM_EXT; - *(*ext)++ = (i >>8) &0xff; - *(*ext)++ = i &0xff; - memcpy(*ext, ERL_PORT_NODE(ep), i); - *ext += i; + encode_atom(&ep->uval.portval.node, ext); /* Then the integer fields */ i = ERL_PORT_NUMBER(ep); *(*ext)++ = (i >> 24) &0xff; @@ -494,6 +501,16 @@ int erl_term_len(ETERM *ep) return 1+erl_term_len_helper(ep, 4); } +static int atom_len_helper(Erl_Atom_data* a) +{ + if (erl_atom_ptr_latin1(a)) { + return 1 + 2 + a->lenL; /* ERL_ATOM_EXT */ + } + else { + return 1 + 1 + (a->lenU > 255) + a->lenU; + } +} + static int erl_term_len_helper(ETERM *ep, int dist) { int len = 0; @@ -505,8 +522,7 @@ static int erl_term_len_helper(ETERM *ep, int dist) if (ep) { switch (ERL_TYPE(ep)) { case ERL_ATOM: - i = ep->uval.aval.len; - len = i + 3; + len = atom_len_helper(&ep->uval.aval.d); break; case ERL_INTEGER: @@ -538,20 +554,15 @@ static int erl_term_len_helper(ETERM *ep, int dist) break; case ERL_PID: - /* 1 + N + 4 + 4 + 1 where N = 3 + strlen */ - i = strlen((char *)ERL_PID_NODE(ep)); - len = 13 + i; + len = 1 + atom_len_helper(&ep->uval.pidval.node) + 4 + 4 + 1; break; case ERL_REF: - i = strlen((char *)ERL_REF_NODE(ep)); - len = 1 + 2 + (i+3) + 1 + ERL_REF_LEN(ep) * 4; + len = 1 + 2 + atom_len_helper(&ep->uval.refval.node) + 1 + ERL_REF_LEN(ep) * 4; break; case ERL_PORT: - /* 1 + N + 4 + 1 where N = 3 + strlen */ - i = strlen((char *)ERL_PORT_NODE(ep)); - len = 9 + i; + len = 1 + atom_len_helper(&ep->uval.portval.node) + 4 + 1; break; case ERL_EMPTY_LIST: @@ -644,31 +655,36 @@ int erl_encode_buf(ETERM *ep, unsigned char **ext) } /* erl_encode_buf */ -/* - * A nice macro to make it look cleaner in the - * cases of PID's,PORT's and REF's below. - * It reads the NODE name from a buffer. - */ -#define READ_THE_NODE(ext,cp,len,i) \ -/* eat first atom, repr. the node */ \ -if (**ext != ERL_ATOM_EXT) \ - return (ETERM *) NULL; \ -*ext += 1; \ -i = (**ext << 8) | (*ext)[1]; \ -cp = (char *) *(ext) + 2; \ -*ext += (i + 2); \ -len = i - -#define STATIC_NODE_BUF_SZ 30 - -#define SET_NODE(node,node_buf,cp,len) \ -if (len >= STATIC_NODE_BUF_SZ) node = erl_malloc(len+1); \ -else node = node_buf; \ -memcpy(node, cp, len); \ -node[len] = '\0' - -#define RESET_NODE(node,len) \ -if (len >= STATIC_NODE_BUF_SZ) free(node) + +static int read_atom(unsigned char** ext, Erl_Atom_data* a) +{ + char buf[MAXATOMLEN_UTF8]; + int offs = 0; + enum erlang_char_encoding enc; + int ret = ei_decode_atom_as((char*)*ext, &offs, buf, MAXATOMLEN_UTF8, + ERLANG_LATIN1|ERLANG_UTF8, NULL, &enc); + *ext += offs; + + if (ret == 0) { + int i = strlen(buf); + char* clone = erl_malloc(i+1); + memcpy(clone, buf, i+1); + + a->latin1 = NULL; + a->lenL = 0; + a->utf8 = NULL; + a->lenU = 0; + if (enc & (ERLANG_LATIN1 | ERLANG_ASCII)) { + a->latin1 = clone; + a->lenL = i; + } + if (enc & (ERLANG_UTF8 | ERLANG_ASCII)) { + a->utf8 = clone; + a->lenU = i; + } + } + return ret; +} /* * The actual DECODE engine. @@ -679,13 +695,13 @@ static ETERM *erl_decode_it(unsigned char **ext) char *cp; ETERM *ep,*tp,*np; unsigned int u,sign; - int i,j,len,arity; + int i,j,arity; double ff; /* Assume we are going to decode an integer */ ep = erl_alloc_eterm(ERL_INTEGER); ERL_COUNT(ep) = 1; - + switch (*(*ext)++) { case ERL_INTEGER_EXT: @@ -774,138 +790,90 @@ static ETERM *erl_decode_it(unsigned char **ext) return ep; case ERL_ATOM_EXT: + case ERL_SMALL_ATOM_EXT: + case ERL_ATOM_UTF8_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: + ERL_TYPE(ep) = ERL_ATOM; - i = (**ext << 8) | (*ext)[1]; - cp = (char *) *(ext) + 2; - *ext += (i + 2); - ep->uval.aval.len = i; - ep->uval.aval.a = (char *) erl_malloc(i+1); - memcpy(ep->uval.aval.a, cp, i); - ep->uval.aval.a[i]='\0'; + --(*ext); + if (read_atom(ext, &ep->uval.aval.d) < 0) return NULL; return ep; case ERL_PID_EXT: - erl_free_term(ep); - { /* Why not use the constructors? */ - char *node; - char node_buf[STATIC_NODE_BUF_SZ]; + { unsigned int number, serial; unsigned char creation; - ETERM *eterm_p; - READ_THE_NODE(ext,cp,len,i); - SET_NODE(node,node_buf,cp,len); + ERL_TYPE(ep) = ERL_PID; + if (read_atom(ext, &ep->uval.pidval.node) < 0) return NULL; /* get the integers */ -#if 0 - /* FIXME: Remove code or whatever.... - Ints on the wire are big-endian (== network byte order) - so use ntoh[sl]. (But some are little-endian! Arrrgh!) - Also, the libc authors can be expected to optimize them - heavily. However, the marshalling makes no guarantees - about alignments -- so it won't work at all. */ - number = ntohl(*((unsigned int *)*ext)++); - serial = ntohl(*((unsigned int *)*ext)++); -#else number = ((*ext)[0] << 24) | ((*ext)[1]) << 16 | ((*ext)[2]) << 8 | ((*ext)[3]); *ext += 4; serial = ((*ext)[0] << 24) | ((*ext)[1]) << 16 | ((*ext)[2]) << 8 | ((*ext)[3]); *ext += 4; -#endif creation = *(*ext)++; - eterm_p = erl_mk_pid(node, number, serial, creation); - RESET_NODE(node,len); - return eterm_p; + erl_mk_pid_helper(ep, number, serial, creation); + return ep; } case ERL_REFERENCE_EXT: - erl_free_term(ep); { - char *node; - char node_buf[STATIC_NODE_BUF_SZ]; - unsigned int number; + unsigned int n[3] = {0, 0, 0}; unsigned char creation; - ETERM *eterm_p; - READ_THE_NODE(ext,cp,len,i); - SET_NODE(node,node_buf,cp,len); + ERL_TYPE(ep) = ERL_REF; + if (read_atom(ext, &ep->uval.refval.node) < 0) return NULL; /* get the integers */ -#if 0 - number = ntohl(*((unsigned int *)*ext)++); -#else - number = ((*ext)[0] << 24) | ((*ext)[1]) << 16 | + n[0] = ((*ext)[0] << 24) | ((*ext)[1]) << 16 | ((*ext)[2]) << 8 | ((*ext)[3]); *ext += 4; -#endif creation = *(*ext)++; - eterm_p = erl_mk_ref(node, number, creation); - RESET_NODE(node,len); - return eterm_p; + __erl_mk_reference(ep, NULL, 1, n, creation); + return ep; } case ERL_NEW_REFERENCE_EXT: - erl_free_term(ep); { - char *node; - char node_buf[STATIC_NODE_BUF_SZ]; size_t cnt, i; unsigned int n[3]; unsigned char creation; - ETERM *eterm_p; -#if 0 - cnt = ntohs(*((unsigned short *)*ext)++); -#else + ERL_TYPE(ep) = ERL_REF; cnt = ((*ext)[0] << 8) | (*ext)[1]; *ext += 2; -#endif - READ_THE_NODE(ext,cp,len,i); - SET_NODE(node,node_buf,cp,len); + if (read_atom(ext, &ep->uval.refval.node) < 0) return NULL; /* get the integers */ creation = *(*ext)++; for(i = 0; i < cnt; i++) { -#if 0 - n[i] = ntohl(*((unsigned int *)*ext)++); -#else n[i] = ((*ext)[0] << 24) | ((*ext)[1]) << 16 | ((*ext)[2]) << 8 | ((*ext)[3]); *ext += 4; -#endif } - eterm_p = __erl_mk_reference(node, cnt, n, creation); - RESET_NODE(node,len); - return eterm_p; + __erl_mk_reference(ep, NULL, cnt, n, creation); + return ep; } case ERL_PORT_EXT: - erl_free_term(ep); { - char *node; - char node_buf[STATIC_NODE_BUF_SZ]; unsigned int number; unsigned char creation; - ETERM *eterm_p; - READ_THE_NODE(ext,cp,len,i); - SET_NODE(node,node_buf,cp,len); + ERL_TYPE(ep) = ERL_PORT; + if (read_atom(ext, &ep->uval.portval.node) < 0) return NULL; /* get the integers */ -#if 0 - number = ntohl(*((unsigned int *)*ext)++); -#else number = ((*ext)[0] << 24) | ((*ext)[1]) << 16 | ((*ext)[2]) << 8 | ((*ext)[3]); *ext += 4; -#endif creation = *(*ext)++; - eterm_p = erl_mk_port(node, number, creation); - RESET_NODE(node,len); - return eterm_p; + erl_mk_port_helper(ep, number, creation); + return ep; } case ERL_NIL_EXT: @@ -1140,6 +1108,9 @@ unsigned char erl_ext_type(unsigned char *ext) case ERL_INTEGER_EXT: return ERL_INTEGER; case ERL_ATOM_EXT: + case ERL_ATOM_UTF8_EXT: + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: return ERL_ATOM; case ERL_PID_EXT: return ERL_PID; @@ -1191,6 +1162,9 @@ int erl_ext_size(unsigned char *t) case ERL_SMALL_INTEGER_EXT: case ERL_INTEGER_EXT: case ERL_ATOM_EXT: + case ERL_ATOM_UTF8_EXT: + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: case ERL_PID_EXT: case ERL_PORT_EXT: case ERL_REFERENCE_EXT: @@ -1229,15 +1203,32 @@ int erl_ext_size(unsigned char *t) } /* ext_size */ -/* - * A nice macro that eats up the atom pointed to. - */ -#define JUMP_ATOM(ext,i) \ -if (**ext != ERL_ATOM_EXT) \ - return 0; \ -*ext += 1; \ -i = (**ext << 8) | (*ext)[1]; \ -*ext += (i + 2) + +static int jump_atom(unsigned char** ext) +{ + unsigned char* e = *ext; + int len; + + switch (*e++) { + case ERL_ATOM_EXT: + case ERL_ATOM_UTF8_EXT: + len = (e[0] << 8) | e[1]; + e += (len + 2); + break; + + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: + len = e[0]; + e += (len + 1); + break; + + default: + return 0; + } + *ext = e; + return 1; +} + /* * MOVE the POINTER PAST the ENCODED ETERM we @@ -1259,25 +1250,27 @@ static int jump(unsigned char **ext) *ext += 1; break; case ERL_ATOM_EXT: - i = (**ext << 8) | (*ext)[1]; - *ext += (i + 2); + case ERL_ATOM_UTF8_EXT: + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: + jump_atom(ext); break; case ERL_PID_EXT: /* eat first atom */ - JUMP_ATOM(ext,i); + if (!jump_atom(ext)) return 0; *ext += 9; /* Two int's and the creation field */ break; case ERL_REFERENCE_EXT: case ERL_PORT_EXT: /* first field is an atom */ - JUMP_ATOM(ext,i); + if (!jump_atom(ext)) return 0; *ext += 5; /* One int and the creation field */ break; case ERL_NEW_REFERENCE_EXT: n = (**ext << 8) | (*ext)[1]; *ext += 2; /* first field is an atom */ - JUMP_ATOM(ext,i); + if (!jump_atom(ext)) return 0; *ext += 4*n+1; break; case ERL_NIL_EXT: @@ -1425,6 +1418,58 @@ static int cmpbytes(unsigned char* s1,int l1,unsigned char* s2,int l2) } /* cmpbytes */ +#define tag2enc(T) ((T)==ERL_ATOM_EXT || (T)==ERL_SMALL_ATOM_EXT ? ERLANG_LATIN1 : ERLANG_UTF8) + +static int cmpatoms(unsigned char* s1, int l1, unsigned char tag1, + unsigned char* s2, int l2, unsigned char tag2) +{ + enum erlang_char_encoding enc1 = tag2enc(tag1); + enum erlang_char_encoding enc2 = tag2enc(tag2); + + if (enc1 == enc2) { + return cmpbytes(s1, l1,s2,l2); + } + + if (enc1 == ERLANG_LATIN1) { + return cmp_latin1_vs_utf8((char*)s1, l1, (char*)s2, l2); + } + else { + return -cmp_latin1_vs_utf8((char*)s2, l2, (char*)s1, l1); + } +} + +int cmp_latin1_vs_utf8(const char* strL, int lenL, const char* strU, int lenU) +{ + unsigned char* sL = (unsigned char*)strL; + unsigned char* sU = (unsigned char*)strU; + unsigned char* sL_end = sL + lenL; + unsigned char* sU_end = sU + lenU; + + while(sL < sL_end && sU < sU_end) { + unsigned char UasL; + if (*sL >= 0x80) { + if (*sU < 0xC4 && (sU+1) < sU_end) { + UasL = ((sU[0] & 0x3) << 6) | (sU[1] & 0x3F); + } + else return -1; + } + else { + UasL = *sU; + } + if (*sL < UasL) return -1; + if (*sL > UasL) return 1; + + sL++; + if (*sU < 0x80) sU++; + else if (*sU < 0xE0) sU += 2; + else if (*sU < 0xF0) sU += 3; + else /*if (*sU < 0xF8)*/ sU += 4; + } + + return (sU >= sU_end) - (sL >= sL_end); /* -1, 0 or 1 */ +} + + #define CMP_EXT_ERROR_CODE 4711 #define CMP_EXT_INT32_BE(AP, BP) \ @@ -1437,9 +1482,8 @@ do { \ #define CMP_EXT_SKIP_ATOM(EP) \ do { \ - if ((EP)[0] != ERL_ATOM_EXT) \ + if (!jump_atom(&(EP))) \ return CMP_EXT_ERROR_CODE; \ - (EP) += 3 + ((EP)[1] << 8 | (EP)[2]); \ } while (0) /* @@ -1561,6 +1605,7 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2) int min, ret,i,j,k; double ff1, ff2; unsigned char *tmp1, *tmp2; + unsigned char tag1, tag2; if ( ((*e1)[0] == ERL_STRING_EXT) && ((*e2)[0] == ERL_LIST_EXT) ) { return cmp_string_list(e1, e2); @@ -1568,8 +1613,10 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2) return -cmp_string_list(e2, e1); } - *e2 += 1; - switch (*(*e1)++) + tag1 = *(*e1)++; + tag2 = *(*e2)++; + i = j = 0; + switch (tag1) { case ERL_SMALL_INTEGER_EXT: if (**e1 < **e2) ret = -1; @@ -1589,11 +1636,17 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2) *e1 += 4; *e2 += 4; return ret; case ERL_ATOM_EXT: - i = (**e1 << 8) | (*e1)[1]; - j = (**e2 << 8) | (*e2)[1]; - ret = cmpbytes(*e1 +2, i, *e2 +2, j); - *e1 += (i + 2); - *e2 += (j + 2); + case ERL_ATOM_UTF8_EXT: + i = (**e1) << 8; (*e1)++; + j = (**e2) << 8; (*e2)++; + /*fall through*/ + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: + i |= (**e1); (*e1)++; + j |= (**e2); (*e2)++; + ret = cmpatoms(*e1, i, tag1, *e2, j, tag2); + *e1 += i; + *e2 += j; return ret; case ERL_PID_EXT: { unsigned char *n1 = *e1; @@ -1622,7 +1675,7 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2) } case ERL_PORT_EXT: /* First compare node names ... */ - if (**e1 != ERL_ATOM_EXT || **e2 != ERL_ATOM_EXT) + if (!IS_ERL_ATOM(**e1) || !IS_ERL_ATOM(**e2)) return CMP_EXT_ERROR_CODE; ret = cmp_exe2(e1, e2); *e1 += 5; *e2 += 5; diff --git a/lib/erl_interface/src/legacy/global_whereis.c b/lib/erl_interface/src/legacy/global_whereis.c index 2afb193504..e6c556d907 100644 --- a/lib/erl_interface/src/legacy/global_whereis.c +++ b/lib/erl_interface/src/legacy/global_whereis.c @@ -85,7 +85,16 @@ ETERM *erl_global_whereis(int fd, const char *name, char *node) opid = erl_decode((unsigned char*)buf); /* extract the nodename for the caller */ - if (node) strcpy(node,epid.node); + if (node) { + char* node_str = ERL_PID_NODE(opid); + if (node_str) { + strcpy(node, node_str); + } + else { + erl_free_term(opid); + return NULL; + } + } return opid; } diff --git a/lib/erl_interface/src/misc/ei_decode_term.c b/lib/erl_interface/src/misc/ei_decode_term.c index 0b82ef0e35..65afee89cc 100644 --- a/lib/erl_interface/src/misc/ei_decode_term.c +++ b/lib/erl_interface/src/misc/ei_decode_term.c @@ -32,7 +32,7 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) { const char* s = buf + *index, * s0 = s; - int len, i, n, sign; + int i, n, sign; char c; if (term == NULL) return -1; @@ -48,20 +48,13 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) case NEW_FLOAT_EXT: return ei_decode_double(buf, index, &term->value.d_val); case ERL_ATOM_EXT: - len = get16be(s); - if (len > MAXATOMLEN) return -1; - memcpy(term->value.atom_name, s, len); - term->value.atom_name[len] = '\0'; - s += len; - break; + case ERL_ATOM_UTF8_EXT: + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: + return ei_decode_atom(buf, index, term->value.atom_name); case ERL_REFERENCE_EXT: /* first the nodename */ - if (get8(s) != ERL_ATOM_EXT) return -1; - len = get16be(s); - if (len > MAXATOMLEN) return -1; - memcpy(term->value.ref.node, s, len); - term->value.ref.node[len] = '\0'; - s += len; + if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1; /* now the numbers: num (4), creation (1) */ term->value.ref.n[0] = get32be(s); term->value.ref.len = 1; @@ -71,12 +64,7 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) /* first the integer count */ term->value.ref.len = get16be(s); /* then the nodename */ - if (get8(s) != ERL_ATOM_EXT) return -1; - len = get16be(s); - if (len > MAXATOMLEN) return -1; - memcpy(term->value.ref.node, s, len); - term->value.ref.node[len] = '\0'; - s += len; + if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1; /* creation */ term->value.ref.creation = get8(s) & 0x03; /* finally the id integers */ @@ -88,22 +76,12 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term) } break; case ERL_PORT_EXT: - if (get8(s) != ERL_ATOM_EXT) return -1; - len = get16be(s); - if (len > MAXATOMLEN) return -1; - memcpy(term->value.port.node, s, len); - term->value.port.node[len] = '\0'; + if (get_atom(&s, term->value.port.node, &term->value.port.node_org_enc) < 0) return -1; term->value.port.id = get32be(s) & 0x0fffffff; /* 28 bits */; term->value.port.creation = get8(s) & 0x03; break; case ERL_PID_EXT: - if (get8(s) != ERL_ATOM_EXT) return -1; - /* name first */ - len = get16be(s); - if (len > MAXATOMLEN) return -1; - memcpy(term->value.pid.node, s, len); - term->value.pid.node[len] = '\0'; - s += len; + if (get_atom(&s, term->value.pid.node, &term->value.port.node_org_enc) < 0) return -1; /* now the numbers: num (4), serial (4), creation (1) */ term->value.pid.num = get32be(s) & 0x7fff; /* 15 bits */ term->value.pid.serial = get32be(s) & 0x1fff; /* 13 bits */ diff --git a/lib/erl_interface/src/misc/ei_format.c b/lib/erl_interface/src/misc/ei_format.c index 281a192535..b5f11e618e 100644 --- a/lib/erl_interface/src/misc/ei_format.c +++ b/lib/erl_interface/src/misc/ei_format.c @@ -139,8 +139,8 @@ static int patom(const char** fmt, ei_x_buff* x) --(*fmt); len = *fmt - start; /* FIXME why truncate atom name and not fail?! */ - if (len > MAXATOMLEN) - len = MAXATOMLEN; + if (len >= MAXATOMLEN) + len = MAXATOMLEN-1; return ei_x_encode_atom_len(x, start, len); } diff --git a/lib/erl_interface/src/misc/ei_printterm.c b/lib/erl_interface/src/misc/ei_printterm.c index 5fc6b3542c..f3003a6172 100644 --- a/lib/erl_interface/src/misc/ei_printterm.c +++ b/lib/erl_interface/src/misc/ei_printterm.c @@ -115,7 +115,7 @@ static int print_term(FILE* fp, ei_x_buff* x, const char* buf, int* index) { int i, doquote, n, m, ty, r; - char a[MAXATOMLEN+1], *p; + char a[MAXATOMLEN], *p; int ch_written = 0; /* counter of written chars */ erlang_pid pid; erlang_port port; @@ -132,7 +132,10 @@ static int print_term(FILE* fp, ei_x_buff* x, doquote = 0; ei_get_type_internal(buf, index, &ty, &n); switch (ty) { - case ERL_ATOM_EXT: + case ERL_ATOM_EXT: + case ERL_ATOM_UTF8_EXT: + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: if (ei_decode_atom(buf, index, a) < 0) goto err; doquote = !islower((int)a[0]); diff --git a/lib/erl_interface/src/misc/ei_x_encode.c b/lib/erl_interface/src/misc/ei_x_encode.c index fa1e26ccbb..44dcff7664 100644 --- a/lib/erl_interface/src/misc/ei_x_encode.c +++ b/lib/erl_interface/src/misc/ei_x_encode.c @@ -197,18 +197,33 @@ int ei_x_encode_tuple_header(ei_x_buff* x, long n) int ei_x_encode_atom(ei_x_buff* x, const char* s) { - return ei_x_encode_atom_len(x, s, strlen(s)); + return ei_x_encode_atom_len_as(x, s, strlen(s), ERLANG_LATIN1, ERLANG_LATIN1); } int ei_x_encode_atom_len(ei_x_buff* x, const char* s, int len) { + return ei_x_encode_atom_len_as(x, s, len, ERLANG_LATIN1, ERLANG_LATIN1); +} + +int ei_x_encode_atom_as(ei_x_buff* x, const char* s, + enum erlang_char_encoding from_enc, + enum erlang_char_encoding to_enc) +{ + return ei_x_encode_atom_len_as(x, s, strlen(s), from_enc, to_enc); +} + +int ei_x_encode_atom_len_as(ei_x_buff* x, const char* s, int len, + enum erlang_char_encoding from_enc, + enum erlang_char_encoding to_enc) +{ int i = x->index; - ei_encode_atom_len(NULL, &i, s, len); + ei_encode_atom_len_as(NULL, &i, s, len, from_enc, to_enc); if (!x_fix_buff(x, i)) return -1; - return ei_encode_atom_len(x->buff, &x->index, s, len); + return ei_encode_atom_len_as(x->buff, &x->index, s, len, from_enc, to_enc); } + int ei_x_encode_pid(ei_x_buff* x, const erlang_pid* pid) { int i = x->index; diff --git a/lib/erl_interface/src/misc/get_type.c b/lib/erl_interface/src/misc/get_type.c index 2a680d0f94..54465196b0 100644 --- a/lib/erl_interface/src/misc/get_type.c +++ b/lib/erl_interface/src/misc/get_type.c @@ -33,78 +33,6 @@ int ei_get_type(const char *buf, const int *index, int *type, int *len) return ei_get_type_internal(buf, index, type, len); } -#if 0 -int ei_get_type(const char *buf, const int *index, int *type, int *len) -{ - const char *s = buf + *index; - int itype = get8(s); /* Internal type */ - - *len = 0; - - switch (*type) { - - case ERL_SMALL_INTEGER_EXT: - case ERL_INTEGER_EXT: - case ERL_SMALL_BIG_EXT: - case ERL_LARGE_BIG_EXT: - *type = EI_TYPE_INTEGER; - break; - - case ERL_FLOAT_EXT: - *type = EI_TYPE_FLOAT; - break; - - case ERL_SMALL_TUPLE_EXT: - *len = get8(s); - break; - - case ERL_ATOM_EXT: - case ERL_STRING_EXT: - *len = get16be(s); - break; - - case ERL_LARGE_TUPLE_EXT: - case ERL_LIST_EXT: - case ERL_BINARY_EXT: - *len = get32be(s); - break; - - case ERL_SMALL_BIG_EXT: - *len = (get8(s)+1)/2; /* big arity */ - break; - - case ERL_LARGE_BIG_EXT: - *len = (get32be(s)+1)/2; /* big arity */ - break; - - case ERL_BINARY_EXT: - *type = EI_TYPE_BINARY; - break; - - case ERL_PID_EXT: - *type = EI_TYPE_PID; - break; - - case ERL_PORT_EXT: - *type = EI_TYPE_PORT; - break; - - case ERL_REFERENCE_EXT: - case ERL_NEW_REFERENCE_EXT: - *type = EI_TYPE_REF; - break; - - default: - break; - } - - /* leave index unchanged */ - return 0; -} -#endif - - -/* Old definition of function above */ int ei_get_type_internal(const char *buf, const int *index, int *type, int *len) @@ -114,10 +42,15 @@ int ei_get_type_internal(const char *buf, const int *index, *type = get8(s); switch (*type) { + case ERL_SMALL_ATOM_EXT: + case ERL_SMALL_ATOM_UTF8_EXT: + *type = ERL_ATOM_EXT; case ERL_SMALL_TUPLE_EXT: *len = get8(s); break; - + + case ERL_ATOM_UTF8_EXT: + *type = ERL_ATOM_EXT; case ERL_ATOM_EXT: case ERL_STRING_EXT: *len = get16be(s); diff --git a/lib/erl_interface/src/misc/putget.h b/lib/erl_interface/src/misc/putget.h index 7a43de324b..77ae168f8c 100644 --- a/lib/erl_interface/src/misc/putget.h +++ b/lib/erl_interface/src/misc/putget.h @@ -105,6 +105,13 @@ ((EI_ULONGLONG)((unsigned char *)(s))[-2] << 8) | \ (EI_ULONGLONG)((unsigned char *)(s))[-1])) +int utf8_to_latin1(char* dst, const char* src, int slen, int destlen, enum erlang_char_encoding* res_encp); +int latin1_to_utf8(char* dst, const char* src, int slen, int destlen, enum erlang_char_encoding* res_encp); +int ei_internal_get_atom(const char** bufp, char* p, enum erlang_char_encoding*); +int ei_internal_put_atom(char** bufp, const char* p, int slen, enum erlang_char_encoding); +#define get_atom ei_internal_get_atom +#define put_atom ei_internal_put_atom + typedef union float_ext { double d; EI_ULONGLONG val; diff --git a/lib/erl_interface/src/misc/show_msg.c b/lib/erl_interface/src/misc/show_msg.c index 194296798b..33b09643ca 100644 --- a/lib/erl_interface/src/misc/show_msg.c +++ b/lib/erl_interface/src/misc/show_msg.c @@ -132,13 +132,13 @@ int ei_show_sendmsg(FILE *stream, const char *header, const char *msgbuf) switch (msg.msgtype) { case ERL_SEND: - if (ei_decode_atom(header,&index,msg.cookie) + if (ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL) || ei_decode_pid(header,&index,&msg.to)) return -1; mbuf = msgbuf; break; case ERL_SEND_TT: - if (ei_decode_atom(header,&index,msg.cookie) + if (ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL) || ei_decode_pid(header,&index,&msg.to) || ei_decode_trace(header,&index,&msg.token)) return -1; mbuf = msgbuf; @@ -146,15 +146,15 @@ int ei_show_sendmsg(FILE *stream, const char *header, const char *msgbuf) case ERL_REG_SEND: if (ei_decode_pid(header,&index,&msg.from) - || ei_decode_atom(header,&index,msg.cookie) - || ei_decode_atom(header,&index,msg.toname)) return -1; + || ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL) + || ei_decode_atom_as(header,&index,msg.toname,sizeof(msg.toname),ERLANG_UTF8,NULL,NULL)) return -1; mbuf = msgbuf; break; case ERL_REG_SEND_TT: if (ei_decode_pid(header,&index,&msg.from) - || ei_decode_atom(header,&index,msg.cookie) - || ei_decode_atom(header,&index,msg.toname) + || ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL) + || ei_decode_atom_as(header,&index,msg.toname,sizeof(msg.toname),ERLANG_UTF8,NULL,NULL) || ei_decode_trace(header,&index,&msg.token)) return -1; mbuf = msgbuf; break; @@ -457,7 +457,7 @@ static void show_term(const char *termbuf, int *index, FILE *stream) break; case ERL_FUN_EXT: { - char atom[MAXATOMLEN+1]; + char atom[MAXATOMLEN]; long idx; long uniq; const char* s = termbuf + *index, * s0 = s; diff --git a/lib/erl_interface/src/prog/ei_fake_prog.c b/lib/erl_interface/src/prog/ei_fake_prog.c index 68eb537211..34101a2851 100644 --- a/lib/erl_interface/src/prog/ei_fake_prog.c +++ b/lib/erl_interface/src/prog/ei_fake_prog.c @@ -96,6 +96,7 @@ int main(void) EI_ULONGLONG *ulonglongp = (EI_ULONGLONG*)NULL; EI_ULONGLONG ulonglongx = 0; #endif + enum erlang_char_encoding enc; intx = erl_errno; @@ -148,9 +149,13 @@ int main(void) ei_x_encode_string(&eix, charp); ei_x_encode_string_len(&eix, charp, intx); ei_encode_atom(charp, intp, charp); + ei_encode_atom_as(charp, intp, charp, ERLANG_LATIN1, ERLANG_UTF8); ei_encode_atom_len(charp, intp, charp, intx); + ei_encode_atom_len_as(charp, intp, charp, intx, ERLANG_ASCII, ERLANG_LATIN1); ei_x_encode_atom(&eix, charp); + ei_x_encode_atom_as(&eix, charp, ERLANG_LATIN1, ERLANG_UTF8); ei_x_encode_atom_len(&eix, charp, intx); + ei_x_encode_atom_len_as(&eix, charp, intx, ERLANG_LATIN1, ERLANG_UTF8); ei_encode_binary(charp, intp, (void *)0, longx); ei_x_encode_binary(&eix, (void*)0, intx); ei_encode_pid(charp, intp, &epid); @@ -181,6 +186,7 @@ int main(void) ei_decode_char(charp, intp, charp); ei_decode_string(charp, intp, charp); ei_decode_atom(charp, intp, charp); + ei_decode_atom_as(charp, intp, charp, MAXATOMLEN_UTF8, ERLANG_WHATEVER, &enc, &enc); ei_decode_binary(charp, intp, (void *)0, longp); ei_decode_fun(charp, intp, &efun); free_fun(&efun); diff --git a/lib/erl_interface/test/ei_decode_encode_SUITE.erl b/lib/erl_interface/test/ei_decode_encode_SUITE.erl index 85cb62239b..0c98b494ec 100644 --- a/lib/erl_interface/test/ei_decode_encode_SUITE.erl +++ b/lib/erl_interface/test/ei_decode_encode_SUITE.erl @@ -118,6 +118,13 @@ test_ei_decode_encode(Config) when is_list(Config) -> ?line send_rec(P, OXPort), ?line send_rec(P, OXRef), + %% Unicode atoms + [begin send_rec(P, Atom), + send_rec(P, mk_pid({Atom,1}, 23434, 3434)), + send_rec(P, mk_port({Atom,1}, 2343434)), + send_rec(P, mk_ref({Atom,1}, [262143, 8723648, 24097245])), + void + end || Atom <- unicode_atom_data()], ?line runner:recv_eot(P), ok. @@ -127,7 +134,7 @@ test_ei_decode_encode(Config) when is_list(Config) -> % We read two packets for each test, the ei_decode_encode and ei_x_decode_encode version.... send_rec(P, Term) when is_port(P) -> - ?t:format("Testing: ~p~n", [Term]), + %%?t:format("Testing: ~p~n", [Term]), P ! {self(), {command, term_to_binary(Term)}}, {_B,Term} = get_buf_and_term(P). @@ -146,7 +153,7 @@ get_buf_and_term(P) -> _ -> B1 = list_to_binary([131,B]), % No magic, add T = binary_to_term(B1), - io:format("~w\n~w\n(got no magic)\n",[B,T]), + %io:format("~w\n~w\n(got no magic)\n",[B,T]), {B,T} end. @@ -160,7 +167,7 @@ get_binary(P) -> case runner:get_term(P) of {bytes,L} -> B = list_to_binary(L), - io:format("~w\n",[L]), + %%io:format("~w\n",[L]), % For strange reasons <<131>> show up as <>.... % io:format("~w\n",[B]), B; @@ -226,38 +233,36 @@ uint8(Uint) -> mk_pid({NodeName, Creation}, Number, Serial) when is_atom(NodeName) -> - mk_pid({atom_to_list(NodeName), Creation}, Number, Serial); -mk_pid({NodeName, Creation}, Number, Serial) -> + <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName), + mk_pid({NodeNameExt, Creation}, Number, Serial); +mk_pid({NodeNameExt, Creation}, Number, Serial) -> case catch binary_to_term(list_to_binary([?VERSION_MAGIC, ?PID_EXT, - ?ATOM_EXT, - uint16_be(length(NodeName)), - NodeName, + NodeNameExt, uint32_be(Number), uint32_be(Serial), uint8(Creation)])) of Pid when is_pid(Pid) -> Pid; {'EXIT', {badarg, _}} -> - exit({badarg, mk_pid, [{NodeName, Creation}, Number, Serial]}); + exit({badarg, mk_pid, [{NodeNameExt, Creation}, Number, Serial]}); Other -> exit({unexpected_binary_to_term_result, Other}) end. mk_port({NodeName, Creation}, Number) when is_atom(NodeName) -> - mk_port({atom_to_list(NodeName), Creation}, Number); -mk_port({NodeName, Creation}, Number) -> + <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName), + mk_port({NodeNameExt, Creation}, Number); +mk_port({NodeNameExt, Creation}, Number) -> case catch binary_to_term(list_to_binary([?VERSION_MAGIC, ?PORT_EXT, - ?ATOM_EXT, - uint16_be(length(NodeName)), - NodeName, + NodeNameExt, uint32_be(Number), uint8(Creation)])) of Port when is_port(Port) -> Port; {'EXIT', {badarg, _}} -> - exit({badarg, mk_port, [{NodeName, Creation}, Number]}); + exit({badarg, mk_port, [{NodeNameExt, Creation}, Number]}); Other -> exit({unexpected_binary_to_term_result, Other}) end. @@ -265,33 +270,30 @@ mk_port({NodeName, Creation}, Number) -> mk_ref({NodeName, Creation}, Numbers) when is_atom(NodeName), is_integer(Creation), is_list(Numbers) -> - mk_ref({atom_to_list(NodeName), Creation}, Numbers); -mk_ref({NodeName, Creation}, [Number]) when is_list(NodeName), - is_integer(Creation), - is_integer(Number) -> + <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName), + mk_ref({NodeNameExt, Creation}, Numbers); +mk_ref({NodeNameExt, Creation}, [Number]) when is_binary(NodeNameExt), + is_integer(Creation), + is_integer(Number) -> case catch binary_to_term(list_to_binary([?VERSION_MAGIC, ?REFERENCE_EXT, - ?ATOM_EXT, - uint16_be(length(NodeName)), - NodeName, + NodeNameExt, uint32_be(Number), uint8(Creation)])) of Ref when is_reference(Ref) -> Ref; {'EXIT', {badarg, _}} -> - exit({badarg, mk_ref, [{NodeName, Creation}, [Number]]}); + exit({badarg, mk_ref, [{NodeNameExt, Creation}, [Number]]}); Other -> exit({unexpected_binary_to_term_result, Other}) end; -mk_ref({NodeName, Creation}, Numbers) when is_list(NodeName), - is_integer(Creation), - is_list(Numbers) -> +mk_ref({NodeNameExt, Creation}, Numbers) when is_binary(NodeNameExt), + is_integer(Creation), + is_list(Numbers) -> case catch binary_to_term(list_to_binary([?VERSION_MAGIC, ?NEW_REFERENCE_EXT, uint16_be(length(Numbers)), - ?ATOM_EXT, - uint16_be(length(NodeName)), - NodeName, + NodeNameExt, uint8(Creation), lists:map(fun (N) -> uint32_be(N) @@ -300,8 +302,67 @@ mk_ref({NodeName, Creation}, Numbers) when is_list(NodeName), Ref when is_reference(Ref) -> Ref; {'EXIT', {badarg, _}} -> - exit({badarg, mk_ref, [{NodeName, Creation}, Numbers]}); + exit({badarg, mk_ref, [{NodeNameExt, Creation}, Numbers]}); Other -> exit({unexpected_binary_to_term_result, Other}) end. + + +unicode_atom_data() -> + [uc_atup(lists:seq(16#1f600, 16#1f600+254)), + uc_atup(lists:seq(16#1f600, 16#1f600+63)), + uc_atup(lists:seq(1, 255)), + uc_atup(lists:seq(100, 163)), + uc_atup(lists:seq(200, 354)), + uc_atup(lists:seq(200, 263)), + uc_atup(lists:seq(2000, 2254)), + uc_atup(lists:seq(2000, 2063)), + uc_atup(lists:seq(65500, 65754)), + uc_atup(lists:seq(65500, 65563)) + | lists:map(fun (N) -> + Pow2 = (1 bsl N), + uc_atup(lists:seq(Pow2 - 127, Pow2 + 127)) + end, + lists:seq(7, 20)) + ]. + +uc_atup(ATxt) -> + string_to_atom(ATxt). + +string_to_atom(String) -> + Utf8List = string_to_utf8_list(String), + Len = length(Utf8List), + TagLen = case Len < 256 of + true -> [119, Len]; + false -> [118, Len bsr 8, Len band 16#ff] + end, + binary_to_term(list_to_binary([131, TagLen, Utf8List])). + +string_to_utf8_list([]) -> + []; +string_to_utf8_list([CP|CPs]) when is_integer(CP), + 0 =< CP, + CP =< 16#7F -> + [CP | string_to_utf8_list(CPs)]; +string_to_utf8_list([CP|CPs]) when is_integer(CP), + 16#80 =< CP, + CP =< 16#7FF -> + [16#C0 bor (CP bsr 6), + 16#80 bor (16#3F band CP) + | string_to_utf8_list(CPs)]; +string_to_utf8_list([CP|CPs]) when is_integer(CP), + 16#800 =< CP, + CP =< 16#FFFF -> + [16#E0 bor (CP bsr 12), + 16#80 bor (16#3F band (CP bsr 6)), + 16#80 bor (16#3F band CP) + | string_to_utf8_list(CPs)]; +string_to_utf8_list([CP|CPs]) when is_integer(CP), + 16#10000 =< CP, + CP =< 16#10FFFF -> + [16#F0 bor (CP bsr 18), + 16#80 bor (16#3F band (CP bsr 12)), + 16#80 bor (16#3F band (CP bsr 6)), + 16#80 bor (16#3F band CP) + | string_to_utf8_list(CPs)]. diff --git a/lib/erl_interface/test/ei_decode_encode_SUITE_data/ei_decode_encode_test.c b/lib/erl_interface/test/ei_decode_encode_SUITE_data/ei_decode_encode_test.c index 406f02ecfb..e57663f984 100644 --- a/lib/erl_interface/test/ei_decode_encode_SUITE_data/ei_decode_encode_test.c +++ b/lib/erl_interface/test/ei_decode_encode_SUITE_data/ei_decode_encode_test.c @@ -29,171 +29,229 @@ * Author: [email protected] */ -#define EI_DECODE_ENCODE(FUNC,TYPE) \ - { \ - char *buf; \ - char buf2[1024]; \ - TYPE p; \ - int size1 = 0; \ - int size2 = 0; \ - int size3 = 0; \ - int err; \ - ei_x_buff arg; \ -\ - message("ei_decode_" #FUNC ", arg is type " #TYPE); \ - buf = read_packet(NULL); \ - err = ei_decode_ ## FUNC(buf+1, &size1, &p); \ - if (err != 0) { \ - if (err != -1) { \ - fail("decode returned non zero but not -1"); \ - } else { \ - fail("decode returned non zero"); \ - } \ - return; \ - } \ - if (size1 < 1) { \ - fail("size is < 1"); \ - return; \ - } \ -\ - message("ei_encode_" #FUNC " buf is NULL, arg is type " #TYPE); \ - err = ei_encode_ ## FUNC(NULL, &size2, &p); \ - if (err != 0) { \ - if (err != -1) { \ - fail("size calculation returned non zero but not -1"); \ - return; \ - } else { \ - fail("size calculation returned non zero"); \ - return; \ - } \ - } \ - if (size1 != size2) { \ - message("size1 = %d, size2 = %d\n",size1,size2); \ - fail("decode and encode size differs when buf is NULL"); \ - return; \ - } \ - message("ei_encode_" #FUNC ", arg is type " #TYPE); \ - err = ei_encode_ ## FUNC(buf2, &size3, &p); \ - if (err != 0) { \ - if (err != -1) { \ - fail("returned non zero but not -1"); \ - } else { \ - fail("returned non zero"); \ - } \ - return; \ - } \ - if (size1 != size3) { \ - message("size1 = %d, size2 = %d\n",size1,size3); \ - fail("decode and encode size differs"); \ - return; \ - } \ - send_buffer(buf2, size1); \ -\ - message("ei_x_encode_" #FUNC ", arg is type " #TYPE); \ - ei_x_new(&arg); \ - err = ei_x_encode_ ## FUNC(&arg, &p); \ - if (err != 0) { \ - if (err != -1) { \ - fail("returned non zero but not -1"); \ - } else { \ - fail("returned non zero"); \ - } \ - ei_x_free(&arg); \ - return; \ - } \ - if (arg.index < 1) { \ - fail("size is < 1"); \ - ei_x_free(&arg); \ - return; \ - } \ - send_buffer(arg.buff, arg.index); \ - ei_x_free(&arg); \ - } - -#define EI_DECODE_ENCODE_BIG(FUNC,TYPE) \ - { \ - char *buf; \ - char buf2[2048]; \ - TYPE *p; \ - int size1 = 0; \ - int size2 = 0; \ - int size3 = 0; \ - int err, index = 0, len, type; \ - ei_x_buff arg; \ -\ - message("ei_decode_" #FUNC ", arg is type " #TYPE); \ - buf = read_packet(NULL); \ - ei_get_type(buf+1, &index, &type, &len); \ - p = ei_alloc_big(len); \ - err = ei_decode_ ## FUNC(buf+1, &size1, p); \ - if (err != 0) { \ - if (err != -1) { \ - fail("decode returned non zero but not -1"); \ - } else { \ - fail("decode returned non zero"); \ - } \ - return; \ - } \ - if (size1 < 1) { \ - fail("size is < 1"); \ - return; \ - } \ -\ - message("ei_encode_" #FUNC " buf is NULL, arg is type " #TYPE); \ - err = ei_encode_ ## FUNC(NULL, &size2, p); \ - if (err != 0) { \ - if (err != -1) { \ - fail("size calculation returned non zero but not -1"); \ - return; \ - } else { \ - fail("size calculation returned non zero"); \ - return; \ - } \ - } \ - if (size1 != size2) { \ - message("size1 = %d, size2 = %d\n",size1,size2); \ - fail("decode and encode size differs when buf is NULL"); \ - return; \ - } \ - message("ei_encode_" #FUNC ", arg is type " #TYPE); \ - err = ei_encode_ ## FUNC(buf2, &size3, p); \ - if (err != 0) { \ - if (err != -1) { \ - fail("returned non zero but not -1"); \ - } else { \ - fail("returned non zero"); \ - } \ - return; \ - } \ - if (size1 != size3) { \ - message("size1 = %d, size2 = %d\n",size1,size3); \ - fail("decode and encode size differs"); \ - return; \ - } \ - send_buffer(buf2, size1); \ -\ - message("ei_x_encode_" #FUNC ", arg is type " #TYPE); \ - ei_x_new(&arg); \ - err = ei_x_encode_ ## FUNC(&arg, p); \ - if (err != 0) { \ - if (err != -1) { \ - fail("returned non zero but not -1"); \ - } else { \ - fail("returned non zero"); \ - } \ - ei_x_free(&arg); \ - return; \ - } \ - if (arg.index < 1) { \ - fail("size is < 1"); \ - ei_x_free(&arg); \ - return; \ - } \ - send_buffer(arg.buff, arg.index); \ - ei_x_free(&arg); \ - ei_free_big(p); \ - } +/*#define MESSAGE(FMT,A1,A2) message(FMT,A1,A2)*/ +#define MESSAGE(FMT,A1,A2) +typedef int decodeFT(const char *buf, int *index, void*); +typedef int encodeFT(char *buf, int *index, void*); +typedef int x_encodeFT(ei_x_buff*, void*); + +struct Type { + char* name; + char* type; + decodeFT* ei_decode_fp; + encodeFT* ei_encode_fp; + x_encodeFT* ei_x_encode_fp; +}; + +typedef struct +{ + char name[MAXATOMLEN_UTF8]; + enum erlang_char_encoding enc; +}my_atom; + +int ei_decode_my_atom(const char *buf, int *index, my_atom* a) +{ + return ei_decode_atom_as(buf, index, a->name, sizeof(a->name), ERLANG_UTF8, &a->enc, NULL); +} +int ei_encode_my_atom(char *buf, int *index, my_atom* a) +{ + return ei_encode_atom_as(buf, index, a->name, ERLANG_UTF8, a->enc); +} +int ei_x_encode_my_atom(ei_x_buff* x, my_atom* a) +{ + return ei_x_encode_atom_as(x, a->name, ERLANG_UTF8, a->enc); +} + +#define BUFSZ 2000 + +void decode_encode(struct Type* t, void* obj) +{ + char *buf; + char buf2[BUFSZ]; + int size1 = 0; + int size2 = 0; + int size3 = 0; + int err; + ei_x_buff arg; + + MESSAGE("ei_decode_%s, arg is type %s", t->name, t->type); + buf = read_packet(NULL); + err = t->ei_decode_fp(buf+1, &size1, obj); + if (err != 0) { + if (err != -1) { + fail("decode returned non zero but not -1"); + } else { + fail("decode returned non zero"); + } + return; + } + if (size1 < 1) { + fail("size is < 1"); + return; + } + + if (size1 > BUFSZ) { + fail("size is > BUFSZ"); + return; + } + + MESSAGE("ei_encode_%s buf is NULL, arg is type %s", t->name, t->type); + err = t->ei_encode_fp(NULL, &size2, obj); + if (err != 0) { + if (err != -1) { + fail("size calculation returned non zero but not -1"); + return; + } else { + fail("size calculation returned non zero"); + return; + } + } + if (size1 != size2) { + MESSAGE("size1 = %d, size2 = %d\n",size1,size2); + fail("decode and encode size differs when buf is NULL"); + return; + } + MESSAGE("ei_encode_%s, arg is type %s", t->name, t->type); + err = t->ei_encode_fp(buf2, &size3, obj); + if (err != 0) { + if (err != -1) { + fail("returned non zero but not -1"); + } else { + fail("returned non zero"); + } + return; + } + if (size1 != size3) { + MESSAGE("size1 = %d, size2 = %d\n",size1,size3); + fail("decode and encode size differs"); + return; + } + send_buffer(buf2, size1); + + MESSAGE("ei_x_encode_%s, arg is type %s", t->name, t->type); + ei_x_new(&arg); + err = t->ei_x_encode_fp(&arg, obj); + if (err != 0) { + if (err != -1) { + fail("returned non zero but not -1"); + } else { + fail("returned non zero"); + } + ei_x_free(&arg); + return; + } + if (arg.index < 1) { + fail("size is < 1"); + ei_x_free(&arg); + return; + } + send_buffer(arg.buff, arg.index); + ei_x_free(&arg); +} + + +#define EI_DECODE_ENCODE(TYPE, ERLANG_TYPE) { \ + struct Type type_struct = {#TYPE, #ERLANG_TYPE, \ + (decodeFT*)ei_decode_##TYPE, \ + (encodeFT*)ei_encode_##TYPE, \ + (x_encodeFT*)ei_x_encode_##TYPE }; \ + ERLANG_TYPE type_obj; \ + decode_encode(&type_struct, &type_obj); \ + } + + +void decode_encode_big(struct Type* t) +{ + char *buf; + char buf2[2048]; + void *p; /* (TYPE*) */ + int size1 = 0; + int size2 = 0; + int size3 = 0; + int err, index = 0, len, type; + ei_x_buff arg; + + MESSAGE("ei_decode_%s, arg is type %s", t->name, t->type); + buf = read_packet(NULL); + ei_get_type(buf+1, &index, &type, &len); + p = ei_alloc_big(len); + err = t->ei_decode_fp(buf+1, &size1, p); + if (err != 0) { + if (err != -1) { + fail("decode returned non zero but not -1"); + } else { + fail("decode returned non zero"); + } + return; + } + if (size1 < 1) { + fail("size is < 1"); + return; + } + + MESSAGE("ei_encode_%s buf is NULL, arg is type %s", t->name, t->type); + err = t->ei_encode_fp(NULL, &size2, p); + if (err != 0) { + if (err != -1) { + fail("size calculation returned non zero but not -1"); + return; + } else { + fail("size calculation returned non zero"); + return; + } + } + if (size1 != size2) { + MESSAGE("size1 = %d, size2 = %d\n",size1,size2); + fail("decode and encode size differs when buf is NULL"); + return; + } + MESSAGE("ei_encode_%s, arg is type %s", t->name, t->type); + err = t->ei_encode_fp(buf2, &size3, p); + if (err != 0) { + if (err != -1) { + fail("returned non zero but not -1"); + } else { + fail("returned non zero"); + } + return; + } + if (size1 != size3) { + MESSAGE("size1 = %d, size2 = %d\n",size1,size3); + fail("decode and encode size differs"); + return; + } + send_buffer(buf2, size1); + + MESSAGE("ei_x_encode_%s, arg is type %s", t->name, t->type); + ei_x_new(&arg); + err = t->ei_x_encode_fp(&arg, p); + if (err != 0) { + if (err != -1) { + fail("returned non zero but not -1"); + } else { + fail("returned non zero"); + } + ei_x_free(&arg); + return; + } + if (arg.index < 1) { + fail("size is < 1"); + ei_x_free(&arg); + return; + } + send_buffer(arg.buff, arg.index); + ei_x_free(&arg); + ei_free_big(p); +} + +#define EI_DECODE_ENCODE_BIG(TYPE, ERLANG_TYPE) { \ + struct Type type_struct = {#TYPE, #ERLANG_TYPE, \ + (decodeFT*)ei_decode_##TYPE, \ + (encodeFT*)ei_encode_##TYPE, \ + (x_encodeFT*)ei_x_encode_##TYPE }; \ + decode_encode_big(&type_struct); \ + } @@ -201,6 +259,8 @@ TESTCASE(test_ei_decode_encode) { + int i; + EI_DECODE_ENCODE(fun , erlang_fun); EI_DECODE_ENCODE(pid , erlang_pid); EI_DECODE_ENCODE(port , erlang_port); @@ -223,6 +283,14 @@ TESTCASE(test_ei_decode_encode) EI_DECODE_ENCODE(port , erlang_port); EI_DECODE_ENCODE(ref , erlang_ref); + /* Unicode atoms */ + for (i=0; i<24; i++) { + EI_DECODE_ENCODE(my_atom, my_atom); + EI_DECODE_ENCODE(pid, erlang_pid); + EI_DECODE_ENCODE(port, erlang_port); + EI_DECODE_ENCODE(ref, erlang_ref); + } + report(1); } diff --git a/lib/ic/examples/all-against-all/client.c b/lib/ic/examples/all-against-all/client.c index e0a52b142d..5dece9cfa6 100644 --- a/lib/ic/examples/all-against-all/client.c +++ b/lib/ic/examples/all-against-all/client.c @@ -88,6 +88,7 @@ int main(){ /* Initiating pid*/ strcpy(pid.node,client_node); + pid.node_org_enc = ERLANG_LATIN1; pid.num = 99; pid.serial = 0; pid.creation = 0; diff --git a/lib/ic/examples/c-client/client.c b/lib/ic/examples/c-client/client.c index 816477cf15..5b11510ce3 100644 --- a/lib/ic/examples/c-client/client.c +++ b/lib/ic/examples/c-client/client.c @@ -64,6 +64,7 @@ int main() /* Initiating pid*/ strcpy(pid.node,CLNODE); + pid.node_org_enc = ERLANG_LATIN1; pid.num = 99; pid.serial = 0; pid.creation = 0; diff --git a/lib/ic/examples/c-server/client.c b/lib/ic/examples/c-server/client.c index fa570089b5..605e41ddb1 100644 --- a/lib/ic/examples/c-server/client.c +++ b/lib/ic/examples/c-server/client.c @@ -58,6 +58,7 @@ int main() /* Initiating pid*/ strcpy(pid.node, CLNODE); + pid.node_org_enc = ERLANG_LATIN1; pid.num = 99; pid.serial = 0; pid.creation = 0; diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java index 16cb544a16..c76fad5e45 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java @@ -90,6 +90,8 @@ public class AbstractNode { static final int dFlagExportPtrTag = 0x200; // NOT SUPPORTED static final int dFlagBitBinaries = 0x400; static final int dFlagNewFloats = 0x800; + static final int dFlagUnicodeIo = 0x1000; + static final int dFlagUtf8Atoms = 0x10000; int ntype = NTYPE_R6; int proto = 0; // tcp/ip @@ -98,7 +100,7 @@ public class AbstractNode { int creation = 0; int flags = dFlagExtendedReferences | dFlagExtendedPidsPorts | dFlagBitBinaries | dFlagNewFloats | dFlagFunTags - | dflagNewFunTags; + | dflagNewFunTags | dFlagUtf8Atoms; /* initialize hostname and default cookie */ static { diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java index ced4dbb8c2..2768edc6fa 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java @@ -51,7 +51,7 @@ public class OtpErlangAtom extends OtpErlangObject implements Serializable, "null string value"); } - if (atom.length() > maxAtomLength) { + if (atom.codePointCount(0, atom.length()) > maxAtomLength) { throw new java.lang.IllegalArgumentException("Atom may not exceed " + maxAtomLength + " characters: " + atom); } diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java index e70b9a786b..2a4cd4fa2d 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java @@ -88,6 +88,12 @@ public class OtpExternal { /** The tag used for old Funs */ public static final int funTag = 117; + /** The tag used for unicode atoms */ + public static final int atomUtf8Tag = 118; + + /** The tag used for small unicode atoms */ + public static final int smallAtomUtf8Tag = 119; + /** The tag used for compressed terms */ public static final int compressedTag = 80; diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java index ae5f4ee072..c2a79af841 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java @@ -351,26 +351,64 @@ public class OtpInputStream extends ByteArrayInputStream { */ public String read_atom() throws OtpErlangDecodeException { int tag; - int len; + int len = -1; byte[] strbuf; String atom; tag = read1skip_version(); - if (tag != OtpExternal.atomTag) { - throw new OtpErlangDecodeException( - "wrong tag encountered, expected " + OtpExternal.atomTag - + ", got " + tag); - } + switch (tag) { - len = read2BE(); + case OtpExternal.atomTag: + len = read2BE(); + strbuf = new byte[len]; + this.readN(strbuf); + try { + atom = new String(strbuf, "ISO-8859-1"); + } catch (final java.io.UnsupportedEncodingException e) { + throw new OtpErlangDecodeException( + "Failed to decode ISO-8859-1 atom"); + } + if (atom.length() > OtpExternal.maxAtomLength) { + /* + * Throwing an exception would be better I think, + * but truncation seems to be the way it has + * been done in other parts of OTP... + */ + atom = atom.substring(0, OtpExternal.maxAtomLength); + } + break; - strbuf = new byte[len]; - this.readN(strbuf); - atom = OtpErlangString.newString(strbuf); + case OtpExternal.smallAtomUtf8Tag: + len = read1(); + /* fall through */ + case OtpExternal.atomUtf8Tag: + if (len < 0) { + len = read2BE(); + } + strbuf = new byte[len]; + this.readN(strbuf); + try { + atom = new String(strbuf, "UTF-8"); + } catch (final java.io.UnsupportedEncodingException e) { + throw new OtpErlangDecodeException( + "Failed to decode UTF-8 atom"); + } + if (atom.codePointCount(0, atom.length()) > OtpExternal.maxAtomLength) { + /* + * Throwing an exception would be better I think, + * but truncation seems to be the way it has + * been done in other parts of OTP... + */ + final int[] cps = OtpErlangString.stringToCodePoints(atom); + atom = new String(cps, 0, OtpExternal.maxAtomLength); + } + break; - if (atom.length() > OtpExternal.maxAtomLength) { - atom = atom.substring(0, OtpExternal.maxAtomLength); + default: + throw new OtpErlangDecodeException( + "wrong tag encountered, expected " + OtpExternal.atomTag + + ", or " + OtpExternal.atomUtf8Tag + ", got " + tag); } return atom; @@ -1152,6 +1190,8 @@ public class OtpInputStream extends ByteArrayInputStream { return new OtpErlangLong(this); case OtpExternal.atomTag: + case OtpExternal.smallAtomUtf8Tag: + case OtpExternal.atomUtf8Tag: return new OtpErlangAtom(this); case OtpExternal.floatTag: diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java index 22ebb4688a..10bdf389cd 100644 --- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java +++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java @@ -343,9 +343,63 @@ public class OtpOutputStream extends ByteArrayOutputStream { * the string to write. */ public void write_atom(final String atom) { - write1(OtpExternal.atomTag); - write2BE(atom.length()); - writeN(atom.getBytes()); + String enc_atom; + byte[] bytes; + boolean isLatin1 = true; + + if (atom.codePointCount(0, atom.length()) <= OtpExternal.maxAtomLength) { + enc_atom = atom; + } + else { + /* + * Throwing an exception would be better I think, + * but truncation seems to be the way it has + * been done in other parts of OTP... + */ + enc_atom = new String(OtpErlangString.stringToCodePoints(atom), + 0, OtpExternal.maxAtomLength); + } + + for (int offset = 0; offset < enc_atom.length();) { + final int cp = enc_atom.codePointAt(offset); + if ((cp & ~0xFF) != 0) { + isLatin1 = false; + break; + } + offset += Character.charCount(cp); + } + try { + if (isLatin1) { + bytes = enc_atom.getBytes("ISO-8859-1"); + write1(OtpExternal.atomTag); + write2BE(bytes.length); + } + else { + bytes = enc_atom.getBytes("UTF-8"); + final int length = bytes.length; + if (length < 256) { + write1(OtpExternal.smallAtomUtf8Tag); + write1(length); + } + else { + write1(OtpExternal.atomUtf8Tag); + write2BE(length); + } + } + writeN(bytes); + } catch (final java.io.UnsupportedEncodingException e) { + /* + * Sigh, why didn't the API designer add an + * OtpErlangEncodeException to these encoding + * functions?!? Instead of changing the API we + * write an invalid atom and let it fail for + * whoever trying to decode this... Sigh, + * again... + */ + write1(OtpExternal.smallAtomUtf8Tag); + write1(2); + write2BE(0xffff); /* Invalid UTF-8 */ + } } /** diff --git a/lib/jinterface/test/nc_SUITE.erl b/lib/jinterface/test/nc_SUITE.erl index c91c743498..546c600116 100644 --- a/lib/jinterface/test/nc_SUITE.erl +++ b/lib/jinterface/test/nc_SUITE.erl @@ -23,6 +23,15 @@ -include_lib("common_test/include/ct.hrl"). -include("test_server_line.hrl"). +-define(VERSION_MAGIC, 131). + +-define(ATOM_EXT, 100). +-define(REFERENCE_EXT, 101). +-define(PORT_EXT, 102). +-define(PID_EXT, 103). +-define(NEW_REFERENCE_EXT, 114). +-define(ATOM_UTF8_EXT, 118). +-define(SMALL_ATOM_UTF8_EXT, 119). -export([all/0, suite/0,groups/0,init_per_group/2,end_per_group/2, init_per_suite/1, @@ -46,6 +55,10 @@ unicode/1, unicode_list_to_string/1, unicode_string_to_list/1, + utf8_atom/1, + utf8_pid/1, + utf8_port/1, + utf8_ref/1, connect/1]). @@ -59,7 +72,9 @@ all() -> decompress_roundtrip, compress_roundtrip, integer_roundtrip, fun_roundtrip, lists_roundtrip, lists_roundtrip_2, lists_iterator, unicode, - unicode_list_to_string, unicode_string_to_list, connect]. + unicode_list_to_string, unicode_string_to_list, + utf8_atom, utf8_pid, utf8_port, utf8_ref, + connect]. groups() -> []. @@ -449,6 +464,71 @@ unicode_string_to_list(Config) when is_list(Config) -> end, ["unicode"]). +evil_smiley() -> + <<240,159,152,136>>. + +evil_smileys(0) -> + []; +evil_smileys(N) -> + [evil_smiley() | evil_smileys(N-1)]. + +utf8_atom(Config) when is_list(Config) -> + ES = evil_smiley(), + SmallUA = binary_to_term(list_to_binary([?VERSION_MAGIC, + ?SMALL_ATOM_UTF8_EXT, + size(ES), + ES])), + true = is_atom(SmallUA), + NoESs = 300 div size(ES), + ESs = evil_smileys(NoESs), + LargeUA = binary_to_term(list_to_binary([?VERSION_MAGIC, + ?ATOM_UTF8_EXT, + uint16_be(NoESs*size(ES)), + ESs])), + true = is_atom(LargeUA), + erlang:display({atom, SmallUA, LargeUA}), + do_echo([SmallUA, LargeUA], Config). + +utf8_nodenames_ext() -> + H = "@host", + ES = evil_smiley(), + SmallUANodeExt = list_to_binary([?SMALL_ATOM_UTF8_EXT, + size(ES)+length(H), + ES, + H]), + NoESs = 300 div size(ES), + ESs = evil_smileys(NoESs), + LargeUANodeExt = list_to_binary([?ATOM_UTF8_EXT, + uint16_be(NoESs*size(ES)+length(H)), + ESs, + H]), + {SmallUANodeExt, LargeUANodeExt}. + +utf8_pid(Config) when is_list(Config) -> + {SmallUANodeExt, LargeUANodeExt} = utf8_nodenames_ext(), + SmallPid = mk_pid({SmallUANodeExt, 2}, 4711, 4711), + LargePid = mk_pid({LargeUANodeExt, 2}, 4711, 4711), + erlang:display({pid, SmallPid, node(SmallPid)}), + erlang:display({pid, LargePid, node(LargePid)}), + do_echo([SmallPid, LargePid], Config). + +utf8_port(Config) when is_list(Config) -> + {SmallUANodeExt, LargeUANodeExt} = utf8_nodenames_ext(), + SmallPort = mk_port({SmallUANodeExt, 2}, 4711), + erlang:display({port, SmallPort, node(SmallPort)}), + LargePort = mk_port({LargeUANodeExt, 2}, 4711), + erlang:display({port, LargePort, node(LargePort)}), + do_echo([SmallPort, LargePort], Config). + +utf8_ref(Config) when is_list(Config) -> + {SmallUANodeExt, LargeUANodeExt} = utf8_nodenames_ext(), + SmallRef = mk_ref({SmallUANodeExt, 2}, [4711, 4711, 4711]), + erlang:display({ref, SmallRef, node(SmallRef)}), + LargeRef = mk_ref({LargeUANodeExt, 2}, [4711, 4711, 4711]), + erlang:display({ref, LargeRef, node(LargeRef)}), + do_echo([SmallRef, LargeRef], Config). + + %% Lazy list cp_gen(N) -> cp_gen(N, -1, 16#110000). @@ -647,16 +727,6 @@ make_name() -> ++ "-" ++ integer_to_list(B) ++ "-" ++ integer_to_list(C)). - - --define(VERSION_MAGIC, 131). - --define(ATOM_EXT, 100). --define(REFERENCE_EXT, 101). --define(PORT_EXT, 102). --define(PID_EXT, 103). --define(NEW_REFERENCE_EXT, 114). - uint32_be(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 32 -> [(Uint bsr 24) band 16#ff, (Uint bsr 16) band 16#ff, @@ -680,72 +750,70 @@ uint8(Uint) -> mk_pid({NodeName, Creation}, Number, Serial) when is_atom(NodeName) -> - mk_pid({atom_to_list(NodeName), Creation}, Number, Serial); -mk_pid({NodeName, Creation}, Number, Serial) -> + <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName), + mk_pid({NodeNameExt, Creation}, Number, Serial); +mk_pid({NodeNameExt, Creation}, Number, Serial) -> case catch binary_to_term(list_to_binary([?VERSION_MAGIC, ?PID_EXT, - ?ATOM_EXT, - uint16_be(length(NodeName)), - NodeName, + NodeNameExt, uint32_be(Number), uint32_be(Serial), uint8(Creation)])) of Pid when is_pid(Pid) -> Pid; {'EXIT', {badarg, _}} -> - exit({badarg, mk_pid, [{NodeName, Creation}, Number, Serial]}); + exit({badarg, mk_pid, [{NodeNameExt, Creation}, Number, Serial]}); Other -> exit({unexpected_binary_to_term_result, Other}) end. mk_port({NodeName, Creation}, Number) when is_atom(NodeName) -> - mk_port({atom_to_list(NodeName), Creation}, Number); -mk_port({NodeName, Creation}, Number) -> + <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName), + mk_port({NodeNameExt, Creation}, Number); +mk_port({NodeNameExt, Creation}, Number) -> case catch binary_to_term(list_to_binary([?VERSION_MAGIC, ?PORT_EXT, - ?ATOM_EXT, - uint16_be(length(NodeName)), - NodeName, + NodeNameExt, uint32_be(Number), uint8(Creation)])) of Port when is_port(Port) -> Port; {'EXIT', {badarg, _}} -> - exit({badarg, mk_port, [{NodeName, Creation}, Number]}); + exit({badarg, mk_port, [{NodeNameExt, Creation}, Number]}); Other -> exit({unexpected_binary_to_term_result, Other}) end. -mk_ref({NodeName, Creation}, Numbers) when is_atom(NodeName), - is_integer(Creation), - is_list(Numbers) -> - mk_ref({atom_to_list(NodeName), Creation}, Numbers); -mk_ref({NodeName, Creation}, [Number]) when is_list(NodeName), - is_integer(Creation), - is_integer(Number) -> +mk_ref({NodeName, Creation}, [Number] = NL) when is_atom(NodeName), + is_integer(Creation), + is_integer(Number) -> + <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName), + mk_ref({NodeNameExt, Creation}, NL); +mk_ref({NodeNameExt, Creation}, [Number]) when is_integer(Creation), + is_integer(Number) -> case catch binary_to_term(list_to_binary([?VERSION_MAGIC, ?REFERENCE_EXT, - ?ATOM_EXT, - uint16_be(length(NodeName)), - NodeName, + NodeNameExt, uint32_be(Number), uint8(Creation)])) of Ref when is_reference(Ref) -> Ref; {'EXIT', {badarg, _}} -> - exit({badarg, mk_ref, [{NodeName, Creation}, [Number]]}); + exit({badarg, mk_ref, [{NodeNameExt, Creation}, [Number]]}); Other -> exit({unexpected_binary_to_term_result, Other}) end; -mk_ref({NodeName, Creation}, Numbers) when is_list(NodeName), +mk_ref({NodeName, Creation}, Numbers) when is_atom(NodeName), is_integer(Creation), is_list(Numbers) -> + <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName), + mk_ref({NodeNameExt, Creation}, Numbers); +mk_ref({NodeNameExt, Creation}, Numbers) when is_integer(Creation), + is_list(Numbers) -> case catch binary_to_term(list_to_binary([?VERSION_MAGIC, ?NEW_REFERENCE_EXT, uint16_be(length(Numbers)), - ?ATOM_EXT, - uint16_be(length(NodeName)), - NodeName, + NodeNameExt, uint8(Creation), lists:map(fun (N) -> uint32_be(N) @@ -754,7 +822,7 @@ mk_ref({NodeName, Creation}, Numbers) when is_list(NodeName), Ref when is_reference(Ref) -> Ref; {'EXIT', {badarg, _}} -> - exit({badarg, mk_ref, [{NodeName, Creation}, Numbers]}); + exit({badarg, mk_ref, [{NodeNameExt, Creation}, Numbers]}); Other -> exit({unexpected_binary_to_term_result, Other}) end. diff --git a/lib/kernel/include/dist.hrl b/lib/kernel/include/dist.hrl index 5b52f6f294..91e13d99a9 100644 --- a/lib/kernel/include/dist.hrl +++ b/lib/kernel/include/dist.hrl @@ -36,3 +36,4 @@ -define(DFLAG_UNICODE_IO,16#1000). -define(DFLAG_DIST_HDR_ATOM_CACHE,16#2000). -define(DFLAG_SMALL_ATOM_TAGS, 16#4000). +-define(DFLAG_UTF8_ATOMS, 16#10000). diff --git a/lib/kernel/internal_doc/distribution_handshake.txt b/lib/kernel/internal_doc/distribution_handshake.txt index 6a3ee22ed3..d00c4ceb02 100644 --- a/lib/kernel/internal_doc/distribution_handshake.txt +++ b/lib/kernel/internal_doc/distribution_handshake.txt @@ -1,215 +1 @@ -HOW THE DISTRIBUTION HANDSHAKE WORKS ------------------------------------- - -This document describes the distribution handshake introduced in -the R6 release of Erlang/OTP. - -GENERAL -------- - -The TCP/IP distribution uses a handshake which expects a -connection based protocol, i.e. the protocol does not include -any authentication after the handshake procedure. - -This is not entirely safe, as it is vulnerable against takeover -attacks, but it is a tradeoff between fair safety and performance. - -The cookies are never sent in cleartext and the handshake procedure -expects the client (called A) to be the first one to prove that it can -generate a sufficient digest. The digest is generated with the -MD5 message digest algorithm and the challenges are expected to be very -random numbers. - -DEFINITIONS ------------ - -A challenge is a 32 bit integer number in big endian order. Below the function -gen_challenge() returns a random 32 bit integer used as a challenge. - -A digest is a (16 bytes) MD5 hash of [the Challenge (as text) concatenated -with the cookie (as text)]. Below, the function gen_digest(Challenge, Cookie) -generates a digest as described above. - -An out_cookie is the cookie used in outgoing communication to a certain node, -so that A's out_cookie for B should correspond with B's in_cookie for A and -the other way around. A's out_cookie for B and A's in_cookie for B need *NOT* -be the same. Below the function out_cookie(Node) returns the current -node's out_cookie for Node. - -An in_cookie is the cookie expected to be used by another node when -communicating with us, so that A's in_cookie for B corresponds with B's -out_cookie for A. Below the function in_cookie(Node) returns the current -node's in_cookie for Node. - -The cookies are text strings that can be viewed as passwords. - -Every message in the handshake starts with a 16 bit big endian integer -which contains the length of the message (not counting the two initial bytes). -In erlang this corresponds to the gen_tcp option {packet, 2}. Note that after -the handshake, the distribution switches to 4 byte packet headers. - -THE HANDSHAKE IN DETAIL ------------------------ - -Imagine two nodes, node A, which initiates the handshake and node B, which -accepts the connection. - -1) connect/accept: A connects to B via TCP/IP and B accepts the connection. - -2) send_name/receive_name: A sends an initial identification to B. -B receives the message. The message looks -like this (every "square" being one byte and the packet header removed): - -+---+--------+--------+-----+-----+-----+-----+-----+-----+-...-+-----+ -|'n'|Version0|Version1|Flag0|Flag1|Flag2|Flag3|Name0|Name1| ... |NameN| -+---+--------+--------+-----+-----+-----+-----+-----+-----+-... +-----+ - -The 'n' is just a message tag, -Version0 & Version1 is the distribution version selected by node A, - based on information from EPMD. (16 bit big endian) -Flag0 ... Flag3 are capability flags, the capabilities defined in dist.hrl. - (32 bit big endian) -Name0 ... NameN is the full nodename of A, as a string of bytes (the - packet length denotes how long it is). - -3) recv_status/send_status: B sends a status message to A, which indicates -if the connection is allowed. Four different status codes are defined: -ok: The handshake will continue. -ok_simultaneous: The handshake will continue, but A is informed that B - has another ongoing connection attempt that will be - shut down (simultaneous connect where A's name is - greater than B's name, compared literally), -nok: The handshake will not continue, as B already has an ongoing handshake - which it itself has initiated. (simultaneous connect where B's name is - greater than A's) -not_allowed: The connection is disallowed for some (unspecified) security - reason. -alive: A connection to the node is already active, which either means - that node A is confused or that the TCP connection breakdown - of a previous node with this name has not yet reached node B. - See 3B below. - -This is the format of the status message: - -+---+-------+-------+-...-+-------+ -|'s'|Status0|Status1| ... |StatusN| -+---+-------+-------+-...-+-------+ - -'s' is the message tag -Status0 ... StatusN is the status as a string (not terminated) - -3B) send_status/recv_status: If status was 'alive', node A will answer with -another status message containing either 'true' which means that the -connection should continue (The old connection from this node is broken), or -'false', which simply means that the connection should be closed, the -connection attempt was a mistake. - -4) recv_challenge/send_challenge: If the status was 'ok' or 'ok_simultaneous', -The handshake continues with B sending A another message, the challenge. -The challenge contains the same type of information as the "name" message -initially sent from A to B, with the addition of a 32 bit challenge: - -+---+--------+--------+-----+-----+-----+-----+-----+-----+-----+-----+--- -|'n'|Version0|Version1|Flag0|Flag1|Flag2|Flag3|Chal0|Chal1|Chal2|Chal3| -+---+--------+--------+-----+-----+-----+-----+-----+-----+-----+-----+--- - ------+-----+-...-+-----+ - Name0|Name1| ... |NameN| - ------+-----+-... +-----+ - -Where Chal0 ... Chal3 is the challenge as a 32 bit big endian integer -and the other fields are B's version, flags and full nodename. - -5) send_challenge_reply/recv_challenge_reply: Now A has generated -a digest and its own challenge. Those are sent together in a package -to B: - -+---+-----+-----+-----+-----+-----+-----+-----+-----+-...-+------+ -|'r'|Chal0|Chal1|Chal2|Chal3|Dige0|Dige1|Dige2|Dige3| ... |Dige15| -+---+-----+-----+-----+-----+-----+-----+-----+-----+-...-+------+ - -Where 'r' is the tag, Chal0 ... Chal3 is A's challenge for B to handle and -Dige0 ... Dige15 is the digest that A constructed from the challenge B sent -in the previous step. - -6) recv_challenge_ack/send_challenge_ack: B checks that the digest received -from A is correct and generates a digest from the challenge received from -A. The digest is then sent to A. The message looks like this: - -+---+-----+-----+-----+-----+-...-+------+ -|'a'|Dige0|Dige1|Dige2|Dige3| ... |Dige15| -+---+-----+-----+-----+-----+-...-+------+ - -Where 'a' is the tag and Dige0 ... Dige15 is the digest calculated by B -for A's challenge. - -7) A checks the digest from B and the connection is up. - -SEMIGRAPHIC VIEW ----------------- - -A (initiator) B (acceptor) - -TCP connect -----------------------------------------> - TCP accept - -send_name -----------------------------------------> - recv_name - - <---------------------------------------- send_status -recv_status -(if status was 'alive' - send_status - - - - - - - - - - - - - - - - - - - -> - recv_status) - ChB = gen_challenge() - (ChB) - <---------------------------------------- send_challenge -recv_challenge - -ChA = gen_challenge(), -OCA = out_cookie(B), -DiA = gen_digest(ChB,OCA) - (ChA, DiA) -send_challenge_reply --------------------------------> - recv_challenge_reply - ICB = in_cookie(A), - check: - DiA == gen_digest - (ChB, ICB) ? - - if OK: - OCB = out_cookie(A), - DiB = gen_digest - (DiB) (ChA, OCB) - <----------------------------------------- send_challenge_ack -recv_challenge_ack DONE -ICA = in_cookie(B), - else -check: CLOSE -DiB == gen_digest(ChA,ICA) ? -- if OK - DONE -- else - CLOSE - - -THE CURRENTLY DEFINED FLAGS ---------------------------- -Currently the following capability flags are defined: - -%% The node should be published and part of the global namespace --define(DFLAG_PUBLISHED,1). - -%% The node implements an atom cache --define(DFLAG_ATOM_CACHE,2). - -%% The node implements extended (3 * 32 bits) references --define(DFLAG_EXTENDED_REFERENCES,4). - -%% The node implements distributed process monitoring. --define(DFLAG_DIST_MONITOR,8). - -%% The node uses separate tag for fun's (lambdas) in the distribution protocol. --define(DFLAG_FUN_TAGS,16). - -An R6 erlang node implements all of the above, while a C or Java node only -implements DFLAG_EXTENDED_REFERENCES. - -Last modified 1999-11-08 -- Patrik Nyblom, OTP +This information has been moved to the "Distribution Protocol" chapter of "ERTS User's Guide". diff --git a/lib/kernel/src/dist_util.erl b/lib/kernel/src/dist_util.erl index e3511988a6..bbb212cebe 100644 --- a/lib/kernel/src/dist_util.erl +++ b/lib/kernel/src/dist_util.erl @@ -115,7 +115,8 @@ make_this_flags(RequestType, OtherNode) -> ?DFLAG_NEW_FLOATS bor ?DFLAG_UNICODE_IO bor ?DFLAG_DIST_HDR_ATOM_CACHE bor - ?DFLAG_SMALL_ATOM_TAGS). + ?DFLAG_SMALL_ATOM_TAGS bor + ?DFLAG_UTF8_ATOMS). handshake_other_started(#hs_data{request_type=ReqType}=HSData0) -> {PreOtherFlags,Node,Version} = recv_name(HSData0), diff --git a/lib/kernel/src/group.erl b/lib/kernel/src/group.erl index 4d2e31a429..c66e823a04 100644 --- a/lib/kernel/src/group.erl +++ b/lib/kernel/src/group.erl @@ -515,6 +515,27 @@ get_line1({undefined,{_A,Mode,Char},Cs,Cont,Rs}, Drv, Ls0, Encoding) Drv, Ls, Encoding) end; +%% ^R = backward search, ^S = forward search. +%% Search is tricky to implement and does a lot of back-and-forth +%% work with edlin.erl (from stdlib). Edlin takes care of writing +%% and handling lines and escape characters to get out of search, +%% whereas this module does the actual searching and appending to lines. +%% Erlang's shell wasn't exactly meant to traverse the wall between +%% line and line stack, so we at least restrict it by introducing +%% new modes: search, search_quit, search_found. These are added to +%% the regular ones (none, meta_left_sq_bracket) and handle special +%% cases of history search. +get_line1({undefined,{_A,Mode,Char},Cs,Cont,Rs}, Drv, Ls, Encoding) + when ((Mode =:= none) and (Char =:= $\^R)) -> + send_drv_reqs(Drv, Rs), + %% drop current line, move to search mode. We store the current + %% prompt ('N>') and substitute it with the search prompt. + send_drv_reqs(Drv, edlin:erase_line(Cont)), + put(search_quit_prompt, edlin:prompt(Cont)), + Pbs = prompt_bytes("(search)`': ", Encoding), + {more_chars,Ncont,Nrs} = edlin:start(Pbs, search), + send_drv_reqs(Drv, Nrs), + get_line1(edlin:edit_line1(Cs, Ncont), Drv, Ls, Encoding); get_line1({expand, Before, Cs0, Cont,Rs}, Drv, Ls0, Encoding) -> send_drv_reqs(Drv, Rs), ExpandFun = get(expand_fun), @@ -535,8 +556,59 @@ get_line1({undefined,_Char,Cs,Cont,Rs}, Drv, Ls, Encoding) -> send_drv_reqs(Drv, Rs), send_drv(Drv, beep), get_line1(edlin:edit_line(Cs, Cont), Drv, Ls, Encoding); +%% The search item was found and accepted (new line entered on the exact +%% result found) +get_line1({_What,Cont={line,_Prompt,_Chars,search_found},Rs}, Drv, Ls0, Encoding) -> + Line = edlin:current_line(Cont), + %% this may create duplicate entries. + Ls = save_line(new_stack(get_lines(Ls0)), Line), + get_line1({done, Line, "", Rs}, Drv, Ls, Encoding); +%% The search mode has been exited, but the user wants to remain in line +%% editing mode wherever that was, but editing the search result. +get_line1({What,Cont={line,_Prompt,_Chars,search_quit},Rs}, Drv, Ls, Encoding) -> + Line = edlin:current_chars(Cont), + %% Load back the old prompt with the correct line number. + case get(search_quit_prompt) of + undefined -> % should not happen. Fallback. + LsFallback = save_line(new_stack(get_lines(Ls)), Line), + get_line1({done, "\n", Line, Rs}, Drv, LsFallback, Encoding); + Prompt -> % redraw the line and keep going with the same stack position + NCont = {line,Prompt,{lists:reverse(Line),[]},none}, + send_drv_reqs(Drv, Rs), + send_drv_reqs(Drv, edlin:erase_line(Cont)), + send_drv_reqs(Drv, edlin:redraw_line(NCont)), + get_line1({What, NCont ,[]}, Drv, pad_stack(Ls), Encoding) + end; +%% Search mode is entered. +get_line1({What,{line,Prompt,{RevCmd0,_Aft},search},Rs}, + Drv, Ls0, Encoding) -> + send_drv_reqs(Drv, Rs), + %% Figure out search direction. ^S and ^R are returned through edlin + %% whenever we received a search while being already in search mode. + {Search, Ls1, RevCmd} = case RevCmd0 of + [$\^S|RevCmd1] -> + {fun search_down_stack/2, Ls0, RevCmd1}; + [$\^R|RevCmd1] -> + {fun search_up_stack/2, Ls0, RevCmd1}; + _ -> % new search, rewind stack for a proper search. + {fun search_up_stack/2, new_stack(get_lines(Ls0)), RevCmd0} + end, + Cmd = lists:reverse(RevCmd), + {Ls, NewStack} = case Search(Ls1, Cmd) of + {none, Ls2} -> + send_drv(Drv, beep), + {Ls2, {RevCmd, "': "}}; + {Line, Ls2} -> % found. Complete the output edlin couldn't have done. + send_drv_reqs(Drv, [{put_chars, Encoding, Line}]), + {Ls2, {RevCmd, "': "++Line}} + end, + Cont = {line,Prompt,NewStack,search}, + more_data(What, Cont, Drv, Ls, Encoding); get_line1({What,Cont0,Rs}, Drv, Ls, Encoding) -> send_drv_reqs(Drv, Rs), + more_data(What, Cont0, Drv, Ls, Encoding). + +more_data(What, Cont0, Drv, Ls, Encoding) -> receive {Drv,{data,Cs}} -> get_line1(edlin:edit_line(Cs, Cont0), Drv, Ls, Encoding); @@ -557,7 +629,6 @@ get_line1({What,Cont0,Rs}, Drv, Ls, Encoding) -> get_line1(edlin:edit_line([], Cont0), Drv, Ls, Encoding) end. - get_line_echo_off(Chars, Pbs, Drv) -> send_drv_reqs(Drv, [{put_chars, unicode,Pbs}]), get_line_echo_off1(edit_line(Chars,[]), Drv). @@ -632,12 +703,46 @@ save_line({stack, U, {}, []}, Line) -> save_line({stack, U, _L, D}, Line) -> {stack, U, Line, D}. -get_lines({stack, U, {}, []}) -> +get_lines(Ls) -> get_all_lines(Ls). +%get_lines({stack, U, {}, []}) -> +% U; +%get_lines({stack, U, {}, D}) -> +% tl(lists:reverse(D, U)); +%get_lines({stack, U, L, D}) -> +% get_lines({stack, U, {}, [L|D]}). + +%% There's a funny behaviour whenever the line stack doesn't have a "\n" +%% at its end -- get_lines() seemed to work on the assumption it *will* be +%% there, but the manipulations done with search history do not require it. +%% +%% It is an assumption because the function was built with either the full +%% stack being on the 'Up' side (we're on the new line) where it isn't +%% stripped. The only other case when it isn't on the 'Up' side is when +%% someone has used the up/down arrows (or ^P and ^N) to navigate lines, +%% in which case, a line with only a \n is stored at the end of the stack +%% (the \n is returned by edlin:current_line/1). +%% +%% get_all_lines works the same as get_lines, but only strips the trailing +%% character if it's a linebreak. Otherwise it's kept the same. This is +%% because traversing the stack due to search history will *not* insert +%% said empty line in the stack at the same time as other commands do, +%% and thus it should not always be stripped unless we know a new line +%% is the last entry. +get_all_lines({stack, U, {}, []}) -> U; -get_lines({stack, U, {}, D}) -> - tl(lists:reverse(D, U)); -get_lines({stack, U, L, D}) -> - get_lines({stack, U, {}, [L|D]}). +get_all_lines({stack, U, {}, D}) -> + case lists:reverse(D, U) of + ["\n"|Lines] -> Lines; + Lines -> Lines + end; +get_all_lines({stack, U, L, D}) -> + get_all_lines({stack, U, {}, [L|D]}). + +%% For the same reason as above, though, we need to expand the stack +%% in some cases to make sure we play nice with up/down arrows. We need +%% to insert newlines, but not always. +pad_stack({stack, U, L, D}) -> + {stack, U, L, D++["\n"]}. save_line_buffer("\n", Lines) -> save_line_buffer(Lines); @@ -649,6 +754,27 @@ save_line_buffer(Line, Lines) -> save_line_buffer(Lines) -> put(line_buffer, Lines). +search_up_stack(Stack, Substr) -> + case up_stack(Stack) of + {none,NewStack} -> {none,NewStack}; + {L, NewStack} -> + case string:str(L, Substr) of + 0 -> search_up_stack(NewStack, Substr); + _ -> {string:strip(L,right,$\n), NewStack} + end + end. + +search_down_stack(Stack, Substr) -> + case down_stack(Stack) of + {none,NewStack} -> {none,NewStack}; + {L, NewStack} -> + case string:str(L, Substr) of + 0 -> search_down_stack(NewStack, Substr); + _ -> {string:strip(L,right,$\n), NewStack} + end + end. + + %% This is get_line without line editing (except for backspace) and %% without echo. get_password_line(Chars, Drv) -> @@ -687,7 +813,7 @@ edit_password([$\177|Cs],[_|Chars]) ->%% is backspace enough? edit_password([Char|Cs],Chars) -> edit_password(Cs,[Char|Chars]). -%% prompt_bytes(Prompt) +%% prompt_bytes(Prompt, Encoding) %% Return a flat list of characters for the Prompt. prompt_bytes(Prompt, Encoding) -> lists:flatten(io_lib:format_prompt(Prompt, Encoding)). diff --git a/lib/kernel/test/file_SUITE.erl b/lib/kernel/test/file_SUITE.erl index 914f0d6127..f34341f561 100644 --- a/lib/kernel/test/file_SUITE.erl +++ b/lib/kernel/test/file_SUITE.erl @@ -60,7 +60,8 @@ -export([ read_not_really_compressed/1, read_compressed_cooked/1, read_compressed_cooked_binary/1, read_cooked_tar_problem/1, - write_compressed/1, compress_errors/1, catenated_gzips/1]). + write_compressed/1, compress_errors/1, catenated_gzips/1, + compress_async_crash/1]). -export([ make_link/1, read_link_info_for_non_link/1, symlinks/1]). @@ -135,7 +136,8 @@ groups() -> {compression, [], [read_compressed_cooked, read_compressed_cooked_binary, read_cooked_tar_problem, read_not_really_compressed, - write_compressed, compress_errors, catenated_gzips]}, + write_compressed, compress_errors, catenated_gzips, + compress_async_crash]}, {links, [], [make_link, read_link_info_for_non_link, symlinks]}]. @@ -2312,6 +2314,57 @@ compress_errors(Config) when is_list(Config) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +compress_async_crash(suite) -> []; +compress_async_crash(doc) -> []; +compress_async_crash(Config) when is_list(Config) -> + ?line DataDir = ?config(data_dir, Config), + ?line Path = filename:join(DataDir, "test.gz"), + ExpectedData = <<"qwerty">>, + + ?line _ = ?FILE_MODULE:delete(Path), + ?line {ok, Fd} = ?FILE_MODULE:open(Path, [write, binary, compressed]), + ?line ok = ?FILE_MODULE:write(Fd, ExpectedData), + ?line ok = ?FILE_MODULE:close(Fd), + + % Test that when using async thread pool, the emulator doesn't crash + % when the efile port driver is stopped while a compressed file operation + % is in progress (being carried by an async thread). + ?line ok = compress_async_crash_loop(10000, Path, ExpectedData), + ?line ok = ?FILE_MODULE:delete(Path), + ok. + +compress_async_crash_loop(0, _Path, _ExpectedData) -> + ok; +compress_async_crash_loop(N, Path, ExpectedData) -> + Parent = self(), + {Pid, Ref} = spawn_monitor( + fun() -> + ?line {ok, Fd} = ?FILE_MODULE:open( + Path, [read, compressed, raw, binary]), + Len = byte_size(ExpectedData), + Parent ! {self(), continue}, + ?line {ok, ExpectedData} = ?FILE_MODULE:read(Fd, Len), + ?line ok = ?FILE_MODULE:close(Fd), + receive foobar -> ok end + end), + receive + {Pid, continue} -> + exit(Pid, shutdown), + receive + {'DOWN', Ref, _, _, Reason} -> + ?line shutdown = Reason + end; + {'DOWN', Ref, _, _, Reason2} -> + test_server:fail({worker_exited, Reason2}) + after 60000 -> + exit(Pid, shutdown), + erlang:demonitor(Ref, [flush]), + test_server:fail(worker_timeout) + end, + compress_async_crash_loop(N - 1, Path, ExpectedData). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + altname(doc) -> "Test the file:altname/1 function"; altname(suite) -> diff --git a/lib/kernel/test/gen_tcp_misc_SUITE.erl b/lib/kernel/test/gen_tcp_misc_SUITE.erl index 93dc2a69d1..a72e76f813 100644 --- a/lib/kernel/test/gen_tcp_misc_SUITE.erl +++ b/lib/kernel/test/gen_tcp_misc_SUITE.erl @@ -42,11 +42,12 @@ killing_acceptor/1,killing_multi_acceptors/1,killing_multi_acceptors2/1, several_accepts_in_one_go/1, accept_system_limit/1, active_once_closed/1, send_timeout/1, send_timeout_active/1, - otp_7731/1, zombie_sockets/1, otp_7816/1, otp_8102/1, + otp_7731/1, zombie_sockets/1, otp_7816/1, otp_8102/1, wrapping_oct/1, otp_9389/1]). %% Internal exports. -export([sender/3, not_owner/1, passive_sockets_server/2, priority_server/1, + oct_acceptor/1, otp_7731_server/1, zombie_server/2, do_iter_max_socks/2]). init_per_testcase(_Func, Config) when is_list(Config) -> @@ -75,6 +76,7 @@ all() -> killing_acceptor, killing_multi_acceptors, killing_multi_acceptors2, several_accepts_in_one_go, accept_system_limit, active_once_closed, send_timeout, send_timeout_active, otp_7731, + wrapping_oct, zombie_sockets, otp_7816, otp_8102, otp_9389]. groups() -> @@ -2581,3 +2583,71 @@ otp_9389_loop(S, OrigLinkHdr, State) -> 3000 -> ?line error({timeout,header}) end. + +wrapping_oct(doc) -> + "Check that 64bit octet counters work."; +wrapping_oct(suite) -> + []; +wrapping_oct(Config) when is_list(Config) -> + Dog = test_server:timetrap(test_server:seconds(600)), + {ok,Sock} = gen_tcp:listen(0,[{active,false},{mode,binary}]), + {ok,Port} = inet:port(Sock), + spawn_link(?MODULE,oct_acceptor,[Sock]), + Res = oct_datapump(Port,16#1FFFFFFFF), + gen_tcp:close(Sock), + test_server:timetrap_cancel(Dog), + ok = Res, + ok. + +oct_datapump(Port,N) -> + {ok,Sock} = gen_tcp:connect("localhost",Port, + [{active,false},{mode,binary}]), + oct_pump(Sock,N,binary:copy(<<$a:8>>,100000),0). + +oct_pump(S,N,_,_) when N =< 0 -> + gen_tcp:close(S), + ok; +oct_pump(S,N,Bin,Last) -> + case gen_tcp:send(S,Bin) of + ok -> + {ok,Stat}=inet:getstat(S), + {_,R}=lists:keyfind(send_oct,1,Stat), + case (R < Last) of + true -> + io:format("ERROR (output) ~p < ~p~n",[R,Last]), + output_counter_error; + false -> + oct_pump(S,N-byte_size(Bin),Bin,R) + end; + _ -> + input_counter_error + end. + + +oct_acceptor(Sock) -> + {ok,Data} = gen_tcp:accept(Sock), + oct_aloop(Data,0,0). + +oct_aloop(S,X,Times) -> + case gen_tcp:recv(S,0) of + {ok,_} -> + {ok,Stat}=inet:getstat(S), + {_,R}=lists:keyfind(recv_oct,1,Stat), + case (R < X) of + true -> + io:format("ERROR ~p < ~p~n",[R,X]), + gen_tcp:close(S), + input_counter_error; + false -> + case Times rem 16#FFFFF of + 0 -> + io:format("Read: ~p~n",[R]); + _ -> + ok + end, + oct_aloop(S,R,Times+1) + end; + _ -> + gen_tcp:close(S), + closed + end. diff --git a/lib/megaco/src/flex/megaco_flex_scanner_drv.flex.src b/lib/megaco/src/flex/megaco_flex_scanner_drv.flex.src index b8146c345d..5faddb08c5 100644 --- a/lib/megaco/src/flex/megaco_flex_scanner_drv.flex.src +++ b/lib/megaco/src/flex/megaco_flex_scanner_drv.flex.src @@ -76,6 +76,7 @@ typedef struct { ErlDrvPort port; + ErlDrvTermData port_id; char* digit_map_name_ptr; int digit_map_name_len; char* digit_map_value_ptr; @@ -1497,6 +1498,7 @@ static ErlDrvData mfs_start(ErlDrvPort port, char *buf) DBG( ("mfs_start -> entry\n") ); dataP->port = port; + dataP->port_id = driver_mk_port(port); dataP->digit_map_name_ptr = NULL; dataP->digit_map_name_len = 0; dataP->digit_map_value_ptr = NULL; @@ -1841,10 +1843,10 @@ static ErlDrvSSizeT mfs_control(ErlDrvData handle, "\n term_spec_size: %d\n", dataP->term_spec_index, dataP->term_spec_size) ); - driver_send_term(dataP->port, - driver_caller(dataP->port), - dataP->term_spec, - dataP->term_spec_index); + erl_drv_send_term(dataP->port_id, + driver_caller(dataP->port), + dataP->term_spec, + dataP->term_spec_index); if (dataP->text_buf != NULL) FREE(dataP->text_buf); if (dataP->term_spec != NULL) FREE(dataP->term_spec); diff --git a/lib/ssh/src/ssh.erl b/lib/ssh/src/ssh.erl index b5a0aa2e05..193f877b98 100644 --- a/lib/ssh/src/ssh.erl +++ b/lib/ssh/src/ssh.erl @@ -31,11 +31,6 @@ stop_listener/1, stop_listener/2, stop_daemon/1, stop_daemon/2, shell/1, shell/2, shell/3]). --deprecated({sign_data, 2, next_major_release}). --deprecated({verify_data, 3, next_major_release}). - --export([sign_data/2, verify_data/3]). - %%-------------------------------------------------------------------- %% Function: start([, Type]) -> ok %% @@ -394,8 +389,8 @@ handle_ssh_option({public_key_alg, Value} = Opt) when Value == 'ssh-rsa'; Value Opt; handle_ssh_option({pref_public_key_algs, Value} = Opt) when is_list(Value), length(Value) >= 1 -> case handle_pref_algs(Value, []) of - true -> - Opt; + {true, NewOpts} -> + NewOpts; _ -> throw({error, {eoptions, Opt}}) end; @@ -503,38 +498,3 @@ inetopt(false) -> %%% %% Deprecated %%% - -%%-------------------------------------------------------------------- -%% Function: sign_data(Data, Algorithm) -> binary() | -%% {error, Reason} -%% -%% Data = binary() -%% Algorithm = "ssh-rsa" -%% -%% Description: Use SSH key to sign data. -%%-------------------------------------------------------------------- -sign_data(Data, Algorithm) when is_binary(Data) -> - case ssh_file:user_key(Algorithm,[]) of - {ok, Key} when Algorithm == "ssh-rsa" -> - public_key:sign(Data, sha, Key); - Error -> - Error - end. - -%%-------------------------------------------------------------------- -%% Function: verify_data(Data, Signature, Algorithm) -> ok | -%% {error, Reason} -%% -%% Data = binary() -%% Signature = binary() -%% Algorithm = "ssh-rsa" -%% -%% Description: Use SSH signature to verify data. -%%-------------------------------------------------------------------- -verify_data(Data, Signature, Algorithm) when is_binary(Data), is_binary(Signature) -> - case ssh_file:user_key(Algorithm, []) of - {ok, #'RSAPrivateKey'{publicExponent = E, modulus = N}} when Algorithm == "ssh-rsa" -> - public_key:verify(Data, sha, Signature, #'RSAPublicKey'{publicExponent = E, modulus = N}); - Error -> - Error - end. diff --git a/lib/stdlib/src/edlin.erl b/lib/stdlib/src/edlin.erl index 1164ee49eb..3192879f09 100644 --- a/lib/stdlib/src/edlin.erl +++ b/lib/stdlib/src/edlin.erl @@ -22,10 +22,10 @@ %% A simple Emacs-like line editor. %% About Latin-1 characters: see the beginning of erl_scan.erl. --export([init/0,start/1,edit_line/2,prefix_arg/1]). +-export([init/0,start/1,start/2,edit_line/2,prefix_arg/1]). -export([erase_line/1,erase_inp/1,redraw_line/1]). -export([length_before/1,length_after/1,prompt/1]). --export([current_line/1]). +-export([current_line/1, current_chars/1]). %%-export([expand/1]). -export([edit_line1/2]). @@ -54,7 +54,12 @@ init() -> %% {undefined,Char,Rest,Cont,Requests} start(Pbs) -> - {more_chars,{line,Pbs,{[],[]},none},[{put_chars,unicode,Pbs}]}. + start(Pbs, none). + +%% Only two modes used: 'none' and 'search'. Other modes can be +%% handled inline through specific character handling. +start(Pbs, Mode) -> + {more_chars,{line,Pbs,{[],[]},Mode},[{put_chars,unicode,Pbs}]}. edit_line(Cs, {line,P,L,{blink,N}}) -> edit(Cs, P, L, none, [{move_rel,N}]); @@ -76,6 +81,10 @@ edit([C|Cs], P, {Bef,Aft}, Prefix, Rs0) -> edit(Cs, P, {Bef,Aft}, meta, Rs0); meta_left_sq_bracket -> edit(Cs, P, {Bef,Aft}, meta_left_sq_bracket, Rs0); + search_meta -> + edit(Cs, P, {Bef,Aft}, search_meta, Rs0); + search_meta_left_sq_bracket -> + edit(Cs, P, {Bef,Aft}, search_meta_left_sq_bracket, Rs0); ctlx -> edit(Cs, P, {Bef,Aft}, ctlx, Rs0); new_line -> @@ -115,6 +124,8 @@ edit([C|Cs], P, {Bef,Aft}, Prefix, Rs0) -> case do_op(Op, Bef, Aft, Rs0) of {blink,N,Line,Rs} -> edit(Cs, P, Line, {blink,N}, Rs); + {Line, Rs, Mode} -> % allow custom modes from do_op + edit(Cs, P, Line, Mode, Rs); {Line,Rs} -> edit(Cs, P, Line, none, Rs) end @@ -168,9 +179,15 @@ key_map($\^], none) -> auto_blink; key_map($\^X, none) -> ctlx; key_map($\^Y, none) -> yank; key_map($\e, none) -> meta; -key_map($), Prefix) when Prefix =/= meta -> {blink,$),$(}; -key_map($}, Prefix) when Prefix =/= meta -> {blink,$},${}; -key_map($], Prefix) when Prefix =/= meta -> {blink,$],$[}; +key_map($), Prefix) when Prefix =/= meta, + Prefix =/= search, + Prefix =/= search_meta -> {blink,$),$(}; +key_map($}, Prefix) when Prefix =/= meta, + Prefix =/= search, + Prefix =/= search_meta -> {blink,$},${}; +key_map($], Prefix) when Prefix =/= meta, + Prefix =/= search, + Prefix =/= search_meta -> {blink,$],$[}; key_map($B, meta) -> backward_word; key_map($D, meta) -> kill_word; key_map($F, meta) -> forward_word; @@ -188,6 +205,32 @@ key_map($D, meta_left_sq_bracket) -> backward_char; key_map($C, meta_left_sq_bracket) -> forward_char; key_map(C, none) when C >= $\s -> {insert,C}; +%% for search, we need smarter line handling and so +%% we cheat a bit on the dispatching, and allow to +%% return a mode. +key_map($\^H, search) -> {search, backward_delete_char}; +key_map($\177, search) -> {search, backward_delete_char}; +key_map($\^R, search) -> {search, skip_up}; +key_map($\^S, search) -> {search, skip_down}; +key_map($\n, search) -> {search, search_found}; +key_map($\r, search) -> {search, search_found}; +key_map($\^A, search) -> {search, search_quit}; +key_map($\^B, search) -> {search, search_quit}; +key_map($\^D, search) -> {search, search_quit}; +key_map($\^E, search) -> {search, search_quit}; +key_map($\^F, search) -> {search, search_quit}; +key_map($\t, search) -> {search, search_quit}; +key_map($\^L, search) -> {search, search_quit}; +key_map($\^T, search) -> {search, search_quit}; +key_map($\^U, search) -> {search, search_quit}; +key_map($\^], search) -> {search, search_quit}; +key_map($\^X, search) -> {search, search_quit}; +key_map($\^Y, search) -> {search, search_quit}; +key_map($\e, search) -> search_meta; +key_map($[, search_meta) -> search_meta_left_sq_bracket; +key_map(_, search_meta) -> {search, search_quit}; +key_map(_C, search_meta_left_sq_bracket) -> {search, search_quit}; +key_map(C, search) -> {insert_search,C}; key_map(C, _) -> {undefined,C}. %% do_op(Action, Before, After, Requests) @@ -196,6 +239,57 @@ do_op({insert,C}, Bef, [], Rs) -> {{[C|Bef],[]},[{put_chars, unicode,[C]}|Rs]}; do_op({insert,C}, Bef, Aft, Rs) -> {{[C|Bef],Aft},[{insert_chars, unicode, [C]}|Rs]}; +%% Search mode prompt always looks like (search)`$TERMS': $RESULT. +%% the {insert_search, _} handlings allow to share this implementation +%% correctly with group.erl. This module provides $TERMS, and group.erl +%% is in charge of providing $RESULT. +%% This require a bit of trickery. Because search disables moving around +%% on the line (left/right arrow keys and other shortcuts that just exit +%% search mode), we can use the Bef and Aft variables to hold each +%% part of the line. Bef takes charge of "(search)`$TERMS" and Aft +%% takes charge of "': $RESULT". +do_op({insert_search, C}, Bef, [], Rs) -> + Aft="': ", + {{[C|Bef],Aft}, + [{insert_chars, unicode, [C]++Aft}, {delete_chars,-3} | Rs], + search}; +do_op({insert_search, C}, Bef, Aft, Rs) -> + Offset= length(Aft), + NAft = "': ", + {{[C|Bef],NAft}, + [{insert_chars, unicode, [C]++NAft}, {delete_chars,-Offset} | Rs], + search}; +do_op({search, backward_delete_char}, [_|Bef], Aft, Rs) -> + Offset= length(Aft)+1, + NAft = "': ", + {{Bef,NAft}, + [{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs], + search}; +do_op({search, backward_delete_char}, [], _Aft, Rs) -> + Aft="': ", + {{[],Aft}, Rs, search}; +do_op({search, skip_up}, Bef, Aft, Rs) -> + Offset= length(Aft), + NAft = "': ", + {{[$\^R|Bef],NAft}, % we insert ^R as a flag to whoever called us + [{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs], + search}; +do_op({search, skip_down}, Bef, Aft, Rs) -> + Offset= length(Aft), + NAft = "': ", + {{[$\^S|Bef],NAft}, % we insert ^S as a flag to whoever called us + [{insert_chars, unicode, NAft}, {delete_chars,-Offset}|Rs], + search}; +do_op({search, search_found}, _Bef, Aft, Rs) -> + "': "++NAft = Aft, + {{[],NAft}, + [{put_chars, unicode, "\n"}, {move_rel,-length(Aft)} | Rs], + search_found}; +do_op({search, search_quit}, _Bef, Aft, Rs) -> + "': "++NAft = Aft, + {{[],NAft}, + [{put_chars, unicode, "\n"}, {move_rel,-length(Aft)} | Rs], + search_quit}; %% do blink after $$ do_op({blink,C,M}, Bef=[$$,$$|_], Aft, Rs) -> N = over_paren(Bef, C, M), @@ -453,6 +547,9 @@ prompt({line,Pbs,_,_}) -> current_line({line,_,{Bef, Aft},_}) -> reverse(Bef, Aft ++ "\n"). +current_chars({line,_,{Bef,Aft},_}) -> + reverse(Bef, Aft). + %% %% expand(CurrentBefore) -> %% %% {yes,Expansion} | no %% %% Try to expand the word before as either a module name or a function diff --git a/lib/stdlib/src/erl_internal.erl b/lib/stdlib/src/erl_internal.erl index 3063881890..bf2fffbd97 100644 --- a/lib/stdlib/src/erl_internal.erl +++ b/lib/stdlib/src/erl_internal.erl @@ -278,6 +278,7 @@ bif(exit, 1) -> true; bif(exit, 2) -> true; bif(float, 1) -> true; bif(float_to_list, 1) -> true; +bif(float_to_list, 2) -> true; bif(garbage_collect, 0) -> true; bif(garbage_collect, 1) -> true; bif(get, 0) -> true; diff --git a/lib/tools/test/cprof_SUITE.erl b/lib/tools/test/cprof_SUITE.erl index 93caee0c8f..ce5cf66a14 100644 --- a/lib/tools/test/cprof_SUITE.erl +++ b/lib/tools/test/cprof_SUITE.erl @@ -230,10 +230,10 @@ on_load_test(Config) -> %% ?line N4 = cprof:restart(), ?line {ok,Module} = c:c(File, [{outdir,Priv}]), - ?line L = Module:seq(1, M, fun (I) -> succ(I) end), - ?line Lr = Module:seq_r(1, M, fun (I) -> succ(I) end), - ?line L = seq(1, M, fun (I) -> succ(I) end), - ?line Lr = seq_r(1, M, fun (I) -> succ(I) end), + ?line L = Module:seq(1, M, fun succ/1), + ?line Lr = Module:seq_r(1, M, fun succ/1), + ?line L = seq(1, M, fun succ/1), + ?line Lr = seq_r(1, M, fun succ/1), ?line N2 = cprof:pause(), ?line {Module,0,[]} = cprof:analyse(Module), ?line M_1 = M - 1, @@ -265,10 +265,10 @@ modules_test(Config) -> ?line M2__1 = M2 + 1, ?line erlang:yield(), ?line N = cprof:start(), - ?line L = Module:seq(1, M, fun (I) -> succ(I) end), - ?line Lr = Module:seq_r(1, M, fun (I) -> succ(I) end), - ?line L = seq(1, M, fun (I) -> succ(I) end), - ?line Lr = seq_r(1, M, fun (I) -> succ(I) end), + ?line L = Module:seq(1, M, fun succ/1), + ?line Lr = Module:seq_r(1, M, fun succ/1), + ?line L = seq(1, M, fun succ/1), + ?line Lr = seq_r(1, M, fun succ/1), ?line N = cprof:pause(), ?line Lr = lists:reverse(L), ?line M_1 = M - 1, |