From c3ddb0fc41efcada961920a26f5286f12793614e Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Wed, 27 Dec 2017 19:17:37 +0100 Subject: Fix encoding of filenames in stacktraces --- erts/emulator/beam/beam_load.c | 8 +- erts/emulator/beam/beam_ranges.c | 6 +- erts/emulator/beam/erl_unicode.c | 152 +++++++++++++++++++------ erts/emulator/beam/erl_unicode.h | 5 +- erts/emulator/test/code_SUITE.erl | 50 +++++++- erts/emulator/test/code_SUITE_data/erl_544.erl | 35 ++++++ 6 files changed, 210 insertions(+), 46 deletions(-) create mode 100644 erts/emulator/test/code_SUITE_data/erl_544.erl diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index adf8779f11..494b7543ec 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2017. All Rights Reserved. + * Copyright Ericsson AB 1996-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,6 +40,7 @@ #include "erl_zlib.h" #include "erl_map.h" #include "erl_process_dict.h" +#include "erl_unicode.h" #ifdef HIPE #include "hipe_bif0.h" @@ -1792,7 +1793,7 @@ read_line_table(LoaderState* stp) GetInt(stp, 2, n); GetString(stp, fname, n); - stp->fname[i] = erts_atom_put(fname, n, ERTS_ATOM_ENC_LATIN1, 1); + stp->fname[i] = erts_atom_put(fname, n, ERTS_ATOM_ENC_UTF8, 1); } } @@ -5891,8 +5892,7 @@ erts_build_mfa_item(FunctionInfo* fi, Eterm* hp, Eterm args, Eterm* mfa_p) file_term = buf_to_intlist(&hp, ".erl", 4, NIL); file_term = buf_to_intlist(&hp, (char*)ap->name, ap->len, file_term); } else { - Atom* ap = atom_tab(atom_val((fi->fname_ptr)[file-1])); - file_term = buf_to_intlist(&hp, (char*)ap->name, ap->len, NIL); + file_term = erts_atom_to_string(&hp, (fi->fname_ptr)[file-1]); } tuple = TUPLE2(hp, am_line, make_small(line)); diff --git a/erts/emulator/beam/beam_ranges.c b/erts/emulator/beam/beam_ranges.c index fac4289271..c7b17d58f3 100644 --- a/erts/emulator/beam/beam_ranges.c +++ b/erts/emulator/beam/beam_ranges.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2012-2016. All Rights Reserved. + * Copyright Ericsson AB 2012-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ #include "erl_vm.h" #include "global.h" #include "beam_load.h" +#include "erl_unicode.h" typedef struct { BeamInstr* start; /* Pointer to start of module. */ @@ -341,8 +342,7 @@ lookup_loc(FunctionInfo* fi, const BeamInstr* pc, Atom* mod_atom = atom_tab(atom_val(fi->mfa->module)); fi->needed += 2*(mod_atom->len+4); } else { - Atom* ap = atom_tab(atom_val((fi->fname_ptr)[file-1])); - fi->needed += 2*ap->len; + fi->needed += 2*erts_atom_to_string_length((fi->fname_ptr)[file-1]); } return; } else { diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 2d1d1443a7..c5fc6f9815 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2008-2017. All Rights Reserved. + * Copyright Ericsson AB 2008-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1158,14 +1158,15 @@ static ERTS_INLINE int analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left, Sint *num_latin1_chars, Uint max_chars) { + int res = ERTS_UTF8_OK; Uint latin1_count; int is_latin1; + Uint nchars = 0; *err_pos = source; if (num_latin1_chars) { is_latin1 = 1; latin1_count = 0; } - *num_chars = 0; while (size) { if (((*source) & ((byte) 0x80)) == 0) { source++; @@ -1174,11 +1175,13 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left latin1_count++; } else if (((*source) & ((byte) 0xE0)) == 0xC0) { if (size < 2) { - return ERTS_UTF8_INCOMPLETE; + res = ERTS_UTF8_INCOMPLETE; + break; } if (((source[1] & ((byte) 0xC0)) != 0x80) || ((*source) < 0xC2) /* overlong */) { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } if (num_latin1_chars) { latin1_count++; @@ -1189,16 +1192,19 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left size -= 2; } else if (((*source) & ((byte) 0xF0)) == 0xE0) { if (size < 3) { - return ERTS_UTF8_INCOMPLETE; + res = ERTS_UTF8_INCOMPLETE; + break; } if (((source[1] & ((byte) 0xC0)) != 0x80) || ((source[2] & ((byte) 0xC0)) != 0x80) || (((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } if ((((*source) & ((byte) 0xF)) == 0xD) && ((source[1] & 0x20) != 0)) { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } source += 3; size -= 3; @@ -1206,37 +1212,47 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left is_latin1 = 0; } else if (((*source) & ((byte) 0xF8)) == 0xF0) { if (size < 4) { - return ERTS_UTF8_INCOMPLETE; + res = ERTS_UTF8_INCOMPLETE; + break; } if (((source[1] & ((byte) 0xC0)) != 0x80) || ((source[2] & ((byte) 0xC0)) != 0x80) || ((source[3] & ((byte) 0xC0)) != 0x80) || (((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } if ((((*source) & ((byte)0x7)) > 0x4U) || ((((*source) & ((byte)0x7)) == 0x4U) && ((source[1] & ((byte)0x3F)) > 0xFU))) { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } source += 4; size -= 4; if (num_latin1_chars) is_latin1 = 0; } else { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } - ++(*num_chars); + ++nchars; *err_pos = source; - if (max_chars && size > 0 && *num_chars == max_chars) - return ERTS_UTF8_OK_MAX_CHARS; + if (max_chars && size > 0 && nchars == max_chars) { + res = ERTS_UTF8_OK_MAX_CHARS; + break; + } if (left && --(*left) <= 0 && size) { - return ERTS_UTF8_ANALYZE_MORE; + res = ERTS_UTF8_ANALYZE_MORE; + break; } } + + *num_chars = nchars; if (num_latin1_chars) *num_latin1_chars = is_latin1 ? latin1_count : -1; - return ERTS_UTF8_OK; + + return res; } int erts_analyze_utf8(byte *source, Uint size, @@ -1252,29 +1268,18 @@ int erts_analyze_utf8_x(byte *source, Uint size, return analyze_utf8(source, size, err_pos, num_chars, left, num_latin1_chars, max_chars); } -/* - * No errors should be able to occur - no overlongs, no malformed, no nothing - */ -static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, - Uint left, - Uint *num_built, Uint *num_eaten, Eterm tail) +static ERTS_INLINE Eterm +make_list_from_utf8_buf(Eterm **hpp, Uint num, + byte *bytes, Uint sz, + Uint *num_built, Uint *num_eaten, + Eterm tail) { Eterm *hp; Eterm ret; + Uint left = num; byte *source, *ssource; Uint unipoint; - - ASSERT(num > 0); - if (left < num) { - if (left > 0) - num = left; - else - num = 1; - } - - *num_built = num; /* Always */ - - hp = HAlloc(p,num * 2); + hp = *hpp; ret = tail; source = bytes + sz; ssource = source; @@ -1302,20 +1307,97 @@ static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, } ret = CONS(hp,make_small(unipoint),ret); hp += 2; - if (--num <= 0) { + if (--left <= 0) { break; } } + *hpp = hp; + *num_built = num; /* Always */ *num_eaten = (ssource - source); return ret; } +/* + * No errors should be able to occur - no overlongs, no malformed, no nothing + */ +static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, + Uint left, + Uint *num_built, Uint *num_eaten, Eterm tail) +{ + Eterm *hp; + + ASSERT(num > 0); + if (left < num) { + if (left > 0) + num = left; + else + num = 1; + } + + hp = HAlloc(p,num * 2); + + return make_list_from_utf8_buf(&hp, num, bytes, sz, + num_built, num_eaten, + tail); +} Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left, Uint *num_built, Uint *num_eaten, Eterm tail) { return do_utf8_to_list(p, num, bytes, sz, left, num_built, num_eaten, tail); } +Uint erts_atom_to_string_length(Eterm atom) +{ + Atom *ap; + + ASSERT(is_atom(atom)); + ap = atom_tab(atom_val(atom)); + + if (ap->latin1_chars >= 0) + return (Uint) ap->len; + else { + byte* err_pos; + Uint num_chars; +#ifdef DEBUG + int ares = +#endif + erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL); + ASSERT(ares == ERTS_UTF8_OK); + + return num_chars; + } +} + +Eterm erts_atom_to_string(Eterm **hpp, Eterm atom) +{ + Atom *ap; + + ASSERT(is_atom(atom)); + ap = atom_tab(atom_val(atom)); + if (ap->latin1_chars >= 0) + return buf_to_intlist(hpp, (char*)ap->name, ap->len, NIL); + else { + Eterm res; + byte* err_pos; + Uint num_chars, num_built, num_eaten; +#ifdef DEBUG + Eterm *hp_start = *hpp; + int ares = +#endif + erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL); + ASSERT(ares == ERTS_UTF8_OK); + + res = make_list_from_utf8_buf(hpp, num_chars, ap->name, ap->len, + &num_built, &num_eaten, NIL); + + ASSERT(num_built == num_chars); + ASSERT(num_eaten == ap->len); + ASSERT(*hpp - hp_start == 2*num_chars); + + return res; + } +} + static int is_candidate(Uint cp) { int index,pos; diff --git a/erts/emulator/beam/erl_unicode.h b/erts/emulator/beam/erl_unicode.h index e01eaa787e..31369fc8f9 100644 --- a/erts/emulator/beam/erl_unicode.h +++ b/erts/emulator/beam/erl_unicode.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2008-2016. All Rights Reserved. + * Copyright Ericsson AB 2008-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,4 +21,7 @@ #ifndef _ERL_UNICODE_H #define _ERL_UNICODE_H +Uint erts_atom_to_string_length(Eterm atom); +Eterm erts_atom_to_string(Eterm **hpp, Eterm atom); + #endif /* _ERL_UNICODE_H */ diff --git a/erts/emulator/test/code_SUITE.erl b/erts/emulator/test/code_SUITE.erl index dca600bc7b..fcef070f08 100644 --- a/erts/emulator/test/code_SUITE.erl +++ b/erts/emulator/test/code_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1999-2017. All Rights Reserved. +%% Copyright Ericsson AB 1999-2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -27,7 +27,8 @@ constant_pools/1,constant_refc_binaries/1, fake_literals/1, false_dependency/1,coverage/1,fun_confusion/1, - t_copy_literals/1, t_copy_literals_frags/1]). + t_copy_literals/1, t_copy_literals_frags/1, + erl_544/1]). -define(line_trace, 1). -include_lib("common_test/include/ct.hrl"). @@ -41,7 +42,8 @@ all() -> module_md5, constant_pools, constant_refc_binaries, fake_literals, false_dependency, - coverage, fun_confusion, t_copy_literals, t_copy_literals_frags]. + coverage, fun_confusion, t_copy_literals, t_copy_literals_frags, + erl_544]. init_per_suite(Config) -> erts_debug:set_internal_state(available_internal_state, true), @@ -918,6 +920,48 @@ reloader(Mod,Code,Time) -> reloader(Mod,Code,Time) end. +erl_544(Config) when is_list(Config) -> + case file:native_name_encoding() of + utf8 -> + {ok, CWD} = file:get_cwd(), + try + Mod = erl_544, + FileName = atom_to_list(Mod) ++ ".erl", + Priv = proplists:get_value(priv_dir, Config), + Data = proplists:get_value(data_dir, Config), + {ok, FileContent} = file:read_file(filename:join(Data, + FileName)), + Dir = filename:join(Priv, [16#2620,16#2620,16#2620]), + File = filename:join(Dir, FileName), + io:format("~ts~n", [File]), + ok = file:make_dir(Dir), + ok = file:set_cwd(Dir), + ok = file:write_file(File, [FileContent]), + {ok, Mod} = compile:file(File), + Res1 = (catch Mod:err()), + io:format("~p~n", [Res1]), + {'EXIT', {err, [{Mod, err, 0, Info1}|_]}} = Res1, + File = proplists:get_value(file, Info1), + Me = self(), + Go = make_ref(), + Tester = spawn_link(fun () -> + Mod:wait(Me, Go), + Mod:err() + end), + receive Go -> ok end, + Res2 = process_info(Tester, current_stacktrace), + io:format("~p~n", [Res2]), + {current_stacktrace, + [{Mod, wait, 2, Info2}|_]} = Res2, + File = proplists:get_value(file, Info2), + ok + after + ok = file:set_cwd(CWD) + end, + ok; + _Enc -> + {skipped, "Only run when native file name encoding is utf8"} + end. %% Utilities. diff --git a/erts/emulator/test/code_SUITE_data/erl_544.erl b/erts/emulator/test/code_SUITE_data/erl_544.erl new file mode 100644 index 0000000000..c93f3ef5bc --- /dev/null +++ b/erts/emulator/test/code_SUITE_data/erl_544.erl @@ -0,0 +1,35 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(erl_544). + +-export([err/0, wait/2]). + +err() -> + erlang:error(err). + +wait(Pid, Msg) -> + erlang:yield(), + Pid ! Msg, + receive + after infinity -> + ok + end, + err(). -- cgit v1.2.3