diff options
author | Patrik Nyblom <[email protected]> | 2013-02-22 12:06:41 +0100 |
---|---|---|
committer | Patrik Nyblom <[email protected]> | 2013-02-22 12:06:41 +0100 |
commit | 7215c49aff685e93765598cd428baf1d4320f752 (patch) | |
tree | 76409af3ed6abe7502af3aa438e47b1be98df1e3 /erts | |
parent | 14820e983856654e68e08244e4dfc689f0804fd8 (diff) | |
parent | 2a79b74ac371387ce338bacf979f9ca32447b302 (diff) | |
download | otp-7215c49aff685e93765598cd428baf1d4320f752.tar.gz otp-7215c49aff685e93765598cd428baf1d4320f752.tar.bz2 otp-7215c49aff685e93765598cd428baf1d4320f752.zip |
Merge branch 'pan/unicode_printable_ranges'
* pan/unicode_printable_ranges:
Adapt stdlib tests to ~tp detecting latin1 binaries
Update primary bootstrap
Make wx debugger use +pc flag when applicable
Correct misspelled comments and space at lin ends
Make ~tp output latin1 binaries as strings if possible
Leave the +pc handling to io and io_lib_pretty
Remove newly introduced warning in erlexec.c
Make shell_SUITE:otp_10302 use +pc unicode when needed
Fix io_proto_SUITE to handle the new io_lib_pretty:print
Add testcase for +pc and io:printable_range/0
Make printing of UTF-8 in binaries behave like lists.
Document +pc flag and io:printable_range/0
Add usage of and spec for io:printable_range/0
Add +pc {latin1|unicode} switch and io:printable_range/0
Fix some Unicode issues
OTP-18084
Diffstat (limited to 'erts')
-rw-r--r-- | erts/doc/src/erl.xml | 22 | ||||
-rw-r--r-- | erts/emulator/beam/bif.tab | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_init.c | 29 | ||||
-rw-r--r-- | erts/emulator/beam/erl_unicode.c | 8 | ||||
-rw-r--r-- | erts/emulator/beam/sys.h | 13 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_sys_common_misc.c | 16 | ||||
-rw-r--r-- | erts/etc/common/erlexec.c | 10 |
7 files changed, 93 insertions, 7 deletions
diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml index 5bfa518266..bd03fb4970 100644 --- a/erts/doc/src/erl.xml +++ b/erts/doc/src/erl.xml @@ -615,6 +615,28 @@ debugging.</item> </taglist> </item> + <tag><marker id="+pc"/><marker id="max_processes"><c><![CDATA[+pc Range]]></c></marker></tag> + <item> + <p>Sets the range of characters that the system will consider printable in heuristic detection of strings. This typically affects the shell, debugger and io:format functions (when ~tp is used in the format string).</p> + <p>Currently two values for the <c>Range</c> are supported: + <taglist> + <tag><c>latin1</c></tag> <item>The default. Only characters + in the ISO-latin-1 range can be considered printable, which means + that a character with a code point > 255 will never be + considered printable and that lists containing such + characters will be displayed as lists of integers rather + than text strings by tools.</item> + <tag><c>unicode</c></tag> + <item>All printable Unicode characters are considered when + determining if a list of integers is to be displayed in + string syntax. This may give unexpected results if for + example your font does not cover all Unicode + characters.</item> + </taglist> + </p> + <p>Se also <seealso marker="stdlib:io#printable_range/0"> + io:printable_range/0</seealso>.</p> + </item> <tag><marker id="+P"/><marker id="max_processes"><c><![CDATA[+P Number]]></c></marker></tag> <item> <p>Sets the maximum number of simultaneously existing processes for this diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index b74dc5c3fe..8bc994c8c3 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -569,6 +569,8 @@ bif erlang:float_to_binary/1 bif erlang:float_to_binary/2 bif erlang:binary_to_float/1 +bif io:printable_range/0 + # # Obsolete # diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index ec3e0d54cb..83853dcd43 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -496,7 +496,7 @@ void erts_usage(void) erts_fprintf(stderr, "-d don't write a crash dump for internally detected errors\n"); erts_fprintf(stderr, " (halt(String) will still produce a crash dump)\n"); - + erts_fprintf(stderr, "-fn[u|a|l] Control how filenames are interpreted\n"); erts_fprintf(stderr, "-hms size set minimum heap size in words (default %d)\n", H_DEFAULT_SIZE); erts_fprintf(stderr, "-hmbs size set minimum binary virtual heap size in words (default %d)\n", @@ -509,7 +509,7 @@ void erts_usage(void) erts_fprintf(stderr, " Note that this flag is deprecated!\n"); erts_fprintf(stderr, "-M<X> <Y> memory allocator switches,\n"); erts_fprintf(stderr, " see the erts_alloc(3) documentation for more info.\n"); - + erts_fprintf(stderr, "-pc <set> Control what characters are considered printable (default latin1)\n"); erts_fprintf(stderr, "-P number set maximum number of processes on this node,\n"); erts_fprintf(stderr, " valid range is [%d-%d]\n", ERTS_MIN_PROCESSES, ERTS_MAX_PROCESSES); @@ -979,13 +979,30 @@ erl_start(int argc, char **argv) VERBOSE(DEBUG_SYSTEM, ("using display items %d\n",display_items)); break; + case 'p': + if (!strncmp(argv[i],"-pc",3)) { + int printable_chars = ERL_PRINTABLE_CHARACTERS_LATIN1; + arg = get_arg(argv[i]+3, argv[i+1], &i); + if (!strcmp(arg,"unicode")) { + printable_chars = ERL_PRINTABLE_CHARACTERS_UNICODE; + } else if (strcmp(arg,"latin1")) { + erts_fprintf(stderr, "bad range of printable " + "characters: %s\n", arg); + erts_usage(); + } + erts_set_printable_characters(printable_chars); + break; + } else { + erts_fprintf(stderr, "%s unknown flag %s\n", argv[0], argv[i]); + erts_usage(); + } case 'f': if (!strncmp(argv[i],"-fn",3)) { int warning_type = ERL_FILENAME_WARNING_WARNING; arg = get_arg(argv[i]+3, argv[i+1], &i); switch (*arg) { case 'u': - switch (*(argv[i]+4)) { + switch (*(arg+1)) { case 'w': case 0: break; @@ -997,7 +1014,7 @@ erl_start(int argc, char **argv) break; default: erts_fprintf(stderr, "bad type of warnings for " - "wrongly coded filename: %s\n", argv[i]+4); + "wrongly coded filename: %s\n", arg+1); erts_usage(); } erts_set_user_requested_filename_encoding @@ -1014,7 +1031,7 @@ erl_start(int argc, char **argv) ); break; case 'a': - switch (*(argv[i]+4)) { + switch (*(arg+1)) { case 'w': case 0: break; @@ -1026,7 +1043,7 @@ erl_start(int argc, char **argv) break; default: erts_fprintf(stderr, "bad type of warnings for " - "wrongly coded filename: %s\n", argv[i]+4); + "wrongly coded filename: %s\n", arg+1); erts_usage(); } erts_set_user_requested_filename_encoding diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 80982f3760..fa53fd0937 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2747,3 +2747,11 @@ int erts_utf8_to_latin1(byte* dest, const byte* source, int slen) return dp - dest; } +BIF_RETTYPE io_printable_range_0(BIF_ALIST_0) +{ + if (erts_get_printable_characters() == ERL_PRINTABLE_CHARACTERS_UNICODE) { + BIF_RET(am_unicode); + } else { + BIF_RET(am_latin1); + } +} diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 9416a91480..05bff430e3 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -1040,11 +1040,24 @@ char* win32_errorstr(int); #define ERL_FILENAME_WARNING_IGNORE (1) #define ERL_FILENAME_WARNING_ERROR (2) +/*********************************************************************** + * The user can request a range of character that he/she consider + * printable. Currently this can be either latin1 or unicode, but + * in the future a set of ranges, or languages, could be specified. + ***********************************************************************/ +#define ERL_PRINTABLE_CHARACTERS_LATIN1 (0) +#define ERL_PRINTABLE_CHARACTERS_UNICODE (1) + int erts_get_native_filename_encoding(void); /* The set function is only to be used by erl_init! */ void erts_set_user_requested_filename_encoding(int encoding, int warning); int erts_get_user_requested_filename_encoding(void); int erts_get_filename_warning_type(void); +/* This function is called from erl_init. The setting is read by BIF's + in io/io_lib. Setting is not atomic. */ +void erts_set_printable_characters(int range); +/* Get the setting (ERL_PRINTABLE_CHARACTERS_{LATIN1|UNICODE} */ +int erts_get_printable_characters(void); void erts_init_sys_common_misc(void); diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c index 0b31c125e5..31ad3b82d5 100644 --- a/erts/emulator/sys/common/erl_sys_common_misc.c +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -49,10 +49,15 @@ static int filename_encoding = ERL_FILENAME_UNKNOWN; static int filename_warning = ERL_FILENAME_WARNING_WARNING; #if defined(__WIN32__) || defined(__DARWIN__) -static int user_filename_encoding = ERL_FILENAME_UTF8; /* Default unicode on windows */ +/* Default unicode on windows and MacOS X */ +static int user_filename_encoding = ERL_FILENAME_UTF8; #else static int user_filename_encoding = ERL_FILENAME_LATIN1; #endif +/* This controls the heuristic in printing characters in shell and w/ + io:format("~tp", ...) etc. */ +static int printable_character_set = ERL_PRINTABLE_CHARACTERS_LATIN1; + void erts_set_user_requested_filename_encoding(int encoding, int warning) { user_filename_encoding = encoding; @@ -69,6 +74,15 @@ int erts_get_filename_warning_type(void) return filename_warning; } +void erts_set_printable_characters(int range) { + /* Not an atomic */ + printable_character_set = range; +} + +int erts_get_printable_characters(void) { + return printable_character_set; +} + void erts_init_sys_common_misc(void) { #if defined(__WIN32__) diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index 577554c43d..9d674a7c65 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -912,6 +912,16 @@ int main(int argc, char **argv) i++; } break; + case 'p': + if (argv[i][2] != 'c' || argv[i][3] != '\0') + goto the_default; + if (i+1 >= argc) + usage(argv[i]); + argv[i][0] = '-'; + add_Eargs(argv[i]); + add_Eargs(argv[i+1]); + i++; + break; case 'z': if (!is_one_of_strings(&argv[i][2], plusz_val_switches)) { goto the_default; |