From bfe9aeb47b494ec4243549340f9abf2a246e35a0 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 13 Feb 2013 12:24:34 +0100 Subject: Add +pc {latin1|unicode} switch and io:printable_range/0 This is the base for implementing configurable ~tp printouts, so that the user can define which characters to view as actually printable in the shell and by io_lib:format. The functionality is neither documented nor used in this commit --- erts/emulator/beam/bif.tab | 2 ++ erts/emulator/beam/erl_init.c | 29 ++++++++++++++++++++------ erts/emulator/beam/erl_unicode.c | 8 +++++++ erts/emulator/beam/sys.h | 13 ++++++++++++ erts/emulator/sys/common/erl_sys_common_misc.c | 16 +++++++++++++- erts/etc/common/erlexec.c | 17 +++++++++++++++ 6 files changed, 78 insertions(+), 7 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index b74dc5c3fe..8bc994c8c3 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -569,6 +569,8 @@ bif erlang:float_to_binary/1 bif erlang:float_to_binary/2 bif erlang:binary_to_float/1 +bif io:printable_range/0 + # # Obsolete # diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index ec3e0d54cb..83853dcd43 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -496,7 +496,7 @@ void erts_usage(void) erts_fprintf(stderr, "-d don't write a crash dump for internally detected errors\n"); erts_fprintf(stderr, " (halt(String) will still produce a crash dump)\n"); - + erts_fprintf(stderr, "-fn[u|a|l] Control how filenames are interpreted\n"); erts_fprintf(stderr, "-hms size set minimum heap size in words (default %d)\n", H_DEFAULT_SIZE); erts_fprintf(stderr, "-hmbs size set minimum binary virtual heap size in words (default %d)\n", @@ -509,7 +509,7 @@ void erts_usage(void) erts_fprintf(stderr, " Note that this flag is deprecated!\n"); erts_fprintf(stderr, "-M memory allocator switches,\n"); erts_fprintf(stderr, " see the erts_alloc(3) documentation for more info.\n"); - + erts_fprintf(stderr, "-pc Control what characters are considered printable (default latin1)\n"); erts_fprintf(stderr, "-P number set maximum number of processes on this node,\n"); erts_fprintf(stderr, " valid range is [%d-%d]\n", ERTS_MIN_PROCESSES, ERTS_MAX_PROCESSES); @@ -979,13 +979,30 @@ erl_start(int argc, char **argv) VERBOSE(DEBUG_SYSTEM, ("using display items %d\n",display_items)); break; + case 'p': + if (!strncmp(argv[i],"-pc",3)) { + int printable_chars = ERL_PRINTABLE_CHARACTERS_LATIN1; + arg = get_arg(argv[i]+3, argv[i+1], &i); + if (!strcmp(arg,"unicode")) { + printable_chars = ERL_PRINTABLE_CHARACTERS_UNICODE; + } else if (strcmp(arg,"latin1")) { + erts_fprintf(stderr, "bad range of printable " + "characters: %s\n", arg); + erts_usage(); + } + erts_set_printable_characters(printable_chars); + break; + } else { + erts_fprintf(stderr, "%s unknown flag %s\n", argv[0], argv[i]); + erts_usage(); + } case 'f': if (!strncmp(argv[i],"-fn",3)) { int warning_type = ERL_FILENAME_WARNING_WARNING; arg = get_arg(argv[i]+3, argv[i+1], &i); switch (*arg) { case 'u': - switch (*(argv[i]+4)) { + switch (*(arg+1)) { case 'w': case 0: break; @@ -997,7 +1014,7 @@ erl_start(int argc, char **argv) break; default: erts_fprintf(stderr, "bad type of warnings for " - "wrongly coded filename: %s\n", argv[i]+4); + "wrongly coded filename: %s\n", arg+1); erts_usage(); } erts_set_user_requested_filename_encoding @@ -1014,7 +1031,7 @@ erl_start(int argc, char **argv) ); break; case 'a': - switch (*(argv[i]+4)) { + switch (*(arg+1)) { case 'w': case 0: break; @@ -1026,7 +1043,7 @@ erl_start(int argc, char **argv) break; default: erts_fprintf(stderr, "bad type of warnings for " - "wrongly coded filename: %s\n", argv[i]+4); + "wrongly coded filename: %s\n", arg+1); erts_usage(); } erts_set_user_requested_filename_encoding diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 80982f3760..fa53fd0937 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2747,3 +2747,11 @@ int erts_utf8_to_latin1(byte* dest, const byte* source, int slen) return dp - dest; } +BIF_RETTYPE io_printable_range_0(BIF_ALIST_0) +{ + if (erts_get_printable_characters() == ERL_PRINTABLE_CHARACTERS_UNICODE) { + BIF_RET(am_unicode); + } else { + BIF_RET(am_latin1); + } +} diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 9416a91480..05bff430e3 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -1040,11 +1040,24 @@ char* win32_errorstr(int); #define ERL_FILENAME_WARNING_IGNORE (1) #define ERL_FILENAME_WARNING_ERROR (2) +/*********************************************************************** + * The user can request a range of character that he/she consider + * printable. Currently this can be either latin1 or unicode, but + * in the future a set of ranges, or languages, could be specified. + ***********************************************************************/ +#define ERL_PRINTABLE_CHARACTERS_LATIN1 (0) +#define ERL_PRINTABLE_CHARACTERS_UNICODE (1) + int erts_get_native_filename_encoding(void); /* The set function is only to be used by erl_init! */ void erts_set_user_requested_filename_encoding(int encoding, int warning); int erts_get_user_requested_filename_encoding(void); int erts_get_filename_warning_type(void); +/* This function is called from erl_init. The setting is read by BIF's + in io/io_lib. Setting is not atomic. */ +void erts_set_printable_characters(int range); +/* Get the setting (ERL_PRINTABLE_CHARACTERS_{LATIN1|UNICODE} */ +int erts_get_printable_characters(void); void erts_init_sys_common_misc(void); diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c index 0b31c125e5..31ad3b82d5 100644 --- a/erts/emulator/sys/common/erl_sys_common_misc.c +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -49,10 +49,15 @@ static int filename_encoding = ERL_FILENAME_UNKNOWN; static int filename_warning = ERL_FILENAME_WARNING_WARNING; #if defined(__WIN32__) || defined(__DARWIN__) -static int user_filename_encoding = ERL_FILENAME_UTF8; /* Default unicode on windows */ +/* Default unicode on windows and MacOS X */ +static int user_filename_encoding = ERL_FILENAME_UTF8; #else static int user_filename_encoding = ERL_FILENAME_LATIN1; #endif +/* This controls the heuristic in printing characters in shell and w/ + io:format("~tp", ...) etc. */ +static int printable_character_set = ERL_PRINTABLE_CHARACTERS_LATIN1; + void erts_set_user_requested_filename_encoding(int encoding, int warning) { user_filename_encoding = encoding; @@ -69,6 +74,15 @@ int erts_get_filename_warning_type(void) return filename_warning; } +void erts_set_printable_characters(int range) { + /* Not an atomic */ + printable_character_set = range; +} + +int erts_get_printable_characters(void) { + return printable_character_set; +} + void erts_init_sys_common_misc(void) { #if defined(__WIN32__) diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index 577554c43d..02615f9ef1 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -145,6 +145,13 @@ static char *plusr_val_switches[] = { NULL }; +/* +pc arguments with values */ +static char *pluspc_val_switches[] = { + "latin1", + "unicode", + NULL +}; + /* +z arguments with values */ static char *plusz_val_switches[] = { "dbbl", @@ -912,6 +919,16 @@ int main(int argc, char **argv) i++; } break; + case 'p': + if (argv[i][2] != 'c' || argv[i][3] != '\0') + goto the_default; + if (i+1 >= argc) + usage(argv[i]); + argv[i][0] = '-'; + add_Eargs(argv[i]); + add_Eargs(argv[i+1]); + i++; + break; case 'z': if (!is_one_of_strings(&argv[i][2], plusz_val_switches)) { goto the_default; -- cgit v1.2.3 From 1b4280cd2e8dcab457562bbaacc4e1d16d372185 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 13 Feb 2013 17:24:05 +0100 Subject: Document +pc flag and io:printable_range/0 --- erts/doc/src/erl.xml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'erts') diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml index f354d68d45..88f7cd9bf1 100644 --- a/erts/doc/src/erl.xml +++ b/erts/doc/src/erl.xml @@ -615,6 +615,28 @@ debugging. + + +

Sets the range of characters that the system will consider printable in heuristic detection of strings. This typically affects the shell, debugger and io:format functions (when ~tp is used in the format string).

+

Currently two values for the Range are supported: + + latin1 The default. Only characters + in the ISO-latin-1 range can be considered printable, which means + that a character with a code point > 255 will never be + considered printable and that lists containing such + characters will be displayed as lists of integers rather + than text strings by tools. + unicode + All printable Unicode characters are considered when + determining if a list of integers is to be displayed in + string syntax. This may give unexpected results if for + example your font does not cover all Unicode + characters. + +

+

Se also + io:printable_range/0.

+

Sets the maximum number of simultaneously existing processes for this -- cgit v1.2.3 From d017d6d6dff3c709989fec37fff40bc0dd715128 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 18 Feb 2013 15:12:00 +0100 Subject: Remove newly introduced warning in erlexec.c --- erts/etc/common/erlexec.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'erts') diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index 02615f9ef1..9d674a7c65 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -145,13 +145,6 @@ static char *plusr_val_switches[] = { NULL }; -/* +pc arguments with values */ -static char *pluspc_val_switches[] = { - "latin1", - "unicode", - NULL -}; - /* +z arguments with values */ static char *plusz_val_switches[] = { "dbbl", -- cgit v1.2.3