aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2013-02-22 12:06:41 +0100
committerPatrik Nyblom <[email protected]>2013-02-22 12:06:41 +0100
commit7215c49aff685e93765598cd428baf1d4320f752 (patch)
tree76409af3ed6abe7502af3aa438e47b1be98df1e3 /erts
parent14820e983856654e68e08244e4dfc689f0804fd8 (diff)
parent2a79b74ac371387ce338bacf979f9ca32447b302 (diff)
downloadotp-7215c49aff685e93765598cd428baf1d4320f752.tar.gz
otp-7215c49aff685e93765598cd428baf1d4320f752.tar.bz2
otp-7215c49aff685e93765598cd428baf1d4320f752.zip
Merge branch 'pan/unicode_printable_ranges'
* pan/unicode_printable_ranges: Adapt stdlib tests to ~tp detecting latin1 binaries Update primary bootstrap Make wx debugger use +pc flag when applicable Correct misspelled comments and space at lin ends Make ~tp output latin1 binaries as strings if possible Leave the +pc handling to io and io_lib_pretty Remove newly introduced warning in erlexec.c Make shell_SUITE:otp_10302 use +pc unicode when needed Fix io_proto_SUITE to handle the new io_lib_pretty:print Add testcase for +pc and io:printable_range/0 Make printing of UTF-8 in binaries behave like lists. Document +pc flag and io:printable_range/0 Add usage of and spec for io:printable_range/0 Add +pc {latin1|unicode} switch and io:printable_range/0 Fix some Unicode issues OTP-18084
Diffstat (limited to 'erts')
-rw-r--r--erts/doc/src/erl.xml22
-rw-r--r--erts/emulator/beam/bif.tab2
-rw-r--r--erts/emulator/beam/erl_init.c29
-rw-r--r--erts/emulator/beam/erl_unicode.c8
-rw-r--r--erts/emulator/beam/sys.h13
-rw-r--r--erts/emulator/sys/common/erl_sys_common_misc.c16
-rw-r--r--erts/etc/common/erlexec.c10
7 files changed, 93 insertions, 7 deletions
diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml
index 5bfa518266..bd03fb4970 100644
--- a/erts/doc/src/erl.xml
+++ b/erts/doc/src/erl.xml
@@ -615,6 +615,28 @@
debugging.</item>
</taglist>
</item>
+ <tag><marker id="+pc"/><marker id="max_processes"><c><![CDATA[+pc Range]]></c></marker></tag>
+ <item>
+ <p>Sets the range of characters that the system will consider printable in heuristic detection of strings. This typically affects the shell, debugger and io:format functions (when ~tp is used in the format string).</p>
+ <p>Currently two values for the <c>Range</c> are supported:
+ <taglist>
+ <tag><c>latin1</c></tag> <item>The default. Only characters
+ in the ISO-latin-1 range can be considered printable, which means
+ that a character with a code point &gt; 255 will never be
+ considered printable and that lists containing such
+ characters will be displayed as lists of integers rather
+ than text strings by tools.</item>
+ <tag><c>unicode</c></tag>
+ <item>All printable Unicode characters are considered when
+ determining if a list of integers is to be displayed in
+ string syntax. This may give unexpected results if for
+ example your font does not cover all Unicode
+ characters.</item>
+ </taglist>
+ </p>
+ <p>Se also <seealso marker="stdlib:io#printable_range/0">
+ io:printable_range/0</seealso>.</p>
+ </item>
<tag><marker id="+P"/><marker id="max_processes"><c><![CDATA[+P Number]]></c></marker></tag>
<item>
<p>Sets the maximum number of simultaneously existing processes for this
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index b74dc5c3fe..8bc994c8c3 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -569,6 +569,8 @@ bif erlang:float_to_binary/1
bif erlang:float_to_binary/2
bif erlang:binary_to_float/1
+bif io:printable_range/0
+
#
# Obsolete
#
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index ec3e0d54cb..83853dcd43 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -496,7 +496,7 @@ void erts_usage(void)
erts_fprintf(stderr, "-d don't write a crash dump for internally detected errors\n");
erts_fprintf(stderr, " (halt(String) will still produce a crash dump)\n");
-
+ erts_fprintf(stderr, "-fn[u|a|l] Control how filenames are interpreted\n");
erts_fprintf(stderr, "-hms size set minimum heap size in words (default %d)\n",
H_DEFAULT_SIZE);
erts_fprintf(stderr, "-hmbs size set minimum binary virtual heap size in words (default %d)\n",
@@ -509,7 +509,7 @@ void erts_usage(void)
erts_fprintf(stderr, " Note that this flag is deprecated!\n");
erts_fprintf(stderr, "-M<X> <Y> memory allocator switches,\n");
erts_fprintf(stderr, " see the erts_alloc(3) documentation for more info.\n");
-
+ erts_fprintf(stderr, "-pc <set> Control what characters are considered printable (default latin1)\n");
erts_fprintf(stderr, "-P number set maximum number of processes on this node,\n");
erts_fprintf(stderr, " valid range is [%d-%d]\n",
ERTS_MIN_PROCESSES, ERTS_MAX_PROCESSES);
@@ -979,13 +979,30 @@ erl_start(int argc, char **argv)
VERBOSE(DEBUG_SYSTEM,
("using display items %d\n",display_items));
break;
+ case 'p':
+ if (!strncmp(argv[i],"-pc",3)) {
+ int printable_chars = ERL_PRINTABLE_CHARACTERS_LATIN1;
+ arg = get_arg(argv[i]+3, argv[i+1], &i);
+ if (!strcmp(arg,"unicode")) {
+ printable_chars = ERL_PRINTABLE_CHARACTERS_UNICODE;
+ } else if (strcmp(arg,"latin1")) {
+ erts_fprintf(stderr, "bad range of printable "
+ "characters: %s\n", arg);
+ erts_usage();
+ }
+ erts_set_printable_characters(printable_chars);
+ break;
+ } else {
+ erts_fprintf(stderr, "%s unknown flag %s\n", argv[0], argv[i]);
+ erts_usage();
+ }
case 'f':
if (!strncmp(argv[i],"-fn",3)) {
int warning_type = ERL_FILENAME_WARNING_WARNING;
arg = get_arg(argv[i]+3, argv[i+1], &i);
switch (*arg) {
case 'u':
- switch (*(argv[i]+4)) {
+ switch (*(arg+1)) {
case 'w':
case 0:
break;
@@ -997,7 +1014,7 @@ erl_start(int argc, char **argv)
break;
default:
erts_fprintf(stderr, "bad type of warnings for "
- "wrongly coded filename: %s\n", argv[i]+4);
+ "wrongly coded filename: %s\n", arg+1);
erts_usage();
}
erts_set_user_requested_filename_encoding
@@ -1014,7 +1031,7 @@ erl_start(int argc, char **argv)
);
break;
case 'a':
- switch (*(argv[i]+4)) {
+ switch (*(arg+1)) {
case 'w':
case 0:
break;
@@ -1026,7 +1043,7 @@ erl_start(int argc, char **argv)
break;
default:
erts_fprintf(stderr, "bad type of warnings for "
- "wrongly coded filename: %s\n", argv[i]+4);
+ "wrongly coded filename: %s\n", arg+1);
erts_usage();
}
erts_set_user_requested_filename_encoding
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index 80982f3760..fa53fd0937 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -2747,3 +2747,11 @@ int erts_utf8_to_latin1(byte* dest, const byte* source, int slen)
return dp - dest;
}
+BIF_RETTYPE io_printable_range_0(BIF_ALIST_0)
+{
+ if (erts_get_printable_characters() == ERL_PRINTABLE_CHARACTERS_UNICODE) {
+ BIF_RET(am_unicode);
+ } else {
+ BIF_RET(am_latin1);
+ }
+}
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index 9416a91480..05bff430e3 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -1040,11 +1040,24 @@ char* win32_errorstr(int);
#define ERL_FILENAME_WARNING_IGNORE (1)
#define ERL_FILENAME_WARNING_ERROR (2)
+/***********************************************************************
+ * The user can request a range of character that he/she consider
+ * printable. Currently this can be either latin1 or unicode, but
+ * in the future a set of ranges, or languages, could be specified.
+ ***********************************************************************/
+#define ERL_PRINTABLE_CHARACTERS_LATIN1 (0)
+#define ERL_PRINTABLE_CHARACTERS_UNICODE (1)
+
int erts_get_native_filename_encoding(void);
/* The set function is only to be used by erl_init! */
void erts_set_user_requested_filename_encoding(int encoding, int warning);
int erts_get_user_requested_filename_encoding(void);
int erts_get_filename_warning_type(void);
+/* This function is called from erl_init. The setting is read by BIF's
+ in io/io_lib. Setting is not atomic. */
+void erts_set_printable_characters(int range);
+/* Get the setting (ERL_PRINTABLE_CHARACTERS_{LATIN1|UNICODE} */
+int erts_get_printable_characters(void);
void erts_init_sys_common_misc(void);
diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c
index 0b31c125e5..31ad3b82d5 100644
--- a/erts/emulator/sys/common/erl_sys_common_misc.c
+++ b/erts/emulator/sys/common/erl_sys_common_misc.c
@@ -49,10 +49,15 @@
static int filename_encoding = ERL_FILENAME_UNKNOWN;
static int filename_warning = ERL_FILENAME_WARNING_WARNING;
#if defined(__WIN32__) || defined(__DARWIN__)
-static int user_filename_encoding = ERL_FILENAME_UTF8; /* Default unicode on windows */
+/* Default unicode on windows and MacOS X */
+static int user_filename_encoding = ERL_FILENAME_UTF8;
#else
static int user_filename_encoding = ERL_FILENAME_LATIN1;
#endif
+/* This controls the heuristic in printing characters in shell and w/
+ io:format("~tp", ...) etc. */
+static int printable_character_set = ERL_PRINTABLE_CHARACTERS_LATIN1;
+
void erts_set_user_requested_filename_encoding(int encoding, int warning)
{
user_filename_encoding = encoding;
@@ -69,6 +74,15 @@ int erts_get_filename_warning_type(void)
return filename_warning;
}
+void erts_set_printable_characters(int range) {
+ /* Not an atomic */
+ printable_character_set = range;
+}
+
+int erts_get_printable_characters(void) {
+ return printable_character_set;
+}
+
void erts_init_sys_common_misc(void)
{
#if defined(__WIN32__)
diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c
index 577554c43d..9d674a7c65 100644
--- a/erts/etc/common/erlexec.c
+++ b/erts/etc/common/erlexec.c
@@ -912,6 +912,16 @@ int main(int argc, char **argv)
i++;
}
break;
+ case 'p':
+ if (argv[i][2] != 'c' || argv[i][3] != '\0')
+ goto the_default;
+ if (i+1 >= argc)
+ usage(argv[i]);
+ argv[i][0] = '-';
+ add_Eargs(argv[i]);
+ add_Eargs(argv[i+1]);
+ i++;
+ break;
case 'z':
if (!is_one_of_strings(&argv[i][2], plusz_val_switches)) {
goto the_default;