From 6ea8348174c62812057dd552d0890b2d9d4a3c16 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 3 Dec 2010 09:51:00 +0100 Subject: Add documentation to erlang.xml and slight correction to unicode_usage.xml --- erts/doc/src/erlang.xml | 46 +++++++++++++++++++++++++++++++++--- lib/stdlib/doc/src/unicode_usage.xml | 2 +- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index 638f7eef10..78d58a1e56 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -2781,14 +2781,17 @@ os_prompt% open_port(PortName, PortSettings) -> port() Open a port - PortName = {spawn, Command} | {spawn_driver, Command} | {spawn_executable, Command} | {fd, In, Out} + PortName = {spawn, Command} | {spawn_driver, Command} | {spawn_executable, FileName} | {fd, In, Out}  Command = string() +  FileName = [ FileNameChar ] | binary() +  FileNameChar = int() (1..255 or any Unicode codepoint, see description)  In = Out = int() PortSettings = [Opt] -  Opt = {packet, N} | stream | {line, L} | {cd, Dir} | {env, Env} | {args, [ string() ]} | {arg0, string()} | exit_status | use_stdio | nouse_stdio | stderr_to_stdout | in | out | binary | eof +  Opt = {packet, N} | stream | {line, L} | {cd, Dir} | {env, Env} | {args, [ ArgString ]} | {arg0, ArgString} | exit_status | use_stdio | nouse_stdio | stderr_to_stdout | in | out | binary | eof   N = 1 | 2 | 4   L = int()   Dir = string() +   ArgString = [ FileNameChar ] | binary()   Env = [{Name, Val}]    Name = string()    Val = string() | false @@ -2851,7 +2854,26 @@ os_prompt% executed, the appropriate command interpreter will implicitly be invoked, but there will still be no command argument expansion or implicit PATH search.

- + +

The name of the executable as well as the arguments + given in args and arg0 is subject to + Unicode file name translation if the system is running + in Unicode file name mode. To avoid + translation or force i.e. UTF-8, supply the executable + and/or arguments as a binary in the correct + encoding. See the file module, the + + file:native_name_encoding/0 function and the + stdlib users guide + for details.

+ + The characters in the name (if given as a list) + can only be > 255 if the Erlang VM is started in + Unicode file name translation mode, otherwise the name + of the executable is limited to the ISO-latin-1 + character set. +

If the Command cannot be run, an error exception, with the posix error code as the reason, is raised. The error reason may differ between operating @@ -2954,6 +2976,21 @@ os_prompt% should not be given in this list. The proper executable name will automatically be used as argv[0] where applicable.

+

When the Erlang VM is running in Unicode file name + mode, the arguments can contain any Unicode characters and + will be translated into whatever is appropriate on the + underlying OS, which means UTF-8 for all platforms except + Windows, which has other (more transparent) ways of + dealing with Unicode arguments to programs. To avoid + Unicode translation of arguments, they can be supplied as + binaries in whatever encoding is deemed appropriate.

+ + The characters in the arguments (if given as a + list of characters) can only be > 255 if the Erlang + VM is started in Unicode file name mode, + otherwise the arguments are limited to the + ISO-latin-1 character set. +

If one, for any reason, wants to explicitly set the program name in the argument vector, the arg0 option can be used.

@@ -2969,6 +3006,9 @@ os_prompt% responds to this is highly system dependent and no specific effect is guaranteed.

+

The unicode file name translation rules of the + args option apply to this option as well.

+ exit_status diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml index df8e6c6b47..c02ea3cbcb 100644 --- a/lib/stdlib/doc/src/unicode_usage.xml +++ b/lib/stdlib/doc/src/unicode_usage.xml @@ -189,7 +189,7 @@ Eshell V5.7 (abort with ^G)

For most systems, turning on Unicode file name translation is no problem even if it uses transparent file naming. Very few systems have mixed file name encodings. A consistent UTF-8 named system will work perfectly in Unicode file name mode. It is still however considered experimental in R14B01. Unicode file name translation is turned on with the +fnu switch to the erl program. If the VM is started in Unicode file name translation mode, file:native_name_encoding/0 will return the atom utf8.

-

In Unicode file name mode, file names given to the BIF open_port/2 with the option {spawn_executable,...} are also interpreted as Unicode. So is the parameter list given in the argv option available when using spawn_executable. The UTF-8 translation of arguments can be avoided using binaries, see the discussion about raw file names below.

+

In Unicode file name mode, file names given to the BIF open_port/2 with the option {spawn_executable,...} are also interpreted as Unicode. So is the parameter list given in the args option available when using spawn_executable. The UTF-8 translation of arguments can be avoided using binaries, see the discussion about raw file names below.

It is worth noting that the file encoding options given when opening a file has nothing to do with the file name encoding convention. You can very well open files containing UTF-8 but having file names in ISO-latin-1 or vice versa.

-- cgit v1.2.3