From bb0b43eae854125688f3143e53c8974cafed4ad2 Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Wed, 6 Sep 2017 17:00:14 +0200 Subject: Don't allow null chars in various strings Various places that now reject null chars inside strings - Primitive file operations reject it in filenames. - Primitive environment variable operations reject it in names and values. - os:cmd() reject it in its input. Also '=' characters are rejected by primitive environment variable operations in environment variable names. Documentation has been updated to document null characters in these types of data as invalid. Currently these operations accept null chars at the end of strings, but that will change in the future. --- lib/kernel/doc/src/file.xml | 43 ++++++++++++-- lib/kernel/doc/src/os.xml | 109 ++++++++++++++++++++++++++++++++++++ lib/kernel/src/kernel.app.src | 2 +- lib/kernel/src/os.erl | 63 ++++++++++++++------- lib/kernel/test/file_name_SUITE.erl | 2 + lib/kernel/test/os_SUITE.erl | 14 ++++- 6 files changed, 204 insertions(+), 29 deletions(-) (limited to 'lib/kernel') diff --git a/lib/kernel/doc/src/file.xml b/lib/kernel/doc/src/file.xml index b674b3ca93..2ab35b9b05 100644 --- a/lib/kernel/doc/src/file.xml +++ b/lib/kernel/doc/src/file.xml @@ -41,7 +41,7 @@

Regarding filename encoding, the Erlang VM can operate in two modes. The current mode can be queried using function - native_name_encoding/0. + native_name_encoding/0. It returns latin1 or utf8.

In latin1 mode, the Erlang VM does not change the @@ -59,7 +59,7 @@ terminal supports UTF-8, otherwise latin1. The default can be overridden using +fnl (to force latin1 mode) or +fnu (to force utf8 mode) when starting - erts:erl.

+ erl.

On operating systems with transparent naming, files can be inconsistently named, for example, some files are encoded in UTF-8 while @@ -81,6 +81,23 @@

See also section Notes About Raw Filenames in the STDLIB User's Guide.

+

+ File operations used to accept filenames containing + null characters (integer value zero). This caused + the name to be truncated and in some cases arguments + to primitive operations to be mixed up. Filenames + containing null characters inside the filename + are now rejected and will cause primitive + file operations fail. +

+

+ Currently null characters at the end of the filename + will be accepted by primitive file operations. Such + filenames are however still documented as invalid. The + implementation will also change in the future and + reject such filenames. +

+ @@ -96,9 +113,21 @@ + +

+ See also the documentation of the + name_all() type. +

+
+ +

+ See also the documentation of the + name_all() type. +

+
@@ -112,21 +141,23 @@

If VM is in Unicode filename mode, string() and char() - are allowed to be > 255. + are allowed to be > 255. See also the documentation of the + name_all() type.

-

If VM is in Unicode filename mode, string() and char() +

If VM is in Unicode filename mode, characters are allowed to be > 255. RawFilename is a filename not subject to Unicode translation, meaning that it can contain characters not conforming to the Unicode encoding expected from the file system (that is, non-UTF-8 characters although the VM is started - in Unicode filename mode). + in Unicode filename mode). Null characters (integer value zero) + are not allowed in filenames (not even at the end).

@@ -1825,7 +1856,7 @@ f.txt: {person, "kalle", 25}.

The functions in the module file usually treat binaries as raw filenames, that is, they are passed "as is" even when the encoding of the binary does not agree with - native_name_encoding(). + native_name_encoding(). However, this function expects binaries to be encoded according to the value returned by native_name_encoding().

Typical error reasons are:

diff --git a/lib/kernel/doc/src/os.xml b/lib/kernel/doc/src/os.xml index 0e9add4161..0a08e2c78a 100644 --- a/lib/kernel/doc/src/os.xml +++ b/lib/kernel/doc/src/os.xml @@ -36,8 +36,99 @@ only run on a specific platform. On the other hand, with careful use, these functions can be of help in enabling a program to run on most platforms.

+ + +

+ File operations used to accept filenames containing + null characters (integer value zero). This caused + the name to be truncated and in some cases arguments + to primitive operations to be mixed up. Filenames + containing null characters inside the filename + are now rejected and will cause primitive + file operations to fail. +

+

+ Also environment variable operations used to accept + names and values of environment variables containing + null characters (integer value zero). This caused + operations to silently produce erroneous results. + Environment variable names and values containing + null characters inside the name or value are now + rejected and will cause environment variable + operations to fail. +

+
+ +

+ Currently null characters at the end of filenames, + environment variable names and values will be accepted + by the primitive operations. Such filenames, environment + variable names and values are however still documented as + invalid. The implementation will also change in the + future and reject such filenames, environment variable + names and values. +

+
+ + + + +

A string containing valid characters on the specific + OS for environment variable names using + file:native_name_encoding() + encoding. Note that specifically null characters (integer + value zero) and $= characters are not allowed. + However, note that not all invalid characters necessarily + will cause the primitiv operations to fail, but may instead + produce invalid results. +

+
+
+ + + +

A string containing valid characters on the specific + OS for environment variable values using + file:native_name_encoding() + encoding. Note that specifically null characters (integer + value zero) are not allowed. However, note that not all + invalid characters necessarily will cause the primitiv + operations to fail, but may instead produce invalid results. +

+
+
+ + + +

+ Assuming that environment variables has been correctly + set, a strings containing valid characters on the specific + OS for environment variable names and values using + file:native_name_encoding() + encoding. The first $= characters appearing in + the string separates environment variable name (on the + left) from environment variable value (on the right). +

+
+
+ + + +

All characters needs to be valid characters on the + specific OS using + file:native_name_encoding() + encoding. Note that specifically null characters (integer + value zero) are not allowed. However, note that not all + invalid characters not necessarily will cause + os:cmd/1 + to fail, but may instead produce invalid results. +

+
+
+
+ @@ -49,6 +140,15 @@ result as a string. This function is a replacement of the previous function unix:cmd/1; they are equivalent on a Unix platform.

+

Previous implementation used to allow all characters + as long as they were integer values greater than or equal to zero. + This sometimes lead to unwanted results since null characters + (integer value zero) often are interpreted as string termination. + Current implementation still accepts null characters at the end + of Command even though the documentation + states that no null characters are allowed. This will however + be changed in the future so that no null characters at all will + be accepted.

Examples:

LsOut = os:cmd("ls"), % on unix platform @@ -152,6 +252,15 @@ DirOut = os:cmd("dir"), % on Win32 platform

On Unix platforms, the environment is set using UTF-8 encoding if Unicode filename translation is in effect. On Windows, the environment is set using wide character interfaces.

+ +

+ VarName is not allowed to contain + an $= character. Previous implementations used + to just let the $= character through which + silently caused erroneous results. Current implementation + will instead throw a badarg exception. +

+
diff --git a/lib/kernel/src/kernel.app.src b/lib/kernel/src/kernel.app.src index 2a88cc7e26..080b11fc4d 100644 --- a/lib/kernel/src/kernel.app.src +++ b/lib/kernel/src/kernel.app.src @@ -120,6 +120,6 @@ {applications, []}, {env, [{error_logger, tty}]}, {mod, {kernel, []}}, - {runtime_dependencies, ["erts-9.1", "stdlib-3.4", "sasl-3.0"]} + {runtime_dependencies, ["erts-10.0", "stdlib-3.5", "sasl-3.0"]} ] }. diff --git a/lib/kernel/src/os.erl b/lib/kernel/src/os.erl index 0250783632..3675e923a3 100644 --- a/lib/kernel/src/os.erl +++ b/lib/kernel/src/os.erl @@ -25,6 +25,8 @@ -include("file.hrl"). +-export_type([env_var_name/0, env_var_value/0, env_var_name_value/0, command_input/0]). + %%% BIFs -export([getenv/0, getenv/1, getenv/2, getpid/0, @@ -32,21 +34,29 @@ putenv/2, set_signal/2, system_time/0, system_time/1, timestamp/0, unsetenv/1]). --spec getenv() -> [string()]. +-type env_var_name() :: nonempty_string(). + +-type env_var_value() :: string(). + +-type env_var_name_value() :: nonempty_string(). + +-type command_input() :: atom() | io_lib:chars(). + +-spec getenv() -> [env_var_name_value()]. getenv() -> erlang:nif_error(undef). -spec getenv(VarName) -> Value | false when - VarName :: string(), - Value :: string(). + VarName :: env_var_name(), + Value :: env_var_value(). getenv(_) -> erlang:nif_error(undef). -spec getenv(VarName, DefaultValue) -> Value when - VarName :: string(), - DefaultValue :: string(), - Value :: string(). + VarName :: env_var_name(), + DefaultValue :: env_var_value(), + Value :: env_var_value(). getenv(VarName, DefaultValue) -> case os:getenv(VarName) of @@ -75,8 +85,8 @@ perf_counter(Unit) -> erlang:convert_time_unit(os:perf_counter(), perf_counter, Unit). -spec putenv(VarName, Value) -> true when - VarName :: string(), - Value :: string(). + VarName :: env_var_name(), + Value :: env_var_value(). putenv(_, _) -> erlang:nif_error(undef). @@ -99,7 +109,7 @@ timestamp() -> erlang:nif_error(undef). -spec unsetenv(VarName) -> true when - VarName :: string(). + VarName :: env_var_name(). unsetenv(_) -> erlang:nif_error(undef). @@ -232,10 +242,9 @@ extensions() -> %% Executes the given command in the default shell for the operating system. -spec cmd(Command) -> string() when - Command :: atom() | io_lib:chars(). + Command :: os:command_input(). cmd(Cmd) -> - validate(Cmd), - {SpawnCmd, SpawnOpts, SpawnInput, Eot} = mk_cmd(os:type(), Cmd), + {SpawnCmd, SpawnOpts, SpawnInput, Eot} = mk_cmd(os:type(), validate(Cmd)), Port = open_port({spawn, SpawnCmd}, [binary, stderr_to_stdout, stream, in, hide | SpawnOpts]), MonRef = erlang:monitor(port, Port), @@ -255,8 +264,6 @@ mk_cmd({win32,Wtype}, Cmd) -> {Cspec,_} -> lists:concat([Cspec," /c",Cmd]) end, {Command, [], [], <<>>}; -mk_cmd(OsType,Cmd) when is_atom(Cmd) -> - mk_cmd(OsType, atom_to_list(Cmd)); mk_cmd(_,Cmd) -> %% Have to send command in like this in order to make sh commands like %% cd and ulimit available @@ -279,17 +286,33 @@ mk_cmd(_,Cmd) -> <<$\^D>>}. validate(Atom) when is_atom(Atom) -> - ok; + validate(atom_to_list(Atom)); validate(List) when is_list(List) -> - validate1(List). + case validate1(List) of + false -> + List; + true -> + %% Had zeros at end; remove them... + string:trim(List, trailing, [0]) + end. -validate1([C|Rest]) when is_integer(C) -> +validate1([0|Rest]) -> + validate2(Rest); +validate1([C|Rest]) when is_integer(C), C > 0 -> validate1(Rest); validate1([List|Rest]) when is_list(List) -> - validate1(List), - validate1(Rest); + validate1(List) or validate1(Rest); validate1([]) -> - ok. + false. + +%% Ensure that the rest is zero only... +validate2([]) -> + true; +validate2([0|Rest]) -> + validate2(Rest); +validate2([List|Rest]) when is_list(List) -> + validate2(List), + validate2(Rest). get_data(Port, MonRef, Eot, Sofar) -> receive diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl index 899102c908..f23529fec9 100644 --- a/lib/kernel/test/file_name_SUITE.erl +++ b/lib/kernel/test/file_name_SUITE.erl @@ -302,7 +302,9 @@ check_normal(Mod) -> {ok, BC} = Mod:read(FD,1024), ok = file:close(FD) end || {regular,Name,Content} <- NormalDir ], + {error, badarg} = Mod:rename("fil1\0tmp_fil2","tmp_fil1"), Mod:rename("fil1","tmp_fil1"), + {error, badarg} = Mod:read_file("tmp_fil1\0.txt"), {ok, <<"fil1">>} = Mod:read_file("tmp_fil1"), {error,enoent} = Mod:read_file("fil1"), Mod:rename("tmp_fil1","fil1"), diff --git a/lib/kernel/test/os_SUITE.erl b/lib/kernel/test/os_SUITE.erl index 53a9e168ef..8056321448 100644 --- a/lib/kernel/test/os_SUITE.erl +++ b/lib/kernel/test/os_SUITE.erl @@ -22,7 +22,8 @@ -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2, init_per_testcase/2,end_per_testcase/2]). --export([space_in_cwd/1, quoting/1, cmd_unicode/1, space_in_name/1, bad_command/1, +-export([space_in_cwd/1, quoting/1, cmd_unicode/1, + null_in_command/1, space_in_name/1, bad_command/1, find_executable/1, unix_comment_in_command/1, deep_list_command/1, large_output_command/1, background_command/0, background_command/1, message_leak/1, close_stdin/0, close_stdin/1, perf_counter_api/1]). @@ -34,7 +35,8 @@ suite() -> {timetrap,{minutes,1}}]. all() -> - [space_in_cwd, quoting, cmd_unicode, space_in_name, bad_command, + [space_in_cwd, quoting, cmd_unicode, null_in_command, + space_in_name, bad_command, find_executable, unix_comment_in_command, deep_list_command, large_output_command, background_command, message_leak, close_stdin, perf_counter_api]. @@ -125,6 +127,14 @@ cmd_unicode(Config) when is_list(Config) -> [] = receive_all(), ok. +null_in_command(Config) -> + {Ok, Error} = case os:type() of + {win32,_} -> {"dir", "di\0r"}; + _ -> {"ls", "l\0s"} + end, + true = is_list(try os:cmd(Ok) catch Class0:_ -> Class0 end), + error = try os:cmd(Error) catch Class1:_ -> Class1 end, + ok. %% Test that program with a space in its name can be executed. space_in_name(Config) when is_list(Config) -> -- cgit v1.2.3