Don't allow null in filenames

author: Rickard Green <rickard@erlang.org> 2017-09-06 17:00:14 +0200
committer: Rickard Green <rickard@erlang.org> 2017-09-27 17:47:01 +0200
commit: eae496a72e270fd7af411714738e99a7fadfd19b (patch)
tree: 4fed861eddc50d9676e2fe03bb0dc46c6b8f98bf /lib/stdlib
parent: 02fd746c40e829adbe77cc526c7df904698e2534 (diff)
download: otp-eae496a72e270fd7af411714738e99a7fadfd19b.tar.gz
otp-eae496a72e270fd7af411714738e99a7fadfd19b.tar.bz2
otp-eae496a72e270fd7af411714738e99a7fadfd19b.zip
6 files changed, 230 insertions, 5 deletions
diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml
index 80c4acffdb..57c4348745 100644
--- a/lib/stdlib/doc/src/filelib.xml
+++ b/lib/stdlib/doc/src/filelib.xml
@@ -45,6 +45,30 @@
 
     <p>For more information about raw filenames, see the
       <seealso marker="kernel:file"><c>file</c></seealso> module.</p>
+
+    <note>
+      <p>
+	Functionality in this module generally assumes valid input and
+	does not necessarily fail on input that does not use a valid
+	encoding. You can validate the encoding of a filename using
+	<seealso marker="stdlib:filename#validate/1">filename:validate/1</seealso>.
+      </p>
+      <p>
+	File operations used to accept filenames containing
+	null characters (integer value zero). This caused
+	the name to be truncated at the first null character.
+	Filenames containing null characters inside the filename
+	are now <em>rejected</em> and will cause primitive
+	file operations fail.
+      </p>
+    </note>
+    <warning><p>
+      Currently null characters at the end of the filename
+      will be accepted by primitive file operations. Such
+      filenames are however still documented as invalid. The
+      implementation will also change in the future and
+      reject such filenames.
+    </p></warning>
   </description>
 
   <datatypes>
diff --git a/lib/stdlib/doc/src/filename.xml b/lib/stdlib/doc/src/filename.xml
index 14fd5ef787..b6028fc066 100644
--- a/lib/stdlib/doc/src/filename.xml
+++ b/lib/stdlib/doc/src/filename.xml
@@ -46,7 +46,10 @@
       filename by removing redundant directory separators, use
       <seealso marker="#join/1"><c>join/1</c></seealso>.</p>
 
-    <p>The module supports raw filenames in the way that if a binary is
+    <p>
+      The module supports
+      <seealso marker="unicode_usage#notes-about-raw-filenames">raw
+      filenames</seealso> in the way that if a binary is
       present, or the filename cannot be interpreted according to the return
       value of <seealso marker="kernel:file#native_name_encoding/0">
       <c>file:native_name_encoding/0</c></seealso>, a raw filename is also
@@ -56,6 +59,30 @@
       (the join operation is performed of course). For more information
       about raw filenames, see the
       <seealso marker="kernel:file"><c>file</c></seealso> module.</p>
+
+    <note>
+      <p>
+	Functionality in this module generally assumes valid input and
+	does not necessarily fail on input that does not use a valid
+	encoding. You can validate the encoding of a filename using
+	<seealso marker="#validate/1">filename:validate/1</seealso>.
+      </p>
+      <p>
+	File operations used to accept filenames containing
+	null characters (integer value zero). This caused
+	the name to be truncated at the first null character.
+	Filenames containing null characters inside the filename
+	are now <em>rejected</em> and will cause primitive
+	file operations fail.
+      </p>
+    </note>
+    <warning><p>
+      Currently null characters at the end of the filename
+      will be accepted by primitive file operations. Such
+      filenames are however still documented as invalid. The
+      implementation will also change in the future and
+      reject such filenames.
+    </p></warning>
   </description>
   <datatypes>
       <datatype>
@@ -555,6 +582,55 @@ unsafe</pre>
 ["a:/","msdev","include"]</pre>
       </desc>
     </func>
+
+    <func>
+      <name name="validate" arity="1"/>
+      <fsummary>Validate encoding of filename</fsummary>
+      <desc>
+	<p>
+	  Validates filename encoding. Returns <c>true</c> if
+	  <c><anno>FileName</anno></c> has a valid encoding;
+	  otherwise, returns <c>false</c>.
+	</p>
+	<taglist>
+	  <tag>Ordinary Filename</tag>
+	  <item>
+	    <p>
+	      Type: <c><anno>FileName</anno> = </c><seealso marker="kernel:file#type-name"><c>file:name()</c></seealso>
+	    </p>
+	    <p>
+	      Validates encoding against the
+	      <seealso marker="kernel:file#native_name_encoding/0">native file
+	      name encoding</seealso>, and the
+	      capabilities of the operating system used.
+	      Regardless of configuration and OS, null
+	      characters (integer value zero) will be
+	      rejected by the validation  (even when only
+	      present at the end of the filename).
+	    </p>
+	  </item>
+	  <tag><seealso marker="unicode_usage#notes-about-raw-filenames">Raw
+	  Filename</seealso></tag>
+	  <item>
+	    <p>
+	      Type: <c><anno>FileName</anno> = binary()</c>
+	    </p>
+	    <p>
+	      The encoding will not be interpreted, but
+	      null bytes (integer value zero) will be
+	      rejected by the validation (even when only
+	      present at the end of the filename).
+	    </p>
+	  </item>
+	</taglist>
+	<p>
+	  For information on filename encoding see the documentation
+	  of unicode filenames in
+	  <seealso marker="stdlib:unicode_usage#unicode_file_names">STDLIB
+	  Users Guide ➜ Using Unicode in Erlang ➜ Unicode Filenames</seealso>.
+	</p>
+      </desc>
+    </func>
   </funcs>
 </erlref>
 
diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml
index 26dc46719e..ff1f864e22 100644
--- a/lib/stdlib/doc/src/unicode_usage.xml
+++ b/lib/stdlib/doc/src/unicode_usage.xml
@@ -719,8 +719,8 @@ Eshell V5.10.1  (abort with ^G)
   </section> 
 
   <section>
-    <title>Unicode Filenames</title>
     <marker id="unicode_file_names"/>
+    <title>Unicode Filenames</title>
     <p>Most modern operating systems support Unicode filenames in some way.
       There are many different ways to do this and Erlang by default treats the
       different approaches differently:</p>
@@ -855,8 +855,8 @@ Eshell V5.10.1  (abort with ^G)
     </note>
 
     <section>
-      <title>Notes About Raw Filenames</title>
       <marker id="notes-about-raw-filenames"/>
+      <title>Notes About Raw Filenames</title>
       <p>Raw filenames were introduced together with Unicode filename support
         in ERTS 5.8.2 (Erlang/OTP R14B01). The reason &quot;raw
         filenames&quot; were introduced in the system was
diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl
index 9bf4290916..1c3ab6d274 100644
--- a/lib/stdlib/src/filename.erl
+++ b/lib/stdlib/src/filename.erl
@@ -41,6 +41,7 @@
          safe_relative_path/1]).
 -export([find_src/1, find_src/2]). % deprecated
 -export([basedir/2, basedir/3]).
+-export([validate/1]).
 
 %% Undocumented and unsupported exports.
 -export([append/2]).
@@ -1135,3 +1136,72 @@ basedir_os_type() ->
         {win32,_}     -> windows;
         _             -> linux
     end.
+
+%%
+%% validate/1
+%%
+
+-spec validate(FileName) -> boolean() when
+      FileName :: file:name_all().
+
+validate(FileName) when is_binary(FileName) ->
+    %% Raw filename...
+    validate_bin(FileName);
+validate(FileName) when is_list(FileName);
+                        is_atom(FileName) ->
+    validate_list(FileName,
+                  file:native_name_encoding(),
+                  os:type()).
+
+validate_list(FileName, Enc, Os) ->
+    try
+        true = validate_list(FileName, Enc, Os, 0) > 0
+    catch
+        _ : _ -> false
+    end.
+
+validate_list([], _Enc, _Os, Chars) ->
+    Chars;
+validate_list(C, Enc, Os, Chars) when is_integer(C) ->
+    validate_char(C, Enc, Os),
+    Chars+1;
+validate_list(A, Enc, Os, Chars) when is_atom(A) ->
+    validate_list(atom_to_list(A), Enc, Os, Chars);
+validate_list([H|T], Enc, Os, Chars) ->
+    NewChars = validate_list(H, Enc, Os, Chars),
+    validate_list(T, Enc, Os, NewChars).
+
+%% C is always an integer...
+% validate_char(C, _, _) when not is_integer(C) ->
+%     throw(invalid);
+validate_char(C, _, _) when C < 1 ->
+    throw(invalid); %% No negative or null characters...
+validate_char(C, latin1, _) when C > 255 ->
+    throw(invalid);
+validate_char(C, utf8, _) when C >= 16#110000 ->
+    throw(invalid);
+validate_char(C, utf8, {win32, _}) when C > 16#ffff ->
+    throw(invalid); %% invalid win wchar...
+validate_char(_C, utf8, {win32, _}) ->
+    ok; %% Range below is accepted on windows...
+validate_char(C, utf8, _) when 16#D800 =< C, C =< 16#DFFF ->
+    throw(invalid); %% invalid unicode range...
+validate_char(_, _, _) ->
+    ok.
+
+validate_bin(Bin) ->
+    %% Raw filename. That is, we do not interpret
+    %% the encoding, but we still do not accept
+    %% null characters...
+    try
+        true = validate_bin(Bin, 0) > 0
+    catch
+        _ : _ -> false
+    end.
+
+validate_bin(<<>>, Bs) ->
+    Bs;
+validate_bin(<<0, _Rest/binary>>, _Bs) ->
+    throw(invalid); %% No null characters allowed...
+validate_bin(<<_B, Rest/binary>>, Bs) ->
+    validate_bin(Rest, Bs+1).
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index 3c449d3cb9..41c89270aa 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -107,7 +107,7 @@
                dets]},
   {applications, [kernel]},
   {env, []},
-  {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-9.0","crypto-3.3",
+  {runtime_dependencies, ["sasl-3.0","kernel-5.4.1","erts-9.1.1","crypto-3.3",
 			  "compiler-5.0"]}
 ]}.
 
diff --git a/lib/stdlib/test/filename_SUITE.erl b/lib/stdlib/test/filename_SUITE.erl
index fc77593bb8..4c82ec1c22 100644
--- a/lib/stdlib/test/filename_SUITE.erl
+++ b/lib/stdlib/test/filename_SUITE.erl
@@ -30,6 +30,7 @@
 -export([pathtype_bin/1,rootname_bin/1,split_bin/1]).
 -export([t_basedir_api/1, t_basedir_xdg/1, t_basedir_windows/1]).
 -export([safe_relative_path/1]).
+-export([validate/1]).
 
 -include_lib("common_test/include/ct.hrl").
 
@@ -43,7 +44,8 @@ all() ->
      absname_bin, absname_bin_2,
      {group,p},
      t_basedir_xdg, t_basedir_windows,
-     safe_relative_path].
+     safe_relative_path,
+     validate].
 
 groups() -> 
     [{p, [parallel],
@@ -1011,3 +1013,56 @@ basedir_xdg_def(Type,Home,Name) ->
                         Dir <- ["/usr/local/share/","/usr/share/"]];
         site_config -> [filename:join(["/etc/xdg",Name])]
     end.
+
+validate(Config) when is_list(Config) ->
+    true = filename:validate(blipp),
+    false = filename:validate('bli\0pp'),
+    false = filename:validate('blipp\0'),
+    true = filename:validate("blipp"),
+    false = filename:validate("bli"++[0]++"pp"),
+    false = filename:validate("blipp"++[0]),
+    true = filename:validate(["one ", blipp, "blopp"]),
+    false = filename:validate(["one ", 'bli\0pp', "blopp"]),
+    false = filename:validate(["one ", 'blipp\0', "blopp"]),
+    false = filename:validate(["one ", 'blipp', "blopp\0"]),
+    false = filename:validate([0]),
+    false = filename:validate([]),
+    false = filename:validate([[[]],[[[[],[[[[[[[[]]], '', [[[[[]]]]]]]]]]]]]]),
+    false = filename:validate([16#110000]),
+    false = filename:validate([16#110001]),
+    false = filename:validate([16#110000*2]),
+    case file:native_name_encoding() of
+        latin1 ->
+            true = filename:validate(lists:seq(1, 255)),
+            false = filename:validate([256]);
+        utf8 ->
+            true = filename:validate(lists:seq(1, 16#D7FF)),
+            true = filename:validate(lists:seq(16#E000, 16#FFFF)),
+            true = filename:validate([16#FFFF]),
+            case os:type() of
+                {win32, _} ->
+                    false = filename:validate([16#10000]),
+                    true = filename:validate(lists:seq(16#D800,16#DFFF));
+                _ ->
+                    true = filename:validate([16#10000]),
+                    true = filename:validate([16#10FFFF]),
+                    lists:foreach(fun (C) ->
+                                          false = filename:validate([C])
+                                  end,
+                                  lists:seq(16#D800,16#DFFF))
+            end
+                        
+    end,
+    true = filename:validate(<<1,17,255>>),
+    false = filename:validate(<<1,0,17,255>>),
+    false = filename:validate(<<1,17,255,0>>),
+    false = filename:validate(<<>>),
+    lists:foreach(fun (N) ->
+                          true = filename:validate(N)
+                  end,
+                  code:get_path()),
+    ok.
+
+            
+    
+
author	Rickard Green <rickard@erlang.org>	2017-09-06 17:00:14 +0200
committer	Rickard Green <rickard@erlang.org>	2017-09-27 17:47:01 +0200
commit	eae496a72e270fd7af411714738e99a7fadfd19b (patch)
tree	4fed861eddc50d9676e2fe03bb0dc46c6b8f98bf /lib/stdlib
parent	02fd746c40e829adbe77cc526c7df904698e2534 (diff)
download	otp-eae496a72e270fd7af411714738e99a7fadfd19b.tar.gz otp-eae496a72e270fd7af411714738e99a7fadfd19b.tar.bz2 otp-eae496a72e270fd7af411714738e99a7fadfd19b.zip