aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2017-09-06 17:00:14 +0200
committerRickard Green <[email protected]>2017-10-11 16:59:40 +0200
commitbb0b43eae854125688f3143e53c8974cafed4ad2 (patch)
tree648c103b0777d951b0e4a44b6615753d2748a2ea
parent02fd746c40e829adbe77cc526c7df904698e2534 (diff)
downloadotp-bb0b43eae854125688f3143e53c8974cafed4ad2.tar.gz
otp-bb0b43eae854125688f3143e53c8974cafed4ad2.tar.bz2
otp-bb0b43eae854125688f3143e53c8974cafed4ad2.zip
Don't allow null chars in various strings
Various places that now reject null chars inside strings - Primitive file operations reject it in filenames. - Primitive environment variable operations reject it in names and values. - os:cmd() reject it in its input. Also '=' characters are rejected by primitive environment variable operations in environment variable names. Documentation has been updated to document null characters in these types of data as invalid. Currently these operations accept null chars at the end of strings, but that will change in the future.
-rw-r--r--erts/doc/src/erlang.xml14
-rw-r--r--erts/emulator/beam/erl_alloc.types1
-rw-r--r--erts/emulator/beam/erl_bif_os.c75
-rw-r--r--erts/emulator/beam/erl_bif_port.c175
-rw-r--r--erts/emulator/beam/erl_unicode.c97
-rw-r--r--erts/emulator/beam/sys.h48
-rw-r--r--erts/preloaded/ebin/erlang.beambin106344 -> 106352 bytes
-rw-r--r--erts/preloaded/src/erlang.erl2
-rw-r--r--erts/preloaded/src/erts.app.src2
-rw-r--r--lib/kernel/doc/src/file.xml43
-rw-r--r--lib/kernel/doc/src/os.xml109
-rw-r--r--lib/kernel/src/kernel.app.src2
-rw-r--r--lib/kernel/src/os.erl63
-rw-r--r--lib/kernel/test/file_name_SUITE.erl2
-rw-r--r--lib/kernel/test/os_SUITE.erl14
-rw-r--r--lib/stdlib/doc/src/filelib.xml24
-rw-r--r--lib/stdlib/doc/src/filename.xml30
-rw-r--r--lib/stdlib/doc/src/unicode_usage.xml8
-rw-r--r--lib/stdlib/src/filename.erl102
-rw-r--r--lib/stdlib/src/stdlib.app.src2
20 files changed, 691 insertions, 122 deletions
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index 2465f49581..aa24eeedd5 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -3683,6 +3683,12 @@ RealSystem = system + MissedSystem</code>
</item>
<tag><c>{env, <anno>Env</anno>}</c></tag>
<item>
+ <p>
+ Types:<br/>
+ &nbsp;&nbsp;<c><anno>Name</anno> = </c><seealso marker="kernel:os#type-env_var_name"><c>os:env_var_name()</c></seealso><br/>
+ &nbsp;&nbsp;<c><anno>Val</anno> = </c><seealso marker="kernel:os#type-env_var_value"><c>os:env_var_value()</c></seealso><c> | false</c><br/>
+ &nbsp;&nbsp;<c>Env = [{<anno>Name</anno>, <anno>Val</anno>}]</c>
+ </p>
<p>Only valid for <c>{spawn, <anno>Command</anno>}</c>, and
<c>{spawn_executable, <anno>FileName</anno>}</c>.
The environment of the started process is extended using
@@ -3697,7 +3703,13 @@ RealSystem = system + MissedSystem</code>
exception is <c><anno>Val</anno></c> being the atom
<c>false</c> (in analogy with
<seealso marker="kernel:os#getenv/1"><c>os:getenv/1</c></seealso>,
- which removes the environment variable.</p>
+ which removes the environment variable.
+ </p>
+ <p>
+ For information about encoding requirements, see documentation
+ of the types for <c><anno>Name</anno></c> and
+ <c><anno>Val</anno></c>.
+ </p>
</item>
<tag><c>{args, [ string() | binary() ]}</c></tag>
<item>
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 50a1d97dd5..f8c089247e 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -285,6 +285,7 @@ type MREF_ENT STANDARD SYSTEM magic_ref_entry
type MREF_TAB_BKTS STANDARD SYSTEM magic_ref_table_buckets
type MREF_TAB LONG_LIVED SYSTEM magic_ref_table
type MINDIRECTION FIXED_SIZE SYSTEM magic_indirection
+type OPEN_PORT_ENV TEMPORARY SYSTEM open_port_env
+if threads_no_smp
# Need thread safe allocs, but std_alloc and fix_alloc are not;
diff --git a/erts/emulator/beam/erl_bif_os.c b/erts/emulator/beam/erl_bif_os.c
index 5cd172c26f..910325a2f4 100644
--- a/erts/emulator/beam/erl_bif_os.c
+++ b/erts/emulator/beam/erl_bif_os.c
@@ -37,6 +37,8 @@
#include "dist.h"
#include "erl_version.h"
+static int check_env_name(char *name);
+
/*
* Return the pid for the Erlang process in the host OS.
*/
@@ -101,8 +103,10 @@ BIF_RETTYPE os_getenv_1(BIF_ALIST_1)
key_str = erts_convert_filename_to_native(BIF_ARG_1,buf,STATIC_BUF_SIZE,
ERTS_ALC_T_TMP,1,0,&len);
- if (!key_str) {
- BIF_ERROR(p, BADARG);
+ if (!check_env_name(key_str)) {
+ if (key_str && key_str != &buf[0])
+ erts_free(ERTS_ALC_T_TMP, key_str);
+ BIF_ERROR(p, BADARG);
}
if (key_str != &buf[0])
@@ -143,25 +147,20 @@ BIF_RETTYPE os_putenv_2(BIF_ALIST_2)
{
char def_buf_key[STATIC_BUF_SIZE];
char def_buf_value[STATIC_BUF_SIZE];
- char *key_buf, *value_buf;
+ char *key_buf = NULL, *value_buf = NULL;
key_buf = erts_convert_filename_to_native(BIF_ARG_1,def_buf_key,
STATIC_BUF_SIZE,
ERTS_ALC_T_TMP,0,0,NULL);
- if (!key_buf) {
- BIF_ERROR(BIF_P, BADARG);
- }
+ if (!check_env_name(key_buf))
+ goto badarg;
+
value_buf = erts_convert_filename_to_native(BIF_ARG_2,def_buf_value,
STATIC_BUF_SIZE,
ERTS_ALC_T_TMP,1,0,
NULL);
- if (!value_buf) {
- if (key_buf != def_buf_key) {
- erts_free(ERTS_ALC_T_TMP, key_buf);
- }
- BIF_ERROR(BIF_P, BADARG);
- }
-
+ if (!value_buf)
+ goto badarg;
if (erts_sys_putenv(key_buf, value_buf)) {
if (key_buf != def_buf_key) {
@@ -179,6 +178,13 @@ BIF_RETTYPE os_putenv_2(BIF_ALIST_2)
erts_free(ERTS_ALC_T_TMP, value_buf);
}
BIF_RET(am_true);
+
+badarg:
+ if (key_buf && key_buf != def_buf_key)
+ erts_free(ERTS_ALC_T_TMP, key_buf);
+ if (value_buf && value_buf != def_buf_value)
+ erts_free(ERTS_ALC_T_TMP, value_buf);
+ BIF_ERROR(BIF_P, BADARG);
}
BIF_RETTYPE os_unsetenv_1(BIF_ALIST_1)
@@ -188,20 +194,21 @@ BIF_RETTYPE os_unsetenv_1(BIF_ALIST_1)
key_buf = erts_convert_filename_to_native(BIF_ARG_1,buf,STATIC_BUF_SIZE,
ERTS_ALC_T_TMP,0,0,NULL);
- if (!key_buf) {
- BIF_ERROR(BIF_P, BADARG);
- }
+ if (!check_env_name(key_buf))
+ goto badarg;
+
+ if (erts_sys_unsetenv(key_buf))
+ goto badarg;
- if (erts_sys_unsetenv(key_buf)) {
- if (key_buf != buf) {
- erts_free(ERTS_ALC_T_TMP, key_buf);
- }
- BIF_ERROR(BIF_P, BADARG);
- }
if (key_buf != buf) {
erts_free(ERTS_ALC_T_TMP, key_buf);
}
BIF_RET(am_true);
+
+badarg:
+ if (key_buf && key_buf != buf)
+ erts_free(ERTS_ALC_T_TMP, key_buf);
+ BIF_ERROR(BIF_P, BADARG);
}
BIF_RETTYPE os_set_signal_2(BIF_ALIST_2) {
@@ -217,3 +224,27 @@ BIF_RETTYPE os_set_signal_2(BIF_ALIST_2) {
error:
BIF_ERROR(BIF_P, BADARG);
}
+
+static int
+check_env_name(char *raw_name)
+{
+ byte *c = (byte *) raw_name;
+ int encoding;
+
+ if (!c)
+ return 0;
+
+ encoding = erts_get_native_filename_encoding();
+
+ if (erts_raw_env_char_is_7bit_ascii_char('\0', c, encoding))
+ return 0; /* Do not allow empty name... */
+
+ /* Verify no '=' characters in variable name... */
+ do {
+ if (erts_raw_env_char_is_7bit_ascii_char('=', c, encoding))
+ return 0;
+ c = erts_raw_env_next_char(c, encoding);
+ } while (!erts_raw_env_char_is_7bit_ascii_char('\0', c, encoding));
+
+ return 1; /* Seems ok... */
+}
diff --git a/erts/emulator/beam/erl_bif_port.c b/erts/emulator/beam/erl_bif_port.c
index ff03151619..ad8be8053f 100644
--- a/erts/emulator/beam/erl_bif_port.c
+++ b/erts/emulator/beam/erl_bif_port.c
@@ -45,7 +45,7 @@
#include "dtrace-wrapper.h"
static Port *open_port(Process* p, Eterm name, Eterm settings, int *err_typep, int *err_nump);
-static byte* convert_environment(Process* p, Eterm env);
+static char* convert_environment(Eterm env);
static char **convert_args(Eterm);
static void free_args(char **);
@@ -726,11 +726,11 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_typep, int *err_nump)
goto badarg;
}
} else if (option == am_env) {
- byte* bytes;
- if ((bytes = convert_environment(p, *tp)) == NULL) {
+ if (opts.envir) /* ignore previous env option... */
+ erts_free(ERTS_ALC_T_OPEN_PORT_ENV, opts.envir);
+ opts.envir = convert_environment(*tp);
+ if (!opts.envir)
goto badarg;
- }
- opts.envir = (char *) bytes;
} else if (option == am_args) {
char **av;
char **oav = opts.argv;
@@ -964,6 +964,8 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_typep, int *err_nump)
erts_atomic32_read_bor_relb(&port->state, sflgs);
do_return:
+ if (opts.envir)
+ erts_free(ERTS_ALC_T_OPEN_PORT_ENV, opts.envir);
if (name_buf)
erts_free(ERTS_ALC_T_TMP, (void *) name_buf);
if (opts.argv) {
@@ -1029,74 +1031,129 @@ static void free_args(char **av)
}
erts_free(ERTS_ALC_T_TMP, av);
}
-
-static byte* convert_environment(Process* p, Eterm env)
+#ifdef DEBUG
+#define ERTS_CONV_ENV_BUF_EXTRA 2
+#else
+#define ERTS_CONV_ENV_BUF_EXTRA 1024
+#endif
+
+static char* convert_environment(Eterm env)
{
- Eterm all;
- Eterm* temp_heap;
- Eterm* hp;
- Uint heap_size;
- Sint n;
- Sint size;
+ /*
+ * Returns environment buffer in memory allocated
+ * as ERTS_ALC_T_OPEN_PORT_ENV. Caller *needs*
+ * to deallocate...
+ */
+
+ Sint size, alloc_size;
byte* bytes;
int encoding = erts_get_native_filename_encoding();
- if ((n = erts_list_length(env)) < 0) {
- return NULL;
- }
- heap_size = 2*(5*n+1);
- temp_heap = hp = (Eterm *) erts_alloc(ERTS_ALC_T_TMP, heap_size*sizeof(Eterm));
- bytes = NULL; /* Indicating error */
+ alloc_size = ERTS_CONV_ENV_BUF_EXTRA;
+ bytes = erts_alloc(ERTS_ALC_T_OPEN_PORT_ENV,
+ alloc_size);
+ size = 0;
- /*
- * All errors below are handled by jumping to 'done', to ensure that the memory
- * gets deallocated. Do NOT return directly from this function.
- */
+ /* ERTS_CONV_ENV_BUF_EXTRA >= for end delimiter... */
+ ERTS_CT_ASSERT(ERTS_CONV_ENV_BUF_EXTRA >= 2);
- all = CONS(hp, make_small(0), NIL);
- hp += 2;
+ while (is_list(env)) {
+ Sint var_sz, val_sz, need;
+ byte *str, *limit;
+ Eterm tmp, *tp, *consp;
- while(is_list(env)) {
- Eterm tmp;
- Eterm* tp;
+ consp = list_val(env);
+ tmp = CAR(consp);
+ if (is_not_tuple_arity(tmp, 2))
+ goto error;
- tmp = CAR(list_val(env));
- if (is_not_tuple_arity(tmp, 2)) {
- goto done;
- }
tp = tuple_val(tmp);
- tmp = CONS(hp, make_small(0), NIL);
- hp += 2;
- if (tp[2] != am_false) {
- tmp = CONS(hp, tp[2], tmp);
- hp += 2;
- }
- tmp = CONS(hp, make_small('='), tmp);
- hp += 2;
- tmp = CONS(hp, tp[1], tmp);
- hp += 2;
- all = CONS(hp, tmp, all);
- hp += 2;
- env = CDR(list_val(env));
- }
- if (is_not_nil(env)) {
- goto done;
- }
- if ((size = erts_native_filename_need(all,encoding)) < 0) {
- goto done;
+ /* Check encoding of env variable... */
+ if (is_not_list(tp[1]))
+ goto error;
+ var_sz = erts_native_filename_need(tp[1], encoding);
+ if (var_sz <= 0)
+ goto error;
+ /* Check encoding of value... */
+ if (tp[2] == am_false || is_nil(tp[2]))
+ val_sz = 0;
+ else if (is_not_list(tp[2]))
+ goto error;
+ else {
+ val_sz = erts_native_filename_need(tp[2], encoding);
+ if (val_sz < 0)
+ goto error;
+ }
+
+ /* Ensure enough memory... */
+ need = size;
+ need += var_sz + val_sz;
+ /* '=' and '\0' */
+ need += 2 * erts_raw_env_7bit_ascii_char_need(encoding);
+ if (need > alloc_size) {
+ alloc_size = (need - alloc_size) + alloc_size;
+ alloc_size += ERTS_CONV_ENV_BUF_EXTRA;
+ bytes = erts_realloc(ERTS_ALC_T_OPEN_PORT_ENV,
+ bytes, alloc_size);
+ }
+
+ /* Write environment variable name... */
+ str = bytes + size;
+ erts_native_filename_put(tp[1], encoding, str);
+ /* empty variable name is not allowed... */
+ if (erts_raw_env_char_is_7bit_ascii_char('\0', str, encoding))
+ goto error;
+
+ /*
+ * Drop null characters at the end and verify that we do
+ * not have any '=' characters in the name...
+ */
+ limit = str + var_sz;
+ while (str < limit) {
+ if (erts_raw_env_char_is_7bit_ascii_char('\0', str, encoding))
+ break;
+ if (erts_raw_env_char_is_7bit_ascii_char('=', str, encoding))
+ goto error;
+ str = erts_raw_env_next_char(str, encoding);
+ }
+
+ /* Write the equals sign... */
+ str = erts_raw_env_7bit_ascii_char_put('=', str, encoding);
+
+ /* Write the value... */
+ if (val_sz > 0) {
+ limit = str + val_sz;
+ erts_native_filename_put(tp[2], encoding, str);
+ while (str < limit) {
+ if (erts_raw_env_char_is_7bit_ascii_char('\0', str, encoding))
+ break;
+ str = erts_raw_env_next_char(str, encoding);
+ }
+ }
+
+ /* Delimit... */
+ str = erts_raw_env_7bit_ascii_char_put('\0', str, encoding);
+
+ size = str - bytes;
+ ASSERT(size <= alloc_size);
+
+ env = CDR(consp);
}
- /*
- * Put the result in a binary (no risk for a memory leak that way).
- */
- (void) erts_new_heap_binary(p, NULL, size, &bytes);
- erts_native_filename_put(all,encoding,bytes);
+ /* End delimit... */
+ (void) erts_raw_env_7bit_ascii_char_put('\0', &bytes[size], encoding);
+
+ if (is_nil(env))
+ return (char *) bytes;
+
+error:
+
+ if (bytes)
+ erts_free(ERTS_ALC_T_OPEN_PORT_ENV, bytes);
- done:
- erts_free(ERTS_ALC_T_TMP, temp_heap);
- return bytes;
+ return (char *) NULL; /* error... */
}
/* ------------ decode_packet() and friends: */
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index 2d1d1443a7..b7a5c45fea 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -1988,7 +1988,7 @@ char *erts_convert_filename_to_encoding(Eterm name, char *statbuf, size_t statbu
is_list(name) ||
(allow_empty && is_nil(name))) {
Sint need;
- if ((need = erts_native_filename_need(name,encoding)) < 0) {
+ if ((need = erts_native_filename_need(name, encoding)) < 0) {
return NULL;
}
if (encoding == ERL_FILENAME_WIN_WCHAR) {
@@ -2152,12 +2152,13 @@ Eterm erts_convert_native_to_filename(Process *p, byte *bytes)
}
-Sint erts_native_filename_need(Eterm ioterm, int encoding)
+Sint erts_native_filename_need(Eterm ioterm, int encoding)
{
Eterm *objp;
Eterm obj;
DECLARE_ESTACK(stack);
Sint need = 0;
+ int seen_null = 0;
if (is_atom(ioterm)) {
Atom* ap;
@@ -2194,6 +2195,22 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding)
default:
need = -1;
}
+ /*
+ * Do not allow null in
+ * the middle of filenames
+ */
+ if (need > 0) {
+ byte *name = ap->name;
+ int len = ap->len;
+ for (i = 0; i < len; i++) {
+ if (name[i] == 0)
+ seen_null = 1;
+ else if (seen_null) {
+ need = -1;
+ break;
+ }
+ }
+ }
DESTROY_ESTACK(stack);
return need;
}
@@ -2224,6 +2241,16 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */
if (is_small(obj)) { /* Always small */
for(;;) {
Uint x = unsigned_val(obj);
+ /*
+ * Do not allow null in
+ * the middle of filenames
+ */
+ if (x == 0)
+ seen_null = 1;
+ else if (seen_null) {
+ DESTROY_ESTACK(stack);
+ return ((Sint) -1);
+ }
switch (encoding) {
case ERL_FILENAME_LATIN1:
if (x > 255) {
@@ -2497,6 +2524,38 @@ void erts_copy_utf8_to_utf16_little(byte *target, byte *bytes, int num_chars)
}
/*
+ * *** Requirements on Raw Filename Format ***
+ *
+ * These requirements are due to the 'filename' module
+ * in stdlib. This since it is documented that it
+ * should be able to operate on raw filenames as well
+ * as ordinary filenames.
+ *
+ * A raw filename *must* be a byte sequence where:
+ * 1. Codepoints 0-127 (7-bit ascii) *must* be encoded
+ * as a byte with the corresponding value. That is,
+ * the most significant bit in the byte encoding the
+ * codepoint is never set.
+ * 2. Codepoints greater than 127 *must* be encoded
+ * with the most significant bit set in *every* byte
+ * encoding it.
+ *
+ * Latin1 and UTF-8 meet these requirements while
+ * UTF-16 and UTF-32 don't.
+ *
+ * On Windows filenames are natively stored as malformed
+ * UTF-16LE (lonely surrogates may appear). A more correct
+ * description than UTF-16 would be an array of 16-bit
+ * words... In order to meet the requirements of the
+ * raw file format we convert the malformed UTF-16LE to
+ * malformed UTF-8 which meet the requirements.
+ *
+ * Note that these requirements are today only OTP
+ * internal (erts-stdlib internal) requirements that
+ * could be changed.
+ */
+
+/*
* This internal bif converts a filename to whatever format is suitable for the file driver
* It also adds zero termination so that prim_file needn't bother with the character encoding
* of the file driver
@@ -2507,6 +2566,12 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1)
Sint need;
Eterm bin_term;
byte* bin_p;
+
+ /*
+ * See comment on "Requirements on Raw Filename Format"
+ * above.
+ */
+
/* Prim file explicitly does not allow atoms, although we could
very well cope with it. Instead of letting 'file' handle them,
it would probably be more efficient to handle them here. Subject to
@@ -2515,6 +2580,7 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1)
BIF_ERROR(BIF_P,BADARG);
}
if (is_binary(BIF_ARG_1)) {
+ int seen_null = 0;
byte *temp_alloc = NULL;
byte *bytes;
byte *err_pos;
@@ -2524,10 +2590,18 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1)
size = binary_size(BIF_ARG_1);
bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
if (encoding != ERL_FILENAME_WIN_WCHAR) {
+ Uint i;
/*Add 0 termination only*/
bin_term = new_binary(BIF_P, NULL, size+1);
bin_p = binary_bytes(bin_term);
- memcpy(bin_p,bytes,size);
+ for (i = 0; i < size; i++) {
+ /* Don't allow null in the middle of filenames... */
+ if (bytes[i] == 0)
+ seen_null = 1;
+ else if (seen_null)
+ goto bin_name_error;
+ bin_p[i] = bytes[i];
+ }
bin_p[size]=0;
erts_free_aligned_binary_bytes(temp_alloc);
BIF_RET(bin_term);
@@ -2541,6 +2615,11 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1)
bin_term = new_binary(BIF_P, 0, (size+1)*2);
bin_p = binary_bytes(bin_term);
while (size--) {
+ /* Don't allow null in the middle of filenames... */
+ if (*bytes == 0)
+ seen_null = 1;
+ else if (seen_null)
+ goto bin_name_error;
*bin_p++ = *bytes++;
*bin_p++ = 0;
}
@@ -2558,11 +2637,14 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1)
bin_p[num_chars*2+1] = 0;
erts_free_aligned_binary_bytes(temp_alloc);
BIF_RET(bin_term);
+ bin_name_error:
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_ERROR(BIF_P,BADARG);
} /* binary */
- if ((need = erts_native_filename_need(BIF_ARG_1,encoding)) < 0) {
- BIF_ERROR(BIF_P,BADARG);
+ if ((need = erts_native_filename_need(BIF_ARG_1, encoding)) < 0) {
+ BIF_ERROR(BIF_P,BADARG);
}
if (encoding == ERL_FILENAME_WIN_WCHAR) {
need += 2;
@@ -2596,6 +2678,11 @@ BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1)
Eterm ret;
int mac = 0;
+ /*
+ * See comment on "Requirements on Raw Filename Format"
+ * above.
+ */
+
if (is_not_binary(BIF_ARG_1)) {
BIF_ERROR(BIF_P,BADARG);
}
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index 704d567337..ce751acdd3 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -1341,4 +1341,52 @@ int erts_get_printable_characters(void);
void erts_init_sys_common_misc(void);
+ERTS_GLB_INLINE Sint erts_raw_env_7bit_ascii_char_need(int encoding);
+ERTS_GLB_INLINE byte *erts_raw_env_7bit_ascii_char_put(byte c, byte *p,
+ int encoding);
+ERTS_GLB_INLINE int erts_raw_env_char_is_7bit_ascii_char(byte c, byte *p,
+ int encoding);
+ERTS_GLB_INLINE byte *erts_raw_env_next_char(byte *p, int encoding);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE Sint
+erts_raw_env_7bit_ascii_char_need(int encoding)
+{
+ return (encoding == ERL_FILENAME_WIN_WCHAR) ? 2 : 1;
+}
+
+ERTS_GLB_INLINE byte *
+erts_raw_env_7bit_ascii_char_put(byte c,
+ byte *p,
+ int encoding)
+{
+ *(p++) = c;
+ if (encoding == ERL_FILENAME_WIN_WCHAR)
+ *(p++) = 0;
+ return p;
+}
+
+ERTS_GLB_INLINE int
+erts_raw_env_char_is_7bit_ascii_char(byte c,
+ byte *p,
+ int encoding)
+{
+ if (encoding == ERL_FILENAME_WIN_WCHAR)
+ return (p[0] == c) & (p[1] == 0);
+ else
+ return p[0] == c;
+}
+
+ERTS_GLB_INLINE byte *
+erts_raw_env_next_char(byte *p, int encoding)
+{
+ if (encoding == ERL_FILENAME_WIN_WCHAR)
+ return p + 2;
+ else
+ return p + 1;
+}
+
+#endif /* #if ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
#endif
diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam
index 6fa48e8582..cc7b989e2f 100644
--- a/erts/preloaded/ebin/erlang.beam
+++ b/erts/preloaded/ebin/erlang.beam
Binary files differ
diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl
index f796ea64d3..e8616fa260 100644
--- a/erts/preloaded/src/erlang.erl
+++ b/erts/preloaded/src/erlang.erl
@@ -2100,7 +2100,7 @@ nodes(_Arg) ->
| stream
| {line, L :: non_neg_integer()}
| {cd, Dir :: string() | binary()}
- | {env, Env :: [{Name :: string(), Val :: string() | false}]}
+ | {env, Env :: [{Name :: os:env_var_name(), Val :: os:env_var_value() | false}]}
| {args, [string() | binary()]}
| {arg0, string() | binary()}
| exit_status
diff --git a/erts/preloaded/src/erts.app.src b/erts/preloaded/src/erts.app.src
index 7ab06164b4..e2ab8686d2 100644
--- a/erts/preloaded/src/erts.app.src
+++ b/erts/preloaded/src/erts.app.src
@@ -37,7 +37,7 @@
{registered, []},
{applications, []},
{env, []},
- {runtime_dependencies, ["stdlib-3.0", "kernel-5.0", "sasl-3.0.1"]}
+ {runtime_dependencies, ["stdlib-3.5", "kernel-6.0", "sasl-3.0.1"]}
]}.
%% vim: ft=erlang
diff --git a/lib/kernel/doc/src/file.xml b/lib/kernel/doc/src/file.xml
index b674b3ca93..2ab35b9b05 100644
--- a/lib/kernel/doc/src/file.xml
+++ b/lib/kernel/doc/src/file.xml
@@ -41,7 +41,7 @@
<p>Regarding filename encoding, the Erlang VM can operate in
two modes. The current mode can be queried using function
- <seealso marker="#native_name_encoding"><c>native_name_encoding/0</c></seealso>.
+ <seealso marker="#native_name_encoding/0"><c>native_name_encoding/0</c></seealso>.
It returns <c>latin1</c> or <c>utf8</c>.</p>
<p>In <c>latin1</c> mode, the Erlang VM does not change the
@@ -59,7 +59,7 @@
terminal supports UTF-8, otherwise <c>latin1</c>. The default can
be overridden using <c>+fnl</c> (to force <c>latin1</c> mode)
or <c>+fnu</c> (to force <c>utf8</c> mode) when starting
- <seealso marker="erts:erl"><c>erts:erl</c></seealso>.</p>
+ <seealso marker="erts:erl"><c>erl</c></seealso>.</p>
<p>On operating systems with transparent naming, files can be
inconsistently named, for example, some files are encoded in UTF-8 while
@@ -81,6 +81,23 @@
<p>See also section <seealso marker="stdlib:unicode_usage#notes-about-raw-filenames">Notes About Raw Filenames</seealso> in the STDLIB User's Guide.</p>
+ <note><p>
+ File operations used to accept filenames containing
+ null characters (integer value zero). This caused
+ the name to be truncated and in some cases arguments
+ to primitive operations to be mixed up. Filenames
+ containing null characters inside the filename
+ are now <em>rejected</em> and will cause primitive
+ file operations fail.
+ </p></note>
+ <warning><p>
+ Currently null characters at the end of the filename
+ will be accepted by primitive file operations. Such
+ filenames are however still documented as invalid. The
+ implementation will also change in the future and
+ reject such filenames.
+ </p></warning>
+
</description>
<datatypes>
@@ -96,9 +113,21 @@
</datatype>
<datatype>
<name name="filename"/>
+ <desc>
+ <p>
+ See also the documentation of the
+ <seealso marker="#type-name_all"><c>name_all()</c></seealso> type.
+ </p>
+ </desc>
</datatype>
<datatype>
<name name="filename_all"/>
+ <desc>
+ <p>
+ See also the documentation of the
+ <seealso marker="#type-name_all"><c>name_all()</c></seealso> type.
+ </p>
+ </desc>
</datatype>
<datatype>
<name name="io_device"/>
@@ -112,21 +141,23 @@
<name name="name"/>
<desc>
<p>If VM is in Unicode filename mode, <c>string()</c> and <c>char()</c>
- are allowed to be &gt; 255.
+ are allowed to be &gt; 255. See also the documentation of the
+ <seealso marker="#type-name_all"><c>name_all()</c></seealso> type.
</p>
</desc>
</datatype>
<datatype>
<name name="name_all"/>
<desc>
- <p>If VM is in Unicode filename mode, <c>string()</c> and <c>char()</c>
+ <p>If VM is in Unicode filename mode, characters
are allowed to be &gt; 255.
<c><anno>RawFilename</anno></c> is a filename not subject to
Unicode translation,
meaning that it can contain characters not conforming to
the Unicode encoding expected from the file system
(that is, non-UTF-8 characters although the VM is started
- in Unicode filename mode).
+ in Unicode filename mode). Null characters (integer value zero)
+ are <em>not</em> allowed in filenames (not even at the end).
</p>
</desc>
</datatype>
@@ -1825,7 +1856,7 @@ f.txt: {person, "kalle", 25}.
<p>The functions in the module <c>file</c> usually treat binaries
as raw filenames, that is, they are passed "as is" even when the
encoding of the binary does not agree with
- <seealso marker="#native_name_encoding"><c>native_name_encoding()</c></seealso>.
+ <seealso marker="#native_name_encoding/0"><c>native_name_encoding()</c></seealso>.
However, this function expects binaries to be encoded according to the
value returned by <c>native_name_encoding()</c>.</p>
<p>Typical error reasons are:</p>
diff --git a/lib/kernel/doc/src/os.xml b/lib/kernel/doc/src/os.xml
index 0e9add4161..0a08e2c78a 100644
--- a/lib/kernel/doc/src/os.xml
+++ b/lib/kernel/doc/src/os.xml
@@ -36,8 +36,99 @@
only run on a specific platform. On the other hand, with careful
use, these functions can be of help in enabling a program to run on
most platforms.</p>
+
+ <note>
+ <p>
+ File operations used to accept filenames containing
+ null characters (integer value zero). This caused
+ the name to be truncated and in some cases arguments
+ to primitive operations to be mixed up. Filenames
+ containing null characters inside the filename
+ are now <em>rejected</em> and will cause primitive
+ file operations to fail.
+ </p>
+ <p>
+ Also environment variable operations used to accept
+ names and values of environment variables containing
+ null characters (integer value zero). This caused
+ operations to silently produce erroneous results.
+ Environment variable names and values containing
+ null characters inside the name or value are now
+ <em>rejected</em> and will cause environment variable
+ operations to fail.
+ </p>
+ </note>
+ <warning>
+ <p>
+ Currently null characters at the end of filenames,
+ environment variable names and values will be accepted
+ by the primitive operations. Such filenames, environment
+ variable names and values are however still documented as
+ invalid. The implementation will also change in the
+ future and reject such filenames, environment variable
+ names and values.
+ </p>
+ </warning>
</description>
+ <datatypes>
+ <datatype>
+ <name name="env_var_name"/>
+ <desc>
+ <p>A string containing valid characters on the specific
+ OS for environment variable names using
+ <seealso marker="file#native_name_encoding/0"><c>file:native_name_encoding()</c></seealso>
+ encoding. Note that specifically null characters (integer
+ value zero) and <c>$=</c> characters are not allowed.
+ However, note that not all invalid characters necessarily
+ will cause the primitiv operations to fail, but may instead
+ produce invalid results.
+ </p>
+ </desc>
+ </datatype>
+ <datatype>
+ <name name="env_var_value"/>
+ <desc>
+ <p>A string containing valid characters on the specific
+ OS for environment variable values using
+ <seealso marker="file#native_name_encoding/0"><c>file:native_name_encoding()</c></seealso>
+ encoding. Note that specifically null characters (integer
+ value zero) are not allowed. However, note that not all
+ invalid characters necessarily will cause the primitiv
+ operations to fail, but may instead produce invalid results.
+ </p>
+ </desc>
+ </datatype>
+ <datatype>
+ <name name="env_var_name_value"/>
+ <desc>
+ <p>
+ Assuming that environment variables has been correctly
+ set, a strings containing valid characters on the specific
+ OS for environment variable names and values using
+ <seealso marker="file#native_name_encoding/0"><c>file:native_name_encoding()</c></seealso>
+ encoding. The first <c>$=</c> characters appearing in
+ the string separates environment variable name (on the
+ left) from environment variable value (on the right).
+ </p>
+ </desc>
+ </datatype>
+ <datatype>
+ <name name="command_input"/>
+ <desc>
+ <p>All characters needs to be valid characters on the
+ specific OS using
+ <seealso marker="file#native_name_encoding/0"><c>file:native_name_encoding()</c></seealso>
+ encoding. Note that specifically null characters (integer
+ value zero) are not allowed. However, note that not all
+ invalid characters not necessarily will cause
+ <seealso marker="#cmd/1"><c>os:cmd/1</c></seealso>
+ to fail, but may instead produce invalid results.
+ </p>
+ </desc>
+ </datatype>
+ </datatypes>
+
<funcs>
<func>
<name name="cmd" arity="1"/>
@@ -49,6 +140,15 @@
result as a string. This function is a replacement of
the previous function <c>unix:cmd/1</c>; they are equivalent on a
Unix platform.</p>
+ <warning><p>Previous implementation used to allow all characters
+ as long as they were integer values greater than or equal to zero.
+ This sometimes lead to unwanted results since null characters
+ (integer value zero) often are interpreted as string termination.
+ Current implementation still accepts null characters at the end
+ of <c><anno>Command</anno></c> even though the documentation
+ states that no null characters are allowed. This will however
+ be changed in the future so that no null characters at all will
+ be accepted.</p></warning>
<p><em>Examples:</em></p>
<code type="none">
LsOut = os:cmd("ls"), % on unix platform
@@ -152,6 +252,15 @@ DirOut = os:cmd("dir"), % on Win32 platform</code>
<p>On Unix platforms, the environment is set using UTF-8 encoding
if Unicode filename translation is in effect. On Windows, the
environment is set using wide character interfaces.</p>
+ <note>
+ <p>
+ <c><anno>VarName</anno></c> is not allowed to contain
+ an <c>$=</c> character. Previous implementations used
+ to just let the <c>$=</c> character through which
+ silently caused erroneous results. Current implementation
+ will instead throw a <c>badarg</c> exception.
+ </p>
+ </note>
</desc>
</func>
diff --git a/lib/kernel/src/kernel.app.src b/lib/kernel/src/kernel.app.src
index 2a88cc7e26..080b11fc4d 100644
--- a/lib/kernel/src/kernel.app.src
+++ b/lib/kernel/src/kernel.app.src
@@ -120,6 +120,6 @@
{applications, []},
{env, [{error_logger, tty}]},
{mod, {kernel, []}},
- {runtime_dependencies, ["erts-9.1", "stdlib-3.4", "sasl-3.0"]}
+ {runtime_dependencies, ["erts-10.0", "stdlib-3.5", "sasl-3.0"]}
]
}.
diff --git a/lib/kernel/src/os.erl b/lib/kernel/src/os.erl
index 0250783632..3675e923a3 100644
--- a/lib/kernel/src/os.erl
+++ b/lib/kernel/src/os.erl
@@ -25,6 +25,8 @@
-include("file.hrl").
+-export_type([env_var_name/0, env_var_value/0, env_var_name_value/0, command_input/0]).
+
%%% BIFs
-export([getenv/0, getenv/1, getenv/2, getpid/0,
@@ -32,21 +34,29 @@
putenv/2, set_signal/2, system_time/0, system_time/1,
timestamp/0, unsetenv/1]).
--spec getenv() -> [string()].
+-type env_var_name() :: nonempty_string().
+
+-type env_var_value() :: string().
+
+-type env_var_name_value() :: nonempty_string().
+
+-type command_input() :: atom() | io_lib:chars().
+
+-spec getenv() -> [env_var_name_value()].
getenv() -> erlang:nif_error(undef).
-spec getenv(VarName) -> Value | false when
- VarName :: string(),
- Value :: string().
+ VarName :: env_var_name(),
+ Value :: env_var_value().
getenv(_) ->
erlang:nif_error(undef).
-spec getenv(VarName, DefaultValue) -> Value when
- VarName :: string(),
- DefaultValue :: string(),
- Value :: string().
+ VarName :: env_var_name(),
+ DefaultValue :: env_var_value(),
+ Value :: env_var_value().
getenv(VarName, DefaultValue) ->
case os:getenv(VarName) of
@@ -75,8 +85,8 @@ perf_counter(Unit) ->
erlang:convert_time_unit(os:perf_counter(), perf_counter, Unit).
-spec putenv(VarName, Value) -> true when
- VarName :: string(),
- Value :: string().
+ VarName :: env_var_name(),
+ Value :: env_var_value().
putenv(_, _) ->
erlang:nif_error(undef).
@@ -99,7 +109,7 @@ timestamp() ->
erlang:nif_error(undef).
-spec unsetenv(VarName) -> true when
- VarName :: string().
+ VarName :: env_var_name().
unsetenv(_) ->
erlang:nif_error(undef).
@@ -232,10 +242,9 @@ extensions() ->
%% Executes the given command in the default shell for the operating system.
-spec cmd(Command) -> string() when
- Command :: atom() | io_lib:chars().
+ Command :: os:command_input().
cmd(Cmd) ->
- validate(Cmd),
- {SpawnCmd, SpawnOpts, SpawnInput, Eot} = mk_cmd(os:type(), Cmd),
+ {SpawnCmd, SpawnOpts, SpawnInput, Eot} = mk_cmd(os:type(), validate(Cmd)),
Port = open_port({spawn, SpawnCmd}, [binary, stderr_to_stdout,
stream, in, hide | SpawnOpts]),
MonRef = erlang:monitor(port, Port),
@@ -255,8 +264,6 @@ mk_cmd({win32,Wtype}, Cmd) ->
{Cspec,_} -> lists:concat([Cspec," /c",Cmd])
end,
{Command, [], [], <<>>};
-mk_cmd(OsType,Cmd) when is_atom(Cmd) ->
- mk_cmd(OsType, atom_to_list(Cmd));
mk_cmd(_,Cmd) ->
%% Have to send command in like this in order to make sh commands like
%% cd and ulimit available
@@ -279,17 +286,33 @@ mk_cmd(_,Cmd) ->
<<$\^D>>}.
validate(Atom) when is_atom(Atom) ->
- ok;
+ validate(atom_to_list(Atom));
validate(List) when is_list(List) ->
- validate1(List).
+ case validate1(List) of
+ false ->
+ List;
+ true ->
+ %% Had zeros at end; remove them...
+ string:trim(List, trailing, [0])
+ end.
-validate1([C|Rest]) when is_integer(C) ->
+validate1([0|Rest]) ->
+ validate2(Rest);
+validate1([C|Rest]) when is_integer(C), C > 0 ->
validate1(Rest);
validate1([List|Rest]) when is_list(List) ->
- validate1(List),
- validate1(Rest);
+ validate1(List) or validate1(Rest);
validate1([]) ->
- ok.
+ false.
+
+%% Ensure that the rest is zero only...
+validate2([]) ->
+ true;
+validate2([0|Rest]) ->
+ validate2(Rest);
+validate2([List|Rest]) when is_list(List) ->
+ validate2(List),
+ validate2(Rest).
get_data(Port, MonRef, Eot, Sofar) ->
receive
diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl
index 899102c908..f23529fec9 100644
--- a/lib/kernel/test/file_name_SUITE.erl
+++ b/lib/kernel/test/file_name_SUITE.erl
@@ -302,7 +302,9 @@ check_normal(Mod) ->
{ok, BC} = Mod:read(FD,1024),
ok = file:close(FD)
end || {regular,Name,Content} <- NormalDir ],
+ {error, badarg} = Mod:rename("fil1\0tmp_fil2","tmp_fil1"),
Mod:rename("fil1","tmp_fil1"),
+ {error, badarg} = Mod:read_file("tmp_fil1\0.txt"),
{ok, <<"fil1">>} = Mod:read_file("tmp_fil1"),
{error,enoent} = Mod:read_file("fil1"),
Mod:rename("tmp_fil1","fil1"),
diff --git a/lib/kernel/test/os_SUITE.erl b/lib/kernel/test/os_SUITE.erl
index 53a9e168ef..8056321448 100644
--- a/lib/kernel/test/os_SUITE.erl
+++ b/lib/kernel/test/os_SUITE.erl
@@ -22,7 +22,8 @@
-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
init_per_group/2,end_per_group/2,
init_per_testcase/2,end_per_testcase/2]).
--export([space_in_cwd/1, quoting/1, cmd_unicode/1, space_in_name/1, bad_command/1,
+-export([space_in_cwd/1, quoting/1, cmd_unicode/1,
+ null_in_command/1, space_in_name/1, bad_command/1,
find_executable/1, unix_comment_in_command/1, deep_list_command/1,
large_output_command/1, background_command/0, background_command/1,
message_leak/1, close_stdin/0, close_stdin/1, perf_counter_api/1]).
@@ -34,7 +35,8 @@ suite() ->
{timetrap,{minutes,1}}].
all() ->
- [space_in_cwd, quoting, cmd_unicode, space_in_name, bad_command,
+ [space_in_cwd, quoting, cmd_unicode, null_in_command,
+ space_in_name, bad_command,
find_executable, unix_comment_in_command, deep_list_command,
large_output_command, background_command, message_leak,
close_stdin, perf_counter_api].
@@ -125,6 +127,14 @@ cmd_unicode(Config) when is_list(Config) ->
[] = receive_all(),
ok.
+null_in_command(Config) ->
+ {Ok, Error} = case os:type() of
+ {win32,_} -> {"dir", "di\0r"};
+ _ -> {"ls", "l\0s"}
+ end,
+ true = is_list(try os:cmd(Ok) catch Class0:_ -> Class0 end),
+ error = try os:cmd(Error) catch Class1:_ -> Class1 end,
+ ok.
%% Test that program with a space in its name can be executed.
space_in_name(Config) when is_list(Config) ->
diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml
index 80c4acffdb..c531b10d50 100644
--- a/lib/stdlib/doc/src/filelib.xml
+++ b/lib/stdlib/doc/src/filelib.xml
@@ -45,6 +45,30 @@
<p>For more information about raw filenames, see the
<seealso marker="kernel:file"><c>file</c></seealso> module.</p>
+
+ <note>
+ <p>
+ Functionality in this module generally assumes valid input and
+ does not necessarily fail on input that does not use a valid
+ encoding, but may instead very likely produce invalid output.
+ </p>
+ <p>
+ File operations used to accept filenames containing
+ null characters (integer value zero). This caused
+ the name to be truncated and in some cases arguments
+ to primitive operations to be mixed up. Filenames
+ containing null characters inside the filename
+ are now <em>rejected</em> and will cause primitive
+ file operations to fail.
+ </p>
+ </note>
+ <warning><p>
+ Currently null characters at the end of the filename
+ will be accepted by primitive file operations. Such
+ filenames are however still documented as invalid. The
+ implementation will also change in the future and
+ reject such filenames.
+ </p></warning>
</description>
<datatypes>
diff --git a/lib/stdlib/doc/src/filename.xml b/lib/stdlib/doc/src/filename.xml
index 14fd5ef787..d2608ad542 100644
--- a/lib/stdlib/doc/src/filename.xml
+++ b/lib/stdlib/doc/src/filename.xml
@@ -46,7 +46,10 @@
filename by removing redundant directory separators, use
<seealso marker="#join/1"><c>join/1</c></seealso>.</p>
- <p>The module supports raw filenames in the way that if a binary is
+ <p>
+ The module supports
+ <seealso marker="unicode_usage#notes-about-raw-filenames">raw
+ filenames</seealso> in the way that if a binary is
present, or the filename cannot be interpreted according to the return
value of <seealso marker="kernel:file#native_name_encoding/0">
<c>file:native_name_encoding/0</c></seealso>, a raw filename is also
@@ -56,6 +59,30 @@
(the join operation is performed of course). For more information
about raw filenames, see the
<seealso marker="kernel:file"><c>file</c></seealso> module.</p>
+
+ <note>
+ <p>
+ Functionality in this module generally assumes valid input and
+ does not necessarily fail on input that does not use a valid
+ encoding, but may instead very likely produce invalid output.
+ </p>
+ <p>
+ File operations used to accept filenames containing
+ null characters (integer value zero). This caused
+ the name to be truncated and in some cases arguments
+ to primitive operations to be mixed up. Filenames
+ containing null characters inside the filename
+ are now <em>rejected</em> and will cause primitive
+ file operations to fail.
+ </p>
+ </note>
+ <warning><p>
+ Currently null characters at the end of the filename
+ will be accepted by primitive file operations. Such
+ filenames are however still documented as invalid. The
+ implementation will also change in the future and
+ reject such filenames.
+ </p></warning>
</description>
<datatypes>
<datatype>
@@ -555,6 +582,7 @@ unsafe</pre>
["a:/","msdev","include"]</pre>
</desc>
</func>
+
</funcs>
</erlref>
diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml
index 26dc46719e..789e063c12 100644
--- a/lib/stdlib/doc/src/unicode_usage.xml
+++ b/lib/stdlib/doc/src/unicode_usage.xml
@@ -719,8 +719,8 @@ Eshell V5.10.1 (abort with ^G)
</section>
<section>
- <title>Unicode Filenames</title>
<marker id="unicode_file_names"/>
+ <title>Unicode Filenames</title>
<p>Most modern operating systems support Unicode filenames in some way.
There are many different ways to do this and Erlang by default treats the
different approaches differently:</p>
@@ -855,8 +855,12 @@ Eshell V5.10.1 (abort with ^G)
</note>
<section>
- <title>Notes About Raw Filenames</title>
<marker id="notes-about-raw-filenames"/>
+ <title>Notes About Raw Filenames</title>
+ <note><p>
+ Note that raw filenames <em>not</em> necessarily are encoded the
+ same way as on the OS level.
+ </p></note>
<p>Raw filenames were introduced together with Unicode filename support
in ERTS 5.8.2 (Erlang/OTP R14B01). The reason &quot;raw
filenames&quot; were introduced in the system was
diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl
index 9bf4290916..9a85642c17 100644
--- a/lib/stdlib/src/filename.erl
+++ b/lib/stdlib/src/filename.erl
@@ -34,6 +34,38 @@
%% we flatten the arguments immediately on function entry as that makes
%% it easier to ensure that the code works.
+%%
+%% *** Requirements on Raw Filename Format ***
+%%
+%% These requirements are due to the 'filename' module
+%% in stdlib. This since it is documented that it
+%% should be able to operate on raw filenames as well
+%% as ordinary filenames.
+%%
+%% A raw filename *must* be a byte sequence where:
+%% 1. Codepoints 0-127 (7-bit ascii) *must* be encoded
+%% as a byte with the corresponding value. That is,
+%% the most significant bit in the byte encoding the
+%% codepoint is never set.
+%% 2. Codepoints greater than 127 *must* be encoded
+%% with the most significant bit set in *every* byte
+%% encoding it.
+%%
+%% Latin1 and UTF-8 meet these requirements while
+%% UTF-16 and UTF-32 don't.
+%%
+%% On Windows filenames are natively stored as malformed
+%% UTF-16LE (lonely surrogates may appear). A more correct
+%% description than UTF-16 would be an array of 16-bit
+%% words... In order to meet the requirements of the
+%% raw file format we convert the malformed UTF-16LE to
+%% malformed UTF-8 which meet the requirements.
+%%
+%% Note that these requirements are today only OTP
+%% internal (erts-stdlib internal) requirements that
+%% could be changed.
+%%
+
-export([absname/1, absname/2, absname_join/2,
basename/1, basename/2, dirname/1,
extension/1, join/1, join/2, pathtype/1,
@@ -41,6 +73,7 @@
safe_relative_path/1]).
-export([find_src/1, find_src/2]). % deprecated
-export([basedir/2, basedir/3]).
+-export([validate/1]).
%% Undocumented and unsupported exports.
-export([append/2]).
@@ -1135,3 +1168,72 @@ basedir_os_type() ->
{win32,_} -> windows;
_ -> linux
end.
+
+%%
+%% validate/1
+%%
+
+-spec validate(FileName) -> boolean() when
+ FileName :: file:name_all().
+
+validate(FileName) when is_binary(FileName) ->
+ %% Raw filename...
+ validate_bin(FileName);
+validate(FileName) when is_list(FileName);
+ is_atom(FileName) ->
+ validate_list(FileName,
+ file:native_name_encoding(),
+ os:type()).
+
+validate_list(FileName, Enc, Os) ->
+ try
+ true = validate_list(FileName, Enc, Os, 0) > 0
+ catch
+ _ : _ -> false
+ end.
+
+validate_list([], _Enc, _Os, Chars) ->
+ Chars;
+validate_list(C, Enc, Os, Chars) when is_integer(C) ->
+ validate_char(C, Enc, Os),
+ Chars+1;
+validate_list(A, Enc, Os, Chars) when is_atom(A) ->
+ validate_list(atom_to_list(A), Enc, Os, Chars);
+validate_list([H|T], Enc, Os, Chars) ->
+ NewChars = validate_list(H, Enc, Os, Chars),
+ validate_list(T, Enc, Os, NewChars).
+
+%% C is always an integer...
+% validate_char(C, _, _) when not is_integer(C) ->
+% throw(invalid);
+validate_char(C, _, _) when C < 1 ->
+ throw(invalid); %% No negative or null characters...
+validate_char(C, latin1, _) when C > 255 ->
+ throw(invalid);
+validate_char(C, utf8, _) when C >= 16#110000 ->
+ throw(invalid);
+validate_char(C, utf8, {win32, _}) when C > 16#ffff ->
+ throw(invalid); %% invalid win wchar...
+validate_char(_C, utf8, {win32, _}) ->
+ ok; %% Range below is accepted on windows...
+validate_char(C, utf8, _) when 16#D800 =< C, C =< 16#DFFF ->
+ throw(invalid); %% invalid unicode range...
+validate_char(_, _, _) ->
+ ok.
+
+validate_bin(Bin) ->
+ %% Raw filename. That is, we do not interpret
+ %% the encoding, but we still do not accept
+ %% null characters...
+ try
+ true = validate_bin(Bin, 0) > 0
+ catch
+ _ : _ -> false
+ end.
+
+validate_bin(<<>>, Bs) ->
+ Bs;
+validate_bin(<<0, _Rest/binary>>, _Bs) ->
+ throw(invalid); %% No null characters allowed...
+validate_bin(<<_B, Rest/binary>>, Bs) ->
+ validate_bin(Rest, Bs+1).
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index 3c449d3cb9..ab0824ca17 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -107,7 +107,7 @@
dets]},
{applications, [kernel]},
{env, []},
- {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-9.0","crypto-3.3",
+ {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-10.0","crypto-3.3",
"compiler-5.0"]}
]}.