aboutsummaryrefslogtreecommitdiffstats
path: root/erts/etc/common/erlexec.c
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2012-07-26 18:35:46 +0200
committerPatrik Nyblom <[email protected]>2012-08-14 15:04:06 +0200
commite11bbb37273ebd2408d0c83c62770f9ef023879d (patch)
tree58ca0f2f6381e171c7c99ba3022a25c137f7b689 /erts/etc/common/erlexec.c
parent0c9d90f314f364e5b1301ec89d762baabc57c7aa (diff)
downloadotp-e11bbb37273ebd2408d0c83c62770f9ef023879d.tar.gz
otp-e11bbb37273ebd2408d0c83c62770f9ef023879d.tar.bz2
otp-e11bbb37273ebd2408d0c83c62770f9ef023879d.zip
Make get/putenv and erlexec understand Unicode
Putenv and getenv needs to convert to the proper environment strings in Unicode depending on platform and user settings for filename encoding. Also erlexec needs to pass environment strings in an appropriate way for kernel to pick up. All environment strings on the command line, as well as home directory, is now passed in UTF8 on windows and in whatever encoding you have on Unix, kernel tries to convert all parameters and environments from UTF8 before making strings.
Diffstat (limited to 'erts/etc/common/erlexec.c')
-rw-r--r--erts/etc/common/erlexec.c171
1 files changed, 164 insertions, 7 deletions
diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c
index 790b0ed400..cba7429fab 100644
--- a/erts/etc/common/erlexec.c
+++ b/erts/etc/common/erlexec.c
@@ -245,7 +245,9 @@ static char* config_script = NULL; /* used by option -start_erl and -config */
static HANDLE this_module_handle;
static int run_werl;
-
+static WCHAR *utf8_to_utf16(unsigned char *bytes);
+static char *utf16_to_utf8(WCHAR *wstr);
+static WCHAR *latin1_to_utf16(char *str);
#endif
/*
@@ -263,8 +265,12 @@ static void
set_env(char *key, char *value)
{
#ifdef __WIN32__
- if (!SetEnvironmentVariable((LPCTSTR) key, (LPCTSTR) value))
+ WCHAR *wkey = latin1_to_utf16(key);
+ WCHAR *wvalue = utf8_to_utf16(value);
+ if (!SetEnvironmentVariableW(wkey, wvalue))
error("SetEnvironmentVariable(\"%s\", \"%s\") failed!", key, value);
+ efree(wkey);
+ efree(wvalue);
#else
size_t size = strlen(key) + 1 + strlen(value) + 1;
char *str = emalloc(size);
@@ -277,25 +283,33 @@ set_env(char *key, char *value)
#endif
}
+
static char *
get_env(char *key)
{
#ifdef __WIN32__
DWORD size = 32;
- char *value = NULL;
+ WCHAR *value = NULL;
+ WCHAR *wkey = latin1_to_utf16(key);
+ char *res;
while (1) {
DWORD nsz;
if (value)
efree(value);
- value = emalloc(size);
+ value = emalloc(size*sizeof(WCHAR));
SetLastError(0);
- nsz = GetEnvironmentVariable((LPCTSTR) key, (LPTSTR) value, size);
+ nsz = GetEnvironmentVariableW(wkey, value, size);
if (nsz == 0 && GetLastError() == ERROR_ENVVAR_NOT_FOUND) {
efree(value);
+ efree(wkey);
return NULL;
}
- if (nsz <= size)
- return value;
+ if (nsz <= size) {
+ efree(wkey);
+ res = utf16_to_utf8(value);
+ efree(value);
+ return res;
+ }
size = nsz;
}
#else
@@ -2080,4 +2094,147 @@ possibly_quote(char* arg)
return narg;
}
+/*
+ * Unicode helpers to handle environment and command line parameters on
+ * Windows. We internally handle all environment variables in UTF8,
+ * but put and get the environment using the WCHAR (limited UTF16) interface
+ *
+ * These are simplified to only handle Unicode characters that can fit in
+ * Windows simplified UTF16, i.e. characters that fit in 16 bits.
+ */
+
+static int utf8_len(unsigned char first)
+{
+ if ((first & ((unsigned char) 0x80)) == 0) {
+ return 1;
+ } else if ((first & ((unsigned char) 0xE0)) == 0xC0) {
+ return 2;
+ } else if ((first & ((unsigned char) 0xF0)) == 0xE0) {
+ return 3;
+ } else if ((first & ((unsigned char) 0xF8)) == 0xF0) {
+ return 4;
+ }
+ return 1; /* will be a '?' */
+}
+
+static WCHAR *utf8_to_utf16(unsigned char *bytes)
+{
+ unsigned int unipoint;
+ unsigned char *tmp = bytes;
+ WCHAR *target, *res;
+ int num = 0;
+
+ while (*tmp) {
+ num++;
+ tmp += utf8_len(*tmp);
+ }
+ res = target = emalloc((num + 1) * sizeof(WCHAR));
+ while (*bytes) {
+ if (((*bytes) & ((unsigned char) 0x80)) == 0) {
+ unipoint = (Uint) *bytes;
+ ++bytes;
+ } else if (((*bytes) & ((unsigned char) 0xE0)) == 0xC0) {
+ unipoint =
+ (((Uint) ((*bytes) & ((unsigned char) 0x1F))) << 6) |
+ ((Uint) (bytes[1] & ((unsigned char) 0x3F)));
+ bytes += 2;
+ } else if (((*bytes) & ((unsigned char) 0xF0)) == 0xE0) {
+ unipoint =
+ (((Uint) ((*bytes) & ((unsigned char) 0xF))) << 12) |
+ (((Uint) (bytes[1] & ((unsigned char) 0x3F))) << 6) |
+ ((Uint) (bytes[2] & ((unsigned char) 0x3F)));
+ if (unipoint > 0xFFFF) {
+ unipoint = (unsigned int) '?';
+ }
+ bytes +=3;
+ } else if (((*bytes) & ((unsigned char) 0xF8)) == 0xF0) {
+ unipoint = (unsigned int) '?'; /* Cannot put in a wchar */
+ bytes += 4;
+ } else {
+ unipoint = (unsigned int) '?';
+ }
+ *target++ = (WCHAR) unipoint;
+ }
+ *target = L'\0';
+ return res;
+}
+
+static int put_utf8(WCHAR ch, unsigned char *target, int sz, int *pos)
+{
+ Uint x = (Uint) ch;
+ if (x < 0x80) {
+ if (*pos >= sz) {
+ return -1;
+ }
+ target[(*pos)++] = (unsigned char) x;
+ }
+ else if (x < 0x800) {
+ if (((*pos) + 1) >= sz) {
+ return -1;
+ }
+ target[(*pos)++] = (((unsigned char) (x >> 6)) |
+ ((unsigned char) 0xC0));
+ target[(*pos)++] = (((unsigned char) (x & 0x3F)) |
+ ((unsigned char) 0x80));
+ } else {
+ if ((x >= 0xD800 && x <= 0xDFFF) ||
+ (x == 0xFFFE) ||
+ (x == 0xFFFF)) { /* Invalid unicode range */
+ return -1;
+ }
+ if (((*pos) + 2) >= sz) {
+ return -1;
+ }
+
+ target[(*pos)++] = (((unsigned char) (x >> 12)) |
+ ((unsigned char) 0xE0));
+ target[(*pos)++] = ((((unsigned char) (x >> 6)) & 0x3F) |
+ ((unsigned char) 0x80));
+ target[(*pos)++] = (((unsigned char) (x & 0x3F)) |
+ ((unsigned char) 0x80));
+ }
+ return 0;
+}
+
+static int need_bytes_for_utf8(WCHAR x)
+{
+ if (x < 0x80)
+ return 1;
+ else if (x < 0x800)
+ return 2;
+ else
+ return 3;
+}
+
+static WCHAR *latin1_to_utf16(char *str)
+{
+ int len = strlen(str);
+ int i;
+ WCHAR *wstr = emalloc((len+1) * sizeof(WCHAR));
+ for(i=0;i<len;++i)
+ wstr[i] = (WCHAR) str[i];
+ wstr[len] = L'\0';
+ return wstr;
+}
+
+static char *utf16_to_utf8(WCHAR *wstr)
+{
+ int len = wcslen(wstr);
+ char *result;
+ int i,pos;
+ int reslen = 0;
+ for(i=0;i<len;++i) {
+ reslen += need_bytes_for_utf8(wstr[i]);
+ }
+ result = emalloc(reslen+1);
+ pos = 0;
+ for(i=0;i<len;++i) {
+ if (put_utf8((int) wstr[i], result, reslen, &pos) < 0) {
+ break;
+ }
+ }
+ result[pos] = '\0';
+ return result;
+}
+
#endif