aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2018-04-20 11:36:21 +0200
committerSverker Eriksson <[email protected]>2018-04-20 11:36:21 +0200
commit21754c8d402855af5081dad0aff465a8f0f7eda7 (patch)
tree7af7d292a13b8131bb273f5b77919ff956f1ae78
parentfd8ce7e0592b4d873773dba405de9cfd4e4ec184 (diff)
parentef0284670c2f108e1ef8980e422b48ec04c3728c (diff)
downloadotp-21754c8d402855af5081dad0aff465a8f0f7eda7.tar.gz
otp-21754c8d402855af5081dad0aff465a8f0f7eda7.tar.bz2
otp-21754c8d402855af5081dad0aff465a8f0f7eda7.zip
Merge branch 'sverker/master/optimize-atom-enc-dec/ERIERL-150'
-rw-r--r--erts/aclocal.m486
-rw-r--r--erts/configure.in78
-rw-r--r--lib/erl_interface/configure.in13
-rw-r--r--lib/erl_interface/src/decode/decode_atom.c62
-rw-r--r--lib/ic/c_src/oe_ei_encode_atom.c43
5 files changed, 189 insertions, 93 deletions
diff --git a/erts/aclocal.m4 b/erts/aclocal.m4
index 887babc13f..a4d09810bd 100644
--- a/erts/aclocal.m4
+++ b/erts/aclocal.m4
@@ -2770,3 +2770,89 @@ rm -f conftest*])
#define UNSAFE_MASK 0xc0000000 /* Mask for bits that must be constant */
+dnl ----------------------------------------------------------------------
+dnl
+dnl LM_HARDWARE_ARCH
+dnl
+dnl Determine target hardware in ARCH
+dnl
+AC_DEFUN([LM_HARDWARE_ARCH], [
+ AC_MSG_CHECKING([target hardware architecture])
+ if test "x$host_alias" != "x" -a "x$host_cpu" != "x"; then
+ chk_arch_=$host_cpu
+ else
+ chk_arch_=`uname -m`
+ fi
+
+ case $chk_arch_ in
+ sun4u) ARCH=ultrasparc;;
+ sparc64) ARCH=sparc64;;
+ sun4v) ARCH=ultrasparc;;
+ i86pc) ARCH=x86;;
+ i386) ARCH=x86;;
+ i486) ARCH=x86;;
+ i586) ARCH=x86;;
+ i686) ARCH=x86;;
+ x86_64) ARCH=amd64;;
+ amd64) ARCH=amd64;;
+ macppc) ARCH=ppc;;
+ powerpc) ARCH=ppc;;
+ ppc) ARCH=ppc;;
+ ppc64) ARCH=ppc64;;
+ ppc64le) ARCH=ppc64le;;
+ "Power Macintosh") ARCH=ppc;;
+ armv5b) ARCH=arm;;
+ armv5teb) ARCH=arm;;
+ armv5tel) ARCH=arm;;
+ armv5tejl) ARCH=arm;;
+ armv6l) ARCH=arm;;
+ armv6hl) ARCH=arm;;
+ armv7l) ARCH=arm;;
+ armv7hl) ARCH=arm;;
+ tile) ARCH=tile;;
+ e2k) ARCH=e2k;;
+ *) ARCH=noarch;;
+ esac
+ AC_MSG_RESULT($ARCH)
+
+ dnl
+ dnl Convert between x86 and amd64 based on the compiler's mode.
+ dnl Ditto between ultrasparc and sparc64.
+ dnl
+ AC_MSG_CHECKING(whether compilation mode forces ARCH adjustment)
+ case "$ARCH-$ac_cv_sizeof_void_p" in
+ x86-8)
+ AC_MSG_RESULT(yes: adjusting ARCH=x86 to ARCH=amd64)
+ ARCH=amd64
+ ;;
+ amd64-4)
+ AC_MSG_RESULT(yes: adjusting ARCH=amd64 to ARCH=x86)
+ ARCH=x86
+ ;;
+ ultrasparc-8)
+ AC_MSG_RESULT(yes: adjusting ARCH=ultrasparc to ARCH=sparc64)
+ ARCH=sparc64
+ ;;
+ sparc64-4)
+ AC_MSG_RESULT(yes: adjusting ARCH=sparc64 to ARCH=ultrasparc)
+ ARCH=ultrasparc
+ ;;
+ ppc64-4)
+ AC_MSG_RESULT(yes: adjusting ARCH=ppc64 to ARCH=ppc)
+ ARCH=ppc
+ ;;
+ ppc-8)
+ AC_MSG_RESULT(yes: adjusting ARCH=ppc to ARCH=ppc64)
+ ARCH=ppc64
+ ;;
+ arm-8)
+ AC_MSG_RESULT(yes: adjusting ARCH=arm to ARCH=noarch)
+ ARCH=noarch
+ ;;
+ *)
+ AC_MSG_RESULT(no: ARCH is $ARCH)
+ ;;
+ esac
+
+ AC_SUBST(ARCH)
+])
diff --git a/erts/configure.in b/erts/configure.in
index 820247b4b8..2d0d6c6444 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -658,83 +658,9 @@ case $chk_opsys_ in
*) OPSYS=noopsys
esac
-if test "x$host_alias" != "x" -a "x$host_cpu" != "x"; then
- chk_arch_=$host_cpu
-else
- chk_arch_=`uname -m`
-fi
-
-case $chk_arch_ in
- sun4u) ARCH=ultrasparc;;
- sparc64) ARCH=sparc64;;
- sun4v) ARCH=ultrasparc;;
- i86pc) ARCH=x86;;
- i386) ARCH=x86;;
- i486) ARCH=x86;;
- i586) ARCH=x86;;
- i686) ARCH=x86;;
- x86_64) ARCH=amd64;;
- amd64) ARCH=amd64;;
- macppc) ARCH=ppc;;
- powerpc) ARCH=ppc;;
- ppc) ARCH=ppc;;
- ppc64) ARCH=ppc64;;
- ppc64le) ARCH=ppc64le;;
- "Power Macintosh") ARCH=ppc;;
- armv5b) ARCH=arm;;
- armv5teb) ARCH=arm;;
- armv5tel) ARCH=arm;;
- armv5tejl) ARCH=arm;;
- armv6l) ARCH=arm;;
- armv6hl) ARCH=arm;;
- armv7l) ARCH=arm;;
- armv7hl) ARCH=arm;;
- tile) ARCH=tile;;
- e2k) ARCH=e2k;;
- *) ARCH=noarch;;
-esac
-
-dnl
-dnl Convert between x86 and amd64 based on the compiler's mode.
-dnl Ditto between ultrasparc and sparc64.
-dnl
-AC_MSG_CHECKING(whether compilation mode forces ARCH adjustment)
-case "$ARCH-$ac_cv_sizeof_void_p" in
-x86-8)
- AC_MSG_RESULT(yes: adjusting ARCH=x86 to ARCH=amd64)
- ARCH=amd64
- ;;
-amd64-4)
- AC_MSG_RESULT(yes: adjusting ARCH=amd64 to ARCH=x86)
- ARCH=x86
- ;;
-ultrasparc-8)
- AC_MSG_RESULT(yes: adjusting ARCH=ultrasparc to ARCH=sparc64)
- ARCH=sparc64
- ;;
-sparc64-4)
- AC_MSG_RESULT(yes: adjusting ARCH=sparc64 to ARCH=ultrasparc)
- ARCH=ultrasparc
- ;;
-ppc64-4)
- AC_MSG_RESULT(yes: adjusting ARCH=ppc64 to ARCH=ppc)
- ARCH=ppc
- ;;
-ppc-8)
- AC_MSG_RESULT(yes: adjusting ARCH=ppc to ARCH=ppc64)
- ARCH=ppc64
- ;;
-arm-8)
- AC_MSG_RESULT(yes: adjusting ARCH=arm to ARCH=noarch)
- ARCH=noarch
- ;;
-*)
- AC_MSG_RESULT(no)
- ;;
-esac
-
AC_SUBST(OPSYS)
-AC_SUBST(ARCH)
+
+LM_HARDWARE_ARCH
dnl Check consistency of os and darwin-switches
diff --git a/lib/erl_interface/configure.in b/lib/erl_interface/configure.in
index 0a8fbf513c..696ebf5ca0 100644
--- a/lib/erl_interface/configure.in
+++ b/lib/erl_interface/configure.in
@@ -106,6 +106,19 @@ if test $ac_cv_sizeof_long = 8; then
CFLAGS="$CFLAGS -DEI_64BIT"
fi
+LM_HARDWARE_ARCH
+
+AC_MSG_CHECKING(for unaligned word access)
+case "$ARCH" in
+ x86|amd64)
+ AC_MSG_RESULT(yes: x86 or amd64)
+ AC_DEFINE(HAVE_UNALIGNED_WORD_ACCESS, 1, [Define if hw supports unaligned word access])
+ ;;
+ *)
+ AC_MSG_RESULT(no)
+ ;;
+esac
+
AC_CHECK_TOOL(AR, ar, false)
if test "$AR" = false; then
AC_MSG_ERROR([No 'ar' command found in PATH])
diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c
index b3bba82434..87cd75b1be 100644
--- a/lib/erl_interface/src/decode/decode_atom.c
+++ b/lib/erl_interface/src/decode/decode_atom.c
@@ -92,6 +92,51 @@ int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen,
}
+
+#ifdef HAVE_UNALIGNED_WORD_ACCESS
+
+#if SIZEOF_VOID_P == SIZEOF_LONG
+typedef unsigned long AsciiWord;
+#elif SIZEOF_VOID_P == SIZEOF_LONG_LONG
+typedef unsigned long long AsciiWord;
+#else
+# error "Uknown word type"
+#endif
+
+#if SIZEOF_VOID_P == 4
+# define ASCII_CHECK_MASK ((AsciiWord)0x80808080U)
+#elif SIZEOF_VOID_P == 8
+# define ASCII_CHECK_MASK ((AsciiWord)0x8080808080808080U)
+#endif
+
+static int ascii_fast_track(char* dst, const char* src, int slen, int destlen)
+{
+ const AsciiWord* src_word = (AsciiWord*) src;
+ const AsciiWord* const src_word_end = src_word + (slen / sizeof(AsciiWord));
+
+ if (destlen < slen)
+ return 0;
+
+ if (dst) {
+ AsciiWord* dst_word = (AsciiWord*)dst;
+
+ while (src_word < src_word_end) {
+ if ((*src_word & ASCII_CHECK_MASK) != 0)
+ break;
+ *dst_word++ = *src_word++;
+ }
+ }
+ else {
+ while (src_word < src_word_end) {
+ if ((*src_word & ASCII_CHECK_MASK) != 0)
+ break;
+ src_word++;
+ }
+ }
+ return (char*)src_word - src;
+}
+#endif /* HAVE_UNALIGNED_WORD_ACCESS */
+
int utf8_to_latin1(char* dst, const char* src, int slen, int destlen,
erlang_char_encoding* res_encp)
{
@@ -99,6 +144,15 @@ int utf8_to_latin1(char* dst, const char* src, int slen, int destlen,
const char* const dst_end = dst + destlen;
int found_non_ascii = 0;
+#ifdef HAVE_UNALIGNED_WORD_ACCESS
+ {
+ int aft = ascii_fast_track(dst, src, slen, destlen);
+ src += aft;
+ slen -= aft;
+ dst += aft;
+ }
+#endif
+
while (slen > 0) {
if (dst >= dst_end) return -1;
if ((src[0] & 0x80) == 0) {
@@ -136,6 +190,14 @@ int latin1_to_utf8(char* dst, const char* src, int slen, int destlen,
const char* const dst_end = dst + destlen;
int found_non_ascii = 0;
+#ifdef HAVE_UNALIGNED_WORD_ACCESS
+ {
+ int aft = ascii_fast_track(dst, src, slen, destlen);
+ dst += aft;
+ src += aft;
+ }
+#endif
+
while (src < src_end) {
if (dst >= dst_end) return -1;
if ((src[0] & 0x80) == 0) {
diff --git a/lib/ic/c_src/oe_ei_encode_atom.c b/lib/ic/c_src/oe_ei_encode_atom.c
index 758586d1d4..99a9fe26f0 100644
--- a/lib/ic/c_src/oe_ei_encode_atom.c
+++ b/lib/ic/c_src/oe_ei_encode_atom.c
@@ -20,28 +20,37 @@
*/
#include <ic.h>
+#include <string.h>
+
+
+#define DIRTY_ATOM_ENC_MAX(LATIN1_CHARS) ((LATIN1_CHARS)*2 + 3)
+
int oe_ei_encode_atom(CORBA_Environment *ev, const char *p) {
int size = ev->_iout;
+ size_t len = strlen(p);
+
+ if (DIRTY_ATOM_ENC_MAX(len) >= ev->_outbufsz) {
+
+ ei_encode_atom_len(0,&size,p,len);
+
+ if (size >= ev->_outbufsz) {
+ char *buf = ev->_outbuf;
+ int bufsz = ev->_outbufsz + ev->_memchunk;
+
+ while (size >= bufsz)
+ bufsz += ev->_memchunk;
+
+ if ((buf = realloc(buf, bufsz)) == NULL) {
+ CORBA_exc_set(ev, CORBA_SYSTEM_EXCEPTION, NO_MEMORY, "End of heap memory while encoding");
+ return -1; /* OUT OF MEMORY */
+ }
- ei_encode_atom(0,&size,p);
-
- if (size >= ev->_outbufsz) {
- char *buf = ev->_outbuf;
- int bufsz = ev->_outbufsz + ev->_memchunk;
-
- while (size >= bufsz)
- bufsz += ev->_memchunk;
-
- if ((buf = realloc(buf, bufsz)) == NULL) {
- CORBA_exc_set(ev, CORBA_SYSTEM_EXCEPTION, NO_MEMORY, "End of heap memory while encoding");
- return -1; /* OUT OF MEMORY */
- }
-
- ev->_outbuf = buf;
- ev->_outbufsz = bufsz;
+ ev->_outbuf = buf;
+ ev->_outbufsz = bufsz;
+ }
}
- return ei_encode_atom(ev->_outbuf,&ev->_iout,p);
+ return ei_encode_atom_len(ev->_outbuf,&ev->_iout,p,len);
}