From 1ab478a7ce281f7cdce01df2fe04953c0770fdbc Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 19 Mar 2018 19:13:29 +0100 Subject: erl_interface: Optimize latin1_to_utf8 and friend to do word wise check/copy for pure ASCII if ARCH allows it (x86 and amd64). --- lib/erl_interface/configure.in | 92 ++++++++++++++++++++++++++++++ lib/erl_interface/src/decode/decode_atom.c | 62 ++++++++++++++++++++ 2 files changed, 154 insertions(+) (limited to 'lib/erl_interface') diff --git a/lib/erl_interface/configure.in b/lib/erl_interface/configure.in index 0a8fbf513c..7cc1f1f89a 100644 --- a/lib/erl_interface/configure.in +++ b/lib/erl_interface/configure.in @@ -106,6 +106,98 @@ if test $ac_cv_sizeof_long = 8; then CFLAGS="$CFLAGS -DEI_64BIT" fi +dnl +dnl Determine target hardware in ARCH +dnl +AC_MSG_CHECKING([target hardware architecture]) +if test "x$host_alias" != "x" -a "x$host_cpu" != "x"; then + chk_arch_=$host_cpu +else + chk_arch_=`uname -m` +fi + +case $chk_arch_ in + sun4u) ARCH=ultrasparc;; + sparc64) ARCH=sparc64;; + sun4v) ARCH=ultrasparc;; + i86pc) ARCH=x86;; + i386) ARCH=x86;; + i486) ARCH=x86;; + i586) ARCH=x86;; + i686) ARCH=x86;; + x86_64) ARCH=amd64;; + amd64) ARCH=amd64;; + macppc) ARCH=ppc;; + powerpc) ARCH=ppc;; + ppc) ARCH=ppc;; + ppc64) ARCH=ppc64;; + ppc64le) ARCH=ppc64le;; + "Power Macintosh") ARCH=ppc;; + armv5b) ARCH=arm;; + armv5teb) ARCH=arm;; + armv5tel) ARCH=arm;; + armv5tejl) ARCH=arm;; + armv6l) ARCH=arm;; + armv6hl) ARCH=arm;; + armv7l) ARCH=arm;; + armv7hl) ARCH=arm;; + tile) ARCH=tile;; + *) ARCH=noarch;; +esac +AC_MSG_RESULT($ARCH) + +dnl +dnl Convert between x86 and amd64 based on the compiler's mode. +dnl Ditto between ultrasparc and sparc64. +dnl +AC_MSG_CHECKING(whether compilation mode forces ARCH adjustment) +case "$ARCH-$ac_cv_sizeof_void_p" in + x86-8) + AC_MSG_RESULT(yes: adjusting ARCH=x86 to ARCH=amd64) + ARCH=amd64 + ;; + amd64-4) + AC_MSG_RESULT(yes: adjusting ARCH=amd64 to ARCH=x86) + ARCH=x86 + ;; + ultrasparc-8) + AC_MSG_RESULT(yes: adjusting ARCH=ultrasparc to ARCH=sparc64) + ARCH=sparc64 + ;; + sparc64-4) + AC_MSG_RESULT(yes: adjusting ARCH=sparc64 to ARCH=ultrasparc) + ARCH=ultrasparc + ;; + ppc64-4) + AC_MSG_RESULT(yes: adjusting ARCH=ppc64 to ARCH=ppc) + ARCH=ppc + ;; + ppc-8) + AC_MSG_RESULT(yes: adjusting ARCH=ppc to ARCH=ppc64) + ARCH=ppc64 + ;; + arm-8) + AC_MSG_RESULT(yes: adjusting ARCH=arm to ARCH=noarch) + ARCH=noarch + ;; + *) + AC_MSG_RESULT(no: ARCH is $ARCH) + ;; +esac + +AC_SUBST(ARCH) + +AC_MSG_CHECKING(for unaligned word access) +case "$ARCH" in + x86|amd64) + AC_MSG_RESULT(yes: x86 or amd64) + AC_DEFINE(HAVE_UNALIGNED_WORD_ACCESS, 1, [Define if hw supports unaligned word access]) + ;; + *) + AC_MSG_RESULT(no) + ;; +esac + AC_CHECK_TOOL(AR, ar, false) if test "$AR" = false; then AC_MSG_ERROR([No 'ar' command found in PATH]) diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c index b3bba82434..87cd75b1be 100644 --- a/lib/erl_interface/src/decode/decode_atom.c +++ b/lib/erl_interface/src/decode/decode_atom.c @@ -92,6 +92,51 @@ int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen, } + +#ifdef HAVE_UNALIGNED_WORD_ACCESS + +#if SIZEOF_VOID_P == SIZEOF_LONG +typedef unsigned long AsciiWord; +#elif SIZEOF_VOID_P == SIZEOF_LONG_LONG +typedef unsigned long long AsciiWord; +#else +# error "Uknown word type" +#endif + +#if SIZEOF_VOID_P == 4 +# define ASCII_CHECK_MASK ((AsciiWord)0x80808080U) +#elif SIZEOF_VOID_P == 8 +# define ASCII_CHECK_MASK ((AsciiWord)0x8080808080808080U) +#endif + +static int ascii_fast_track(char* dst, const char* src, int slen, int destlen) +{ + const AsciiWord* src_word = (AsciiWord*) src; + const AsciiWord* const src_word_end = src_word + (slen / sizeof(AsciiWord)); + + if (destlen < slen) + return 0; + + if (dst) { + AsciiWord* dst_word = (AsciiWord*)dst; + + while (src_word < src_word_end) { + if ((*src_word & ASCII_CHECK_MASK) != 0) + break; + *dst_word++ = *src_word++; + } + } + else { + while (src_word < src_word_end) { + if ((*src_word & ASCII_CHECK_MASK) != 0) + break; + src_word++; + } + } + return (char*)src_word - src; +} +#endif /* HAVE_UNALIGNED_WORD_ACCESS */ + int utf8_to_latin1(char* dst, const char* src, int slen, int destlen, erlang_char_encoding* res_encp) { @@ -99,6 +144,15 @@ int utf8_to_latin1(char* dst, const char* src, int slen, int destlen, const char* const dst_end = dst + destlen; int found_non_ascii = 0; +#ifdef HAVE_UNALIGNED_WORD_ACCESS + { + int aft = ascii_fast_track(dst, src, slen, destlen); + src += aft; + slen -= aft; + dst += aft; + } +#endif + while (slen > 0) { if (dst >= dst_end) return -1; if ((src[0] & 0x80) == 0) { @@ -136,6 +190,14 @@ int latin1_to_utf8(char* dst, const char* src, int slen, int destlen, const char* const dst_end = dst + destlen; int found_non_ascii = 0; +#ifdef HAVE_UNALIGNED_WORD_ACCESS + { + int aft = ascii_fast_track(dst, src, slen, destlen); + dst += aft; + src += aft; + } +#endif + while (src < src_end) { if (dst >= dst_end) return -1; if ((src[0] & 0x80) == 0) { -- cgit v1.2.3