From c2a9915a451a3c6ad618f55a2871a43403333b7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 17 Jan 2018 16:01:28 +0100 Subject: Optimize matching of an 'utf8' segment in the binary syntax Matching out an 8-bit integer is faster than matching out an utf8-encoded code point, even if the value of the code point is less than 128. The reason is that matching out an 8-bit integer is specially optimized to avoid a function call. Do a similar optimization for matching out an utf8 segment. --- erts/emulator/beam/bs_instrs.tab | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'erts') diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab index 9f03b19731..b11903a47b 100644 --- a/erts/emulator/beam/bs_instrs.tab +++ b/erts/emulator/beam/bs_instrs.tab @@ -919,9 +919,23 @@ i_bs_get_integer(Fail, Live, FlagsAndUnit, Ms, Sz, Dst) { } i_bs_get_utf8(Ctx, Fail, Dst) { + Eterm result; ErlBinMatchBuffer* mb = ms_matchbuffer($Ctx); - Eterm result = erts_bs_get_utf8(mb); + if (mb->size - mb->offset < 8) { + $FAIL($Fail); + } + if (BIT_OFFSET(mb->offset) != 0) { + result = erts_bs_get_utf8(mb); + } else { + byte b = mb->base[BYTE_OFFSET(mb->offset)]; + if (b < 128) { + result = make_small(b); + mb->offset += 8; + } else { + result = erts_bs_get_utf8(mb); + } + } if (is_non_value(result)) { $FAIL($Fail); } -- cgit v1.2.3