/* * %CopyrightBegin% * * Copyright Ericsson AB 1998-2016. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * %CopyrightEnd% */ #include #include "eidef.h" #include "eiext.h" #include "putget.h" int ei_decode_atom(const char *buf, int *index, char *p) { return ei_decode_atom_as(buf, index, p, MAXATOMLEN, ERLANG_LATIN1, NULL, NULL); } int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen, erlang_char_encoding want_enc, erlang_char_encoding* was_encp, erlang_char_encoding* res_encp) { const char *s = buf + *index; const char *s0 = s; int len; erlang_char_encoding got_enc; switch (get8(s)) { case ERL_ATOM_EXT: len = get16be(s); got_enc = ERLANG_LATIN1; break; case ERL_SMALL_ATOM_EXT: len = get8(s); got_enc = ERLANG_LATIN1; break; case ERL_ATOM_UTF8_EXT: len = get16be(s); got_enc = ERLANG_UTF8; break; case ERL_SMALL_ATOM_UTF8_EXT: len = get8(s); got_enc = ERLANG_UTF8; break; default: return -1; } if ((want_enc & got_enc) || want_enc == ERLANG_ASCII) { int i, found_non_ascii = 0; if (len >= destlen) return -1; for (i=0; i 0) { if (dst >= dst_end) return -1; if ((src[0] & 0x80) == 0) { if (dst_start) { *dst = *src; } ++dst; ++src; --slen; } else if (slen > 1 && (src[0] & 0xFE) == 0xC2 && (src[1] & 0xC0) == 0x80) { if (dst_start) { *dst = (char) ((src[0] << 6) | (src[1] & 0x3F)); } ++dst; src += 2; slen -= 2; found_non_ascii = 1; } else return -1; } if (res_encp) { *res_encp = found_non_ascii ? ERLANG_LATIN1 : ERLANG_ASCII; } return dst - dst_start; } int latin1_to_utf8(char* dst, const char* src, int slen, int destlen, erlang_char_encoding* res_encp) { const char* const src_end = src + slen; const char* const dst_start = dst; const char* const dst_end = dst + destlen; int found_non_ascii = 0; #ifdef HAVE_UNALIGNED_WORD_ACCESS { int aft = ascii_fast_track(dst, src, slen, destlen); dst += aft; src += aft; } #endif while (src < src_end) { if (dst >= dst_end) return -1; if ((src[0] & 0x80) == 0) { if (dst_start) { *dst = *src; } ++dst; } else { if (dst_start) { unsigned char ch = *src; dst[0] = 0xC0 | (ch >> 6); dst[1] = 0x80 | (ch & 0x3F); } dst += 2; found_non_ascii = 1; } ++src; } if (res_encp) { *res_encp = found_non_ascii ? ERLANG_UTF8 : ERLANG_ASCII; } return dst - dst_start; } int ei_internal_get_atom(const char** bufp, char* p, erlang_char_encoding* was_encp) { int ix = 0; if (ei_decode_atom_as(*bufp, &ix, p, MAXATOMLEN_UTF8, ERLANG_UTF8, was_encp, NULL) < 0) return -1; *bufp += ix; return 0; }