aboutsummaryrefslogblamecommitdiffstats
path: root/lib/erl_interface/src/decode/decode_atom.c
blob: df1e5b9350ee008169485cc93d2bea17cfb27626 (plain) (tree)
1
2
3
4
5


                   
                                                        
   










                                                                           







                   
 

                                                        
                                                                                   

 
                                                                        


                                                     
 


                                 
                                 





















                                 
                                                           
















                                                                 

                                                          








                                
             

        
 
                                                                     
                                                  
 


                                              
 







                                      


                            






                                                                
                      
                                


                       






                                                                     
                                                  

































                                                                 
                                                        




                                                                                           
             
 
 
 
/*
 * %CopyrightBegin%
 * 
 * Copyright Ericsson AB 1998-2013. All Rights Reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * %CopyrightEnd%
 */
#include <string.h>
#include "eidef.h"
#include "eiext.h"
#include "putget.h"


int ei_decode_atom(const char *buf, int *index, char *p)
{
    return ei_decode_atom_as(buf, index, p, MAXATOMLEN, ERLANG_LATIN1, NULL, NULL);
}

int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen,
		      erlang_char_encoding want_enc,
		      erlang_char_encoding* was_encp,
		      erlang_char_encoding* res_encp)
{
    const char *s = buf + *index;
    const char *s0 = s;
    int len;
    erlang_char_encoding got_enc;
    
    switch (get8(s)) {
    case ERL_ATOM_EXT:
	len = get16be(s);
	got_enc = ERLANG_LATIN1;
	break;
    case ERL_SMALL_ATOM_EXT:
	len = get8(s);
	got_enc = ERLANG_LATIN1;
	break;
    case ERL_ATOM_UTF8_EXT:
	len = get16be(s);
	got_enc = ERLANG_UTF8;
	break;
    case ERL_SMALL_ATOM_UTF8_EXT:
	len = get8(s);
	got_enc = ERLANG_UTF8;
	break;
    default:
	return -1;
    }
    
    if ((want_enc & got_enc) || want_enc == ERLANG_ASCII) {
	int i, found_non_ascii = 0;
	if (len >= destlen)
	    return -1;
	for (i=0; i<len; i++) {
	    if (s[i] & 0x80) found_non_ascii = 1;
	    if (p) p[i] = s[i];
	}
	if (p) p[len] = 0;
	if (want_enc == ERLANG_ASCII && found_non_ascii) {
	    return -1;
	}
	if (res_encp) {
	    *res_encp = found_non_ascii ? got_enc : ERLANG_ASCII;
	}
    }
    else {
	int plen = (got_enc == ERLANG_LATIN1) ?
	  latin1_to_utf8(p, s, len, destlen-1, res_encp) :
	  utf8_to_latin1(p, s, len, destlen-1, res_encp);
	if (plen < 0) return -1;
	if (p) p[plen] = 0;
    }
    if (was_encp) {
	*was_encp = got_enc;
    }
    
    s += len;
    *index += s-s0;
    return 0;
}	


int utf8_to_latin1(char* dst, const char* src, int slen, int destlen,
		   erlang_char_encoding* res_encp)
{
    const char* const dst_start = dst;
    const char* const dst_end = dst + destlen;
    int found_non_ascii = 0;

    while (slen > 0) {
	if (dst >= dst_end) return -1;
	if ((src[0] & 0x80) == 0) {
	    if (dst_start) {
		*dst = *src;
	    }
	    ++dst;
	    ++src;
	    --slen;
	}
	else if (slen > 1 &&
		 (src[0] & 0xFE) == 0xC2 &&
		 (src[1] & 0xC0) == 0x80) {
	    if (dst_start) {
		*dst = (char) ((src[0] << 6) | (src[1] & 0x3F));
	    }
	    ++dst;
	    src += 2;
	    slen -= 2;
	    found_non_ascii = 1;
	}
	else return -1;
    }
    if (res_encp) {
	*res_encp = found_non_ascii ? ERLANG_LATIN1 : ERLANG_ASCII;
    }
    return dst - dst_start;
}

int latin1_to_utf8(char* dst, const char* src, int slen, int destlen,
		   erlang_char_encoding* res_encp)
{
    const char* const src_end = src + slen;
    const char* const dst_start = dst;
    const char* const dst_end = dst + destlen;
    int found_non_ascii = 0;

    while (src < src_end) {
	if (dst >= dst_end) return -1;
	if ((src[0] & 0x80) == 0) {
	    if (dst_start) {
		*dst = *src;
	    }
	    ++dst;
	}
	else {
	    if (dst_start) {
		unsigned char ch = *src;
		dst[0] = 0xC0 | (ch >> 6);
		dst[1] = 0x80 | (ch & 0x3F);
	    }
	    dst += 2;
	    found_non_ascii = 1;
	}
	++src;
    }
    if (res_encp) {
	*res_encp = found_non_ascii ? ERLANG_UTF8 : ERLANG_ASCII;
    }
    return dst - dst_start;
}



int ei_internal_get_atom(const char** bufp, char* p,
			 erlang_char_encoding* was_encp)
{
    int ix = 0;
    if (ei_decode_atom_as(*bufp, &ix, p, MAXATOMLEN_UTF8, ERLANG_UTF8, was_encp, NULL) < 0)
	return -1;
    *bufp += ix;
    return 0;
}