aboutsummaryrefslogblamecommitdiffstats
path: root/lib/erl_interface/src/encode/encode_atom.c
blob: 372122f3eb3a61a4eaf85078e289e664c7e4fff8 (plain) (tree)
1
2
3
4
5


                   
                                                        
   










                                                                           



                   
                   



                   
 

                                                        
                                                     
 

                                                        

                           


                                                                                   



                                                                     






                                                                                   

                                                    




                                                                             

                                                        
 

                         
           
 
                                                                       

                
 







                                                                       









                                                                    

                                                           
                    












                                                                     

                                                         










                                                                                       

                                                       
                

                                 

                                                      















                                               
 

                







                  

                                                          
                                                 






                                                                            

 
                                                       


                                            
              




               
                                                      







                                                                   
                      



                                                                                              
                         



                                                                                                                         
                         



                           
              




               









                                                    
/*
 * %CopyrightBegin%
 * 
 * Copyright Ericsson AB 1998-2013. All Rights Reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * %CopyrightEnd%
 */
#include <string.h>
#include <limits.h>
#include "eidef.h"
#include "eiext.h"
#include "putget.h"


static int verify_ascii_atom(const char* src, int slen);
static int verify_utf8_atom(const char* src, int slen);
static int is_latin1_as_utf8(const char *p, int len);

int ei_encode_atom(char *buf, int *index, const char *p)
{
    size_t len = strlen(p);

    if (len >= MAXATOMLEN)
	len = MAXATOMLEN - 1;
    return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
}

int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
{
    /* This function is documented to truncate at MAXATOMLEN (256) */ 
    if (len >= MAXATOMLEN)
	len = MAXATOMLEN - 1;
    return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
}

int ei_encode_atom_as(char *buf, int *index, const char *p,
		      erlang_char_encoding from_enc,
		      erlang_char_encoding to_enc)
{	
    return ei_encode_atom_len_as(buf, index, p, strlen(p), from_enc, to_enc);
}

int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
			  erlang_char_encoding from_enc,
			  erlang_char_encoding to_enc)
{
  char *s = buf + *index;
  char *s0 = s;
  int offs;

  if (len >= MAXATOMLEN && (from_enc & (ERLANG_LATIN1|ERLANG_ASCII))) {
      return -1;
  }

  if (to_enc == (ERLANG_LATIN1 | ERLANG_UTF8)) {
    if (from_enc == ERLANG_UTF8) {
      to_enc = is_latin1_as_utf8(p, len) ? ERLANG_LATIN1 : ERLANG_UTF8;
    }
    else {
      to_enc = from_enc;
    }
  }
  switch(to_enc) {
  case ERLANG_LATIN1:
      if (buf) {
	  put8(s,ERL_ATOM_EXT);
	  switch (from_enc) {
	  case ERLANG_UTF8:
	      len = utf8_to_latin1(s+2, p, len, MAXATOMLEN-1, NULL);
	      if (len < 0) return -1;
	      break;
	  case ERLANG_ASCII:
	      if (verify_ascii_atom(p, len) < 0) return -1;
	      memcpy(s+2, p, len);
	      break;
	  case ERLANG_LATIN1:
	      memcpy(s+2, p, len);
	      break;
	  default:
	      return -1;
	  }
	  put16be(s,len);
      }
      else {
	  s += 3;
	  if (from_enc == ERLANG_UTF8) {
	      len = utf8_to_latin1(NULL, p, len, MAXATOMLEN-1, NULL);
	      if (len < 0) return -1;
	  } else if (from_enc == ERLANG_ASCII)
	    if (verify_ascii_atom(p, len) < 0) return -1;
      }
      break;
      
  case ERLANG_UTF8:
      offs =  1 + 1;
      switch (from_enc) {
      case ERLANG_LATIN1:
	  if (len >= 256/2) offs++;
	  len = latin1_to_utf8((buf ? s+offs : NULL), p, len, MAXATOMLEN_UTF8-1, NULL);
	  break;
      case ERLANG_ASCII:
	  if (verify_ascii_atom(p, len) < 0) return -1;
	  if (buf) memcpy(s+offs,p,len);
	  break;
      case ERLANG_UTF8:
	  if (len >= 256) offs++;
	  if (verify_utf8_atom(p, len) < 0) return -1;
	  if (buf) memcpy(s+offs,p,len);
	  break;
      default:
	  return -1;
      }
      if (buf) {
	  if (offs == 2) {
	      put8(s, ERL_SMALL_ATOM_UTF8_EXT);
	      put8(s, len);
	  }
	  else {
	      put8(s, ERL_ATOM_UTF8_EXT);
	      put16be(s, len);
	  }
      }
      else s+= offs;
      break;

  default:
      return -1;
  }
  s += len;

  *index += s-s0; 

  return 0; 
}

int
ei_internal_put_atom(char** bufp, const char* p, int slen,
		     erlang_char_encoding to_enc)
{
    int ix = 0;
    if (ei_encode_atom_len_as(*bufp, &ix, p, slen, ERLANG_UTF8, to_enc) < 0)
	return -1;
    *bufp += ix;
    return 0;
}


static int verify_ascii_atom(const char* src, int slen)
{
    while (slen > 0) {
	if ((src[0] & 0x80) != 0) return -1;
	src++;
	slen--;
    }
    return 0;
}

static int verify_utf8_atom(const char* src, int slen)
{
    int num_chars = 0;

    while (slen > 0) {
	if (++num_chars >= MAXATOMLEN) return -1;
	if ((src[0] & 0x80) != 0) {
	    if ((src[0] & 0xE0) == 0xC0) {
		if (slen < 2 || (src[1] & 0xC0) != 0x80) return -1;
		src++;
		slen--;
	    }
	    else if ((src[0] & 0xF0) == 0xE0) {
		if (slen < 3 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80) return -1;
		src += 2;
		slen -= 2;
	    }
	    else if ((src[0] & 0xF8) == 0xF0) {
		if (slen < 4 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80 || (src[3] & 0xC0) != 0x80) return -1;
		src += 3;
		slen -= 3;
	    }
	    else return -1;
	}
	src++;
	slen--;
    }
    return 0;
}

/* Only latin1 code points in utf8 string?
 */
static int is_latin1_as_utf8(const char *p, int len)
{
  int i;
  for (i=0; i<len; i++) {
    if ((unsigned char)p[i] > 0xC3) return 0;
  }
  return 1;
}