aboutsummaryrefslogblamecommitdiffstats
path: root/lib/erl_interface/src/encode/encode_atom.c
blob: 26e485518dd25de3b343dd199e7d628c17adf4e4 (plain) (tree)
1
2
3
4


                   
                                                        














                                                                         
                   



                   
 

                                                        

 

                                                        

                           


                                                                                   



                                                                     
















                                                                                   

                         
           
 
                                                                       

                
 









                                                                    

                                                           
                    












                                                                     

                                                         










                                                                                       

                                                       
                

                                 

                                                      















                                               
 

                







                  









                                                                            

 
                                                


                                            
              




               
                                               







                                                                   
                      



                                                                                              
                         



                                                                                                                         
                         



                           
              




               
/*
 * %CopyrightBegin%
 * 
 * Copyright Ericsson AB 1998-2011. All Rights Reserved.
 * 
 * The contents of this file are subject to the Erlang Public License,
 * Version 1.1, (the "License"); you may not use this file except in
 * compliance with the License. You should have received a copy of the
 * Erlang Public License along with this software. If not, it can be
 * retrieved online at http://www.erlang.org/.
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 * 
 * %CopyrightEnd%
 */
#include <string.h>
#include <limits.h>
#include "eidef.h"
#include "eiext.h"
#include "putget.h"


static int verify_ascii_atom(const char* src, int slen);
static int verify_utf8_atom(const char* src, int slen);


int ei_encode_atom(char *buf, int *index, const char *p)
{
    size_t len = strlen(p);

    if (len >= MAXATOMLEN)
	len = MAXATOMLEN - 1;
    return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
}

int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
{
    /* This function is documented to truncate at MAXATOMLEN (256) */ 
    if (len >= MAXATOMLEN)
	len = MAXATOMLEN - 1;
    return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
}

int ei_encode_atom_as(char *buf, int *index, const char *p,
		      enum erlang_char_encoding from_enc,
		      enum erlang_char_encoding to_enc)
{	
    return ei_encode_atom_len_as(buf, index, p, strlen(p), from_enc, to_enc);
}

int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
			  enum erlang_char_encoding from_enc,
			  enum erlang_char_encoding to_enc)
{
  char *s = buf + *index;
  char *s0 = s;
  int offs;

  if (len >= MAXATOMLEN && (from_enc & (ERLANG_LATIN1|ERLANG_ASCII))) {
      return -1;
  }

  switch(to_enc) {
  case ERLANG_LATIN1:
      if (buf) {
	  put8(s,ERL_ATOM_EXT);
	  switch (from_enc) {
	  case ERLANG_UTF8:
	      len = utf8_to_latin1(s+2, p, len, MAXATOMLEN-1, NULL);
	      if (len < 0) return -1;
	      break;
	  case ERLANG_ASCII:
	      if (verify_ascii_atom(p, len) < 0) return -1;
	      memcpy(s+2, p, len);
	      break;
	  case ERLANG_LATIN1:
	      memcpy(s+2, p, len);
	      break;
	  default:
	      return -1;
	  }
	  put16be(s,len);
      }
      else {
	  s += 3;
	  if (from_enc == ERLANG_UTF8) {
	      len = utf8_to_latin1(NULL, p, len, MAXATOMLEN-1, NULL);
	      if (len < 0) return -1;
	  } else if (from_enc == ERLANG_ASCII)
	    if (verify_ascii_atom(p, len) < 0) return -1;
      }
      break;
      
  case ERLANG_UTF8:
      offs =  1 + 1;
      switch (from_enc) {
      case ERLANG_LATIN1:
	  if (len >= 256/2) offs++;
	  len = latin1_to_utf8((buf ? s+offs : NULL), p, len, MAXATOMLEN_UTF8-1, NULL);
	  break;
      case ERLANG_ASCII:
	  if (verify_ascii_atom(p, len) < 0) return -1;
	  if (buf) memcpy(s+offs,p,len);
	  break;
      case ERLANG_UTF8:
	  if (len >= 256) offs++;
	  if (verify_utf8_atom(p, len) < 0) return -1;
	  if (buf) memcpy(s+offs,p,len);
	  break;
      default:
	  return -1;
      }
      if (buf) {
	  if (offs == 2) {
	      put8(s, ERL_SMALL_ATOM_UTF8_EXT);
	      put8(s, len);
	  }
	  else {
	      put8(s, ERL_ATOM_UTF8_EXT);
	      put16be(s, len);
	  }
      }
      else s+= offs;
      break;

  default:
      return -1;
  }
  s += len;

  *index += s-s0; 

  return 0; 
}

int
ei_internal_put_atom(char** bufp, const char* p, int slen,
		     enum erlang_char_encoding to_enc)
{
    int ix = 0;
    if (ei_encode_atom_len_as(*bufp, &ix, p, slen, ERLANG_UTF8, to_enc) < 0)
	return -1;
    *bufp += ix;
    return 0;
}


int verify_ascii_atom(const char* src, int slen)
{
    while (slen > 0) {
	if ((src[0] & 0x80) != 0) return -1;
	src++;
	slen--;
    }
    return 0;
}

int verify_utf8_atom(const char* src, int slen)
{
    int num_chars = 0;

    while (slen > 0) {
	if (++num_chars >= MAXATOMLEN) return -1;
	if ((src[0] & 0x80) != 0) {
	    if ((src[0] & 0xE0) == 0xC0) {
		if (slen < 2 || (src[1] & 0xC0) != 0x80) return -1;
		src++;
		slen--;
	    }
	    else if ((src[0] & 0xF0) == 0xE0) {
		if (slen < 3 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80) return -1;
		src += 2;
		slen -= 2;
	    }
	    else if ((src[0] & 0xF8) == 0xF0) {
		if (slen < 4 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80 || (src[3] & 0xC0) != 0x80) return -1;
		src += 3;
		slen -= 3;
	    }
	    else return -1;
	}
	src++;
	slen--;
    }
    return 0;
}