From 60c2511da7fb72197a807e77e62fdf671676e718 Mon Sep 17 00:00:00 2001 From: David JULIEN Date: Wed, 13 Apr 2011 12:56:01 +0200 Subject: Add latin9 (iso-8859-15) support in xmerl_ucs --- lib/xmerl/src/xmerl_ucs.erl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_ucs.erl b/lib/xmerl/src/xmerl_ucs.erl index 7c45c838ab..75e0f5dccb 100644 --- a/lib/xmerl/src/xmerl_ucs.erl +++ b/lib/xmerl/src/xmerl_ucs.erl @@ -43,6 +43,7 @@ -export([to_utf16be/1, from_utf16be/1, from_utf16be/2]). -export([to_utf16le/1, from_utf16le/1, from_utf16le/2]). -export([to_utf8/1, from_utf8/1]). +-export([from_latin9/1]). %%% NB: Non-canonical UTF-8 encodings and incorrectly used %%% surrogate-pair codes are disallowed by this code. There are @@ -184,6 +185,20 @@ from_utf8(List) -> exit({ucs,{bad_utf8_character_code}}) end. +%%% Latin9 support +from_latin9(Bin) when is_binary(Bin) -> from_latin9(binary_to_list(Bin)); +from_latin9(List) -> + [ latin9_to_ucs4(Char) || Char <- List]. + +latin9_to_ucs4(16#A4) -> 16#20AC; +latin9_to_ucs4(16#A6) -> 16#160; +latin9_to_ucs4(16#A8) -> 16#161; +latin9_to_ucs4(16#B4) -> 16#17D; +latin9_to_ucs4(16#B8) -> 16#17E; +latin9_to_ucs4(16#BC) -> 16#152; +latin9_to_ucs4(16#BD) -> 16#153; +latin9_to_ucs4(16#BE) -> 16#178; +latin9_to_ucs4(Other) -> Other. @@ -476,6 +491,8 @@ to_unicode(Input,Cs) when Cs=='iso_8859-1:1987';Cs=='iso-ir-100'; Cs=='l1';Cs=='ibm819'; Cs=='cp819';Cs=='csisolatin1' -> Input; +to_unicode(Input,Cs) when Cs=='iso_8859-15';Cs=='iso-8859-15';Cs=='latin9' -> + from_latin9(Input); % to_unicode(Input,Cs) when Cs=='mnemonic';Cs=='"mnemonic+ascii+38'; % Cs=='mnem';Cs=='"mnemonic+ascii+8200' -> % from_mnemonic(Input); -- cgit v1.2.3 From 374b0299972709feb2dc2a6dbf8b02677b87d4fa Mon Sep 17 00:00:00 2001 From: Michal Ptaszek Date: Thu, 25 Aug 2011 13:48:43 +0200 Subject: Fixed xmerl_ucs UCS2 little endian en/decoding Corrected number of shift bytes in xmerl_ucs:char_to_ucs2le and recursive call from from_ucs2le to from_ucs4le. --- lib/xmerl/src/xmerl_ucs.erl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_ucs.erl b/lib/xmerl/src/xmerl_ucs.erl index 7c45c838ab..076595d873 100644 --- a/lib/xmerl/src/xmerl_ucs.erl +++ b/lib/xmerl/src/xmerl_ucs.erl @@ -1,19 +1,19 @@ %% %% %CopyrightBegin% -%% +%% %% Copyright Ericsson AB 2005-2009. All Rights Reserved. -%% +%% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. -%% +%% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. -%% +%% %% %CopyrightEnd% %% @@ -177,7 +177,7 @@ to_utf8(List) when is_list(List) -> lists:flatmap(fun to_utf8/1, List); to_utf8(Ch) -> char_to_utf8(Ch). from_utf8(Bin) when is_binary(Bin) -> from_utf8(binary_to_list(Bin)); -from_utf8(List) -> +from_utf8(List) -> case expand_utf8(List) of {Result,0} -> Result; {_Res,_NumBadChar} -> @@ -238,7 +238,7 @@ from_ucs4le(Bin,Acc,Tail) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% UCS-2 support -%%% FIXME! Don't know how to encode UCS-2!! +%%% FIXME! Don't know how to encode UCS-2!! %%% Currently I just encode as UCS-4, but strips the 16 higher bits. char_to_ucs2be(Ch) -> true = is_iso10646(Ch), @@ -259,15 +259,15 @@ from_ucs2be(Bin,Acc,Tail) -> char_to_ucs2le(Ch) -> true = is_iso10646(Ch), - [(Ch bsr 16) band 16#FF, - (Ch bsr 24)]. + [Ch band 16#FF, + (Ch bsr 8) band 16#FF]. from_ucs2le(<>,Acc,Tail) -> if Ch < 0; Ch >= 16#D800, Ch < 16#E000; Ch =:= 16#FFFE; Ch =:= 16#FFFF -> exit({bad_character_code,Ch}); true -> - from_ucs4le(Rest,[Ch|Acc],Tail) + from_ucs2le(Rest,[Ch|Acc],Tail) end; from_ucs2le(<<>>,Acc,Tail) -> lists:reverse(Acc,Tail); -- cgit v1.2.3 From d15f145c4b459bb7bf3e31dd66f7cd703d4c4fb8 Mon Sep 17 00:00:00 2001 From: Tuncer Ayaz Date: Wed, 31 Aug 2011 15:15:47 +0200 Subject: Fix misspelling of erroneous in xmerl_xsd --- lib/xmerl/src/xmerl_xsd.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_xsd.erl b/lib/xmerl/src/xmerl_xsd.erl index f003cc74ba..50c0a79016 100644 --- a/lib/xmerl/src/xmerl_xsd.erl +++ b/lib/xmerl/src/xmerl_xsd.erl @@ -742,7 +742,7 @@ element_content({IDC,S},El,Env) {{IDC,IDConstr},S3}; Err -> S3 = acc_errs(S2,{error_path(El,El#xmlElement.name),?MODULE, - {erronous_content_in_identity_constraint,IDC,Err}}), + {erroneous_content_in_identity_constraint,IDC,Err}}), {{IDC,[]},S3} end; element_content({selector,S},Sel,_Env) -> @@ -5571,7 +5571,7 @@ format_error({incomplete_file,_FileName,_Other}) -> "Schema: The file containing a schema state must be produced by xmerl_xsd:state2file/[1,2]."; format_error({unexpected_content_in_any,A}) -> io_lib:format("Schema: The any type is considered to have no content besides annotation. ~p was found.",[A]); -format_error({erronous_content_in_identity_constraint,IDC,Err}) -> +format_error({erroneous_content_in_identity_constraint,IDC,Err}) -> io_lib:format("Schema: An ~p identity constraint must have one selector and one or more field in content. This case ~p",[IDC,Err]); format_error({missing_xpath_attribute,IDCContent}) -> io_lib:format("Schema: A ~p in a identity constraint must have a xpath attribute.",[IDCContent]); -- cgit v1.2.3