1 files changed, 44 insertions, 29 deletions
diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl
index 4e5cc59d8f..25c6547497 100644
--- a/lib/xmerl/src/xmerl_scan.erl
+++ b/lib/xmerl/src/xmerl_scan.erl
@@ -1,19 +1,19 @@
 %%
 %% %CopyrightBegin%
-%% 
-%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
-%% 
+%%
+%% Copyright Ericsson AB 2003-2011. All Rights Reserved.
+%%
 %% The contents of this file are subject to the Erlang Public License,
 %% Version 1.1, (the "License"); you may not use this file except in
 %% compliance with the License. You should have received a copy of the
 %% Erlang Public License along with this software. If not, it can be
 %% retrieved online at http://www.erlang.org/.
-%% 
+%%
 %% Software distributed under the License is distributed on an "AS IS"
 %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 %% the License for the specific language governing rights and limitations
 %% under the License.
-%% 
+%%
 %% %CopyrightEnd%
 %%
 
@@ -34,7 +34,9 @@
 %% See also <a href="xmerl_examples.html">tutorial</a> on customization
 %% functions.
 %% </p>
+%% <p>
 %% Possible options are:
+%% </p>
 %% <dl>
 %%  <dt><code>{acc_fun, Fun}</code></dt>
 %%    <dd>Call back function to accumulate contents of entity.</dd>
@@ -100,6 +102,7 @@
 %%    declaration. </dd>
 %% </dl>
 
+
 -module(xmerl_scan).
 -vsn('0.20').
 -date('03-09-16').
@@ -2071,10 +2074,10 @@ scan_element(T, S, Pos, Name, StartL, StartC, Attrs, Lang, Parents,
     {AttName, NamespaceInfo, T1, S1} = scan_name(T, S),
     {T2, S2} = scan_eq(T1, S1),
     {AttType,_DefaultDecl} = get_att_type(S2,AttName,Name),
-    {AttValue, T3, S3,IsNorm} = scan_att_value(T2, S2, AttType),
+    {AttValue, T3a, S3a,IsNorm} = scan_att_value(T2, S2, AttType),
 %%    check_default_value(S3,DefaultDecl,AttValue),
     NewNS = check_namespace(AttName, NamespaceInfo, AttValue, NS),
-    wfc_whitespace_betw_attrs(hd(T3),S3),
+    {T3,S3} = wfc_whitespace_betw_attrs(T3a,S3a),
     ?strip4,  
     AttrPos = case Attrs of
 		  [] ->
@@ -2273,7 +2276,7 @@ scan_att_chars([H|T], S0, H, Acc, TmpAcc,AttType,IsNorm) -> % End quote
 	    true -> 
 		normalize(Acc,S,IsNorm)
 	end,
-    {lists:reverse(Acc2), T, S2,IsNorm2};
+    {lists:flatten(lists:reverse(Acc2)), T, S2,IsNorm2};
 scan_att_chars("&" ++ T, S0, Delim, Acc, TmpAcc,AT,IsNorm) -> % Reference
     ?bump_col(1),
     {ExpRef, T1, S1} = scan_reference(T, S),
@@ -2281,8 +2284,16 @@ scan_att_chars("&" ++ T, S0, Delim, Acc, TmpAcc,AT,IsNorm) -> % Reference
 	true ->
 	    scan_att_chars(T1,S1,Delim,[ExpRef|Acc],[ExpRef|TmpAcc],AT,IsNorm);
 	_ ->
-	    scan_att_chars(string_to_char_set(S#xmerl_scanner.encoding,ExpRef)
-			   ++ T1, S1, Delim, Acc,TmpAcc, AT,IsNorm)
+            Ch = string_to_char_set(S#xmerl_scanner.encoding, ExpRef),
+            case T of
+                "#" ++ _ ->
+                    %% normalization rules (sec 3.3.3) require that for
+                    %% character references, the referenced character be
+                    %% added directly to the normalized value
+                    scan_att_chars(T1, S1, Delim, Ch ++ Acc,TmpAcc, AT,IsNorm);
+                _ ->
+                    scan_att_chars(Ch ++ T1, S1, Delim, Acc,TmpAcc, AT,IsNorm)
+            end
     end;
 scan_att_chars("<" ++ _T, S0, _Delim, _Acc,_, _,_) -> % Tags not allowed here
     ?fatal(unexpected_char, S0);
@@ -2602,8 +2613,7 @@ scan_reference("#x" ++ T, S0) ->
     %% [66] CharRef
     ?bump_col(1),
     if hd(T) /= $; ->
-	    {[Ch], T2, S2} = scan_char_ref_hex(T, S, 0),
-	    {to_char_set(S2#xmerl_scanner.encoding,Ch),T2,S2};
+	    scan_char_ref_hex(T, S, 0);
        true ->
 	    ?fatal(invalid_char_ref, S)
     end;
@@ -3274,12 +3284,17 @@ wfc_legal_char(Ch,S) ->
     end.
 
 
-wfc_whitespace_betw_attrs(WS,_S) when ?whitespace(WS) ->
-    ok;
-wfc_whitespace_betw_attrs($/,_S) ->
-    ok;
-wfc_whitespace_betw_attrs($>,_S) ->
-    ok;
+wfc_whitespace_betw_attrs([WS |_]=L,S) when ?whitespace(WS) ->
+    {L,S};
+wfc_whitespace_betw_attrs([$/ |_]=L,S) ->
+    {L,S};
+wfc_whitespace_betw_attrs([$> |_]=L,S) ->
+    {L,S};
+wfc_whitespace_betw_attrs([],S=#xmerl_scanner{continuation_fun = F}) ->
+    ?dbg("cont()...~n", []),
+    F(fun(MoreBytes, S1) -> wfc_whitespace_betw_attrs(MoreBytes, S1) end,
+      fun(S1) -> ?fatal(unexpected_end, S1) end,
+      S);
 wfc_whitespace_betw_attrs(_,S) ->
     ?fatal({whitespace_required_between_attributes},S).
 
@@ -3452,14 +3467,14 @@ scan_entity_value("%" ++ T, S0, Delim, Acc, PEName,Namespace,PENesting) ->
 			%% {system,URI} or {public,URI}
 			%% Included in literal.
 			{ExpRef,Sx}=fetch_not_parse(Tuple,S1),
-			{EntV,_,_S2} = 
-			    scan_entity_value(ExpRef, Sx, no_delim,[],
+			{EntV, _, S5} = 
+		 	    scan_entity_value(ExpRef, Sx, no_delim,[],
 					      PERefName,parameter,[]),
 			%% should do an update Write(parameter_entity)
 			%% so next expand_pe_reference is faster
-			{EntV,_S2};
+			{string_to_char_set(S5#xmerl_scanner.encoding, EntV), S5};
 		     ExpRef ->
-			{ExpRef,S1}
+			{string_to_char_set(S1#xmerl_scanner.encoding, ExpRef) ,S1}
 		end,
 	    %% single or duoble qoutes are not treated as delimeters
 	    %% in passages "included in literal"
@@ -3908,7 +3923,7 @@ schemaLocations(#xmlElement{attributes=Atts,xmlbase=_Base}) ->
     case lists:dropwhile(Pred,Atts) of
 	[#xmlAttribute{value=Paths}|_] ->
 	    
-	    case string:tokens(Paths," ") of
+	    case string:tokens(Paths," \n\t\r") of
 		L when length(L) > 0 ->
 		    case length(L) rem 2 of
 			0 ->
@@ -4020,12 +4035,12 @@ utf8_2_ucs([A|Rest]) when A < 16#80 ->
 utf8_2_ucs([A|Rest]) ->
     {{error,{bad_character,A}},Rest}.
 
-to_char_set("iso-10646-utf-1",Ch) ->
-    [Ch];
-to_char_set(UTF8,Ch) when UTF8 =:= "utf-8"; UTF8 =:= undefined ->
-    ucs_2_utf8(Ch);
-to_char_set(_,Ch) ->
-    [Ch].
+%% to_char_set("iso-10646-utf-1",Ch) ->
+%%     [Ch];
+%% to_char_set(UTF8,Ch) when UTF8 =:= "utf-8"; UTF8 =:= undefined ->
+%%     ucs_2_utf8(Ch);
+%% to_char_set(_,Ch) ->
+%%     [Ch].
 
 ucs_2_utf8(Ch) when Ch < 128 ->
     %% 0vvvvvvv