aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xmerl')
-rw-r--r--lib/xmerl/doc/src/Makefile1
-rw-r--r--lib/xmerl/doc/src/notes.xml85
-rw-r--r--lib/xmerl/doc/src/xmerl_sax_parser.xml10
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.erl144
-rw-r--r--lib/xmerl/src/xmerl_scan.erl19
-rw-r--r--lib/xmerl/vsn.mk2
6 files changed, 175 insertions, 86 deletions
diff --git a/lib/xmerl/doc/src/Makefile b/lib/xmerl/doc/src/Makefile
index d87677d0a7..0def492246 100644
--- a/lib/xmerl/doc/src/Makefile
+++ b/lib/xmerl/doc/src/Makefile
@@ -140,6 +140,7 @@ debug opt:
clean clean_docs:
rm -rf $(HTMLDIR)/*
+ rm -rf $(XMLDIR)
rm -f $(MAN3DIR)/*
rm -f $(MAN6DIR)/*
rm -f $(XMERL_XML_FILES)
diff --git a/lib/xmerl/doc/src/notes.xml b/lib/xmerl/doc/src/notes.xml
index b7d1db7dfc..63070ea316 100644
--- a/lib/xmerl/doc/src/notes.xml
+++ b/lib/xmerl/doc/src/notes.xml
@@ -32,6 +32,75 @@
<p>This document describes the changes made to the Xmerl application.</p>
+<section><title>Xmerl 1.3.20.1</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ <c>xmerl_sax_parser</c> crashed during charset detection
+ when the xml declarations attribute values was missing
+ the closing quotation (&apos; or &quot;).</p>
+ <p>
+ Own Id: OTP-15826</p>
+ </item>
+ </list>
+ </section>
+
+</section>
+
+<section><title>Xmerl 1.3.20</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ Handling of character references in attributes are fixed.</p>
+ <p>
+ Own Id: OTP-15684 Aux Id: ERL-837 </p>
+ </item>
+ <item>
+ <p>
+ Normalization of whitespace characters in attributes are
+ fixed so it works when character references are used.</p>
+ <p>
+ Own Id: OTP-15685 Aux Id: ERL-475 </p>
+ </item>
+ </list>
+ </section>
+
+</section>
+
+<section><title>Xmerl 1.3.19</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>The charset detection parsing crash in some cases when
+ the XML directive is not syntactic correct.</p>
+ <p>
+ Own Id: OTP-15492 Aux Id: ERIERL-283 </p>
+ </item>
+ </list>
+ </section>
+
+</section>
+
+<section><title>Xmerl 1.3.18</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>
+ Improved documentation.</p>
+ <p>
+ Own Id: OTP-15190</p>
+ </item>
+ </list>
+ </section>
+
+</section>
+
<section><title>Xmerl 1.3.17</title>
<section><title>Fixed Bugs and Malfunctions</title>
@@ -47,6 +116,21 @@
</section>
+<section><title>Xmerl 1.3.16.1</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p>The charset detection parsing crash in some cases when
+ the XML directive is not syntactic correct.</p>
+ <p>
+ Own Id: OTP-15492 Aux Id: ERIERL-283 </p>
+ </item>
+ </list>
+ </section>
+
+</section>
+
<section><title>Xmerl 1.3.16</title>
<section><title>Fixed Bugs and Malfunctions</title>
@@ -1397,4 +1481,3 @@
</section>
</section>
</chapter>
-
diff --git a/lib/xmerl/doc/src/xmerl_sax_parser.xml b/lib/xmerl/doc/src/xmerl_sax_parser.xml
index 8ea197e209..2390779028 100644
--- a/lib/xmerl/doc/src/xmerl_sax_parser.xml
+++ b/lib/xmerl/doc/src/xmerl_sax_parser.xml
@@ -31,7 +31,7 @@
<rev></rev>
</header>
- <module>xmerl_sax_parser</module>
+ <module since="">xmerl_sax_parser</module>
<modulesummary>XML SAX parser API</modulesummary>
<description>
@@ -325,7 +325,7 @@
<funcs>
<func>
- <name>file(Filename, Options) -> Result</name>
+ <name since="">file(Filename, Options) -> Result</name>
<fsummary>Parse file containing an XML document.</fsummary>
<type>
<v>Filename = string()</v>
@@ -347,7 +347,7 @@
</func>
<func>
- <name>stream(Xml, Options) -> Result</name>
+ <name since="">stream(Xml, Options) -> Result</name>
<fsummary>Parse a stream containing an XML document.</fsummary>
<type>
<v>Xml = unicode_binary() | latin1_binary() | [unicode_char()]</v>
@@ -381,7 +381,7 @@
<funcs>
<func>
- <name>ContinuationFun(State) -> {NewBytes, NewState}</name>
+ <name since="">ContinuationFun(State) -> {NewBytes, NewState}</name>
<fsummary>Continuation call back function.</fsummary>
<type>
<v>State = NewState = term()</v>
@@ -402,7 +402,7 @@
</func>
<func>
- <name>EventFun(Event, Location, State) -> NewState</name>
+ <name since="">EventFun(Event, Location, State) -> NewState</name>
<fsummary>Event call back function.</fsummary>
<type>
<v>Event = event()</v>
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl
index e383c4c349..2767d02552 100644
--- a/lib/xmerl/src/xmerl_sax_parser.erl
+++ b/lib/xmerl/src/xmerl_sax_parser.erl
@@ -1,8 +1,8 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
-%%
+%%
+%% Copyright Ericsson AB 2008-2018. All Rights Reserved.
+%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
@@ -14,13 +14,13 @@
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
-%%
+%%
%% %CopyrightEnd%
%%----------------------------------------------------------------------
%% File : xmerl_sax_parser.erl
%% Description : XML SAX parse API module.
%%
-%% Created : 4 Jun 2008
+%% Created : 4 Jun 2008
%%----------------------------------------------------------------------
-module(xmerl_sax_parser).
@@ -72,9 +72,9 @@ file(Name,Options) ->
CL = filename:absname(Dir),
File = filename:basename(Name),
ContinuationFun = fun default_continuation_cb/1,
- Res = stream(<<>>,
+ Res = stream(<<>>,
[{continuation_fun, ContinuationFun},
- {continuation_state, FD},
+ {continuation_state, FD},
{current_location, CL},
{entity, File}
|Options],
@@ -101,39 +101,39 @@ stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) ->
State = parse_options(Options, initial_state()),
case State#xmerl_sax_parser_state.file_type of
dtd ->
- xmerl_sax_parser_list:parse_dtd(Xml,
+ xmerl_sax_parser_list:parse_dtd(Xml,
State#xmerl_sax_parser_state{encoding = list,
input_type = InputType});
normal ->
- xmerl_sax_parser_list:parse(Xml,
+ xmerl_sax_parser_list:parse(Xml,
State#xmerl_sax_parser_state{encoding = list,
input_type = InputType})
end;
stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) ->
- case parse_options(Options, initial_state()) of
+ case parse_options(Options, initial_state()) of
{error, Reason} -> {error, Reason};
State ->
- ParseFunction =
+ ParseFunction =
case State#xmerl_sax_parser_state.file_type of
dtd ->
parse_dtd;
normal ->
parse
end,
- try
+ try
{Xml1, State1} = detect_charset(Xml, State),
parse_binary(Xml1,
State1#xmerl_sax_parser_state{input_type = InputType},
ParseFunction)
catch
throw:{fatal_error, {State2, Reason}} ->
- {fatal_error,
+ {fatal_error,
{
State2#xmerl_sax_parser_state.current_location,
- State2#xmerl_sax_parser_state.entity,
+ State2#xmerl_sax_parser_state.entity,
1
},
- Reason, [],
+ Reason, [],
State2#xmerl_sax_parser_state.event_state}
end
end.
@@ -157,7 +157,7 @@ parse_binary(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) ->
xmerl_sax_parser_utf16be:F(Xml, State);
parse_binary(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) ->
xmerl_sax_parser_latin1:F(Xml, State);
-parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, State) ->
+parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, State) ->
?fatal_error(State, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))).
%%----------------------------------------------------------------------
@@ -177,9 +177,9 @@ initial_state() ->
%%----------------------------------------------------------------------
%% Function: parse_options(Options, State)
%% Input: Options = [Option]
-%% Option = {event_state, term()} | {event_fun, fun()} |
+%% Option = {event_state, term()} | {event_fun, fun()} |
%% {continuation_state, term()} | {continuation_fun, fun()} |
-%% {encoding, Encoding} | {file_type, FT}
+%% {encoding, Encoding} | {file_type, FT}
%% FT = normal | dtd
%% Encoding = utf8 | utf16le | utf16be | list | iso8859
%% State = #xmerl_sax_parser_state{}
@@ -200,7 +200,7 @@ parse_options([{file_type, FT} |Options], State) when FT==normal; FT==dtd ->
parse_options(Options, State#xmerl_sax_parser_state{file_type = FT});
parse_options([{encoding, E} |Options], State) ->
case check_encoding_option(E) of
- {error, Reason} ->
+ {error, Reason} ->
{error, Reason};
Enc ->
parse_options(Options, State#xmerl_sax_parser_state{encoding = Enc})
@@ -231,7 +231,7 @@ check_encoding_option(E) ->
%% Description: Detects which character set is used in a binary stream.
%%----------------------------------------------------------------------
detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = State) ->
- ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
detect_charset(<<>>, State) ->
cf(<<>>, State, fun detect_charset/2);
detect_charset(Bytes, State) ->
@@ -269,22 +269,14 @@ detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D>> = Xml, State) ->
cf(Xml, State, fun detect_charset_1/2);
detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>>, State) ->
{Xml3, State1} = read_until_end_of_xml_directive(Xml2, State),
- case parse_xml_directive(Xml3) of
- {error, Reason} ->
- ?fatal_error(State, Reason);
- AttrList ->
- case lists:keysearch("encoding", 1, AttrList) of
- {value, {_, E}} ->
- case convert_encoding(E) of
- {error, Reason} ->
- ?fatal_error(State, Reason);
- Enc ->
- {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>,
- State1#xmerl_sax_parser_state{encoding=Enc}}
- end;
- _ ->
- {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>, State1}
- end
+ AttrList = parse_xml_directive(Xml3, State),
+ case lists:keysearch("encoding", 1, AttrList) of
+ {value, {_, E}} ->
+ Enc = convert_encoding(E, State),
+ {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>,
+ State1#xmerl_sax_parser_state{encoding=Enc}};
+ _ ->
+ {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>, State1}
end;
detect_charset_1(Xml, State) ->
{Xml, State}.
@@ -295,7 +287,7 @@ detect_charset_1(Xml, State) ->
%% Output: utf8 | iso8859
%% Description: Converting 7,8 bit and utf8 encoding strings to internal format.
%%----------------------------------------------------------------------
-convert_encoding(Enc) -> %% Just for 7,8 bit + utf8
+convert_encoding(Enc, State) -> %% Just for 7,8 bit + utf8
case string:to_lower(Enc) of
"utf-8" -> utf8;
"us-ascii" -> utf8;
@@ -309,19 +301,19 @@ convert_encoding(Enc) -> %% Just for 7,8 bit + utf8
"iso-8859-7" -> latin1;
"iso-8859-8" -> latin1;
"iso-8859-9" -> latin1;
- _ -> {error, "Unknown encoding: " ++ Enc}
+ _ -> ?fatal_error(State, "Unknown encoding: " ++ Enc)
end.
%%----------------------------------------------------------------------
%% Function: parse_xml_directive(Xml)
%% Input: Xml = binary()
%% Acc = list()
-%% Output:
+%% Output:
%% Description: Parsing the xml declaration from the input stream.
%%----------------------------------------------------------------------
-parse_xml_directive(<<C, Rest/binary>>) when ?is_whitespace(C) ->
- parse_xml_directive_1(Rest, []).
-
+parse_xml_directive(<<C, Rest/binary>>, State) when ?is_whitespace(C) ->
+ parse_xml_directive_1(Rest, [], State).
+
%%----------------------------------------------------------------------
%% Function: parse_xml_directive_1(Xml, Acc) -> [{Name, Value}]
%% Input: Xml = binary()
@@ -331,20 +323,20 @@ parse_xml_directive(<<C, Rest/binary>>) when ?is_whitespace(C) ->
%% Output: see above
%% Description: Parsing the xml declaration from the input stream.
%%----------------------------------------------------------------------
-parse_xml_directive_1(<<C, Rest/binary>>, Acc) when ?is_whitespace(C) ->
- parse_xml_directive_1(Rest, Acc);
-parse_xml_directive_1(<<"?>", _/binary>>, Acc) ->
+parse_xml_directive_1(<<C, Rest/binary>>, Acc, State) when ?is_whitespace(C) ->
+ parse_xml_directive_1(Rest, Acc, State);
+parse_xml_directive_1(<<"?>", _/binary>>, Acc, _State) ->
Acc;
-parse_xml_directive_1(<<C, Rest/binary>>, Acc) when 97 =< C, C =< 122 ->
+parse_xml_directive_1(<<C, Rest/binary>>, Acc, State) when 97 =< C, C =< 122 ->
{Name, Rest1} = parse_name(Rest, [C]),
- Rest2 = parse_eq(Rest1),
- {Value, Rest3} = parse_value(Rest2),
- parse_xml_directive_1(Rest3, [{Name, Value} |Acc]);
-parse_xml_directive_1(_, _) ->
- {error, "Unknown attribute in xml directive"}.
+ Rest2 = parse_eq(Rest1, State),
+ {Value, Rest3} = parse_value(Rest2, State),
+ parse_xml_directive_1(Rest3, [{Name, Value} |Acc], State);
+parse_xml_directive_1(_, _, State) ->
+ ?fatal_error(State, "Unknown attribute in xml directive").
%%----------------------------------------------------------------------
-%% Function: parse_xml_directive_1(Xml, Acc) -> Name
+%% Function: parse_name(Xml, Acc) -> Name
%% Input: Xml = binary()
%% Acc = string()
%% Output: Name = string()
@@ -361,10 +353,12 @@ parse_name(Rest, Acc) ->
%% Output: Rest = binary()
%% Description: Reads an '=' from the stream.
%%----------------------------------------------------------------------
-parse_eq(<<C, Rest/binary>>) when ?is_whitespace(C) ->
- parse_eq(Rest);
-parse_eq(<<"=", Rest/binary>>) ->
- Rest.
+parse_eq(<<C, Rest/binary>>, State) when ?is_whitespace(C) ->
+ parse_eq(Rest, State);
+parse_eq(<<"=", Rest/binary>>, _State) ->
+ Rest;
+parse_eq(_, State) ->
+ ?fatal_error(State, "expecting = or whitespace").
%%----------------------------------------------------------------------
%% Function: parse_value(Xml) -> {Value, Rest}
@@ -373,10 +367,12 @@ parse_eq(<<"=", Rest/binary>>) ->
%% Rest = binary()
%% Description: Parsing an attribute value from the stream.
%%----------------------------------------------------------------------
-parse_value(<<C, Rest/binary>>) when ?is_whitespace(C) ->
- parse_value(Rest);
-parse_value(<<C, Rest/binary>>) when C == $'; C == $" ->
- parse_value_1(Rest, C, []).
+parse_value(<<C, Rest/binary>>, State) when ?is_whitespace(C) ->
+ parse_value(Rest, State);
+parse_value(<<C, Rest/binary>>, State) when C == $'; C == $" ->
+ parse_value_1(Rest, C, [], State);
+parse_value(_, State) ->
+ ?fatal_error(State, "\', \" or whitespace expected").
%%----------------------------------------------------------------------
%% Function: parse_value_1(Xml, Stop, Acc) -> {Value, Rest}
@@ -387,10 +383,12 @@ parse_value(<<C, Rest/binary>>) when C == $'; C == $" ->
%% Rest = binary()
%% Description: Parsing an attribute value from the stream.
%%----------------------------------------------------------------------
-parse_value_1(<<Stop, Rest/binary>>, Stop, Acc) ->
+parse_value_1(<<Stop, Rest/binary>>, Stop, Acc, _State) ->
{lists:reverse(Acc), Rest};
-parse_value_1(<<C, Rest/binary>>, Stop, Acc) ->
- parse_value_1(Rest, Stop, [C |Acc]).
+parse_value_1(<<C, Rest/binary>>, Stop, Acc, State) ->
+ parse_value_1(Rest, Stop, [C |Acc], State);
+parse_value_1(_, _Stop, _Acc, State) ->
+ ?fatal_error(State, "end of input and no \' or \" found").
%%======================================================================
%% Default functions
@@ -431,7 +429,7 @@ read_until_end_of_xml_directive(Rest, State) ->
nomatch ->
case cf(Rest, State) of
{<<>>, _} ->
- ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
{NewBytes, NewState} ->
read_until_end_of_xml_directive(NewBytes, NewState)
end;
@@ -450,9 +448,9 @@ read_until_end_of_xml_directive(Rest, State) ->
%% input stream and calls the fun in NextCall.
%%----------------------------------------------------------------------
cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State) ->
- ?fatal_error(State, "Continuation function undefined");
+ ?fatal_error(State, "Continuation function undefined");
cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State) ->
- Result =
+ Result =
try
CFun(CState)
catch
@@ -463,9 +461,9 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C
end,
case Result of
{<<>>, _} ->
- ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
{NewBytes, NewContState} ->
- {<<Rest/binary, NewBytes/binary>>,
+ {<<Rest/binary, NewBytes/binary>>,
State#xmerl_sax_parser_state{continuation_state = NewContState}}
end.
@@ -479,10 +477,10 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C
%% input stream and calls the fun in NextCall.
%%----------------------------------------------------------------------
cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _) ->
- ?fatal_error(State, "Continuation function undefined");
-cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State,
+ ?fatal_error(State, "Continuation function undefined");
+cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State,
NextCall) ->
- Result =
+ Result =
try
CFun(CState)
catch
@@ -493,8 +491,8 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C
end,
case Result of
{<<>>, _} ->
- ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
{NewBytes, NewContState} ->
- NextCall(<<Rest/binary, NewBytes/binary>>,
+ NextCall(<<Rest/binary, NewBytes/binary>>,
State#xmerl_sax_parser_state{continuation_state = NewContState})
end.
diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl
index a1f6ad4e2c..d76ed5c820 100644
--- a/lib/xmerl/src/xmerl_scan.erl
+++ b/lib/xmerl/src/xmerl_scan.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2003-2017. All Rights Reserved.
+%% Copyright Ericsson AB 2003-2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -279,7 +279,7 @@ int_file_decl(F, Options,_ExtCharset) ->
%% @spec string(Text::list()) -> {xmlElement(),Rest}
%% Rest = list()
-%% @equiv string(Test, [])
+%% @equiv string(Text, [])
string(Str) ->
string(Str, []).
@@ -2410,15 +2410,22 @@ scan_att_chars("&" ++ T, S0, Delim, Acc, TmpAcc,AT,IsNorm) -> % Reference
true ->
scan_att_chars(T1,S1,Delim,[ExpRef|Acc],[ExpRef|TmpAcc],AT,IsNorm);
_ ->
- Ch = string_to_char_set(S#xmerl_scanner.encoding, ExpRef),
case T of
"#" ++ _ ->
%% normalization rules (sec 3.3.3) require that for
%% character references, the referenced character be
%% added directly to the normalized value
- scan_att_chars(T1, S1, Delim, Ch ++ Acc,TmpAcc, AT,IsNorm);
+ {T2,S2,IsNorm2} =
+ if
+ ?whitespace(hd(ExpRef)) ->
+ normalize(T1, S1, IsNorm);
+ true ->
+ {T1, S1, IsNorm}
+ end,
+ scan_att_chars(T2, S2, Delim, ExpRef ++ Acc, TmpAcc, AT, IsNorm2);
_ ->
- scan_att_chars(Ch ++ T1, S1, Delim, Acc,TmpAcc, AT,IsNorm)
+ Ch = string_to_char_set(S#xmerl_scanner.encoding, ExpRef),
+ scan_att_chars(Ch ++ T1, S1, Delim, Acc, TmpAcc, AT, IsNorm)
end
end;
scan_att_chars("<" ++ _T, S0, _Delim, _Acc,_, _,_) -> % Tags not allowed here
@@ -3964,7 +3971,7 @@ normalize(T,S,IsNorm) ->
{_,T,S} ->
{T,S,IsNorm};
{_,T1,S1} ->
- {T1,S1,true}
+ normalize(T1,S1,true)
end.
diff --git a/lib/xmerl/vsn.mk b/lib/xmerl/vsn.mk
index be11935f2f..3c97615e1e 100644
--- a/lib/xmerl/vsn.mk
+++ b/lib/xmerl/vsn.mk
@@ -1 +1 @@
-XMERL_VSN = 1.3.17
+XMERL_VSN = 1.3.20.1