From a26159db792be4fb0266f8eeba3f22f530034eb0 Mon Sep 17 00:00:00 2001 From: Siri Hansen Date: Thu, 17 Jan 2013 12:21:17 +0100 Subject: [test_server] Write link target with correct encoding in erl2html2 If the encoding of the source file was utf-8, then the link target could be faulty encoded in the resulting HTML file. This has been corrected. --- lib/test_server/src/erl2html2.erl | 68 +++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/test_server/src/erl2html2.erl b/lib/test_server/src/erl2html2.erl index 1729257809..9c0ca64173 100644 --- a/lib/test_server/src/erl2html2.erl +++ b/lib/test_server/src/erl2html2.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2012. All Rights Reserved. +%% Copyright Ericsson AB 1997-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -34,16 +34,17 @@ convert(File, Dest) -> %% %% FIXME: The colours should *really* be set with %% stylesheets... + %% + %% The html file is written with the same encoding as the input file. Encoding = encoding(File), Header = ["\n" - "\n" + "\n" "\n" "\n" "\n" - "", File, "\n" + "charset=",html_encoding(Encoding),"\"/>\n" + "", to_raw_list(File,Encoding), "\n" "\n\n" "\n"], @@ -61,7 +62,7 @@ convert(File, Dest, Header) -> case file:open(Dest,[write,raw]) of {ok,DFd} -> file:write(DFd,[Header,"
\n"]),
-			    _Lines = build_html(SFd,DFd,Functions),
+			    _Lines = build_html(SFd,DFd,encoding(File),Functions),
 			    file:write(DFd,["
\n",footer(), "\n\n"]), %% {_, Time2} = statistics(runtime), @@ -121,21 +122,21 @@ parse_file(Epp,File,InCorrectFile) -> %%%----------------------------------------------------------------- %%% Add a link target for each line and one for each function definition. -build_html(SFd,DFd,Functions) -> - build_html(SFd,DFd,file:read_line(SFd),1,Functions,false). +build_html(SFd,DFd,Encoding,Functions) -> + build_html(SFd,DFd,Encoding,file:read_line(SFd),1,Functions,false). -build_html(SFd,DFd,{ok,Str},L,[{F,A,L}|Functions],_IsFuncDef) -> +build_html(SFd,DFd,Encoding,{ok,Str},L,[{F,A,L}|Functions],_IsFuncDef) -> FALink = http_uri:encode(F++"-"++integer_to_list(A)), - file:write(DFd,[""]), - build_html(SFd,DFd,{ok,Str},L,Functions,true); -build_html(SFd,DFd,{ok,Str},L,[{clause,L}|Functions],_IsFuncDef) -> - build_html(SFd,DFd,{ok,Str},L,Functions,true); -build_html(SFd,DFd,{ok,Str},L,Functions,IsFuncDef) -> + file:write(DFd,[""]), + build_html(SFd,DFd,Encoding,{ok,Str},L,Functions,true); +build_html(SFd,DFd,Encoding,{ok,Str},L,[{clause,L}|Functions],_IsFuncDef) -> + build_html(SFd,DFd,Encoding,{ok,Str},L,Functions,true); +build_html(SFd,DFd,Encoding,{ok,Str},L,Functions,IsFuncDef) -> LStr = line_number(L), Str1 = line(Str,IsFuncDef), file:write(DFd,[LStr,Str1]), - build_html(SFd,DFd,file:read_line(SFd),L+1,Functions,false); -build_html(_SFd,_DFd,eof,L,_Functions,_IsFuncDef) -> + build_html(SFd,DFd,Encoding,file:read_line(SFd),L+1,Functions,false); +build_html(_SFd,_DFd,_Encoding,eof,L,_Functions,_IsFuncDef) -> L. line_number(L) -> @@ -190,16 +191,35 @@ footer() -> %%%----------------------------------------------------------------- %%% Read encoding from source file encoding(File) -> - Encoding = - case epp:read_encoding(File) of - none -> - epp:default_encoding(); - E -> - E - end, - html_encoding(Encoding). + case epp:read_encoding(File) of + none -> + epp:default_encoding(); + E -> + E + end. +%%%----------------------------------------------------------------- +%%% Covert encoding atom to string for use in HTML header html_encoding(latin1) -> "iso-8859-1"; html_encoding(utf8) -> "utf-8". + +%%%----------------------------------------------------------------- +%%% Convert a string to a list of raw printable characters in the +%%% given encoding. This is necessary since the files (source and +%%% destination) are both opened in raw mode (default encoding). Byte +%%% by byte is read from source and written to the destination. This +%%% conversion is needed when printing data that is not first read +%%% from the source. +%%% +%%% Example: if the encoding of the file is utf8, and we have a string +%%% containing "å" = [229], then we need to convert this to [195,165] +%%% before writing. Note that this conversion is only necessary +%%% because the destination file is not (necessarily) opened with utf8 +%%% encoding - it is opened with default encoding in order to allow +%%% raw file mode and byte by byte copying from source. +to_raw_list(X,latin1) when is_list(X) -> + X; +to_raw_list(X,utf8) when is_list(X) -> + binary_to_list(unicode:characters_to_binary(X)). -- cgit v1.2.3