1 files changed, 112 insertions, 78 deletions
diff --git a/lib/stdlib/doc/src/unicode.xml b/lib/stdlib/doc/src/unicode.xml
index 60edd8ade9..d02763f75c 100644
--- a/lib/stdlib/doc/src/unicode.xml
+++ b/lib/stdlib/doc/src/unicode.xml
@@ -5,7 +5,7 @@
   <header>
     <copyright>
       <year>1996</year>
-      <year>2009</year>
+      <year>2011</year>
       <holder>Ericsson AB, All Rights Reserved</holder>
     </copyright>
     <legalnotice>
@@ -38,50 +38,83 @@
   <p>It is recommended to only use external encodings for communication with external entities where this is required. When working inside the Erlang/OTP environment, it is recommended to keep binaries in UTF-8 when representing Unicode characters. Latin1 encoding is supported both for backward compatibility and for communication with external entities not supporting Unicode character sets.</p>
   </description>
 
-  <section>
-    <title>DATA TYPES</title>
-    <marker id="charlist_definition"></marker>
-    <code type="none">
-unicode_binary() = binary() with characters encoded in UTF-8 coding standard
-unicode_char() = integer() representing valid unicode codepoint
-
-chardata() = charlist() | unicode_binary()
-
-charlist() = [unicode_char() | unicode_binary() | charlist()]
-  a unicode_binary is allowed as the tail of the list</code>
-
-    <code type="none">
-external_unicode_binary() = binary() 
-  with characters coded in a user specified Unicode encoding other 
-  than UTF-8 (UTF-16 or UTF-32)
-
-external_chardata() = external_charlist() | external_unicode_binary()
-
-external_charlist() = [unicode_char() | external_unicode_binary() | external_charlist()]
-  an external_unicode_binary is allowed as the tail of the list</code>
-
-    <code type="none">
-latin1_binary() = binary() with characters coded in iso-latin-1
-latin1_char() = integer() representing valid latin1 character (0-255)
-
-latin1_chardata() = latin1_charlist() | latin1_binary()
+  <datatypes>
+    <datatype>
+      <name name="encoding"/>
+    </datatype>
+    <datatype>
+      <name name="endian"/>
+    </datatype>
+    <datatype>
+      <name name="unicode_binary"/>
+      <desc>
+        <p>A binary() with characters encoded in the UTF-8 coding standard.</p>
+      </desc>
+    </datatype>
+    <datatype>
+      <name name="unicode_char"/>
+      <desc>
+        <p>An integer() representing a valid unicode codepoint.</p>
+      </desc>
+    </datatype>
+    <datatype>
+      <name name="chardata"/>
+    </datatype>
+    <datatype>
+      <name name="charlist"/>
+      <desc>
+        <p>A unicode_binary is allowed as the tail of the list.</p>
+      </desc>
+    </datatype>
+    <datatype>
+      <name name="external_unicode_binary"/>
+      <desc>
+        <p>A <c>binary()</c> with characters coded in a user specified Unicode
+           encoding other than UTF-8 (UTF-16 or UTF-32).</p>
+      </desc>
+    </datatype>
+    <datatype>
+      <name name="external_chardata"/>
+    </datatype>
+    <datatype>
+      <name name="external_charlist"/>
+      <desc>
+        <p>An <c>external_unicode_binary()</c> is allowed as the tail
+           of the list.</p>
+      </desc>
+    </datatype>
+    <datatype>
+      <name name="latin1_binary"/>
+      <desc><p>A <c>binary()</c> with characters coded in iso-latin-1.</p>
+      </desc>
+    </datatype>
+    <datatype>
+      <name name="latin1_char"/>
+      <desc><p>An <c>integer()</c> representing valid latin1
+         character (0-255).</p>
+      </desc>
+    </datatype>
+    <datatype>
+      <name name="latin1_chardata"/>
+    </datatype>
+    <datatype>
+      <name name="latin1_charlist"/>
+      <desc><p>A <c>latin1_binary()</c> is allowed as the tail of
+        the list.</p>
+      </desc>
+    </datatype>
+  </datatypes>
 
-latin1_charlist() = [latin1_char() | latin1_binary() | latin1_charlist()]
-  a latin1_binary is allowed as the tail of the list</code>
-  </section>
   <funcs>
     <func>
-      <name>bom_to_encoding(Bin) -> {Encoding,Length}</name>
+      <name name="bom_to_encoding" arity="1"/>
       <fsummary>Identify UTF byte order marks in a binary.</fsummary>
-      <type>
-        <v>Bin = binary() of byte_size 4 or more</v>
-        <v>Encoding = latin1 | utf8 | {utf16,little} | {utf16,big} | {utf32,little} | {utf32,big}</v>
-	<v>Length = int()</v>
-      </type>
+      <type name="endian"/>
+      <type_desc variable="Bin">A binary() of byte_size 4 or more.</type_desc>
       <desc>
 
       <p>Check for a UTF byte order mark (BOM) in the beginning of a
-      binary.  If the supplied binary <c>Bin</c> begins with a valid
+      binary.  If the supplied binary <c><anno>Bin</anno></c> begins with a valid
       byte order mark for either UTF-8, UTF-16 or UTF-32, the function
       returns the encoding identified along with the length of the BOM
       in bytes.</p>
@@ -90,23 +123,24 @@ latin1_charlist() = [latin1_char() | latin1_binary() | latin1_charlist()]
       </desc>
     </func>
     <func>
-      <name>characters_to_list(Data) -> list() | {error, list(), RestData} | {incomplete, list(), binary()} </name>
+      <name name="characters_to_list" arity="1"/>
       <fsummary>Convert a collection of characters to list of Unicode characters</fsummary>
-      <type>
-        <v>Data = latin1_chardata() | chardata() | external_chardata()</v>
-        <v>RestData = latin1_chardata() | chardata() | external_chardata()</v>
-      </type>
       <desc>
-      <p>Same as characters_to_list(Data,unicode).</p>
+      <p>Same as characters_to_list(<anno>Data</anno>,unicode).</p>
       </desc>
     </func>
     <func>
-      <name>characters_to_list(Data, InEncoding) -> list() | {error, list(), RestData} | {incomplete, list(), binary()} </name>
+      <name>characters_to_list(Data, InEncoding) -> Result</name>
       <fsummary>Convert a collection of characters to list of Unicode characters</fsummary>
       <type>
-        <v>Data = latin1_chardata() | chardata() | external_chardata()</v>
-        <v>RestData = latin1_chardata() | chardata() | external_chardata()</v>
-        <v>InEncoding = latin1 | unicode | utf8 | utf16 | utf32 | {utf16,little} | {utf16,big} | {utf32,little} | {utf32,big}</v>
+        <v>Data = <seealso marker="#type-latin1_chardata">latin1_chardata()</seealso>
+                | <seealso marker="#type-chardata">chardata()</seealso>
+                | <seealso marker="#type-external_chardata">external_chardata()</seealso></v>
+        <v>Result = list() | {error, list(), RestData} | {incomplete, list(), binary()}</v>
+        <v>RestData = <seealso marker="#type-latin1_chardata">latin1_chardata()</seealso>
+                    | <seealso marker="#type-chardata">chardata()</seealso>
+                    | <seealso marker="#type-external_chardata">external_chardata()</seealso></v>
+        <v>InEncoding = <seealso marker="#type-encoding">encoding()</seealso></v>
       </type>
       <desc>
 
@@ -164,10 +198,16 @@ latin1_charlist() = [latin1_char() | latin1_binary() | latin1_charlist()]
            <item>Integers out of range - If <c>InEncoding</c> is
 	   <c>latin1</c>, an error occurs whenever an integer greater
 	   than 255 is found in the lists. If <c>InEncoding</c> is
-	   of a Unicode type, error occurs whenever an integer greater than
-	   <c>16#10FFFF</c> (the maximum unicode character) or in the
-	   range <c>16#D800</c> to <c>16#DFFF</c> (invalid unicode
-	   range) is found.</item>
+	   of a Unicode type, an error occurs whenever an integer
+	   <list type="bulleted">
+	     <item>greater than <c>16#10FFFF</c>
+	     (the maximum unicode character),</item>
+	     <item>in the range <c>16#D800</c> to <c>16#DFFF</c>
+	     (invalid unicode range)</item>
+	     <item>or equal to 16#FFFE or 16#FFFF (non characters)</item>
+	   </list>
+	   is found.
+	   </item>
 
 	   <item>UTF encoding incorrect - If <c>InEncoding</c> is
 	   one of the UTF types, the bytes in any binaries have to be valid
@@ -228,44 +268,42 @@ latin1_charlist() = [latin1_char() | latin1_binary() | latin1_charlist()]
       </desc>
     </func>
     <func>
-     <name>characters_to_binary(Data) ->  binary() | {error, binary(), RestData} | {incomplete, binary(), binary()} </name>
-      <fsummary>Convert a collection of characters to an UTF-8 binary</fsummary>      <type>
-        <v>Data = latin1_chardata() | chardata() | external_chardata()</v>
-        <v>RestData = latin1_chardata() | chardata() | external_chardata()</v>
-       </type>
+     <name name="characters_to_binary" arity="1"/>
+      <fsummary>Convert a collection of characters to an UTF-8 binary</fsummary>
       <desc>
       <p>Same as characters_to_binary(Data, unicode, unicode).</p>
       </desc>
     </func>
     <func>    
-     <name>characters_to_binary(Data,InEncoding) ->  binary() | {error, binary(), RestData} | {incomplete, binary(), binary()} </name>
-      <fsummary>Convert a collection of characters to an UTF-8 binary</fsummary>      <type>
-        <v>Data = latin1_chardata() | chardata() | external_chardata()</v>
-        <v>RestData = latin1_chardata() | chardata() | external_chardata()</v>
-        <v>InEncoding = latin1 | unicode | utf8 | utf16 | utf32 | {utf16,little} | {utf16,big} | {utf32,little} | {utf32,big}</v>
+     <name>characters_to_binary(Data,InEncoding) -> Result</name>
+      <fsummary>Convert a collection of characters to an UTF-8 binary</fsummary>
+
+      <type>
+        <v>Data = <seealso marker="#type-latin1_chardata">latin1_chardata()</seealso>
+                | <seealso marker="#type-chardata">chardata()</seealso>
+                | <seealso marker="#type-external_chardata">external_chardata()</seealso></v>
+        <v>Result = binary() | {error, binary(), RestData} | {incomplete, binary(), binary()}</v>
+        <v>RestData = <seealso marker="#type-latin1_chardata">latin1_chardata()</seealso>
+                    | <seealso marker="#type-chardata">chardata()</seealso>
+                    | <seealso marker="#type-external_chardata">external_chardata()</seealso></v>
+        <v>InEncoding = <seealso marker="#type-encoding">encoding()</seealso></v>
       </type>
       <desc>
       <p>Same as characters_to_binary(Data, InEncoding, unicode).</p>
       </desc>
     </func>    
     <func>
-      <name>characters_to_binary(Data, InEncoding, OutEncoding) -> binary() | {error, binary(), RestData} | {incomplete, binary(), binary()} </name>
+      <name name="characters_to_binary" arity="3"/>
       <fsummary>Convert a collection of characters to an UTF-8 binary</fsummary>
-      <type>
-        <v>Data = latin1_chardata() | chardata() | external_chardata()</v>
-        <v>RestData = latin1_chardata() | chardata() | external_chardata()</v>
-        <v>InEncoding = latin1 | unicode | utf8 | utf16 | utf32 | {utf16,little} | {utf16,big} | {utf32,little} | {utf32,big}</v>
-	<v>OutEncoding = latin1 | unicode | utf8 | utf16 | utf32| {utf16,little} | {utf16,big} | {utf32,little} | {utf32,big}</v>	
-      </type>
       <desc>
 
       <p>This function behaves as <seealso
       marker="#characters_to_list/2">
       characters_to_list/2</seealso>, but produces an binary
       instead of a unicode list. The
-      <c>InEncoding</c> defines how input is to be interpreted if
+      <c><anno>InEncoding</anno></c> defines how input is to be interpreted if
       binaries are present in the <c>Data</c>, while
-      <c>OutEncoding</c> defines in what format output is to be
+      <c><anno>OutEncoding</anno></c> defines in what format output is to be
       generated.</p>
 
       <p>The option <c>unicode</c> is an alias for <c>utf8</c>, as this is the
@@ -285,17 +323,13 @@ latin1_charlist() = [latin1_char() | latin1_binary() | latin1_charlist()]
       </desc>
     </func>
     <func>
-      <name>encoding_to_bom(InEncoding) -> Bin</name>
+      <name name="encoding_to_bom" arity="1"/>
       <fsummary>Create a binary UTF byte order mark from encoding.</fsummary>
-      <type>
-        <v>Bin = binary() of byte_size 4 or less</v>
-        <v>InEncoding = latin1 | unicode | utf8 | utf16 | utf32 | {utf16,little} | {utf16,big} | {utf32,little} | {utf32,big}</v>
-	<v>Length = int()</v>
-      </type>
+      <type_desc variable="Bin">A binary() of byte_size 4 or more.</type_desc>
       <desc>
 
       <p>Create an UTF byte order mark (BOM) as a binary from the
-      supplied <c>InEncoding</c>. The BOM is, if supported at all,
+      supplied <c><anno>InEncoding</anno></c>. The BOM is, if supported at all,
       expected to be placed first in UTF encoded files or
       messages.</p>