From 6be3e658d9999274d5c0c702bf799b90a110c745 Mon Sep 17 00:00:00 2001
From: Hans Bolinder
Date: Mon, 17 Mar 2014 12:05:18 +0100
Subject: Clarify the reference manual regarding source file encoding
---
system/doc/reference_manual/character_set.xml | 132 ++++++++++++++++++++++++++
system/doc/reference_manual/introduction.xml | 85 +----------------
system/doc/reference_manual/part.xml | 3 +-
3 files changed, 135 insertions(+), 85 deletions(-)
create mode 100644 system/doc/reference_manual/character_set.xml
(limited to 'system')
diff --git a/system/doc/reference_manual/character_set.xml b/system/doc/reference_manual/character_set.xml
new file mode 100644
index 0000000000..884898eb34
--- /dev/null
+++ b/system/doc/reference_manual/character_set.xml
@@ -0,0 +1,132 @@
+
+
+
+
+
+
+ 20142014
+ Ericsson AB. All Rights Reserved.
+
+
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+
+
+ Character Set and Source File Encoding
+
+
+
+
+ character_set.xml
+
+
+
+ Character Set
+ In Erlang 4.8/OTP R5A the syntax of Erlang tokens was extended to
+ allow the use of the full ISO-8859-1 (Latin-1) character set. This
+ is noticeable in the following ways:
+
+ -
+
All the Latin-1 printable characters can be used and are
+ shown without the escape backslash convention.
+
+ -
+
Atoms and variables can use all Latin-1 letters.
+
+
+
+
+ Octal |
+ Decimal |
+ |
+ Class |
+
+
+ 200 - 237 |
+ 128 - 159 |
+ |
+ Control characters |
+
+
+ 240 - 277 |
+ 160 - 191 |
+ - ¿ |
+ Punctuation characters |
+
+
+ 300 - 326 |
+ 192 - 214 |
+ À - Ö |
+ Uppercase letters |
+
+
+ 327 |
+ 215 |
+ × |
+ Punctuation character |
+
+
+ 330 - 336 |
+ 216 - 222 |
+ Ø - Þ |
+ Uppercase letters |
+
+
+ 337 - 366 |
+ 223 - 246 |
+ ß - ö |
+ Lowercase letters |
+
+
+ 367 |
+ 247 |
+ ÷ |
+ Punctuation character |
+
+
+ 370 - 377 |
+ 248 - 255 |
+ ø - ÿ |
+ Lowercase letters |
+
+ Character Classes.
+
+ In Erlang/OTP R16B the syntax of Erlang tokens was extended to
+ handle Unicode. To begin with the support is limited to
+ strings, but Erlang/OTP 18 is expected to handle Unicode atoms
+ as well. More about the usage of Unicode in Erlang source files
+ can be found in STDLIB's User's
+ Guide.
+
+
+ Source File Encoding
+ The Erlang source file encoding is selected by a
+ comment in one of the first two lines of the source file. The
+ first string that matches the regular expression
+ coding\s*[:=]\s*([-a-zA-Z0-9])+ selects the encoding. If
+ the matching string is not a valid encoding it is ignored. The
+ valid encodings are Latin-1 and UTF-8 where the
+ case of the characters can be chosen freely.
+ The following example selects UTF-8 as default encoding:
+
+%% coding: utf-8
+ Two more examples, both selecting Latin-1 as default encoding:
+
+%% For this file we have chosen encoding = Latin-1
+
+%% -*- coding: latin-1 -*-
+ The default encoding for Erlang source files was changed from
+ Latin-1 to UTF-8 in Erlang OTP 17.0.
+
+
diff --git a/system/doc/reference_manual/introduction.xml b/system/doc/reference_manual/introduction.xml
index aa42967625..36bec17825 100644
--- a/system/doc/reference_manual/introduction.xml
+++ b/system/doc/reference_manual/introduction.xml
@@ -4,7 +4,7 @@
- 20032013
+ 20032014
Ericsson AB. All Rights Reserved.
@@ -79,88 +79,5 @@
when xor
-
- Character Set
- In Erlang 4.8/OTP R5A the syntax of Erlang tokens was extended to
- allow the use of the full ISO-8859-1 (Latin-1) character set. This
- is noticeable in the following ways:
-
- -
-
All the Latin-1 printable characters can be used and are
- shown without the escape backslash convention.
-
- -
-
Atoms and variables can use all Latin-1 letters.
-
-
-
-
- Octal |
- Decimal |
- |
- Class |
-
-
- 200 - 237 |
- 128 - 159 |
- |
- Control characters |
-
-
- 240 - 277 |
- 160 - 191 |
- - ¿ |
- Punctuation characters |
-
-
- 300 - 326 |
- 192 - 214 |
- À - Ö |
- Uppercase letters |
-
-
- 327 |
- 215 |
- × |
- Punctuation character |
-
-
- 330 - 336 |
- 216 - 222 |
- Ø - Þ |
- Uppercase letters |
-
-
- 337 - 366 |
- 223 - 246 |
- ß - ö |
- Lowercase letters |
-
-
- 367 |
- 247 |
- ÷ |
- Punctuation character |
-
-
- 370 - 377 |
- 248 - 255 |
- ø - ÿ |
- Lowercase letters |
-
- Character Classes.
-
- In Erlang/OTP R16 the syntax of Erlang tokens was extended to
- handle Unicode. To begin with the support is limited to strings,
- but Erlang/OTP R18 is expected to handle Unicode atoms as well.
- More about the usage of Unicode in Erlang source files can be
- found in STDLIB's User'S
- Guide. The default encoding for Erlang source files
- is still Latin-1, but in Erlang/OTP R17 the default encoding
- will be UTF-8. The details on how to state the encoding of an
- Erlang source file can be found in epp(3).
-
diff --git a/system/doc/reference_manual/part.xml b/system/doc/reference_manual/part.xml
index b4f114c268..ee8f3dd7eb 100644
--- a/system/doc/reference_manual/part.xml
+++ b/system/doc/reference_manual/part.xml
@@ -4,7 +4,7 @@
- 20032013
+ 20032014
Ericsson AB. All Rights Reserved.
@@ -28,6 +28,7 @@
+
--
cgit v1.2.3