From a4645470a77989cc1d4943331dc11a0d1a4db532 Mon Sep 17 00:00:00 2001
From: Lukas Larsson
Date: Tue, 30 Apr 2019 15:45:48 +0200
Subject: erts: Add documentation for distribution fragments
---
erts/doc/src/erl_dist_protocol.xml | 13 +-
erts/doc/src/erl_ext_dist.xml | 492 ++++++++++++++++++++++++++-----------
erts/emulator/beam/dist.c | 4 +-
3 files changed, 359 insertions(+), 150 deletions(-)
(limited to 'erts')
diff --git a/erts/doc/src/erl_dist_protocol.xml b/erts/doc/src/erl_dist_protocol.xml
index 185c75fe84..f924c8a70b 100644
--- a/erts/doc/src/erl_dist_protocol.xml
+++ b/erts/doc/src/erl_dist_protocol.xml
@@ -850,10 +850,15 @@ DiB == gen_digest(ChA, ICA)?
-define(DFLAG_EXIT_PAYLOAD, 16#400000).
-
Use the PAYLOAD_EXIT, PAYLOAD_EXIT_TT,
- PAYLOAD_EXIT2, PAYLOAD_EXIT2_TT
- and PAYLOAD_MONITOR_P_EXIT
- control messages
- instead of the non-PAYLOAD variants.
+ PAYLOAD_EXIT2, PAYLOAD_EXIT2_TT
+ and PAYLOAD_MONITOR_P_EXIT
+ control messages
+ instead of the non-PAYLOAD variants.
+
+ -define(DFLAG_FRAGMENTS, 16#800000).
+ -
+
Use fragmented
+ distribution messages to send large messages.
diff --git a/erts/doc/src/erl_ext_dist.xml b/erts/doc/src/erl_ext_dist.xml
index a6bc44b8c8..3730f0e8ac 100644
--- a/erts/doc/src/erl_ext_dist.xml
+++ b/erts/doc/src/erl_ext_dist.xml
@@ -140,162 +140,366 @@
Distribution Header
- The distribution header only contains an atom cache
- reference section, but can in the future contain more
- information. The distribution header precedes one or more Erlang
- terms on the external format. For more information, see the
- documentation of the
+ The distribution header is sent by the erlang distribution to
+ carry metadata about the coming
+ control message
+ and potential payload. It is primarily used to handle the atom cache
+ in the Erlang distribution. Since OTP-22 it is also used to fragment
+ large distribution messages into multiple smaller fragments.
+ For more information about how the distribution uses the distribution header,
+ see the documentation of the
protocol between
connected nodes in the
distribution protocol
documentation.
- ATOM_CACHE_REF
+ Any ATOM_CACHE_REF
entries with corresponding AtomCacheReferenceIndex in terms
encoded on the external format following a distribution header refer
to the atom cache references made in the distribution header. The range
is 0 <= AtomCacheReferenceIndex < 255, that is, at most 255
different atom cache references from the following terms can be made.
-
- The distribution header format is as follows:
-
-
-
- 1 |
- 1 |
- 1 |
- NumberOfAtomCacheRefs/2+1 | 0 |
- N | 0 |
-
-
- 131 |
- 68 |
- NumberOfAtomCacheRefs |
- Flags |
- AtomCacheRefs |
-
- Distribution Header Format
-
- Flags consist of NumberOfAtomCacheRefs/2+1 bytes,
- unless NumberOfAtomCacheRefs is 0. If
- NumberOfAtomCacheRefs is 0, Flags and
- AtomCacheRefs are omitted. Each atom cache reference has
- a half byte flag field. Flags corresponding to a specific
- AtomCacheReferenceIndex are located in flag byte number
- AtomCacheReferenceIndex/2. Flag byte 0 is the first byte
- after the NumberOfAtomCacheRefs byte. Flags for an even
- AtomCacheReferenceIndex are located in the least significant
- half byte and flags for an odd AtomCacheReferenceIndex are
- located in the most significant half byte.
-
-
- The flag field of an atom cache reference has the following
- format:
-
-
-
- 1 bit |
- 3 bits |
-
-
- NewCacheEntryFlag |
- SegmentIndex |
-
-
-
- The most significant bit is the NewCacheEntryFlag. If set,
- the corresponding cache reference is new. The three least
- significant bits are the SegmentIndex of the corresponding
- atom cache entry. An atom cache consists of 8 segments, each of size
- 256, that is, an atom cache can contain 2048 entries.
-
-
- After flag fields for atom cache references, another half byte flag
- field is located with the following format:
-
-
-
- 3 bits |
- 1 bit |
-
-
- CurrentlyUnused |
- LongAtoms |
-
-
-
- The least significant bit in that half byte is flag LongAtoms.
- If it is set, 2 bytes are used for atom lengths instead of
- 1 byte in the distribution header.
-
-
- After the Flags field follow the AtomCacheRefs. The
- first AtomCacheRef is the one corresponding to
- AtomCacheReferenceIndex 0. Higher indices follow
- in sequence up to index NumberOfAtomCacheRefs - 1.
-
-
- If the NewCacheEntryFlag for the next AtomCacheRef has
- been set, a NewAtomCacheRef on the following format follows:
-
-
-
- 1 |
- 1 | 2 |
- Length |
-
-
- InternalSegmentIndex |
- Length |
- AtomText |
-
-
-
- InternalSegmentIndex together with the SegmentIndex
- completely identify the location of an atom cache entry in the
- atom cache. Length is the number of bytes that AtomText
- consists of. Length is a 2 byte big-endian integer
- if flag LongAtoms has been set, otherwise a 1 byte
- integer. When distribution flag
-
- DFLAG_UTF8_ATOMS
- has been exchanged between both nodes in the
-
- distribution handshake,
- characters in AtomText are encoded in UTF-8, otherwise
- in Latin-1. The following CachedAtomRefs with the same
- SegmentIndex and InternalSegmentIndex as this
- NewAtomCacheRef refer to this atom until a new
- NewAtomCacheRef with the same SegmentIndex
- and InternalSegmentIndex appear.
-
-
- For more information on encoding of atoms, see the
- note on UTF-8 encoded atoms
- in the beginning of this section.
-
-
- If the NewCacheEntryFlag for the next AtomCacheRef
- has not been set, a CachedAtomRef on the following format
- follows:
-
-
-
- 1 |
-
-
- InternalSegmentIndex |
-
-
-
- InternalSegmentIndex together with the SegmentIndex
- identify the location of the atom cache entry in the atom cache.
- The atom corresponding to this CachedAtomRef is the
- latest NewAtomCacheRef preceding this CachedAtomRef
- in another previously passed distribution header.
-
+
+ Normal Distribution Header
+
+ The non-fragmented distribution header format is as follows:
+
+
+
+ 1 |
+ 1 |
+ 1 |
+ NumberOfAtomCacheRefs/2+1 | 0 |
+ N | 0 |
+
+
+ 131 |
+ 68 |
+ NumberOfAtomCacheRefs |
+ Flags |
+ AtomCacheRefs |
+
+ Normal Distribution Header Format
+
+ Flags consist of NumberOfAtomCacheRefs/2+1 bytes,
+ unless NumberOfAtomCacheRefs is 0. If
+ NumberOfAtomCacheRefs is 0, Flags and
+ AtomCacheRefs are omitted. Each atom cache reference has
+ a half byte flag field. Flags corresponding to a specific
+ AtomCacheReferenceIndex are located in flag byte number
+ AtomCacheReferenceIndex/2. Flag byte 0 is the first byte
+ after the NumberOfAtomCacheRefs byte. Flags for an even
+ AtomCacheReferenceIndex are located in the least significant
+ half byte and flags for an odd AtomCacheReferenceIndex are
+ located in the most significant half byte.
+
+
+ The flag field of an atom cache reference has the following
+ format:
+
+
+
+ 1 bit |
+ 3 bits |
+
+
+ NewCacheEntryFlag |
+ SegmentIndex |
+
+
+
+ The most significant bit is the NewCacheEntryFlag. If set,
+ the corresponding cache reference is new. The three least
+ significant bits are the SegmentIndex of the corresponding
+ atom cache entry. An atom cache consists of 8 segments, each of size
+ 256, that is, an atom cache can contain 2048 entries.
+
+
+ After flag fields for atom cache references, another half byte flag
+ field is located with the following format:
+
+
+
+ 3 bits |
+ 1 bit |
+
+
+ CurrentlyUnused |
+ LongAtoms |
+
+
+
+ The least significant bit in that half byte is flag LongAtoms.
+ If it is set, 2 bytes are used for atom lengths instead of
+ 1 byte in the distribution header.
+
+
+ After the Flags field follow the AtomCacheRefs. The
+ first AtomCacheRef is the one corresponding to
+ AtomCacheReferenceIndex 0. Higher indices follow
+ in sequence up to index NumberOfAtomCacheRefs - 1.
+
+
+ If the NewCacheEntryFlag for the next AtomCacheRef has
+ been set, a NewAtomCacheRef on the following format follows:
+
+
+
+ 1 |
+ 1 | 2 |
+ Length |
+
+
+ InternalSegmentIndex |
+ Length |
+ AtomText |
+
+
+
+ InternalSegmentIndex together with the SegmentIndex
+ completely identify the location of an atom cache entry in the
+ atom cache. Length is the number of bytes that AtomText
+ consists of. Length is a 2 byte big-endian integer
+ if flag LongAtoms has been set, otherwise a 1 byte
+ integer. When distribution flag
+
+ DFLAG_UTF8_ATOMS
+ has been exchanged between both nodes in the
+
+ distribution handshake,
+ characters in AtomText are encoded in UTF-8, otherwise
+ in Latin-1. The following CachedAtomRefs with the same
+ SegmentIndex and InternalSegmentIndex as this
+ NewAtomCacheRef refer to this atom until a new
+ NewAtomCacheRef with the same SegmentIndex
+ and InternalSegmentIndex appear.
+
+
+ For more information on encoding of atoms, see the
+ note on UTF-8 encoded atoms
+ in the beginning of this section.
+
+
+ If the NewCacheEntryFlag for the next AtomCacheRef
+ has not been set, a CachedAtomRef on the following format
+ follows:
+
+
+
+ 1 |
+
+
+ InternalSegmentIndex |
+
+
+
+ InternalSegmentIndex together with the SegmentIndex
+ identify the location of the atom cache entry in the atom cache.
+ The atom corresponding to this CachedAtomRef is the
+ latest NewAtomCacheRef preceding this CachedAtomRef
+ in another previously passed distribution header.
+
+
+
+
+ Distribution Header for fragmented messages
+ Messages sent between Erlang nodes can sometimes be
+ quite large. Since OTP-22 it is possible to split large messages
+ into smaller fragments in order to allow smaller messages to be interleaved
+ between larges messages. It is only the message part of each
+ distributed message
+ that may be split using fragmentation. Therefore it is recommended to use the
+
+ PAYLOAD control messages introduced in OTP-22.
+
+ Fragmented distribution messages are only used if the receiving node
+ signals that it supports them via the
+ DFLAG_FRAGMENTS distribution
+ flag.
+ A process must complete the sending of a fragmented message before it
+ can start sending any other message on the same distribution channel.
+
+ The start of a sequence of fragmented messages looks like this:
+
+
+ 1 |
+ 1 |
+ 8 |
+ 8 |
+ 1 |
+ NumberOfAtomCacheRefs/2+1 | 0 |
+ N | 0 |
+
+
+ 131 |
+ 69 |
+ SequenceId |
+ FragmentId |
+ NumberOfAtomCacheRefs |
+ Flags |
+ AtomCacheRefs |
+
+ Starting Fragmented Distribution Header Format
+
+
+ The continuation of a sequence of fragmented messages looks like this:
+
+
+ 1 |
+ 1 |
+ 8 |
+ 8 |
+
+
+ 131 |
+ 70 |
+ SequenceId |
+ FragmentId |
+
+ Continuing Fragmented Distribution Header Format
+
+
+
+ The starting distribution header is very similar to a non-fragmented distribution
+ header. The atom cache works the same as for normal distribution header and
+ is the same for the entire sequence. The additional fields added are the
+ sequence id and fragment id.
+
+
+
+ Sequence ID
+ -
+
+ The sequence id is used to uniquely identify a fragmented message sent
+ from one process to another on the same distributed connection. This is used
+ to identify which sequence a fragment is a part of as the same process can
+ be in the process of receiving multiple sequences at the same time.
+
+
+ As one process can only be sending one fragmented message at once,
+ it can be convenient to use the local PID as the sequence id.
+
+
+ Fragments ID
+ -
+
+ The Fragment ID is used to number the fragments in a sequence.
+ The id starts at the total number of fragments and then decrements to 1
+ (which is the final fragment). So if a sequence consists of 3 fragments
+ the fragment id in the starting header will be 3, and then fragments 2 and 1
+ are sent.
+
+
+ The fragments must be delivered in the correct order, so if an unordered
+ distribution carrier is used, they must be ordered before delivered to the
+ Erlang run-time.
+
+
+
+
+
+ Example:
+
+ As an example, let say that we want to send
+ {call, <0.245.2>, {set_get_state, <<0:1024>>}} to
+ registered process reg using a fragment size of 128. To send
+ this message we need a distribution header, atom cache updates,
+ the control message (which would be {6, <0.245.2>, [], reg} in this case)
+ and finally the actual message. This would all be encoded into:
+
+
+
+131,69,0,0,2,168,0,0,5,83,0,0,0,0,0,0,0,2, %% Header with seq and frag id
+5,4,137,9,10,5,236,3,114,101,103,9,4,99,97,108,108, %% Atom cache updates
+238,13,115,101,116,95,103,101,116,95,115,116,97,116,101,
+104,4,97,6,103,82,0,0,0,0,85,0,0,0,0,2,82,1,82,2, %% Control message
+104,3,82,3,103,82,0,0,0,0,245,0,0,0,2,2, %% Actual message using cached atoms
+104,2,82,4,109,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+
+131,70,0,0,2,168,0,0,5,83,0,0,0,0,0,0,0,1, %% Cont Header with seq and frag id
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, %% Rest of payload
+0,0,0,0
+
+
+ Let us break that apart into its components. First we have the
+ distribution header tags together with the sequence id and
+ a fragment id of 2.
+
+
+131,69, %% Start fragment header
+0,0,2,168,0,0,5,83, %% The sequence ID
+0,0,0,0,0,0,0,2, %% The fragment ID
+
+ Then we have the updates to the atom cache:
+
+5,4,137,9, %% 5 atoms and their flags
+10,5, %% The already cached atom ids
+236,3,114,101,103, %% The atom 'reg'
+9,4,99,97,108,108, %% The atom 'call'
+238,13,115,101,116,95,103,101,116,95,115,116,97,116,101, %% The atom 'set_get_state'
+
+
+ The first byte says that we have 5 atoms that are part
+ of the cache. Then follows three bytes that are the
+ atom cache ref flags. Each of the flags uses 4 bits so
+ they are a bit hard to read in decimal byte form. In
+ binary half-byte form they look like this:
+
+ 0000, 0100, 1000, 1001, 1001
+
+ As the high bit of the first two atoms in the
+ cache are not set we know that they are already in the cache,
+ so they do not have to be sent again (this is the node name of the
+ receiving and sending node). Then follows the atoms that have to be sent,
+ together with their segment ids.
+
+
+ Then the listing of the atoms comes, starting with 10 and 5
+ which are the atom refs of the already cached atoms. Then the
+ new atoms are sent.
+
+
+ When the atom cache is setup correctly the control message is sent.
+
+ 104,4,97,6,103,82,0,0,0,0,85,0,0,0,0,2,82,1,82,2,
+
+ Note that up until here it is not allowed to fragments the message.
+ The entire atom cache and control message has to be part of the
+ starting fragment. After the control message the payload of the message
+ is sent using 128 bytes:
+
+
+104,3,82,3,103,82,0,0,0,0,245,0,0,0,2,2,
+104,2,82,4,109,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+
+
+ Since the payload is larger than 128-bytes it is split into two
+ fragments. The second fragment does not have any atom cache update
+ instructions so it is a lot simpler:
+
+
+131,70,0,0,2,168,0,0,5,83,0,0,0,0,0,0,0,1, %% Continuation dist header 70 with seq and frag id
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, %% remaining payload
+0,0,0,0
+
+
+
+ The fragment size of 128 is only used as an example.
+ Any fragments size may be used when sending fragmented messages.
+
+
+
+
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c
index ec55a6913c..ff19ef018e 100644
--- a/erts/emulator/beam/dist.c
+++ b/erts/emulator/beam/dist.c
@@ -55,7 +55,6 @@
*/
#if 0
#define ERTS_DIST_MSG_DBG
-FILE *dbg_file;
#endif
#if 0
/* Enable this to print the dist debug messages to a file instead */
@@ -67,6 +66,7 @@ FILE *dbg_file;
#endif
#if defined(ERTS_DIST_MSG_DBG) || defined(ERTS_RAW_DIST_MSG_DBG)
+FILE *dbg_file;
static void bw(byte *buf, ErlDrvSizeT sz)
{
bin_write(ERTS_PRINT_FILE, dbg_file, buf, sz);
@@ -743,7 +743,7 @@ void init_dist(void)
sprintf(buff, ERTS_DIST_MSG_DBG_FILE, getpid());
dbg_file = fopen(buff,"w+");
}
-#elif defined (ERTS_DIST_MSG_DBG)
+#elif defined(ERTS_DIST_MSG_DBG) || defined(ERTS_RAW_DIST_MSG_DBG)
dbg_file = stderr;
#endif
--
cgit v1.2.3