From a4645470a77989cc1d4943331dc11a0d1a4db532 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Tue, 30 Apr 2019 15:45:48 +0200 Subject: erts: Add documentation for distribution fragments --- erts/doc/src/erl_dist_protocol.xml | 13 +- erts/doc/src/erl_ext_dist.xml | 492 ++++++++++++++++++++++++++----------- erts/emulator/beam/dist.c | 4 +- 3 files changed, 359 insertions(+), 150 deletions(-) diff --git a/erts/doc/src/erl_dist_protocol.xml b/erts/doc/src/erl_dist_protocol.xml index 185c75fe84..f924c8a70b 100644 --- a/erts/doc/src/erl_dist_protocol.xml +++ b/erts/doc/src/erl_dist_protocol.xml @@ -850,10 +850,15 @@ DiB == gen_digest(ChA, ICA)? -define(DFLAG_EXIT_PAYLOAD, 16#400000).

Use the PAYLOAD_EXIT, PAYLOAD_EXIT_TT, - PAYLOAD_EXIT2, PAYLOAD_EXIT2_TT - and PAYLOAD_MONITOR_P_EXIT - control messages - instead of the non-PAYLOAD variants.

+ PAYLOAD_EXIT2, PAYLOAD_EXIT2_TT + and PAYLOAD_MONITOR_P_EXIT + control messages + instead of the non-PAYLOAD variants.

+
+ -define(DFLAG_FRAGMENTS, 16#800000). + +

Use fragmented + distribution messages to send large messages.

diff --git a/erts/doc/src/erl_ext_dist.xml b/erts/doc/src/erl_ext_dist.xml index a6bc44b8c8..3730f0e8ac 100644 --- a/erts/doc/src/erl_ext_dist.xml +++ b/erts/doc/src/erl_ext_dist.xml @@ -140,162 +140,366 @@ Distribution Header

- The distribution header only contains an atom cache - reference section, but can in the future contain more - information. The distribution header precedes one or more Erlang - terms on the external format. For more information, see the - documentation of the + The distribution header is sent by the erlang distribution to + carry metadata about the coming + control message + and potential payload. It is primarily used to handle the atom cache + in the Erlang distribution. Since OTP-22 it is also used to fragment + large distribution messages into multiple smaller fragments. + For more information about how the distribution uses the distribution header, + see the documentation of the protocol between connected nodes in the distribution protocol documentation.

- ATOM_CACHE_REF + Any ATOM_CACHE_REF entries with corresponding AtomCacheReferenceIndex in terms encoded on the external format following a distribution header refer to the atom cache references made in the distribution header. The range is 0 <= AtomCacheReferenceIndex < 255, that is, at most 255 different atom cache references from the following terms can be made.

-

- The distribution header format is as follows: -

- - - 1 - 1 - 1 - NumberOfAtomCacheRefs/2+1 | 0 - N | 0 - - - 131 - 68 - NumberOfAtomCacheRefs - Flags - AtomCacheRefs - - Distribution Header Format
-

- Flags consist of NumberOfAtomCacheRefs/2+1 bytes, - unless NumberOfAtomCacheRefs is 0. If - NumberOfAtomCacheRefs is 0, Flags and - AtomCacheRefs are omitted. Each atom cache reference has - a half byte flag field. Flags corresponding to a specific - AtomCacheReferenceIndex are located in flag byte number - AtomCacheReferenceIndex/2. Flag byte 0 is the first byte - after the NumberOfAtomCacheRefs byte. Flags for an even - AtomCacheReferenceIndex are located in the least significant - half byte and flags for an odd AtomCacheReferenceIndex are - located in the most significant half byte. -

-

- The flag field of an atom cache reference has the following - format: -

- - - 1 bit - 3 bits - - - NewCacheEntryFlag - SegmentIndex - -
-

- The most significant bit is the NewCacheEntryFlag. If set, - the corresponding cache reference is new. The three least - significant bits are the SegmentIndex of the corresponding - atom cache entry. An atom cache consists of 8 segments, each of size - 256, that is, an atom cache can contain 2048 entries. -

-

- After flag fields for atom cache references, another half byte flag - field is located with the following format: -

- - - 3 bits - 1 bit - - - CurrentlyUnused - LongAtoms - -
-

- The least significant bit in that half byte is flag LongAtoms. - If it is set, 2 bytes are used for atom lengths instead of - 1 byte in the distribution header. -

-

- After the Flags field follow the AtomCacheRefs. The - first AtomCacheRef is the one corresponding to - AtomCacheReferenceIndex 0. Higher indices follow - in sequence up to index NumberOfAtomCacheRefs - 1. -

-

- If the NewCacheEntryFlag for the next AtomCacheRef has - been set, a NewAtomCacheRef on the following format follows: -

- - - 1 - 1 | 2 - Length - - - InternalSegmentIndex - Length - AtomText - -
-

- InternalSegmentIndex together with the SegmentIndex - completely identify the location of an atom cache entry in the - atom cache. Length is the number of bytes that AtomText - consists of. Length is a 2 byte big-endian integer - if flag LongAtoms has been set, otherwise a 1 byte - integer. When distribution flag - - DFLAG_UTF8_ATOMS - has been exchanged between both nodes in the - - distribution handshake, - characters in AtomText are encoded in UTF-8, otherwise - in Latin-1. The following CachedAtomRefs with the same - SegmentIndex and InternalSegmentIndex as this - NewAtomCacheRef refer to this atom until a new - NewAtomCacheRef with the same SegmentIndex - and InternalSegmentIndex appear. -

-

- For more information on encoding of atoms, see the - note on UTF-8 encoded atoms - in the beginning of this section. -

-

- If the NewCacheEntryFlag for the next AtomCacheRef - has not been set, a CachedAtomRef on the following format - follows: -

- - - 1 - - - InternalSegmentIndex - -
-

- InternalSegmentIndex together with the SegmentIndex - identify the location of the atom cache entry in the atom cache. - The atom corresponding to this CachedAtomRef is the - latest NewAtomCacheRef preceding this CachedAtomRef - in another previously passed distribution header. -

+
+ Normal Distribution Header +

+ The non-fragmented distribution header format is as follows: +

+ + + 1 + 1 + 1 + NumberOfAtomCacheRefs/2+1 | 0 + N | 0 + + + 131 + 68 + NumberOfAtomCacheRefs + Flags + AtomCacheRefs + + Normal Distribution Header Format
+

+ Flags consist of NumberOfAtomCacheRefs/2+1 bytes, + unless NumberOfAtomCacheRefs is 0. If + NumberOfAtomCacheRefs is 0, Flags and + AtomCacheRefs are omitted. Each atom cache reference has + a half byte flag field. Flags corresponding to a specific + AtomCacheReferenceIndex are located in flag byte number + AtomCacheReferenceIndex/2. Flag byte 0 is the first byte + after the NumberOfAtomCacheRefs byte. Flags for an even + AtomCacheReferenceIndex are located in the least significant + half byte and flags for an odd AtomCacheReferenceIndex are + located in the most significant half byte. +

+

+ The flag field of an atom cache reference has the following + format: +

+ + + 1 bit + 3 bits + + + NewCacheEntryFlag + SegmentIndex + +
+

+ The most significant bit is the NewCacheEntryFlag. If set, + the corresponding cache reference is new. The three least + significant bits are the SegmentIndex of the corresponding + atom cache entry. An atom cache consists of 8 segments, each of size + 256, that is, an atom cache can contain 2048 entries. +

+

+ After flag fields for atom cache references, another half byte flag + field is located with the following format: +

+ + + 3 bits + 1 bit + + + CurrentlyUnused + LongAtoms + +
+

+ The least significant bit in that half byte is flag LongAtoms. + If it is set, 2 bytes are used for atom lengths instead of + 1 byte in the distribution header. +

+

+ After the Flags field follow the AtomCacheRefs. The + first AtomCacheRef is the one corresponding to + AtomCacheReferenceIndex 0. Higher indices follow + in sequence up to index NumberOfAtomCacheRefs - 1. +

+

+ If the NewCacheEntryFlag for the next AtomCacheRef has + been set, a NewAtomCacheRef on the following format follows: +

+ + + 1 + 1 | 2 + Length + + + InternalSegmentIndex + Length + AtomText + +
+

+ InternalSegmentIndex together with the SegmentIndex + completely identify the location of an atom cache entry in the + atom cache. Length is the number of bytes that AtomText + consists of. Length is a 2 byte big-endian integer + if flag LongAtoms has been set, otherwise a 1 byte + integer. When distribution flag + + DFLAG_UTF8_ATOMS + has been exchanged between both nodes in the + + distribution handshake, + characters in AtomText are encoded in UTF-8, otherwise + in Latin-1. The following CachedAtomRefs with the same + SegmentIndex and InternalSegmentIndex as this + NewAtomCacheRef refer to this atom until a new + NewAtomCacheRef with the same SegmentIndex + and InternalSegmentIndex appear. +

+

+ For more information on encoding of atoms, see the + note on UTF-8 encoded atoms + in the beginning of this section. +

+

+ If the NewCacheEntryFlag for the next AtomCacheRef + has not been set, a CachedAtomRef on the following format + follows: +

+ + + 1 + + + InternalSegmentIndex + +
+

+ InternalSegmentIndex together with the SegmentIndex + identify the location of the atom cache entry in the atom cache. + The atom corresponding to this CachedAtomRef is the + latest NewAtomCacheRef preceding this CachedAtomRef + in another previously passed distribution header. +

+
+
+ + Distribution Header for fragmented messages +

Messages sent between Erlang nodes can sometimes be + quite large. Since OTP-22 it is possible to split large messages + into smaller fragments in order to allow smaller messages to be interleaved + between larges messages. It is only the message part of each + distributed message + that may be split using fragmentation. Therefore it is recommended to use the + + PAYLOAD control messages introduced in OTP-22. +

+

Fragmented distribution messages are only used if the receiving node + signals that it supports them via the + DFLAG_FRAGMENTS distribution + flag.

+

A process must complete the sending of a fragmented message before it + can start sending any other message on the same distribution channel.

+ +

The start of a sequence of fragmented messages looks like this:

+ + + 1 + 1 + 8 + 8 + 1 + NumberOfAtomCacheRefs/2+1 | 0 + N | 0 + + + 131 + 69 + SequenceId + FragmentId + NumberOfAtomCacheRefs + Flags + AtomCacheRefs + + Starting Fragmented Distribution Header Format +
+ +

The continuation of a sequence of fragmented messages looks like this:

+ + + 1 + 1 + 8 + 8 + + + 131 + 70 + SequenceId + FragmentId + + Continuing Fragmented Distribution Header Format +
+ +

+ The starting distribution header is very similar to a non-fragmented distribution + header. The atom cache works the same as for normal distribution header and + is the same for the entire sequence. The additional fields added are the + sequence id and fragment id. +

+ + + Sequence ID + +

+ The sequence id is used to uniquely identify a fragmented message sent + from one process to another on the same distributed connection. This is used + to identify which sequence a fragment is a part of as the same process can + be in the process of receiving multiple sequences at the same time. +

+

+ As one process can only be sending one fragmented message at once, + it can be convenient to use the local PID as the sequence id. +

+
+ Fragments ID + +

+ The Fragment ID is used to number the fragments in a sequence. + The id starts at the total number of fragments and then decrements to 1 + (which is the final fragment). So if a sequence consists of 3 fragments + the fragment id in the starting header will be 3, and then fragments 2 and 1 + are sent. +

+

+ The fragments must be delivered in the correct order, so if an unordered + distribution carrier is used, they must be ordered before delivered to the + Erlang run-time. +

+
+
+ +
+ Example: +

+ As an example, let say that we want to send + {call, <0.245.2>, {set_get_state, <<0:1024>>}} to + registered process reg using a fragment size of 128. To send + this message we need a distribution header, atom cache updates, + the control message (which would be {6, <0.245.2>, [], reg} in this case) + and finally the actual message. This would all be encoded into: +

+ + +131,69,0,0,2,168,0,0,5,83,0,0,0,0,0,0,0,2, %% Header with seq and frag id +5,4,137,9,10,5,236,3,114,101,103,9,4,99,97,108,108, %% Atom cache updates +238,13,115,101,116,95,103,101,116,95,115,116,97,116,101, +104,4,97,6,103,82,0,0,0,0,85,0,0,0,0,2,82,1,82,2, %% Control message +104,3,82,3,103,82,0,0,0,0,245,0,0,0,2,2, %% Actual message using cached atoms +104,2,82,4,109,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + +131,70,0,0,2,168,0,0,5,83,0,0,0,0,0,0,0,1, %% Cont Header with seq and frag id +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, %% Rest of payload +0,0,0,0 + +

+ Let us break that apart into its components. First we have the + distribution header tags together with the sequence id and + a fragment id of 2. +

+ +131,69, %% Start fragment header +0,0,2,168,0,0,5,83, %% The sequence ID +0,0,0,0,0,0,0,2, %% The fragment ID + +

Then we have the updates to the atom cache:

+ +5,4,137,9, %% 5 atoms and their flags +10,5, %% The already cached atom ids +236,3,114,101,103, %% The atom 'reg' +9,4,99,97,108,108, %% The atom 'call' +238,13,115,101,116,95,103,101,116,95,115,116,97,116,101, %% The atom 'set_get_state' + +

+ The first byte says that we have 5 atoms that are part + of the cache. Then follows three bytes that are the + atom cache ref flags. Each of the flags uses 4 bits so + they are a bit hard to read in decimal byte form. In + binary half-byte form they look like this: +

+ 0000, 0100, 1000, 1001, 1001 +

+ As the high bit of the first two atoms in the + cache are not set we know that they are already in the cache, + so they do not have to be sent again (this is the node name of the + receiving and sending node). Then follows the atoms that have to be sent, + together with their segment ids. +

+

+ Then the listing of the atoms comes, starting with 10 and 5 + which are the atom refs of the already cached atoms. Then the + new atoms are sent. +

+

+ When the atom cache is setup correctly the control message is sent. +

+ 104,4,97,6,103,82,0,0,0,0,85,0,0,0,0,2,82,1,82,2, +

+ Note that up until here it is not allowed to fragments the message. + The entire atom cache and control message has to be part of the + starting fragment. After the control message the payload of the message + is sent using 128 bytes: +

+ +104,3,82,3,103,82,0,0,0,0,245,0,0,0,2,2, +104,2,82,4,109,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + +

+ Since the payload is larger than 128-bytes it is split into two + fragments. The second fragment does not have any atom cache update + instructions so it is a lot simpler: +

+ +131,70,0,0,2,168,0,0,5,83,0,0,0,0,0,0,0,1, %% Continuation dist header 70 with seq and frag id +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, %% remaining payload +0,0,0,0 + + +

+ The fragment size of 128 is only used as an example. + Any fragments size may be used when sending fragmented messages. +

+
+
+
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index ec55a6913c..ff19ef018e 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -55,7 +55,6 @@ */ #if 0 #define ERTS_DIST_MSG_DBG -FILE *dbg_file; #endif #if 0 /* Enable this to print the dist debug messages to a file instead */ @@ -67,6 +66,7 @@ FILE *dbg_file; #endif #if defined(ERTS_DIST_MSG_DBG) || defined(ERTS_RAW_DIST_MSG_DBG) +FILE *dbg_file; static void bw(byte *buf, ErlDrvSizeT sz) { bin_write(ERTS_PRINT_FILE, dbg_file, buf, sz); @@ -743,7 +743,7 @@ void init_dist(void) sprintf(buff, ERTS_DIST_MSG_DBG_FILE, getpid()); dbg_file = fopen(buff,"w+"); } -#elif defined (ERTS_DIST_MSG_DBG) +#elif defined(ERTS_DIST_MSG_DBG) || defined(ERTS_RAW_DIST_MSG_DBG) dbg_file = stderr; #endif -- cgit v1.2.3