diff options
-rw-r--r-- | erts/doc/src/erl_nif.xml | 16 | ||||
-rw-r--r-- | erts/emulator/beam/atom.h | 2 | ||||
-rw-r--r-- | erts/emulator/beam/index.c | 2 | ||||
-rw-r--r-- | lib/dialyzer/src/dialyzer_typesig.erl | 12 | ||||
-rw-r--r-- | lib/dialyzer/test/map_SUITE_data/results/map_anon_fun | 2 | ||||
-rw-r--r-- | lib/dialyzer/test/map_SUITE_data/src/map_anon_fun.erl | 9 | ||||
-rw-r--r-- | lib/dialyzer/test/plt_SUITE.erl | 10 | ||||
-rw-r--r-- | lib/hipe/cerl/erl_types.erl | 4 | ||||
-rw-r--r-- | lib/mnesia/src/mnesia.erl | 3 | ||||
-rw-r--r-- | lib/public_key/test/public_key_SUITE_data/pkix_verify_hostname_subjAltName_IP.pem | 14 | ||||
-rw-r--r-- | lib/public_key/test/public_key_SUITE_data/verify_hostname_ip.conf | 8 | ||||
-rw-r--r-- | lib/ssl/src/dtls_connection.erl | 84 | ||||
-rw-r--r-- | lib/ssl/src/ssl_connection.erl | 14 | ||||
-rw-r--r-- | lib/ssl/src/tls_connection.erl | 63 | ||||
-rw-r--r-- | lib/stdlib/doc/src/unicode.xml | 9 | ||||
-rw-r--r-- | lib/stdlib/src/string.erl | 748 | ||||
-rw-r--r-- | lib/stdlib/test/string_SUITE.erl | 121 | ||||
-rw-r--r-- | lib/stdlib/test/unicode_util_SUITE.erl | 17 | ||||
-rwxr-xr-x | lib/stdlib/uc_spec/gen_unicode_mod.escript | 31 |
19 files changed, 803 insertions, 366 deletions
diff --git a/erts/doc/src/erl_nif.xml b/erts/doc/src/erl_nif.xml index 419e41693e..ef3cdb89e9 100644 --- a/erts/doc/src/erl_nif.xml +++ b/erts/doc/src/erl_nif.xml @@ -952,6 +952,8 @@ typedef struct { <desc> <p>Allocates memory of <c>size</c> bytes.</p> <p>Returns <c>NULL</c> if the allocation fails.</p> + <p>The returned pointer is suitably aligned for any built-in type that + fit in the allocated memory.</p> </desc> </func> @@ -2760,6 +2762,20 @@ enif_map_iterator_destroy(env, &iter);</code> </func> <func> + <name><ret>void *</ret> + <nametext>enif_realloc(void* ptr, size_t size)</nametext></name> + <fsummary>Reallocate dynamic memory.</fsummary> + <desc> + <p>Reallocates memory allocated by + <seealso marker="#enif_alloc"><c>enif_alloc</c></seealso> to + <c>size</c> bytes.</p> + <p>Returns <c>NULL</c> if the reallocation fails.</p> + <p>The returned pointer is suitably aligned for any built-in type that + fit in the allocated memory.</p> + </desc> + </func> + + <func> <name><ret>int</ret> <nametext>enif_realloc_binary(ErlNifBinary* bin, size_t size)</nametext> </name> diff --git a/erts/emulator/beam/atom.h b/erts/emulator/beam/atom.h index be998a46bd..385120a8d9 100644 --- a/erts/emulator/beam/atom.h +++ b/erts/emulator/beam/atom.h @@ -36,7 +36,7 @@ /* Internal atom cache needs MAX_ATOM_TABLE_SIZE to be less than an unsigned 32 bit integer. See external.c(erts_encode_ext_dist_header_setup) for more details. */ -#define MAX_ATOM_TABLE_SIZE ((MAX_ATOM_INDEX + 1 < (UWORD_CONSTANT(1) << 32)) ? MAX_ATOM_INDEX + 1 : (UWORD_CONSTANT(1) << 32)) +#define MAX_ATOM_TABLE_SIZE ((MAX_ATOM_INDEX + 1 < (UWORD_CONSTANT(1) << 32)) ? MAX_ATOM_INDEX + 1 : ((UWORD_CONSTANT(1) << 31) - 1)) /* Here we use maximum signed interger value to avoid integer overflow */ #else #define MAX_ATOM_TABLE_SIZE (MAX_ATOM_INDEX + 1) #endif diff --git a/erts/emulator/beam/index.c b/erts/emulator/beam/index.c index a1f6f54543..7bf1a032c1 100644 --- a/erts/emulator/beam/index.c +++ b/erts/emulator/beam/index.c @@ -58,7 +58,7 @@ IndexTable* erts_index_init(ErtsAlcType_t type, IndexTable* t, char* name, int size, int limit, HashFunctions fun) { - Uint base_size = ((limit+INDEX_PAGE_SIZE-1)/INDEX_PAGE_SIZE)*sizeof(IndexSlot*); + Uint base_size = (((Uint)limit+INDEX_PAGE_SIZE-1)/INDEX_PAGE_SIZE)*sizeof(IndexSlot*); hash_init(type, &t->htable, name, 3*size/4, fun); t->size = 0; diff --git a/lib/dialyzer/src/dialyzer_typesig.erl b/lib/dialyzer/src/dialyzer_typesig.erl index c4d8f45447..d03326ec97 100644 --- a/lib/dialyzer/src/dialyzer_typesig.erl +++ b/lib/dialyzer/src/dialyzer_typesig.erl @@ -41,7 +41,7 @@ t_is_float/1, t_is_fun/1, t_is_integer/1, t_non_neg_integer/0, t_is_list/1, t_is_nil/1, t_is_none/1, t_is_number/1, - t_is_singleton/1, + t_is_singleton/1, t_is_none_or_unit/1, t_limit/2, t_list/0, t_list/1, t_list_elements/1, t_nonempty_list/1, t_maybe_improper_list/0, @@ -528,13 +528,14 @@ traverse(Tree, DefinedVars, State) -> false -> t_any(); true -> MT = t_inf(lookup_type(MapVar, Map), t_map()), - case t_is_none(MT) of + case t_is_none_or_unit(MT) of true -> t_none(); false -> DisjointFromKeyType = fun(ShadowKey) -> - t_is_none(t_inf(lookup_type(ShadowKey, Map), - KeyType)) + ST = t_inf(lookup_type(ShadowKey, Map), + KeyType), + t_is_none_or_unit(ST) end, case lists:all(DisjointFromKeyType, ShadowKeys) of true -> t_map_get(KeyType, MT); @@ -567,7 +568,8 @@ traverse(Tree, DefinedVars, State) -> case cerl:is_literal(OpTree) andalso cerl:concrete(OpTree) =:= exact of true -> - case t_is_none(t_inf(ShadowedKeys, KeyType)) of + ST = t_inf(ShadowedKeys, KeyType), + case t_is_none_or_unit(ST) of true -> t_map_put({KeyType, t_any()}, AccType); false -> diff --git a/lib/dialyzer/test/map_SUITE_data/results/map_anon_fun b/lib/dialyzer/test/map_SUITE_data/results/map_anon_fun new file mode 100644 index 0000000000..cfca5b1407 --- /dev/null +++ b/lib/dialyzer/test/map_SUITE_data/results/map_anon_fun @@ -0,0 +1,2 @@ + +map_anon_fun.erl:4: Function g/1 will never be called diff --git a/lib/dialyzer/test/map_SUITE_data/src/map_anon_fun.erl b/lib/dialyzer/test/map_SUITE_data/src/map_anon_fun.erl new file mode 100644 index 0000000000..e77016d68a --- /dev/null +++ b/lib/dialyzer/test/map_SUITE_data/src/map_anon_fun.erl @@ -0,0 +1,9 @@ +-module(map_anon_fun). + +%% Not exported. +g(A) -> + maps:map(fun F(K, {V, _C}) -> + F(K, V); + F(_K, _V) -> + #{ system => {A} } + end, #{}). diff --git a/lib/dialyzer/test/plt_SUITE.erl b/lib/dialyzer/test/plt_SUITE.erl index a8a9f176fc..680f5b5088 100644 --- a/lib/dialyzer/test/plt_SUITE.erl +++ b/lib/dialyzer/test/plt_SUITE.erl @@ -283,8 +283,8 @@ bad_dialyzer_attr(Config) -> {dialyzer_error, "Analysis failed with error:\n" ++ Str1} = (catch dialyzer:run(Opts)), - P1 = string:str(Str1, "dial.erl:2: function undef/0 undefined"), - true = P1 > 0, + S1 = string:find(Str1, "dial.erl:2: function undef/0 undefined"), + true = is_list(S1), Prog2 = <<"-module(dial). -dialyzer({no_return, [{undef,1,2}]}).">>, @@ -292,9 +292,9 @@ bad_dialyzer_attr(Config) -> {dialyzer_error, "Analysis failed with error:\n" ++ Str2} = (catch dialyzer:run(Opts)), - P2 = string:str(Str2, "dial.erl:2: badly formed dialyzer " - "attribute: {no_return,{undef,1,2}}"), - true = P2 > 0, + S2 = string:find(Str2, "dial.erl:2: badly formed dialyzer " + "attribute: {no_return,{undef,1,2}}"), + true = is_list(S2), ok. diff --git a/lib/hipe/cerl/erl_types.erl b/lib/hipe/cerl/erl_types.erl index abb6c259f6..4e0f93212d 100644 --- a/lib/hipe/cerl/erl_types.erl +++ b/lib/hipe/cerl/erl_types.erl @@ -1877,6 +1877,7 @@ t_map_put(KV, Map, Opaques) -> %% Key and Value are *not* unopaqued, but the map is map_put(_, ?none, _) -> ?none; +map_put(_, ?unit, _) -> ?none; map_put({Key, Value}, ?map(Pairs,DefK,DefV), Opaques) -> case t_is_none_or_unit(Key) orelse t_is_none_or_unit(Value) of true -> ?none; @@ -1902,6 +1903,7 @@ t_map_update(KV, Map) -> -spec t_map_update({erl_type(), erl_type()}, erl_type(), opaques()) -> erl_type(). t_map_update(_, ?none, _) -> ?none; +t_map_update(_, ?unit, _) -> ?none; t_map_update(KV={Key, _}, M, Opaques) -> case t_is_subtype(t_atom('true'), t_map_is_key(Key, M, Opaques)) of false -> ?none; @@ -1922,6 +1924,7 @@ t_map_get(Key, Map, Opaques) -> end). map_get(_, ?none) -> ?none; +map_get(_, ?unit) -> ?none; map_get(Key, ?map(Pairs, DefK, DefV)) -> DefRes = case t_do_overlap(DefK, Key) of @@ -1957,6 +1960,7 @@ t_map_is_key(Key, Map, Opaques) -> end). map_is_key(_, ?none) -> ?none; +map_is_key(_, ?unit) -> ?none; map_is_key(Key, ?map(Pairs, DefK, _DefV)) -> case is_singleton_type(Key) of true -> diff --git a/lib/mnesia/src/mnesia.erl b/lib/mnesia/src/mnesia.erl index b68b2de028..1842769778 100644 --- a/lib/mnesia/src/mnesia.erl +++ b/lib/mnesia/src/mnesia.erl @@ -151,7 +151,8 @@ {'snmp', SnmpStruct::term()} | {'storage_properties', [{Backend::module(), [BackendProp::_]}]} | {'type', 'set' | 'ordered_set' | 'bag'} | - {'local_content', boolean()}. + {'local_content', boolean()} | + {'user_properties', proplists:proplist()}. -type t_result(Res) :: {'atomic', Res} | {'aborted', Reason::term()}. -type activity() :: 'ets' | 'async_dirty' | 'sync_dirty' | 'transaction' | 'sync_transaction' | diff --git a/lib/public_key/test/public_key_SUITE_data/pkix_verify_hostname_subjAltName_IP.pem b/lib/public_key/test/public_key_SUITE_data/pkix_verify_hostname_subjAltName_IP.pem index dc20285f30..97d12cdadf 100644 --- a/lib/public_key/test/public_key_SUITE_data/pkix_verify_hostname_subjAltName_IP.pem +++ b/lib/public_key/test/public_key_SUITE_data/pkix_verify_hostname_subjAltName_IP.pem @@ -1,5 +1,4 @@ -----BEGIN CERTIFICATE----- -<<<<<<< HEAD MIICBzCCAXCgAwIBAgIJAJgbo5FL73LuMA0GCSqGSIb3DQEBCwUAMCMxCzAJBgNV BAYTAlNFMRQwEgYDVQQDEwtleGFtcGxlLmNvbTAeFw0xNzEwMTExMDM0NDJaFw0x NzExMTAxMDM0NDJaMCMxCzAJBgNVBAYTAlNFMRQwEgYDVQQDEwtleGFtcGxlLmNv @@ -11,17 +10,4 @@ S4cQq80A7wAAAAAAAAAAAAAAAYYTaHR0cHM6Ly8xMC4xMS4xMi4xMzANBgkqhkiG 9w0BAQsFAAOBgQDMn8aqs/5FkkWhspvN2n+D2l87M+33a5My54ZVZhayZ/KRmhCN Gix/BiVYJ3UlmWmGcnQXb3MLt/LQHaD3S2whDaLN3xJ8BbnX7A4ZTybitdyeFhDw K3iDVUM3bSsBJ4EcBPWIMnow3ALP5HlGRMlH/87Qt+uVPXuwNh9pmyIhRQ== -======= -MIIB/zCCAWigAwIBAgIJAMoSejmTjwAGMA0GCSqGSIb3DQEBCwUAMB8xCzAJBgNV -BAYTAlNFMRAwDgYDVQQDEwc1LjYuNy44MB4XDTE3MDkyODE0MDAxNVoXDTE3MTAy -ODE0MDAxNVowHzELMAkGA1UEBhMCU0UxEDAOBgNVBAMTBzUuNi43LjgwgZ8wDQYJ -KoZIhvcNAQEBBQADgY0AMIGJAoGBAMUPU89KwVbTCDkyxQSz3wprMbZTLe35K6jm -Q7oY1rJyVXjsFHwZrFqqNMScEyX40rJhczQ2Z9etEX6qYLbdb/DZeFcKo14fR583 -QMFZC+qqpLWHdvjaQN0KwD99VFeZIGpRgywG8SR+BXZjDHUkGsMrikAEJtf0Tgih -IPyiFtiJAgMBAAGjQzBBMD8GA1UdEQQ4MDaCBzEuMi4zLjSHBAUGBwiHEKvNAO8A -AAAAAAAAAAAAAAGGE2h0dHBzOi8vMTAuMTEuMTIuMTMwDQYJKoZIhvcNAQELBQAD -gYEAtWVeQaRFZ0kH/pzSWMSsOCUrjbwlWRwDNbagNKoM6nCRv0QQ59fG6XrVZwR3 -c0s5arlMh3U2+bjKE+Iq9+b/lN1lGzf8iaAqBNa7KptwTSUEY3TiNG5X0zlSXKTI -3z7AaUEtghL9ImCPj5V3tVksqWd7U0zLmeeLZnM+wGAL9Hc= ->>>>>>> maint-20 -----END CERTIFICATE----- diff --git a/lib/public_key/test/public_key_SUITE_data/verify_hostname_ip.conf b/lib/public_key/test/public_key_SUITE_data/verify_hostname_ip.conf index f27dac07ec..798592e4f6 100644 --- a/lib/public_key/test/public_key_SUITE_data/verify_hostname_ip.conf +++ b/lib/public_key/test/public_key_SUITE_data/verify_hostname_ip.conf @@ -5,21 +5,13 @@ distinguished_name = DN [DN] C=SE CN=example.com -<<<<<<< HEAD -======= -CN=5.6.7.8 ->>>>>>> maint-20 [SAN] subjectAltName = @alt_names [alt_names] DNS = 1.2.3.4 -<<<<<<< HEAD IP.1 = 10.67.16.75 -======= -IP.1 = 5.6.7.8 ->>>>>>> maint-20 IP.2 = abcd:ef::1 URI = https://10.11.12.13 diff --git a/lib/ssl/src/dtls_connection.erl b/lib/ssl/src/dtls_connection.erl index 15eb39e716..073cb4009b 100644 --- a/lib/ssl/src/dtls_connection.erl +++ b/lib/ssl/src/dtls_connection.erl @@ -232,8 +232,6 @@ next_event(StateName, Record, #alert{} = Alert -> {next_state, StateName, State0, [{next_event, internal, Alert} | Actions]} end. -handle_call(Event, From, StateName, State) -> - ssl_connection:handle_call(Event, From, StateName, State, ?MODULE). handle_common_event(internal, #alert{} = Alert, StateName, #state{negotiated_version = Version} = State) -> @@ -446,21 +444,21 @@ init({call, From}, {start, Timeout}, {Record, State} = next_record(State3), next_event(hello, Record, State, Actions); init({call, _} = Type, Event, #state{role = server, transport_cb = gen_udp} = State) -> - Result = ssl_connection:?FUNCTION_NAME(Type, Event, - State#state{flight_state = {retransmit, ?INITIAL_RETRANSMIT_TIMEOUT}, - protocol_specific = #{current_cookie_secret => dtls_v1:cookie_secret(), - previous_cookie_secret => <<>>, - ignored_alerts => 0, - max_ignored_alerts => 10}}, - ?MODULE), + Result = gen_handshake(?FUNCTION_NAME, Type, Event, + State#state{flight_state = {retransmit, ?INITIAL_RETRANSMIT_TIMEOUT}, + protocol_specific = #{current_cookie_secret => dtls_v1:cookie_secret(), + previous_cookie_secret => <<>>, + ignored_alerts => 0, + max_ignored_alerts => 10}}), erlang:send_after(dtls_v1:cookie_timeout(), self(), new_cookie_secret), Result; init({call, _} = Type, Event, #state{role = server} = State) -> %% I.E. DTLS over sctp - ssl_connection:?FUNCTION_NAME(Type, Event, State#state{flight_state = reliable}, ?MODULE); + gen_handshake(?FUNCTION_NAME, Type, Event, State#state{flight_state = reliable}); init(Type, Event, State) -> - ssl_connection:?FUNCTION_NAME(Type, Event, State, ?MODULE). + gen_handshake(?FUNCTION_NAME, Type, Event, State). + %%-------------------------------------------------------------------- -spec error(gen_statem:event_type(), {start, timeout()} | term(), #state{}) -> @@ -470,8 +468,8 @@ error(enter, _, State) -> {keep_state, State}; error({call, From}, {start, _Timeout}, {Error, State}) -> {stop_and_reply, normal, {reply, From, {error, Error}}, State}; -error({call, From}, Msg, State) -> - handle_call(Msg, From, ?FUNCTION_NAME, State); +error({call, _} = Call, Msg, State) -> + gen_handshake(?FUNCTION_NAME, Call, Msg, State); error(_, _, _) -> {keep_state_and_data, [postpone]}. @@ -566,11 +564,11 @@ hello(internal, {handshake, {#hello_verify_request{} = Handshake, _}}, State) -> %% hello_verify should not be in handshake history {next_state, ?FUNCTION_NAME, State, [{next_event, internal, Handshake}]}; hello(info, Event, State) -> - handle_info(Event, ?FUNCTION_NAME, State); + gen_info(Event, ?FUNCTION_NAME, State); hello(state_timeout, Event, State) -> handle_state_timeout(Event, ?FUNCTION_NAME, State); hello(Type, Event, State) -> - ssl_connection:?FUNCTION_NAME(Type, Event, State, ?MODULE). + gen_handshake(?FUNCTION_NAME, Type, Event, State). %%-------------------------------------------------------------------- -spec abbreviated(gen_statem:event_type(), term(), #state{}) -> @@ -580,22 +578,21 @@ abbreviated(enter, _, State0) -> {State, Actions} = handle_flight_timer(State0), {keep_state, State, Actions}; abbreviated(info, Event, State) -> - handle_info(Event, ?FUNCTION_NAME, State); + gen_info(Event, ?FUNCTION_NAME, State); abbreviated(internal = Type, #change_cipher_spec{type = <<1>>} = Event, #state{connection_states = ConnectionStates0} = State) -> ConnectionStates1 = dtls_record:save_current_connection_state(ConnectionStates0, read), ConnectionStates = dtls_record:next_epoch(ConnectionStates1, read), - ssl_connection:?FUNCTION_NAME(Type, Event, State#state{connection_states = ConnectionStates}, ?MODULE); + gen_handshake(?FUNCTION_NAME, Type, Event, State#state{connection_states = ConnectionStates}); abbreviated(internal = Type, #finished{} = Event, #state{connection_states = ConnectionStates} = State) -> - ssl_connection:?FUNCTION_NAME(Type, Event, - prepare_flight(State#state{connection_states = ConnectionStates, - flight_state = connection}), ?MODULE); + gen_handshake(?FUNCTION_NAME, Type, Event, + prepare_flight(State#state{connection_states = ConnectionStates, + flight_state = connection})); abbreviated(state_timeout, Event, State) -> handle_state_timeout(Event, ?FUNCTION_NAME, State); abbreviated(Type, Event, State) -> - ssl_connection:?FUNCTION_NAME(Type, Event, State, ?MODULE). - + gen_handshake(?FUNCTION_NAME, Type, Event, State). %%-------------------------------------------------------------------- -spec certify(gen_statem:event_type(), term(), #state{}) -> gen_statem:state_function_result(). @@ -604,13 +601,13 @@ certify(enter, _, State0) -> {State, Actions} = handle_flight_timer(State0), {keep_state, State, Actions}; certify(info, Event, State) -> - handle_info(Event, ?FUNCTION_NAME, State); + gen_info(Event, ?FUNCTION_NAME, State); certify(internal = Type, #server_hello_done{} = Event, State) -> ssl_connection:certify(Type, Event, prepare_flight(State), ?MODULE); certify(state_timeout, Event, State) -> handle_state_timeout(Event, ?FUNCTION_NAME, State); certify(Type, Event, State) -> - ssl_connection:?FUNCTION_NAME(Type, Event, State, ?MODULE). + gen_handshake(?FUNCTION_NAME, Type, Event, State). %%-------------------------------------------------------------------- -spec cipher(gen_statem:event_type(), term(), #state{}) -> @@ -620,7 +617,7 @@ cipher(enter, _, State0) -> {State, Actions} = handle_flight_timer(State0), {keep_state, State, Actions}; cipher(info, Event, State) -> - handle_info(Event, ?FUNCTION_NAME, State); + gen_info(Event, ?FUNCTION_NAME, State); cipher(internal = Type, #change_cipher_spec{type = <<1>>} = Event, #state{connection_states = ConnectionStates0} = State) -> ConnectionStates1 = dtls_record:save_current_connection_state(ConnectionStates0, read), @@ -644,7 +641,7 @@ cipher(Type, Event, State) -> connection(enter, _, State) -> {keep_state, State}; connection(info, Event, State) -> - handle_info(Event, ?FUNCTION_NAME, State); + gen_info(Event, ?FUNCTION_NAME, State); connection(internal, #hello_request{}, #state{host = Host, port = Port, session = #session{own_certificate = Cert} = Session0, session_cache = Cache, session_cache_cb = CacheCb, @@ -864,7 +861,7 @@ handle_info(new_cookie_secret, StateName, CookieInfo#{current_cookie_secret => dtls_v1:cookie_secret(), previous_cookie_secret => Secret}}}; handle_info(Msg, StateName, State) -> - ssl_connection:handle_info(Msg, StateName, State). + ssl_connection:StateName(info, Msg, State, ?MODULE). handle_state_timeout(flight_retransmission_timeout, StateName, #state{flight_state = {retransmit, NextTimeout}} = State0) -> @@ -907,6 +904,39 @@ encode_change_cipher(#change_cipher_spec{}, Version, Epoch, ConnectionStates) -> decode_alerts(Bin) -> ssl_alert:decode(Bin). +gen_handshake(StateName, Type, Event, + #state{negotiated_version = Version} = State) -> + try ssl_connection:StateName(Type, Event, State, ?MODULE) of + Result -> + Result + catch + _:_ -> + ssl_connection:handle_own_alert(?ALERT_REC(?FATAL, ?HANDSHAKE_FAILURE, + malformed_handshake_data), + Version, StateName, State) + end. + +gen_info(Event, connection = StateName, #state{negotiated_version = Version} = State) -> + try handle_info(Event, StateName, State) of + Result -> + Result + catch + _:_ -> + ssl_connection:handle_own_alert(?ALERT_REC(?FATAL, ?INTERNAL_ERROR, + malformed_data), + Version, StateName, State) + end; + +gen_info(Event, StateName, #state{negotiated_version = Version} = State) -> + try handle_info(Event, StateName, State) of + Result -> + Result + catch + _:_ -> + ssl_connection:handle_own_alert(?ALERT_REC(?FATAL, ?HANDSHAKE_FAILURE, + malformed_handshake_data), + Version, StateName, State) + end. unprocessed_events(Events) -> %% The first handshake event will be processed immediately %% as it is entered first in the event queue and diff --git a/lib/ssl/src/ssl_connection.erl b/lib/ssl/src/ssl_connection.erl index 452bbb6816..3531cdda11 100644 --- a/lib/ssl/src/ssl_connection.erl +++ b/lib/ssl/src/ssl_connection.erl @@ -61,14 +61,11 @@ %% General gen_statem state functions with extra callback argument %% to determine if it is an SSL/TLS or DTLS gen_statem machine --export([init/4, hello/4, abbreviated/4, certify/4, cipher/4, connection/4, downgrade/4]). +-export([init/4, error/4, hello/4, abbreviated/4, certify/4, cipher/4, connection/4, downgrade/4]). %% gen_statem callbacks -export([terminate/3, format_status/2]). -%% TODO: do not export, call state function instead --export([handle_info/3, handle_call/5, handle_common_event/5]). - %%==================================================================== %% Setup %%==================================================================== @@ -539,6 +536,15 @@ init(_Type, _Event, _State, _Connection) -> {keep_state_and_data, [postpone]}. %%-------------------------------------------------------------------- +-spec error(gen_statem:event_type(), + {start, timeout()} | term(), #state{}, + tls_connection | dtls_connection) -> + gen_statem:state_function_result(). +%%-------------------------------------------------------------------- +error({call, From}, Msg, State, Connection) -> + handle_call(Msg, From, ?FUNCTION_NAME, State, Connection). + +%%-------------------------------------------------------------------- -spec hello(gen_statem:event_type(), #hello_request{} | #server_hello{} | term(), #state{}, tls_connection | dtls_connection) -> diff --git a/lib/ssl/src/tls_connection.erl b/lib/ssl/src/tls_connection.erl index 96243db4ae..b033eea261 100644 --- a/lib/ssl/src/tls_connection.erl +++ b/lib/ssl/src/tls_connection.erl @@ -141,12 +141,14 @@ next_record(#state{protocol_buffers = end; next_record(#state{protocol_buffers = #protocol_buffers{tls_packets = [], tls_cipher_texts = []}, socket = Socket, + close_tag = CloseTag, transport_cb = Transport} = State) -> case tls_socket:setopts(Transport, Socket, [{active,once}]) of ok -> {no_record, State}; _ -> - {socket_closed, State} + self() ! {CloseTag, Socket}, + {no_record, State} end; next_record(State) -> {no_record, State}. @@ -154,15 +156,10 @@ next_record(State) -> next_event(StateName, Record, State) -> next_event(StateName, Record, State, []). -next_event(StateName, socket_closed, State, _) -> - ssl_connection:handle_normal_shutdown(?ALERT_REC(?FATAL, ?CLOSE_NOTIFY), StateName, State), - {stop, {shutdown, transport_closed}, State}; next_event(connection = StateName, no_record, State0, Actions) -> case next_record_if_active(State0) of {no_record, State} -> ssl_connection:hibernate_after(StateName, State, Actions); - {socket_closed, State} -> - next_event(StateName, socket_closed, State, Actions); {#ssl_tls{} = Record, State} -> {next_state, StateName, State, [{next_event, internal, {protocol_record, Record}} | Actions]}; {#alert{} = Alert, State} -> @@ -431,7 +428,7 @@ init({call, From}, {start, Timeout}, {Record, State} = next_record(State1), next_event(hello, Record, State); init(Type, Event, State) -> - gen_handshake(ssl_connection, ?FUNCTION_NAME, Type, Event, State). + gen_handshake(?FUNCTION_NAME, Type, Event, State). %%-------------------------------------------------------------------- -spec error(gen_statem:event_type(), @@ -441,8 +438,8 @@ init(Type, Event, State) -> error({call, From}, {start, _Timeout}, {Error, State}) -> {stop_and_reply, normal, {reply, From, {error, Error}}, State}; -error({call, From}, Msg, State) -> - handle_call(Msg, From, ?FUNCTION_NAME, State); +error({call, _} = Call, Msg, State) -> + gen_handshake(?FUNCTION_NAME, Call, Msg, State); error(_, _, _) -> {keep_state_and_data, [postpone]}. @@ -472,14 +469,13 @@ hello(internal, #client_hello{client_version = ClientVersion} = Hello, undefined -> CurrentProtocol; _ -> Protocol0 end, - - gen_handshake(ssl_connection, hello, internal, {common_client_hello, Type, ServerHelloExt}, - State#state{connection_states = ConnectionStates, - negotiated_version = Version, - client_hello_version = ClientVersion, - hashsign_algorithm = HashSign, - session = Session, - negotiated_protocol = Protocol}) + gen_handshake(?FUNCTION_NAME, internal, {common_client_hello, Type, ServerHelloExt}, + State#state{connection_states = ConnectionStates, + negotiated_version = Version, + hashsign_algorithm = HashSign, + client_hello_version = ClientVersion, + session = Session, + negotiated_protocol = Protocol}) end; hello(internal, #server_hello{} = Hello, #state{connection_states = ConnectionStates0, @@ -497,7 +493,7 @@ hello(internal, #server_hello{} = Hello, hello(info, Event, State) -> gen_info(Event, ?FUNCTION_NAME, State); hello(Type, Event, State) -> - gen_handshake(ssl_connection, ?FUNCTION_NAME, Type, Event, State). + gen_handshake(?FUNCTION_NAME, Type, Event, State). %%-------------------------------------------------------------------- -spec abbreviated(gen_statem:event_type(), term(), #state{}) -> @@ -506,7 +502,7 @@ hello(Type, Event, State) -> abbreviated(info, Event, State) -> gen_info(Event, ?FUNCTION_NAME, State); abbreviated(Type, Event, State) -> - gen_handshake(ssl_connection, ?FUNCTION_NAME, Type, Event, State). + gen_handshake(?FUNCTION_NAME, Type, Event, State). %%-------------------------------------------------------------------- -spec certify(gen_statem:event_type(), term(), #state{}) -> @@ -515,7 +511,7 @@ abbreviated(Type, Event, State) -> certify(info, Event, State) -> gen_info(Event, ?FUNCTION_NAME, State); certify(Type, Event, State) -> - gen_handshake(ssl_connection, ?FUNCTION_NAME, Type, Event, State). + gen_handshake(?FUNCTION_NAME, Type, Event, State). %%-------------------------------------------------------------------- -spec cipher(gen_statem:event_type(), term(), #state{}) -> @@ -524,7 +520,7 @@ certify(Type, Event, State) -> cipher(info, Event, State) -> gen_info(Event, ?FUNCTION_NAME, State); cipher(Type, Event, State) -> - gen_handshake(ssl_connection, ?FUNCTION_NAME, Type, Event, State). + gen_handshake(?FUNCTION_NAME, Type, Event, State). %%-------------------------------------------------------------------- -spec connection(gen_statem:event_type(), @@ -587,9 +583,6 @@ terminate(Reason, StateName, State) -> format_status(Type, Data) -> ssl_connection:format_status(Type, Data). -code_change(_OldVsn, StateName, State0, {Direction, From, To}) -> - State = convert_state(State0, Direction, From, To), - {ok, StateName, State}; code_change(_OldVsn, StateName, State, _) -> {ok, StateName, State}. @@ -651,10 +644,7 @@ tls_handshake_events(Packets) -> {next_event, internal, {handshake, Packet}} end, Packets). -handle_call(Event, From, StateName, State) -> - ssl_connection:handle_call(Event, From, StateName, State, ?MODULE). - -%% raw data from socket, unpack records +%% raw data from socket, upack records handle_info({Protocol, _, Data}, StateName, #state{data_tag = Protocol} = State0) -> case next_tls_record(Data, State0) of @@ -697,7 +687,7 @@ handle_info({CloseTag, Socket}, StateName, next_event(StateName, no_record, State) end; handle_info(Msg, StateName, State) -> - ssl_connection:handle_info(Msg, StateName, State). + ssl_connection:StateName(info, Msg, State, ?MODULE). handle_alerts([], Result) -> Result; @@ -721,9 +711,9 @@ encode_change_cipher(#change_cipher_spec{}, Version, ConnectionStates) -> decode_alerts(Bin) -> ssl_alert:decode(Bin). -gen_handshake(GenConnection, StateName, Type, Event, +gen_handshake(StateName, Type, Event, #state{negotiated_version = Version} = State) -> - try GenConnection:StateName(Type, Event, State, ?MODULE) of + try ssl_connection:StateName(Type, Event, State, ?MODULE) of Result -> Result catch @@ -784,14 +774,3 @@ assert_buffer_sanity(Bin, _) -> throw(?ALERT_REC(?FATAL, ?HANDSHAKE_FAILURE, malformed_handshake_data)) end. - -convert_state(#state{ssl_options = Options} = State, up, "5.3.5", "5.3.6") -> - State#state{ssl_options = convert_options_partial_chain(Options, up)}; -convert_state(#state{ssl_options = Options} = State, down, "5.3.6", "5.3.5") -> - State#state{ssl_options = convert_options_partial_chain(Options, down)}. - -convert_options_partial_chain(Options, up) -> - {Head, Tail} = lists:split(5, tuple_to_list(Options)), - list_to_tuple(Head ++ [{partial_chain, fun(_) -> unknown_ca end}] ++ Tail); -convert_options_partial_chain(Options, down) -> - list_to_tuple(proplists:delete(partial_chain, tuple_to_list(Options))). diff --git a/lib/stdlib/doc/src/unicode.xml b/lib/stdlib/doc/src/unicode.xml index e86f45431f..d822aca89c 100644 --- a/lib/stdlib/doc/src/unicode.xml +++ b/lib/stdlib/doc/src/unicode.xml @@ -239,8 +239,13 @@ <c><anno>InEncoding</anno></c>.</p> </item> </list> - <p>Only when <c><anno>InEncoding</anno></c> is one of the UTF - encodings, integers in the list are allowed to be > 255.</p> + <p> + Note that integers in the list always represent code points + regardless of <c><anno>InEncoding</anno></c> passed. If + <c><anno>InEncoding</anno> latin1</c> is passed, only code + points < 256 are allowed; otherwise, all valid unicode code + points are allowed. + </p> <p>If <c><anno>InEncoding</anno></c> is <c>latin1</c>, parameter <c><anno>Data</anno></c> corresponds to the <c>iodata()</c> type, but for <c>unicode</c>, parameter <c><anno>Data</anno></c> can diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl index 4972da297d..ab041ff53c 100644 --- a/lib/stdlib/src/string.erl +++ b/lib/stdlib/src/string.erl @@ -74,19 +74,21 @@ -export([to_upper/1, to_lower/1]). %% -import(lists,[member/2]). - -compile({no_auto_import,[length/1]}). +-compile({inline, [btoken/2, rev/1, append/2, stack/2, search_compile/1]}). +-define(ASCII_LIST(CP1,CP2), CP1 < 256, CP2 < 256, CP1 =/= $\r). -export_type([grapheme_cluster/0]). -type grapheme_cluster() :: char() | [char()]. -type direction() :: 'leading' | 'trailing'. --dialyzer({no_improper_lists, stack/2}). +-dialyzer({no_improper_lists, [stack/2, length_b/3]}). %%% BIFs internal (not documented) should not to be used outside of this module %%% May be removed -export([list_to_float/1, list_to_integer/1]). + %% Uses bifs: string:list_to_float/1 and string:list_to_integer/1 -spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when String :: string(), @@ -117,8 +119,10 @@ is_empty(_) -> false. %% Count the number of grapheme clusters in chardata -spec length(String::unicode:chardata()) -> non_neg_integer(). +length(<<CP1/utf8, Bin/binary>>) -> + length_b(Bin, CP1, 0); length(CD) -> - length_1(unicode_util:gc(CD), 0). + length_1(CD, 0). %% Convert a string to a list of grapheme clusters -spec to_graphemes(String::unicode:chardata()) -> [grapheme_cluster()]. @@ -166,6 +170,8 @@ equal(A, B, true, Norm) -> %% Reverse grapheme clusters -spec reverse(String::unicode:chardata()) -> [grapheme_cluster()]. +reverse(<<CP1/utf8, Rest/binary>>) -> + reverse_b(Rest, CP1, []); reverse(CD) -> reverse_1(CD, []). @@ -176,7 +182,10 @@ reverse(CD) -> Start :: non_neg_integer(), Slice :: unicode:chardata(). slice(CD, N) when is_integer(N), N >= 0 -> - slice_l(CD, N, is_binary(CD)). + case slice_l0(CD, N) of + [] when is_binary(CD) -> <<>>; + Res -> Res + end. -spec slice(String, Start, Length) -> Slice when String::unicode:chardata(), @@ -185,9 +194,15 @@ slice(CD, N) when is_integer(N), N >= 0 -> Slice :: unicode:chardata(). slice(CD, N, Length) when is_integer(N), N >= 0, is_integer(Length), Length > 0 -> - slice_trail(slice_l(CD, N, is_binary(CD)), Length); + case slice_l0(CD, N) of + [] when is_binary(CD) -> <<>>; + L -> slice_trail(L, Length) + end; slice(CD, N, infinity) -> - slice_l(CD, N, is_binary(CD)); + case slice_l0(CD, N) of + [] when is_binary(CD) -> <<>>; + Res -> Res + end; slice(CD, _, 0) -> case is_binary(CD) of true -> <<>>; @@ -246,18 +261,22 @@ trim(Str, Dir) -> Dir :: direction() | 'both', Characters :: [grapheme_cluster()]. trim(Str, _, []) -> Str; +trim(Str, leading, [Sep]) when is_list(Str), Sep < 256 -> + trim_ls(Str, Sep); trim(Str, leading, Sep) when is_list(Sep) -> - trim_l(Str, search_pattern(Sep)); -trim(Str, trailing, Sep) when is_list(Sep) -> - trim_t(Str, 0, search_pattern(Sep)); -trim(Str, both, Sep0) when is_list(Sep0) -> - Sep = search_pattern(Sep0), - trim_t(trim_l(Str,Sep), 0, Sep). + trim_l(Str, Sep); +trim(Str, trailing, [Sep]) when is_list(Str), Sep < 256 -> + trim_ts(Str, Sep); +trim(Str, trailing, Seps0) when is_list(Seps0) -> + Seps = search_pattern(Seps0), + trim_t(Str, 0, Seps); +trim(Str, both, Sep) when is_list(Sep) -> + trim(trim(Str,leading,Sep), trailing, Sep). %% Delete trailing newlines or \r\n -spec chomp(String::unicode:chardata()) -> unicode:chardata(). chomp(Str) -> - trim_t(Str,0, {[[$\r,$\n],$\n], [$\r,$\n], [<<$\r>>,<<$\n>>]}). + trim(Str, trailing, [[$\r,$\n],$\n]). %% Split String into two parts where the leading part consists of Characters -spec take(String, Characters) -> {Leading, Trailing} when @@ -290,8 +309,7 @@ take(Str, [], Complement, Dir) -> {true, leading} -> {Str, Empty}; {true, trailing} -> {Empty, Str} end; -take(Str, Sep0, false, leading) -> - Sep = search_pattern(Sep0), +take(Str, Sep, false, leading) -> take_l(Str, Sep, []); take(Str, Sep0, true, leading) -> Sep = search_pattern(Sep0), @@ -451,6 +469,7 @@ replace(String, SearchPattern, Replacement, Where) -> SeparatorList::[grapheme_cluster()]) -> [unicode:chardata()]. lexemes([], _) -> []; +lexemes(Str, []) -> [Str]; lexemes(Str, Seps0) when is_list(Seps0) -> Seps = search_pattern(Seps0), lexemes_m(Str, Seps, []). @@ -484,13 +503,13 @@ find(String, SearchPattern, leading) -> find(String, SearchPattern, trailing) -> find_r(String, unicode:characters_to_list(SearchPattern), nomatch). -%% Fetch first codepoint and return rest in tail +%% Fetch first grapheme cluster and return rest in tail -spec next_grapheme(String::unicode:chardata()) -> maybe_improper_list(grapheme_cluster(),unicode:chardata()) | {error,unicode:chardata()}. next_grapheme(CD) -> unicode_util:gc(CD). -%% Fetch first grapheme cluster and return rest in tail +%% Fetch first codepoint and return rest in tail -spec next_codepoint(String::unicode:chardata()) -> maybe_improper_list(char(),unicode:chardata()) | {error,unicode:chardata()}. @@ -498,10 +517,23 @@ next_codepoint(CD) -> unicode_util:cp(CD). %% Internals -length_1([_|Rest], N) -> - length_1(unicode_util:gc(Rest), N+1); -length_1([], N) -> - N. +length_1([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2) -> + length_1(Cont, N+1); +length_1(Str, N) -> + case unicode_util:gc(Str) of + [] -> N; + [_|Rest] -> length_1(Rest, N+1) + end. + +length_b(<<CP2/utf8, Rest/binary>>, CP1, N) + when ?ASCII_LIST(CP1,CP2) -> + length_b(Rest, CP2, N+1); +length_b(Bin0, CP1, N) -> + [_|Bin1] = unicode_util:gc([CP1|Bin0]), + case unicode_util:cp(Bin1) of + [] -> N+1; + [CP3|Bin] -> length_b(Bin, CP3, N+1) + end. equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) -> A =:= B andalso equal_1(AR, BR); @@ -540,29 +572,66 @@ equal_norm_nocase(A0, B0, Norm) -> {L1,L2} when is_list(L1), is_list(L2) -> false end. +reverse_1([CP1|[CP2|_]=Cont], Acc) when ?ASCII_LIST(CP1,CP2) -> + reverse_1(Cont, [CP1|Acc]); reverse_1(CD, Acc) -> case unicode_util:gc(CD) of [GC|Rest] -> reverse_1(Rest, [GC|Acc]); [] -> Acc end. -slice_l(CD, N, Binary) when N > 0 -> +reverse_b(<<CP2/utf8, Rest/binary>>, CP1, Acc) + when ?ASCII_LIST(CP1,CP2) -> + reverse_b(Rest, CP2, [CP1|Acc]); +reverse_b(Bin0, CP1, Acc) -> + [GC|Bin1] = unicode_util:gc([CP1|Bin0]), + case unicode_util:cp(Bin1) of + [] -> [GC|Acc]; + [CP3|Bin] -> reverse_b(Bin, CP3, [GC|Acc]) + end. + +slice_l0(<<CP1/utf8, Bin/binary>>, N) when N > 0 -> + slice_lb(Bin, CP1, N); +slice_l0(L, N) -> + slice_l(L, N). + +slice_l([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 -> + slice_l(Cont, N-1); +slice_l(CD, N) when N > 0 -> case unicode_util:gc(CD) of - [_|Cont] -> slice_l(Cont, N-1, Binary); - [] when Binary -> <<>>; + [_|Cont] -> slice_l(Cont, N-1); [] -> [] end; -slice_l(Cont, 0, Binary) -> - case is_empty(Cont) of - true when Binary -> <<>>; - _ -> Cont +slice_l(Cont, 0) -> + Cont. + +slice_lb(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 1 -> + slice_lb(Bin, CP2, N-1); +slice_lb(Bin, CP1, N) -> + [_|Rest] = unicode_util:gc([CP1|Bin]), + if N > 1 -> + case unicode_util:cp(Rest) of + [CP2|Cont] -> slice_lb(Cont, CP2, N-1); + [] -> <<>> + end; + N =:= 1 -> + Rest end. +slice_trail(Orig, N) when is_binary(Orig) -> + case Orig of + <<CP1/utf8, Bin/binary>> when N > 0 -> + Length = slice_bin(Bin, CP1, N), + Sz = byte_size(Orig) - Length, + <<Keep:Sz/binary, _/binary>> = Orig, + Keep; + _ -> <<>> + end; slice_trail(CD, N) when is_list(CD) -> - slice_list(CD, N); -slice_trail(CD, N) when is_binary(CD) -> - slice_bin(CD, N, CD). + slice_list(CD, N). +slice_list([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 -> + [CP1|slice_list(Cont, N-1)]; slice_list(CD, N) when N > 0 -> case unicode_util:gc(CD) of [GC|Cont] -> append(GC, slice_list(Cont, N-1)); @@ -571,17 +640,16 @@ slice_list(CD, N) when N > 0 -> slice_list(_, 0) -> []. -slice_bin(CD, N, Orig) when N > 0 -> - case unicode_util:gc(CD) of - [_|Cont] -> slice_bin(Cont, N-1, Orig); - [] -> Orig +slice_bin(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 0 -> + slice_bin(Bin, CP2, N-1); +slice_bin(CD, CP1, N) when N > 0 -> + [_|Bin] = unicode_util:gc([CP1|CD]), + case unicode_util:cp(Bin) of + [CP2|Cont] -> slice_bin(Cont, CP2, N-1); + [] -> 0 end; -slice_bin([], 0, Orig) -> - Orig; -slice_bin(CD, 0, Orig) -> - Sz = byte_size(Orig) - byte_size(CD), - <<Keep:Sz/binary, _/binary>> = Orig, - Keep. +slice_bin(CD, CP1, 0) -> + byte_size(CD)+byte_size(<<CP1/utf8>>). uppercase_list(CPs0) -> case unicode_util:uppercase(CPs0) of @@ -631,16 +699,31 @@ casefold_bin(CPs0, Acc) -> [] -> Acc end. - +%% Fast path for ascii searching for one character in lists +trim_ls([CP1|[CP2|_]=Cont]=Str, Sep) + when ?ASCII_LIST(CP1,CP2) -> + case Sep of + CP1 -> trim_ls(Cont, Sep); + _ -> Str + end; +trim_ls(Str, Sep) -> + trim_l(Str, [Sep]). + +trim_l([CP1|[CP2|_]=Cont]=Str, Sep) + when ?ASCII_LIST(CP1,CP2) -> + case lists:member(CP1, Sep) of + true -> trim_l(Cont, Sep); + false -> Str + end; trim_l([Bin|Cont0], Sep) when is_binary(Bin) -> case bin_search_inv(Bin, Cont0, Sep) of {nomatch, Cont} -> trim_l(Cont, Sep); Keep -> Keep end; -trim_l(Str, {GCs, _, _}=Sep) when is_list(Str) -> +trim_l(Str, Sep) when is_list(Str) -> case unicode_util:gc(Str) of [C|Cs] -> - case lists:member(C, GCs) of + case lists:member(C, Sep) of true -> trim_l(Cs, Sep); false -> Str end; @@ -652,15 +735,51 @@ trim_l(Bin, Sep) when is_binary(Bin) -> [Keep] -> Keep end. -trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) -> +%% Fast path for ascii searching for one character in lists +trim_ts([Sep|Cs1]=Str, Sep) -> + case Cs1 of + [] -> []; + [CP2|_] when ?ASCII_LIST(Sep,CP2) -> + Tail = trim_ts(Cs1, Sep), + case is_empty(Tail) of + true -> []; + false -> [Sep|Tail] + end; + _ -> + trim_t(Str, 0, search_pattern([Sep])) + end; +trim_ts([CP|Cont],Sep) when is_integer(CP) -> + [CP|trim_ts(Cont, Sep)]; +trim_ts(Str, Sep) -> + trim_t(Str, 0, search_pattern([Sep])). + +trim_t([CP1|Cont]=Cs0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) -> + case lists:member(CP1, CPs) of + true -> + [GC|Cs1] = unicode_util:gc(Cs0), + case lists:member(GC, GCs) of + true -> + Tail = trim_t(Cs1, 0, Seps), + case is_empty(Tail) of + true -> []; + false -> append(GC,Tail) + end; + false -> + append(GC,trim_t(Cs1, 0, Seps)) + end; + false -> + [CP1|trim_t(Cont, 0, Seps)] + end; +trim_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search(Rest, Cont0, Sep) of + Seps = search_compile(Seps0), + case bin_search(Rest, Cont0, Seps) of {nomatch,_} -> - stack(Bin, trim_t(Cont0, 0, Sep)); + stack(Bin, trim_t(Cont0, 0, Seps)); [SepStart|Cont1] -> - case bin_search_inv(SepStart, Cont1, Sep) of + case bin_search_inv(SepStart, Cont1, GCs) of {nomatch, Cont} -> - Tail = trim_t(Cont, 0, Sep), + Tail = trim_t(Cont, 0, Seps), case is_empty(Tail) of true -> KeepSz = byte_size(Bin) - byte_size(SepStart), @@ -672,67 +791,69 @@ trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) -> end; [NonSep|Cont] when is_binary(NonSep) -> KeepSz = byte_size(Bin) - byte_size(NonSep), - trim_t([Bin|Cont], KeepSz, Sep) + trim_t([Bin|Cont], KeepSz, Seps) end end; -trim_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) -> - case unicode_util:cp(Str) of - [CP|Cs] -> - case lists:member(CP, CPs) of +trim_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) -> + case unicode_util:gc(Str) of + [GC|Cs1] -> + case lists:member(GC, GCs) of true -> - [GC|Cs1] = unicode_util:gc(Str), - case lists:member(GC, GCs) of - true -> - Tail = trim_t(Cs1, 0, Sep), - case is_empty(Tail) of - true -> []; - false -> append(GC,Tail) - end; - false -> - append(GC,trim_t(Cs1, 0, Sep)) + Tail = trim_t(Cs1, 0, Seps), + case is_empty(Tail) of + true -> []; + false -> append(GC,Tail) end; false -> - append(CP,trim_t(Cs, 0, Sep)) + append(GC,trim_t(Cs1, 0, Seps)) end; [] -> [] end; -trim_t(Bin, N, Sep) when is_binary(Bin) -> +trim_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search(Rest, Sep) of + Seps = search_compile(Seps0), + case bin_search(Rest, [], Seps) of {nomatch,_} -> Bin; [SepStart] -> - case bin_search_inv(SepStart, [], Sep) of + case bin_search_inv(SepStart, [], GCs) of {nomatch,_} -> KeepSz = byte_size(Bin) - byte_size(SepStart), <<Keep:KeepSz/binary, _/binary>> = Bin, Keep; [NonSep] -> KeepSz = byte_size(Bin) - byte_size(NonSep), - trim_t(Bin, KeepSz, Sep) + trim_t(Bin, KeepSz, Seps) end end. -take_l([Bin|Cont0], Sep, Acc) when is_binary(Bin) -> - case bin_search_inv(Bin, Cont0, Sep) of + +take_l([CP1|[CP2|_]=Cont]=Str, Seps, Acc) + when ?ASCII_LIST(CP1,CP2) -> + case lists:member(CP1, Seps) of + true -> take_l(Cont, Seps, [CP1|Acc]); + false -> {rev(Acc), Str} + end; +take_l([Bin|Cont0], Seps, Acc) when is_binary(Bin) -> + case bin_search_inv(Bin, Cont0, Seps) of {nomatch, Cont} -> Used = cp_prefix(Cont0, Cont), - take_l(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]); + take_l(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]); [Bin1|_]=After when is_binary(Bin1) -> First = byte_size(Bin) - byte_size(Bin1), <<Keep:First/binary, _/binary>> = Bin, {btoken(Keep,Acc), After} end; -take_l(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) -> +take_l(Str, Seps, Acc) when is_list(Str) -> case unicode_util:gc(Str) of [C|Cs] -> - case lists:member(C, GCs) of - true -> take_l(Cs, Sep, append(rev(C),Acc)); + case lists:member(C, Seps) of + true -> take_l(Cs, Seps, append(rev(C),Acc)); false -> {rev(Acc), Str} end; [] -> {rev(Acc), []} end; -take_l(Bin, Sep, Acc) when is_binary(Bin) -> - case bin_search_inv(Bin, [], Sep) of +take_l(Bin, Seps, Acc) when is_binary(Bin) -> + case bin_search_inv(Bin, [], Seps) of {nomatch,_} -> {btoken(Bin, Acc), <<>>}; [After] -> @@ -741,27 +862,41 @@ take_l(Bin, Sep, Acc) when is_binary(Bin) -> {btoken(Keep, Acc), After} end. -take_lc([Bin|Cont0], Sep, Acc) when is_binary(Bin) -> - case bin_search(Bin, Cont0, Sep) of + +take_lc([CP1|Cont]=Str0, {GCs,CPs,_}=Seps, Acc) when is_integer(CP1) -> + case lists:member(CP1, CPs) of + true -> + [GC|Str] = unicode_util:gc(Str0), + case lists:member(GC, GCs) of + false -> take_lc(Str, Seps, append(rev(GC),Acc)); + true -> {rev(Acc), Str0} + end; + false -> + take_lc(Cont, Seps, append(CP1,Acc)) + end; +take_lc([Bin|Cont0], Seps0, Acc) when is_binary(Bin) -> + Seps = search_compile(Seps0), + case bin_search(Bin, Cont0, Seps) of {nomatch, Cont} -> Used = cp_prefix(Cont0, Cont), - take_lc(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]); + take_lc(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]); [Bin1|_]=After when is_binary(Bin1) -> First = byte_size(Bin) - byte_size(Bin1), <<Keep:First/binary, _/binary>> = Bin, {btoken(Keep,Acc), After} end; -take_lc(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) -> +take_lc(Str, {GCs,_,_}=Seps, Acc) when is_list(Str) -> case unicode_util:gc(Str) of [C|Cs] -> case lists:member(C, GCs) of - false -> take_lc(Cs, Sep, append(rev(C),Acc)); + false -> take_lc(Cs, Seps, append(rev(C),Acc)); true -> {rev(Acc), Str} end; [] -> {rev(Acc), []} end; -take_lc(Bin, Sep, Acc) when is_binary(Bin) -> - case bin_search(Bin, [], Sep) of +take_lc(Bin, Seps0, Acc) when is_binary(Bin) -> + Seps = search_compile(Seps0), + case bin_search(Bin, [], Seps) of {nomatch,_} -> {btoken(Bin, Acc), <<>>}; [After] -> @@ -770,148 +905,192 @@ take_lc(Bin, Sep, Acc) when is_binary(Bin) -> {btoken(Keep, Acc), After} end. -take_t([Bin|Cont0], N, Sep) when is_binary(Bin) -> + +take_t([CP1|Cont]=Str0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) -> + case lists:member(CP1, CPs) of + true -> + [GC|Str] = unicode_util:gc(Str0), + case lists:member(GC, GCs) of + true -> + {Head, Tail} = take_t(Str, 0, Seps), + case is_empty(Head) of + true -> {Head, append(GC,Tail)}; + false -> {append(GC,Head), Tail} + end; + false -> + {Head, Tail} = take_t(Str, 0, Seps), + {append(GC,Head), Tail} + end; + false -> + {Head, Tail} = take_t(Cont, 0, Seps), + {[CP1|Head], Tail} + end; +take_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search(Rest, Cont0, Sep) of + Seps = search_compile(Seps0), + case bin_search(Rest, Cont0, Seps) of {nomatch,Cont} -> Used = cp_prefix(Cont0, Cont), - {Head, Tail} = take_t(Cont, 0, Sep), + {Head, Tail} = take_t(Cont, 0, Seps), {stack(unicode:characters_to_binary([Bin|Used]), Head), Tail}; [SepStart|Cont1] -> - case bin_search_inv(SepStart, Cont1, Sep) of + case bin_search_inv(SepStart, Cont1, GCs) of {nomatch, Cont} -> - {Head, Tail} = take_t(Cont, 0, Sep), + {Head, Tail} = take_t(Cont, 0, Seps), Used = cp_prefix(Cont0, Cont), - case equal(Tail, Cont) of + case is_empty(Head) of true -> KeepSz = byte_size(Bin) - byte_size(SepStart), <<Keep:KeepSz/binary, End/binary>> = Bin, - {stack(Keep,Head), stack(stack(End,Used),Tail)}; + {Keep, stack(stack(End,Used),Tail)}; false -> {stack(unicode:characters_to_binary([Bin|Used]),Head), Tail} end; [NonSep|Cont] when is_binary(NonSep) -> KeepSz = byte_size(Bin) - byte_size(NonSep), - take_t([Bin|Cont], KeepSz, Sep) + take_t([Bin|Cont], KeepSz, Seps) end end; -take_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) -> - case unicode_util:cp(Str) of - [CP|Cs] -> - case lists:member(CP, CPs) of +take_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) -> + case unicode_util:gc(Str) of + [GC|Cs1] -> + case lists:member(GC, GCs) of true -> - [GC|Cs1] = unicode_util:gc(Str), - case lists:member(GC, GCs) of - true -> - {Head, Tail} = take_t(Cs1, 0, Sep), - case equal(Tail, Cs1) of - true -> {Head, append(GC,Tail)}; - false -> {append(GC,Head), Tail} - end; - false -> - {Head, Tail} = take_t(Cs, 0, Sep), - {append(CP,Head), Tail} + {Head, Tail} = take_t(Cs1, 0, Seps), + case is_empty(Head) of + true -> {Head, append(GC,Tail)}; + false -> {append(GC,Head), Tail} end; false -> - {Head, Tail} = take_t(Cs, 0, Sep), - {append(CP,Head), Tail} + {Head, Tail} = take_t(Cs1, 0, Seps), + {append(GC,Head), Tail} end; [] -> {[],[]} end; -take_t(Bin, N, Sep) when is_binary(Bin) -> +take_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search(Rest, Sep) of + Seps = search_compile(Seps0), + case bin_search(Rest, [], Seps) of {nomatch,_} -> {Bin, <<>>}; [SepStart] -> - case bin_search_inv(SepStart, [], Sep) of + case bin_search_inv(SepStart, [], GCs) of {nomatch,_} -> KeepSz = byte_size(Bin) - byte_size(SepStart), <<Before:KeepSz/binary, End/binary>> = Bin, {Before, End}; [NonSep] -> KeepSz = byte_size(Bin) - byte_size(NonSep), - take_t(Bin, KeepSz, Sep) + take_t(Bin, KeepSz, Seps) end end. -take_tc([Bin|Cont0], N, Sep) when is_binary(Bin) -> +take_tc([CP1|[CP2|_]=Cont], _, {GCs,_,_}=Seps) when ?ASCII_LIST(CP1,CP2) -> + case lists:member(CP1, GCs) of + false -> + {Head, Tail} = take_tc(Cont, 0, Seps), + case is_empty(Head) of + true -> {Head, append(CP1,Tail)}; + false -> {append(CP1,Head), Tail} + end; + true -> + {Head, Tail} = take_tc(Cont, 0, Seps), + {append(CP1,Head), Tail} + end; +take_tc([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search_inv(Rest, Cont0, Sep) of + case bin_search_inv(Rest, Cont0, GCs) of {nomatch,Cont} -> Used = cp_prefix(Cont0, Cont), - {Head, Tail} = take_tc(Cont, 0, Sep), + {Head, Tail} = take_tc(Cont, 0, Seps0), {stack(unicode:characters_to_binary([Bin|Used]), Head), Tail}; [SepStart|Cont1] -> - case bin_search(SepStart, Cont1, Sep) of + Seps = search_compile(Seps0), + case bin_search(SepStart, Cont1, Seps) of {nomatch, Cont} -> - {Head, Tail} = take_tc(Cont, 0, Sep), + {Head, Tail} = take_tc(Cont, 0, Seps), Used = cp_prefix(Cont0, Cont), - case equal(Tail, Cont) of + case is_empty(Head) of true -> KeepSz = byte_size(Bin) - byte_size(SepStart), <<Keep:KeepSz/binary, End/binary>> = Bin, - {stack(Keep,Head), stack(stack(End,Used),Tail)}; + {Keep, stack(stack(End,Used),Tail)}; false -> {stack(unicode:characters_to_binary([Bin|Used]),Head), Tail} end; [NonSep|Cont] when is_binary(NonSep) -> KeepSz = byte_size(Bin) - byte_size(NonSep), - take_tc([Bin|Cont], KeepSz, Sep) + take_tc([Bin|Cont], KeepSz, Seps) end end; -take_tc(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) -> - case unicode_util:cp(Str) of - [CP|Cs] -> - case lists:member(CP, CPs) of - true -> - [GC|Cs1] = unicode_util:gc(Str), - case lists:member(GC, GCs) of - false -> - {Head, Tail} = take_tc(Cs1, 0, Sep), - case equal(Tail, Cs1) of - true -> {Head, append(GC,Tail)}; - false -> {append(GC,Head), Tail} - end; - true -> - {Head, Tail} = take_tc(Cs1, 0, Sep), - {append(GC,Head), Tail} - end; +take_tc(Str, 0, {GCs,_,_}=Seps) when is_list(Str) -> + case unicode_util:gc(Str) of + [GC|Cs1] -> + case lists:member(GC, GCs) of false -> - {Head, Tail} = take_tc(Cs, 0, Sep), - case equal(Tail, Cs) of - true -> {Head, append(CP,Tail)}; - false -> {append(CP,Head), Tail} - end + {Head, Tail} = take_tc(Cs1, 0, Seps), + case is_empty(Head) of + true -> {Head, append(GC,Tail)}; + false -> {append(GC,Head), Tail} + end; + true -> + {Head, Tail} = take_tc(Cs1, 0, Seps), + {append(GC,Head), Tail} end; [] -> {[],[]} end; -take_tc(Bin, N, Sep) when is_binary(Bin) -> +take_tc(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search_inv(Rest, [], Sep) of + case bin_search_inv(Rest, [], GCs) of {nomatch,_} -> {Bin, <<>>}; [SepStart] -> - case bin_search(SepStart, [], Sep) of + Seps = search_compile(Seps0), + case bin_search(SepStart, [], Seps) of {nomatch,_} -> KeepSz = byte_size(Bin) - byte_size(SepStart), <<Before:KeepSz/binary, End/binary>> = Bin, {Before, End}; [NonSep] -> KeepSz = byte_size(Bin) - byte_size(NonSep), - take_tc(Bin, KeepSz, Sep) + take_tc(Bin, KeepSz, Seps) end end. -prefix_1(Cs, []) -> Cs; -prefix_1(Cs, [_]=Pre) -> - prefix_2(unicode_util:gc(Cs), Pre); -prefix_1(Cs, Pre) -> - prefix_2(unicode_util:cp(Cs), Pre). - -prefix_2([C|Cs], [C|Pre]) -> - prefix_1(Cs, Pre); -prefix_2(_, _) -> - nomatch. +prefix_1(Cs0, [GC]) -> + case unicode_util:gc(Cs0) of + [GC|Cs] -> Cs; + _ -> nomatch + end; +prefix_1([CP|Cs], [Pre|PreR]) when is_integer(CP) -> + case CP =:= Pre of + true -> prefix_1(Cs,PreR); + false -> nomatch + end; +prefix_1(<<CP/utf8, Cs/binary>>, [Pre|PreR]) -> + case CP =:= Pre of + true -> prefix_1(Cs,PreR); + false -> nomatch + end; +prefix_1(Cs0, [Pre|PreR]) -> + case unicode_util:cp(Cs0) of + [Pre|Cs] -> prefix_1(Cs,PreR); + _ -> nomatch + end. +split_1([CP1|Cs]=Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_integer(CP1) -> + case CP1=:=C of + true -> + case prefix_1(Cs0, Needle) of + nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc); + Rest when Where =:= leading -> + [rev(Curr), Rest]; + Rest when Where =:= trailing -> + split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]); + Rest when Where =:= all -> + split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc]) + end; + false -> + split_1(Cs, Needle, 0, Where, append(CP1,Curr), Acc) + end; split_1([Bin|Cont0], Needle, Start, Where, Curr0, Acc) when is_binary(Bin) -> case bin_search_str(Bin, Start, Cont0, Needle) of @@ -971,32 +1150,50 @@ split_1(Bin, [_C|_]=Needle, Start, Where, Curr0, Acc) -> end end. -lexemes_m([Bin|Cont0], Seps, Ts) when is_binary(Bin) -> - case bin_search_inv(Bin, Cont0, Seps) of +lexemes_m([CP|_]=Cs0, {GCs,CPs,_}=Seps, Ts) when is_integer(CP) -> + case lists:member(CP, CPs) of + true -> + [GC|Cs2] = unicode_util:gc(Cs0), + case lists:member(GC, GCs) of + true -> + lexemes_m(Cs2, Seps, Ts); + false -> + {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []), + lexemes_m(Rest, Seps, [Lexeme|Ts]) + end; + false -> + {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []), + lexemes_m(Rest, Seps, [Lexeme|Ts]) + end; +lexemes_m([Bin|Cont0], {GCs,_,_}=Seps0, Ts) when is_binary(Bin) -> + case bin_search_inv(Bin, Cont0, GCs) of {nomatch,Cont} -> - lexemes_m(Cont, Seps, Ts); + lexemes_m(Cont, Seps0, Ts); Cs -> + Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs, Seps, []), lexemes_m(Rest, Seps, [Lexeme|Ts]) end; -lexemes_m(Cs0, {GCs, _, _}=Seps, Ts) when is_list(Cs0) -> +lexemes_m(Cs0, {GCs, _, _}=Seps0, Ts) when is_list(Cs0) -> case unicode_util:gc(Cs0) of [C|Cs] -> case lists:member(C, GCs) of true -> - lexemes_m(Cs, Seps, Ts); + lexemes_m(Cs, Seps0, Ts); false -> + Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []), lexemes_m(Rest, Seps, [Lexeme|Ts]) end; [] -> lists:reverse(Ts) end; -lexemes_m(Bin, Seps, Ts) when is_binary(Bin) -> - case bin_search_inv(Bin, [], Seps) of +lexemes_m(Bin, {GCs,_,_}=Seps0, Ts) when is_binary(Bin) -> + case bin_search_inv(Bin, [], GCs) of {nomatch,_} -> lists:reverse(Ts); [Cs] -> + Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs, Seps, []), lexemes_m(Rest, Seps, add_non_empty(Lexeme,Ts)) end. @@ -1027,7 +1224,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) -> true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of - true -> {rev(Tkn), Cs0}; + true -> {rev(Tkn), Cs2}; false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn)) end; false -> @@ -1037,7 +1234,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) -> {rev(Tkn), []} end; lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) -> - case bin_search(Bin, Seps) of + case bin_search(Bin, [], Seps) of {nomatch,_} -> {btoken(Bin,Tkn), []}; [Left] -> @@ -1046,35 +1243,38 @@ lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) -> {btoken(Lexeme, Tkn), Left} end. -nth_lexeme_m([Bin|Cont0], Seps, N) when is_binary(Bin) -> - case bin_search_inv(Bin, Cont0, Seps) of +nth_lexeme_m([Bin|Cont0], {GCs,_,_}=Seps0, N) when is_binary(Bin) -> + case bin_search_inv(Bin, Cont0, GCs) of {nomatch,Cont} -> - nth_lexeme_m(Cont, Seps, N); + nth_lexeme_m(Cont, Seps0, N); Cs when N > 1 -> - Rest = lexeme_skip(Cs, Seps), - nth_lexeme_m(Rest, Seps, N-1); + Rest = lexeme_skip(Cs, Seps0), + nth_lexeme_m(Rest, Seps0, N-1); Cs -> + Seps = search_compile(Seps0), {Lexeme,_} = lexeme_pick(Cs, Seps, []), Lexeme end; -nth_lexeme_m(Cs0, {GCs, _, _}=Seps, N) when is_list(Cs0) -> +nth_lexeme_m(Cs0, {GCs, _, _}=Seps0, N) when is_list(Cs0) -> case unicode_util:gc(Cs0) of [C|Cs] -> case lists:member(C, GCs) of true -> - nth_lexeme_m(Cs, Seps, N); + nth_lexeme_m(Cs, Seps0, N); false when N > 1 -> - Cs1 = lexeme_skip(Cs, Seps), - nth_lexeme_m(Cs1, Seps, N-1); + Cs1 = lexeme_skip(Cs, Seps0), + nth_lexeme_m(Cs1, Seps0, N-1); false -> + Seps = search_compile(Seps0), {Lexeme,_} = lexeme_pick(Cs0, Seps, []), Lexeme end; [] -> [] end; -nth_lexeme_m(Bin, Seps, N) when is_binary(Bin) -> - case bin_search_inv(Bin, [], Seps) of +nth_lexeme_m(Bin, {GCs,_,_}=Seps0, N) when is_binary(Bin) -> + Seps = search_compile(Seps0), + case bin_search_inv(Bin, [], GCs) of [Cs] when N > 1 -> Cs1 = lexeme_skip(Cs, Seps), nth_lexeme_m(Cs1, Seps, N-1); @@ -1090,16 +1290,17 @@ lexeme_skip([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps) when is_integer(CP) -> true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of - true -> Cs0; + true -> Cs2; false -> lexeme_skip(Cs2, Seps) end; false -> lexeme_skip(Cs1, Seps) end; -lexeme_skip([Bin|Cont0], Seps) when is_binary(Bin) -> +lexeme_skip([Bin|Cont0], Seps0) when is_binary(Bin) -> + Seps = search_compile(Seps0), case bin_search(Bin, Cont0, Seps) of {nomatch,_} -> lexeme_skip(Cont0, Seps); - Cs -> Cs + Cs -> tl(unicode_util:gc(Cs)) end; lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) -> case unicode_util:cp(Cs0) of @@ -1108,7 +1309,7 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) -> true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of - true -> Cs0; + true -> Cs2; false -> lexeme_skip(Cs2, Seps) end; false -> @@ -1117,12 +1318,23 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) -> [] -> [] end; -lexeme_skip(Bin, Seps) when is_binary(Bin) -> - case bin_search(Bin, Seps) of +lexeme_skip(Bin, Seps0) when is_binary(Bin) -> + Seps = search_compile(Seps0), + case bin_search(Bin, [], Seps) of {nomatch,_} -> <<>>; - [Left] -> Left + [Left] -> tl(unicode_util:gc(Left)) end. +find_l([C1|Cs]=Cs0, [C|_]=Needle) when is_integer(C1) -> + case C1 of + C -> + case prefix_1(Cs0, Needle) of + nomatch -> find_l(Cs, Needle); + _ -> Cs0 + end; + _ -> + find_l(Cs, Needle) + end; find_l([Bin|Cont0], Needle) when is_binary(Bin) -> case bin_search_str(Bin, 0, Cont0, Needle) of {nomatch, _, Cont} -> @@ -1147,6 +1359,16 @@ find_l(Bin, Needle) -> {_Before, [Cs], _After} -> Cs end. +find_r([Cp|Cs]=Cs0, [C|_]=Needle, Res) when is_integer(Cp) -> + case Cp of + C -> + case prefix_1(Cs0, Needle) of + nomatch -> find_r(Cs, Needle, Res); + _ -> find_r(Cs, Needle, Cs0) + end; + _ -> + find_r(Cs, Needle, Res) + end; find_r([Bin|Cont0], Needle, Res) when is_binary(Bin) -> case bin_search_str(Bin, 0, Cont0, Needle) of {nomatch,_,Cont} -> @@ -1217,11 +1439,6 @@ cp_prefix_1(Orig, Until, Cont) -> %% Binary special -bin_search(Bin, Seps) -> - bin_search(Bin, [], Seps). - -bin_search(_Bin, Cont, {[],_,_}) -> - {nomatch, Cont}; bin_search(Bin, Cont, {Seps,_,BP}) -> bin_search_loop(Bin, 0, BP, Cont, Seps). @@ -1229,10 +1446,14 @@ bin_search(Bin, Cont, {Seps,_,BP}) -> %% i.e. å in nfd form $a "COMBINING RING ABOVE" %% and PREPEND characters like "ARABIC NUMBER SIGN" 1536 <<216,128>> %% combined with other characters are currently ignored. +search_pattern({_,_,_}=P) -> P; search_pattern(Seps) -> CPs = search_cp(Seps), - Bin = bin_pattern(CPs), - {Seps, CPs, Bin}. + {Seps, CPs, undefined}. + +search_compile({Sep, CPs, undefined}) -> + {Sep, CPs, binary:compile_pattern(bin_pattern(CPs))}; +search_compile({_,_,_}=Compiled) -> Compiled. search_cp([CP|Seps]) when is_integer(CP) -> [CP|search_cp(Seps)]; @@ -1253,9 +1474,21 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) -> case binary:match(Bin, BinSeps) of nomatch -> {nomatch,Cont}; + {Where, _CL} when Cont =:= [] -> + <<_:Where/binary, Cont1/binary>> = Bin, + [GC|Cont2] = unicode_util:gc(Cont1), + case lists:member(GC, Seps) of + false when Cont2 =:= [] -> + {nomatch, []}; + false -> + Next = byte_size(Bin0) - byte_size(Cont2), + bin_search_loop(Bin0, Next, BinSeps, Cont, Seps); + true -> + [Cont1] + end; {Where, _CL} -> <<_:Where/binary, Cont0/binary>> = Bin, - Cont1 = stack(Cont0, Cont), + Cont1 = [Cont0|Cont], [GC|Cont2] = unicode_util:gc(Cont1), case lists:member(GC, Seps) of false -> @@ -1263,55 +1496,108 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) -> [BinR|Cont] when is_binary(BinR) -> Next = byte_size(Bin0) - byte_size(BinR), bin_search_loop(Bin0, Next, BinSeps, Cont, Seps); - BinR when is_binary(BinR), Cont =:= [] -> - Next = byte_size(Bin0) - byte_size(BinR), - bin_search_loop(Bin0, Next, BinSeps, Cont, Seps); _ -> {nomatch, Cont2} end; - true when is_list(Cont1) -> - Cont1; true -> - [Cont1] + Cont1 end end. -bin_search_inv(Bin, Cont, {[], _, _}) -> - [Bin|Cont]; -bin_search_inv(Bin, Cont, {[Sep], _, _}) -> - bin_search_inv_1([Bin|Cont], Sep); -bin_search_inv(Bin, Cont, {Seps, _, _}) -> - bin_search_inv_n([Bin|Cont], Seps). - -bin_search_inv_1([<<>>|CPs], _) -> - {nomatch, CPs}; -bin_search_inv_1(CPs = [Bin0|Cont], Sep) when is_binary(Bin0) -> - case unicode_util:gc(CPs) of - [Sep|Bin] when is_binary(Bin), Cont =:= [] -> - bin_search_inv_1([Bin], Sep); - [Sep|[Bin|Cont]=Cs] when is_binary(Bin) -> - bin_search_inv_1(Cs, Sep); - [Sep|Cs] -> - {nomatch, Cs}; - _ -> CPs - end. +bin_search_inv(<<>>, Cont, _) -> + {nomatch, Cont}; +bin_search_inv(Bin, Cont, [Sep]) -> + bin_search_inv_1(Bin, Cont, Sep); +bin_search_inv(Bin, Cont, Seps) -> + bin_search_inv_n(Bin, Cont, Seps). + +bin_search_inv_1(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Sep) -> + case BinRest of + <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) -> + case CP1 of + Sep -> bin_search_inv_1(BinRest, Cont, Sep); + _ -> [Bin0|Cont] + end; + _ when Cont =:= [] -> + case unicode_util:gc(Bin0) of + [Sep|Bin] -> bin_search_inv_1(Bin, Cont, Sep); + _ -> [Bin0|Cont] + end; + _ -> + case unicode_util:gc([Bin0|Cont]) of + [Sep|[Bin|Cont]] when is_binary(Bin) -> + bin_search_inv_1(Bin, Cont, Sep); + [Sep|Cs] -> + {nomatch, Cs}; + _ -> [Bin0|Cont] + end + end; +bin_search_inv_1(<<>>, Cont, _Sep) -> + {nomatch, Cont}; +bin_search_inv_1([], Cont, _Sep) -> + {nomatch, Cont}. -bin_search_inv_n([<<>>|CPs], _) -> - {nomatch, CPs}; -bin_search_inv_n([Bin0|Cont]=CPs, Seps) when is_binary(Bin0) -> - [C|Cs0] = unicode_util:gc(CPs), - case {lists:member(C, Seps), Cs0} of - {true, Cs} when is_binary(Cs), Cont =:= [] -> - bin_search_inv_n([Cs], Seps); - {true, [Bin|Cont]=Cs} when is_binary(Bin) -> - bin_search_inv_n(Cs, Seps); - {true, Cs} -> {nomatch, Cs}; - {false, _} -> CPs - end. +bin_search_inv_n(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Seps) -> + case BinRest of + <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) -> + case lists:member(CP1,Seps) of + true -> bin_search_inv_n(BinRest, Cont, Seps); + false -> [Bin0|Cont] + end; + _ when Cont =:= [] -> + [GC|Bin] = unicode_util:gc(Bin0), + case lists:member(GC, Seps) of + true -> bin_search_inv_n(Bin, Cont, Seps); + false -> [Bin0|Cont] + end; + _ -> + [GC|Cs0] = unicode_util:gc([Bin0|Cont]), + case lists:member(GC, Seps) of + false -> [Bin0|Cont]; + true -> + case Cs0 of + [Bin|Cont] when is_binary(Bin) -> + bin_search_inv_n(Bin, Cont, Seps); + _ -> + {nomatch, Cs0} + end + end + end; +bin_search_inv_n(<<>>, Cont, _Sep) -> + {nomatch, Cont}; +bin_search_inv_n([], Cont, _Sep) -> + {nomatch, Cont}. + +bin_search_str(Bin0, Start, [], SearchCPs) -> + Compiled = binary:compile_pattern(unicode:characters_to_binary(SearchCPs)), + bin_search_str_1(Bin0, Start, Compiled, SearchCPs); bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) -> + First = binary:compile_pattern(<<CP/utf8>>), + bin_search_str_2(Bin0, Start, Cont, First, SearchCPs). + +bin_search_str_1(Bin0, Start, First, SearchCPs) -> + <<_:Start/binary, Bin/binary>> = Bin0, + case binary:match(Bin, First) of + nomatch -> {nomatch, byte_size(Bin0), []}; + {Where0, _} -> + Where = Start+Where0, + <<Keep:Where/binary, Cs0/binary>> = Bin0, + case prefix_1(Cs0, SearchCPs) of + nomatch -> + <<_/utf8, Cs/binary>> = Cs0, + KeepSz = byte_size(Bin0) - byte_size(Cs), + bin_search_str_1(Bin0, KeepSz, First, SearchCPs); + [] -> + {Keep, [Cs0], <<>>}; + Rest -> + {Keep, [Cs0], Rest} + end + end. + +bin_search_str_2(Bin0, Start, Cont, First, SearchCPs) -> <<_:Start/binary, Bin/binary>> = Bin0, - case binary:match(Bin, <<CP/utf8>>) of + case binary:match(Bin, First) of nomatch -> {nomatch, byte_size(Bin0), Cont}; {Where0, _} -> Where = Start+Where0, @@ -1320,7 +1606,7 @@ bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) -> case prefix_1(stack(Cs0,Cont), SearchCPs) of nomatch when is_binary(Cs) -> KeepSz = byte_size(Bin0) - byte_size(Cs), - bin_search_str(Bin0, KeepSz, Cont, SearchCPs); + bin_search_str_2(Bin0, KeepSz, Cont, First, SearchCPs); nomatch -> {nomatch, Where, stack([GC|Cs],Cont)}; [] -> diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl index 90f980c0e5..f43bfb4482 100644 --- a/lib/stdlib/test/string_SUITE.erl +++ b/lib/stdlib/test/string_SUITE.erl @@ -92,14 +92,11 @@ end_per_testcase(_Case, _Config) -> ok. debug() -> - Config = [{data_dir, ?MODULE_STRING++"_data"}], + Config = [{data_dir, "./" ++ ?MODULE_STRING++"_data"}], [io:format("~p:~p~n",[Test,?MODULE:Test(Config)]) || {_,Tests} <- groups(), Test <- Tests]. -define(TEST(B,C,D), test(?LINE,?FUNCTION_NAME,B,C,D, true)). --define(TEST_EQ(B,C,D), - test(?LINE,?FUNCTION_NAME,B,C,D, true), - test(?LINE,?FUNCTION_NAME,hd(C),[B|tl(C),D, true)). -define(TEST_NN(B,C,D), test(?LINE,?FUNCTION_NAME,B,C,D, false), @@ -294,6 +291,7 @@ trim(_) -> ?TEST(["..h", ".e", <<"j..">>], [both, ". "], "h.ej"), ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [both, ". "], "h.ejsan"), %% Test that it behaves with graphemes (i.e. nfd tests are the hard part) + ?TEST([1013,101,778,101,101], [trailing, [101]], [1013,101,778]), ?TEST("aaåaa", [both, "a"], "å"), ?TEST(["aaa",778,"äöoo"], [both, "ao"], "åäö"), ?TEST([<<"aaa">>,778,"äöoo"], [both, "ao"], "åäö"), @@ -353,6 +351,7 @@ take(_) -> ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [Chars, true, leading], {".. ", "h.ej .."}), ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [Chars, true, leading], {"..", "h.ejsan.."}), %% Test that it behaves with graphemes (i.e. nfd tests are the hard part) + ?TEST([101,778], [[[101, 779]], true], {[101,778], []}), ?TEST(["aaee",778,"äöoo"], [[[$e,778]], true, leading], {"aae", [$e,778|"äöoo"]}), ?TEST([<<"aae">>,778,"äöoo"], [[[$e,778]],true,leading], {"aa", [$e,778|"äöoo"]}), ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [[[$e,778]], true, leading], {[], [$e,778]++"åäöe"++[778]}), @@ -713,29 +712,123 @@ nth_lexeme(_) -> meas(Config) -> + Parent = self(), + Exec = fun() -> + DataDir0 = proplists:get_value(data_dir, Config), + DataDir = filename:join(lists:droplast(filename:split(DataDir0))), + case proplists:get_value(profile, Config, false) of + false -> + do_measure(DataDir); + eprof -> + eprof:profile(fun() -> do_measure(DataDir) end, [set_on_spawn]), + eprof:stop_profiling(), + eprof:analyze(), + eprof:stop() + end, + Parent ! {test_done, self()}, + normal + end, + ct:timetrap({minutes,2}), case ct:get_timetrap_info() of {_,{_,Scale}} when Scale > 1 -> {skip,{will_not_run_in_debug,Scale}}; - _ -> % No scaling - DataDir = proplists:get_value(data_dir, Config), - TestDir = filename:dirname(string:trim(DataDir, trailing, "/")), - do_measure(TestDir) + _ -> % No scaling, run at most 1.5 min + Tester = spawn(Exec), + receive {test_done, Tester} -> ok + after 90000 -> + io:format("Timelimit reached stopping~n",[]), + exit(Tester, die) + end, + ok end. -do_measure(TestDir) -> - File = filename:join(TestDir, ?MODULE_STRING ++ ".erl"), +do_measure(DataDir) -> + File = filename:join([DataDir,"unicode_util_SUITE_data","NormalizationTest.txt"]), io:format("File ~s ",[File]), {ok, Bin} = file:read_file(File), io:format("~p~n",[byte_size(Bin)]), Do = fun(Name, Func, Mode) -> {N, Mean, Stddev, _} = time_func(Func, Mode, Bin), - io:format("~10w ~6w ~6.2fms ±~4.2fms #~.2w gc included~n", + io:format("~15w ~6w ~6.2fms ±~5.2fms #~.2w gc included~n", [Name, Mode, Mean/1000, Stddev/1000, N]) end, + Do2 = fun(Name, Func, Mode) -> + {N, Mean, Stddev, _} = time_func(Func, binary, <<>>), + io:format("~15w ~6w ~6.2fms ±~5.2fms #~.2w gc included~n", + [Name, Mode, Mean/1000, Stddev/1000, N]) + end, io:format("----------------------~n"), - Do(tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list), + + Do(old_tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list), Tokens = {lexemes, fun(Str) -> string:lexemes(Str, [$\n,$\r]) end}, [Do(Name,Fun,Mode) || {Name,Fun} <- [Tokens], Mode <- [list, binary]], + + S0 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy.....", + S0B = <<"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy.....">>, + Do2(old_strip_l, repeat(fun() -> string:strip(S0, left, $x) end), list), + Do2(trim_l, repeat(fun() -> string:trim(S0, leading, [$x]) end), list), + Do2(trim_l, repeat(fun() -> string:trim(S0B, leading, [$x]) end), binary), + Do2(old_strip_r, repeat(fun() -> string:strip(S0, right, $.) end), list), + Do2(trim_t, repeat(fun() -> string:trim(S0, trailing, [$.]) end), list), + Do2(trim_t, repeat(fun() -> string:trim(S0B, trailing, [$.]) end), binary), + + Do2(old_chr_sub, repeat(fun() -> string:sub_string(S0, string:chr(S0, $.)) end), list), + Do2(old_str_sub, repeat(fun() -> string:sub_string(S0, string:str(S0, [$.])) end), list), + Do2(find, repeat(fun() -> string:find(S0, [$.]) end), list), + Do2(find, repeat(fun() -> string:find(S0B, [$.]) end), binary), + Do2(old_str_sub2, repeat(fun() -> N = string:str(S0, "xy.."), + {string:sub_string(S0,1,N), string:sub_string(S0,N+4)} end), list), + Do2(split, repeat(fun() -> string:split(S0, "xy..") end), list), + Do2(split, repeat(fun() -> string:split(S0B, "xy..") end), binary), + + Do2(old_rstr_sub, repeat(fun() -> string:sub_string(S0, string:rstr(S0, [$y])) end), list), + Do2(find_t, repeat(fun() -> string:find(S0, [$y], trailing) end), list), + Do2(find_t, repeat(fun() -> string:find(S0B, [$y], trailing) end), binary), + Do2(old_rstr_sub2, repeat(fun() -> N = string:rstr(S0, "y.."), + {string:sub_string(S0,1,N), string:sub_string(S0,N+3)} end), list), + Do2(split_t, repeat(fun() -> string:split(S0, "y..", trailing) end), list), + Do2(split_t, repeat(fun() -> string:split(S0B, "y..", trailing) end), binary), + + Do2(old_span, repeat(fun() -> N=string:span(S0, [$x, $y]), + {string:sub_string(S0,1,N),string:sub_string(S0,N+1)} + end), list), + Do2(take, repeat(fun() -> string:take(S0, [$x, $y]) end), list), + Do2(take, repeat(fun() -> string:take(S0B, [$x, $y]) end), binary), + + Do2(old_cspan, repeat(fun() -> N=string:cspan(S0, [$.,$y]), + {string:sub_string(S0,1,N),string:sub_string(S0,N+1)} + end), list), + Do2(take_c, repeat(fun() -> string:take(S0, [$.,$y], true) end), list), + Do2(take_c, repeat(fun() -> string:take(S0B, [$.,$y], true) end), binary), + + Do2(old_substr, repeat(fun() -> string:substr(S0, 21, 15) end), list), + Do2(slice, repeat(fun() -> string:slice(S0, 20, 15) end), list), + Do2(slice, repeat(fun() -> string:slice(S0B, 20, 15) end), binary), + + io:format("--~n",[]), + NthTokens = {nth_lexemes, fun(Str) -> string:nth_lexeme(Str, 18000, [$\n,$\r]) end}, + [Do(Name,Fun,Mode) || {Name,Fun} <- [NthTokens], Mode <- [list, binary]], + Do2(take_t, repeat(fun() -> string:take(S0, [$.,$y], false, trailing) end), list), + Do2(take_t, repeat(fun() -> string:take(S0B, [$.,$y], false, trailing) end), binary), + Do2(take_tc, repeat(fun() -> string:take(S0, [$x], true, trailing) end), list), + Do2(take_tc, repeat(fun() -> string:take(S0B, [$x], true, trailing) end), binary), + + Length = {length, fun(Str) -> string:length(Str) end}, + [Do(Name,Fun,Mode) || {Name,Fun} <- [Length], Mode <- [list, binary]], + + Reverse = {reverse, fun(Str) -> string:reverse(Str) end}, + [Do(Name,Fun,Mode) || {Name,Fun} <- [Reverse], Mode <- [list, binary]], + + ok. + +repeat(F) -> + fun(_) -> repeat_1(F,20000) end. + +repeat_1(F, N) when N > 0 -> + F(), + repeat_1(F, N-1); +repeat_1(_, _) -> + erlang:garbage_collect(), ok. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -865,8 +958,6 @@ check_types_1({list, _},{list, undefined}) -> ok; check_types_1({list, _},{list, codepoints}) -> ok; -check_types_1({list, _},{list, {list, codepoints}}) -> - ok; check_types_1({list, {list, _}},{list, {list, codepoints}}) -> ok; check_types_1(mixed,_) -> @@ -947,7 +1038,7 @@ time_func(Fun, Mode, Bin) -> end), receive {Pid,Msg} -> Msg end. -time_func(N,Sum,SumSq, Fun, Str, _) when N < 50 -> +time_func(N,Sum,SumSq, Fun, Str, _) when N < 20 -> {Time, Res} = timer:tc(fun() -> Fun(Str) end), time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res); time_func(N,Sum,SumSq, _, _, Res) -> diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl index 03c24c7027..a89627eba5 100644 --- a/lib/stdlib/test/unicode_util_SUITE.erl +++ b/lib/stdlib/test/unicode_util_SUITE.erl @@ -310,12 +310,23 @@ get(_) -> add_get_tests. count(Config) -> + Parent = self(), + Exec = fun() -> + do_measure(Config), + Parent ! {test_done, self()} + end, ct:timetrap({minutes,5}), case ct:get_timetrap_info() of - {_,{_,Scale}} -> + {_,{_,Scale}} when Scale > 1 -> {skip,{measurments_skipped_debug,Scale}}; - _ -> % No scaling - do_measure(Config) + _ -> % No scaling, run at most 2 min + Tester = spawn(Exec), + receive {test_done, Tester} -> ok + after 120000 -> + io:format("Timelimit reached stopping~n",[]), + exit(Tester, die) + end, + ok end. do_measure(Config) -> diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript index fefd7d3b70..73c351e1af 100755 --- a/lib/stdlib/uc_spec/gen_unicode_mod.escript +++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript @@ -170,7 +170,7 @@ gen_header(Fd) -> io:put_chars(Fd, "-export([spec_version/0, lookup/1, get_case/1]).\n"), io:put_chars(Fd, "-inline([class/1]).\n"), io:put_chars(Fd, "-compile(nowarn_unused_vars).\n"), - io:put_chars(Fd, "-dialyzer({no_improper_lists, [cp/1, gc_prepend/2, gc_e_cont/2]}).\n"), + io:put_chars(Fd, "-dialyzer({no_improper_lists, [cp/1, gc/1, gc_prepend/2, gc_e_cont/2]}).\n"), io:put_chars(Fd, "-type gc() :: char()|[char()].\n\n\n"), ok. @@ -240,7 +240,7 @@ gen_norm(Fd) -> "-spec nfd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfd(Str0) ->\n" " case gc(Str0) of\n" - " [GC|R] when GC < 127 -> [GC|R];\n" + " [GC|R] when GC < 128 -> [GC|R];\n" " [GC|Str] -> [decompose(GC)|Str];\n" " [] -> [];\n" " {error,_}=Error -> Error\n end.\n\n" @@ -250,7 +250,7 @@ gen_norm(Fd) -> "-spec nfkd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfkd(Str0) ->\n" " case gc(Str0) of\n" - " [GC|R] when GC < 127 -> [GC|R];\n" + " [GC|R] when GC < 128 -> [GC|R];\n" " [GC|Str] -> [decompose_compat(GC)|Str];\n" " [] -> [];\n" " {error,_}=Error -> Error\n end.\n\n" @@ -260,7 +260,7 @@ gen_norm(Fd) -> "-spec nfc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfc(Str0) ->\n" " case gc(Str0) of\n" - " [GC|R] when GC < 255 -> [GC|R];\n" + " [GC|R] when GC < 256 -> [GC|R];\n" " [GC|Str] -> [compose(decompose(GC))|Str];\n" " [] -> [];\n" " {error,_}=Error -> Error\n end.\n\n" @@ -270,7 +270,7 @@ gen_norm(Fd) -> "-spec nfkc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfkc(Str0) ->\n" " case gc(Str0) of\n" - " [GC|R] when GC < 127 -> [GC|R];\n" + " [GC|R] when GC < 128 -> [GC|R];\n" " [GC|Str] -> [compose_compat_0(decompose_compat(GC))|Str];\n" " [] -> [];\n" " {error,_}=Error -> Error\n end.\n\n" @@ -476,13 +476,30 @@ gen_gc(Fd, GBP) -> "-spec gc(String::unicode:chardata()) ->" " maybe_improper_list() | {error, unicode:chardata()}.\n"), io:put_chars(Fd, + "gc([CP1, CP2|_]=T)\n" + " when CP1 < 256, CP2 < 256, CP1 =/= $\r -> %% Ascii Fast path\n" + " T;\n" + "gc(<<CP1/utf8, Rest/binary>>) ->\n" + " if CP1 < 256, CP1 =/= $\r ->\n" + " case Rest of\n" + " <<CP2/utf8, _/binary>> when CP2 < 256 -> %% Ascii Fast path\n" + " [CP1|Rest];\n" + " _ -> gc_1([CP1|Rest])\n" + " end;\n" + " true -> gc_1([CP1|Rest])\n" + " end;\n" "gc(Str) ->\n" " gc_1(cp(Str)).\n\n" "gc_1([$\\r|R0] = R) ->\n" " case cp(R0) of % Don't break CRLF\n" " [$\\n|R1] -> [[$\\r,$\\n]|R1];\n" " _ -> R\n" - " end;\n"), + " end;\n" + %% "gc_1([CP1, CP2|_]=T) when CP1 < 256, CP2 < 256 ->\n" + %% " T; %% Fast path\n" + %% "gc_1([CP1|<<CP2/utf8, _/binary>>]=T) when CP1 < 256, CP2 < 256 ->\n" + %% " T; %% Fast path\n" + ), io:put_chars(Fd, "%% Handle control\n"), GenControl = fun(Range) -> io:format(Fd, "gc_1~s R0;\n", [gen_clause(Range)]) end, @@ -490,7 +507,7 @@ gen_gc(Fd, GBP) -> [R1,R2,R3|Crs] = CRs0, [GenControl(CP) || CP <- merge_ranges([R1,R2,R3], split), CP =/= {$\r, undefined}], %%GenControl(R1),GenControl(R2),GenControl(R3), - io:format(Fd, "gc_1([CP|R]) when CP < 255 -> gc_extend(R,CP);\n", []), + io:format(Fd, "gc_1([CP|R]) when CP < 256 -> gc_extend(R,CP);\n", []), [GenControl(CP) || CP <- Crs], %% One clause per CP %% CRs0 = merge_ranges(maps:get(cr, GBP) ++ maps:get(lf, GBP) ++ maps:get(control, GBP)), |