aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/dialyzer/src/dialyzer_typesig.erl12
-rw-r--r--lib/dialyzer/test/map_SUITE_data/results/map_anon_fun2
-rw-r--r--lib/dialyzer/test/map_SUITE_data/src/map_anon_fun.erl9
-rw-r--r--lib/dialyzer/test/plt_SUITE.erl10
-rw-r--r--lib/hipe/cerl/erl_types.erl4
-rw-r--r--lib/mnesia/src/mnesia.erl3
-rw-r--r--lib/public_key/test/public_key_SUITE_data/pkix_verify_hostname_subjAltName_IP.pem14
-rw-r--r--lib/public_key/test/public_key_SUITE_data/verify_hostname_ip.conf8
-rw-r--r--lib/ssl/src/dtls_connection.erl84
-rw-r--r--lib/ssl/src/ssl_connection.erl14
-rw-r--r--lib/ssl/src/tls_connection.erl63
-rw-r--r--lib/stdlib/doc/src/unicode.xml9
-rw-r--r--lib/stdlib/src/string.erl748
-rw-r--r--lib/stdlib/test/string_SUITE.erl121
-rw-r--r--lib/stdlib/test/unicode_util_SUITE.erl17
-rwxr-xr-xlib/stdlib/uc_spec/gen_unicode_mod.escript31
16 files changed, 785 insertions, 364 deletions
diff --git a/lib/dialyzer/src/dialyzer_typesig.erl b/lib/dialyzer/src/dialyzer_typesig.erl
index c4d8f45447..d03326ec97 100644
--- a/lib/dialyzer/src/dialyzer_typesig.erl
+++ b/lib/dialyzer/src/dialyzer_typesig.erl
@@ -41,7 +41,7 @@
t_is_float/1, t_is_fun/1,
t_is_integer/1, t_non_neg_integer/0,
t_is_list/1, t_is_nil/1, t_is_none/1, t_is_number/1,
- t_is_singleton/1,
+ t_is_singleton/1, t_is_none_or_unit/1,
t_limit/2, t_list/0, t_list/1,
t_list_elements/1, t_nonempty_list/1, t_maybe_improper_list/0,
@@ -528,13 +528,14 @@ traverse(Tree, DefinedVars, State) ->
false -> t_any();
true ->
MT = t_inf(lookup_type(MapVar, Map), t_map()),
- case t_is_none(MT) of
+ case t_is_none_or_unit(MT) of
true -> t_none();
false ->
DisjointFromKeyType =
fun(ShadowKey) ->
- t_is_none(t_inf(lookup_type(ShadowKey, Map),
- KeyType))
+ ST = t_inf(lookup_type(ShadowKey, Map),
+ KeyType),
+ t_is_none_or_unit(ST)
end,
case lists:all(DisjointFromKeyType, ShadowKeys) of
true -> t_map_get(KeyType, MT);
@@ -567,7 +568,8 @@ traverse(Tree, DefinedVars, State) ->
case cerl:is_literal(OpTree) andalso
cerl:concrete(OpTree) =:= exact of
true ->
- case t_is_none(t_inf(ShadowedKeys, KeyType)) of
+ ST = t_inf(ShadowedKeys, KeyType),
+ case t_is_none_or_unit(ST) of
true ->
t_map_put({KeyType, t_any()}, AccType);
false ->
diff --git a/lib/dialyzer/test/map_SUITE_data/results/map_anon_fun b/lib/dialyzer/test/map_SUITE_data/results/map_anon_fun
new file mode 100644
index 0000000000..cfca5b1407
--- /dev/null
+++ b/lib/dialyzer/test/map_SUITE_data/results/map_anon_fun
@@ -0,0 +1,2 @@
+
+map_anon_fun.erl:4: Function g/1 will never be called
diff --git a/lib/dialyzer/test/map_SUITE_data/src/map_anon_fun.erl b/lib/dialyzer/test/map_SUITE_data/src/map_anon_fun.erl
new file mode 100644
index 0000000000..e77016d68a
--- /dev/null
+++ b/lib/dialyzer/test/map_SUITE_data/src/map_anon_fun.erl
@@ -0,0 +1,9 @@
+-module(map_anon_fun).
+
+%% Not exported.
+g(A) ->
+ maps:map(fun F(K, {V, _C}) ->
+ F(K, V);
+ F(_K, _V) ->
+ #{ system => {A} }
+ end, #{}).
diff --git a/lib/dialyzer/test/plt_SUITE.erl b/lib/dialyzer/test/plt_SUITE.erl
index a8a9f176fc..680f5b5088 100644
--- a/lib/dialyzer/test/plt_SUITE.erl
+++ b/lib/dialyzer/test/plt_SUITE.erl
@@ -283,8 +283,8 @@ bad_dialyzer_attr(Config) ->
{dialyzer_error,
"Analysis failed with error:\n" ++ Str1} =
(catch dialyzer:run(Opts)),
- P1 = string:str(Str1, "dial.erl:2: function undef/0 undefined"),
- true = P1 > 0,
+ S1 = string:find(Str1, "dial.erl:2: function undef/0 undefined"),
+ true = is_list(S1),
Prog2 = <<"-module(dial).
-dialyzer({no_return, [{undef,1,2}]}).">>,
@@ -292,9 +292,9 @@ bad_dialyzer_attr(Config) ->
{dialyzer_error,
"Analysis failed with error:\n" ++ Str2} =
(catch dialyzer:run(Opts)),
- P2 = string:str(Str2, "dial.erl:2: badly formed dialyzer "
- "attribute: {no_return,{undef,1,2}}"),
- true = P2 > 0,
+ S2 = string:find(Str2, "dial.erl:2: badly formed dialyzer "
+ "attribute: {no_return,{undef,1,2}}"),
+ true = is_list(S2),
ok.
diff --git a/lib/hipe/cerl/erl_types.erl b/lib/hipe/cerl/erl_types.erl
index abb6c259f6..4e0f93212d 100644
--- a/lib/hipe/cerl/erl_types.erl
+++ b/lib/hipe/cerl/erl_types.erl
@@ -1877,6 +1877,7 @@ t_map_put(KV, Map, Opaques) ->
%% Key and Value are *not* unopaqued, but the map is
map_put(_, ?none, _) -> ?none;
+map_put(_, ?unit, _) -> ?none;
map_put({Key, Value}, ?map(Pairs,DefK,DefV), Opaques) ->
case t_is_none_or_unit(Key) orelse t_is_none_or_unit(Value) of
true -> ?none;
@@ -1902,6 +1903,7 @@ t_map_update(KV, Map) ->
-spec t_map_update({erl_type(), erl_type()}, erl_type(), opaques()) -> erl_type().
t_map_update(_, ?none, _) -> ?none;
+t_map_update(_, ?unit, _) -> ?none;
t_map_update(KV={Key, _}, M, Opaques) ->
case t_is_subtype(t_atom('true'), t_map_is_key(Key, M, Opaques)) of
false -> ?none;
@@ -1922,6 +1924,7 @@ t_map_get(Key, Map, Opaques) ->
end).
map_get(_, ?none) -> ?none;
+map_get(_, ?unit) -> ?none;
map_get(Key, ?map(Pairs, DefK, DefV)) ->
DefRes =
case t_do_overlap(DefK, Key) of
@@ -1957,6 +1960,7 @@ t_map_is_key(Key, Map, Opaques) ->
end).
map_is_key(_, ?none) -> ?none;
+map_is_key(_, ?unit) -> ?none;
map_is_key(Key, ?map(Pairs, DefK, _DefV)) ->
case is_singleton_type(Key) of
true ->
diff --git a/lib/mnesia/src/mnesia.erl b/lib/mnesia/src/mnesia.erl
index b68b2de028..1842769778 100644
--- a/lib/mnesia/src/mnesia.erl
+++ b/lib/mnesia/src/mnesia.erl
@@ -151,7 +151,8 @@
{'snmp', SnmpStruct::term()} |
{'storage_properties', [{Backend::module(), [BackendProp::_]}]} |
{'type', 'set' | 'ordered_set' | 'bag'} |
- {'local_content', boolean()}.
+ {'local_content', boolean()} |
+ {'user_properties', proplists:proplist()}.
-type t_result(Res) :: {'atomic', Res} | {'aborted', Reason::term()}.
-type activity() :: 'ets' | 'async_dirty' | 'sync_dirty' | 'transaction' | 'sync_transaction' |
diff --git a/lib/public_key/test/public_key_SUITE_data/pkix_verify_hostname_subjAltName_IP.pem b/lib/public_key/test/public_key_SUITE_data/pkix_verify_hostname_subjAltName_IP.pem
index dc20285f30..97d12cdadf 100644
--- a/lib/public_key/test/public_key_SUITE_data/pkix_verify_hostname_subjAltName_IP.pem
+++ b/lib/public_key/test/public_key_SUITE_data/pkix_verify_hostname_subjAltName_IP.pem
@@ -1,5 +1,4 @@
-----BEGIN CERTIFICATE-----
-<<<<<<< HEAD
MIICBzCCAXCgAwIBAgIJAJgbo5FL73LuMA0GCSqGSIb3DQEBCwUAMCMxCzAJBgNV
BAYTAlNFMRQwEgYDVQQDEwtleGFtcGxlLmNvbTAeFw0xNzEwMTExMDM0NDJaFw0x
NzExMTAxMDM0NDJaMCMxCzAJBgNVBAYTAlNFMRQwEgYDVQQDEwtleGFtcGxlLmNv
@@ -11,17 +10,4 @@ S4cQq80A7wAAAAAAAAAAAAAAAYYTaHR0cHM6Ly8xMC4xMS4xMi4xMzANBgkqhkiG
9w0BAQsFAAOBgQDMn8aqs/5FkkWhspvN2n+D2l87M+33a5My54ZVZhayZ/KRmhCN
Gix/BiVYJ3UlmWmGcnQXb3MLt/LQHaD3S2whDaLN3xJ8BbnX7A4ZTybitdyeFhDw
K3iDVUM3bSsBJ4EcBPWIMnow3ALP5HlGRMlH/87Qt+uVPXuwNh9pmyIhRQ==
-=======
-MIIB/zCCAWigAwIBAgIJAMoSejmTjwAGMA0GCSqGSIb3DQEBCwUAMB8xCzAJBgNV
-BAYTAlNFMRAwDgYDVQQDEwc1LjYuNy44MB4XDTE3MDkyODE0MDAxNVoXDTE3MTAy
-ODE0MDAxNVowHzELMAkGA1UEBhMCU0UxEDAOBgNVBAMTBzUuNi43LjgwgZ8wDQYJ
-KoZIhvcNAQEBBQADgY0AMIGJAoGBAMUPU89KwVbTCDkyxQSz3wprMbZTLe35K6jm
-Q7oY1rJyVXjsFHwZrFqqNMScEyX40rJhczQ2Z9etEX6qYLbdb/DZeFcKo14fR583
-QMFZC+qqpLWHdvjaQN0KwD99VFeZIGpRgywG8SR+BXZjDHUkGsMrikAEJtf0Tgih
-IPyiFtiJAgMBAAGjQzBBMD8GA1UdEQQ4MDaCBzEuMi4zLjSHBAUGBwiHEKvNAO8A
-AAAAAAAAAAAAAAGGE2h0dHBzOi8vMTAuMTEuMTIuMTMwDQYJKoZIhvcNAQELBQAD
-gYEAtWVeQaRFZ0kH/pzSWMSsOCUrjbwlWRwDNbagNKoM6nCRv0QQ59fG6XrVZwR3
-c0s5arlMh3U2+bjKE+Iq9+b/lN1lGzf8iaAqBNa7KptwTSUEY3TiNG5X0zlSXKTI
-3z7AaUEtghL9ImCPj5V3tVksqWd7U0zLmeeLZnM+wGAL9Hc=
->>>>>>> maint-20
-----END CERTIFICATE-----
diff --git a/lib/public_key/test/public_key_SUITE_data/verify_hostname_ip.conf b/lib/public_key/test/public_key_SUITE_data/verify_hostname_ip.conf
index f27dac07ec..798592e4f6 100644
--- a/lib/public_key/test/public_key_SUITE_data/verify_hostname_ip.conf
+++ b/lib/public_key/test/public_key_SUITE_data/verify_hostname_ip.conf
@@ -5,21 +5,13 @@ distinguished_name = DN
[DN]
C=SE
CN=example.com
-<<<<<<< HEAD
-=======
-CN=5.6.7.8
->>>>>>> maint-20
[SAN]
subjectAltName = @alt_names
[alt_names]
DNS = 1.2.3.4
-<<<<<<< HEAD
IP.1 = 10.67.16.75
-=======
-IP.1 = 5.6.7.8
->>>>>>> maint-20
IP.2 = abcd:ef::1
URI = https://10.11.12.13
diff --git a/lib/ssl/src/dtls_connection.erl b/lib/ssl/src/dtls_connection.erl
index 15eb39e716..073cb4009b 100644
--- a/lib/ssl/src/dtls_connection.erl
+++ b/lib/ssl/src/dtls_connection.erl
@@ -232,8 +232,6 @@ next_event(StateName, Record,
#alert{} = Alert ->
{next_state, StateName, State0, [{next_event, internal, Alert} | Actions]}
end.
-handle_call(Event, From, StateName, State) ->
- ssl_connection:handle_call(Event, From, StateName, State, ?MODULE).
handle_common_event(internal, #alert{} = Alert, StateName,
#state{negotiated_version = Version} = State) ->
@@ -446,21 +444,21 @@ init({call, From}, {start, Timeout},
{Record, State} = next_record(State3),
next_event(hello, Record, State, Actions);
init({call, _} = Type, Event, #state{role = server, transport_cb = gen_udp} = State) ->
- Result = ssl_connection:?FUNCTION_NAME(Type, Event,
- State#state{flight_state = {retransmit, ?INITIAL_RETRANSMIT_TIMEOUT},
- protocol_specific = #{current_cookie_secret => dtls_v1:cookie_secret(),
- previous_cookie_secret => <<>>,
- ignored_alerts => 0,
- max_ignored_alerts => 10}},
- ?MODULE),
+ Result = gen_handshake(?FUNCTION_NAME, Type, Event,
+ State#state{flight_state = {retransmit, ?INITIAL_RETRANSMIT_TIMEOUT},
+ protocol_specific = #{current_cookie_secret => dtls_v1:cookie_secret(),
+ previous_cookie_secret => <<>>,
+ ignored_alerts => 0,
+ max_ignored_alerts => 10}}),
erlang:send_after(dtls_v1:cookie_timeout(), self(), new_cookie_secret),
Result;
init({call, _} = Type, Event, #state{role = server} = State) ->
%% I.E. DTLS over sctp
- ssl_connection:?FUNCTION_NAME(Type, Event, State#state{flight_state = reliable}, ?MODULE);
+ gen_handshake(?FUNCTION_NAME, Type, Event, State#state{flight_state = reliable});
init(Type, Event, State) ->
- ssl_connection:?FUNCTION_NAME(Type, Event, State, ?MODULE).
+ gen_handshake(?FUNCTION_NAME, Type, Event, State).
+
%%--------------------------------------------------------------------
-spec error(gen_statem:event_type(),
{start, timeout()} | term(), #state{}) ->
@@ -470,8 +468,8 @@ error(enter, _, State) ->
{keep_state, State};
error({call, From}, {start, _Timeout}, {Error, State}) ->
{stop_and_reply, normal, {reply, From, {error, Error}}, State};
-error({call, From}, Msg, State) ->
- handle_call(Msg, From, ?FUNCTION_NAME, State);
+error({call, _} = Call, Msg, State) ->
+ gen_handshake(?FUNCTION_NAME, Call, Msg, State);
error(_, _, _) ->
{keep_state_and_data, [postpone]}.
@@ -566,11 +564,11 @@ hello(internal, {handshake, {#hello_verify_request{} = Handshake, _}}, State) ->
%% hello_verify should not be in handshake history
{next_state, ?FUNCTION_NAME, State, [{next_event, internal, Handshake}]};
hello(info, Event, State) ->
- handle_info(Event, ?FUNCTION_NAME, State);
+ gen_info(Event, ?FUNCTION_NAME, State);
hello(state_timeout, Event, State) ->
handle_state_timeout(Event, ?FUNCTION_NAME, State);
hello(Type, Event, State) ->
- ssl_connection:?FUNCTION_NAME(Type, Event, State, ?MODULE).
+ gen_handshake(?FUNCTION_NAME, Type, Event, State).
%%--------------------------------------------------------------------
-spec abbreviated(gen_statem:event_type(), term(), #state{}) ->
@@ -580,22 +578,21 @@ abbreviated(enter, _, State0) ->
{State, Actions} = handle_flight_timer(State0),
{keep_state, State, Actions};
abbreviated(info, Event, State) ->
- handle_info(Event, ?FUNCTION_NAME, State);
+ gen_info(Event, ?FUNCTION_NAME, State);
abbreviated(internal = Type,
#change_cipher_spec{type = <<1>>} = Event,
#state{connection_states = ConnectionStates0} = State) ->
ConnectionStates1 = dtls_record:save_current_connection_state(ConnectionStates0, read),
ConnectionStates = dtls_record:next_epoch(ConnectionStates1, read),
- ssl_connection:?FUNCTION_NAME(Type, Event, State#state{connection_states = ConnectionStates}, ?MODULE);
+ gen_handshake(?FUNCTION_NAME, Type, Event, State#state{connection_states = ConnectionStates});
abbreviated(internal = Type, #finished{} = Event, #state{connection_states = ConnectionStates} = State) ->
- ssl_connection:?FUNCTION_NAME(Type, Event,
- prepare_flight(State#state{connection_states = ConnectionStates,
- flight_state = connection}), ?MODULE);
+ gen_handshake(?FUNCTION_NAME, Type, Event,
+ prepare_flight(State#state{connection_states = ConnectionStates,
+ flight_state = connection}));
abbreviated(state_timeout, Event, State) ->
handle_state_timeout(Event, ?FUNCTION_NAME, State);
abbreviated(Type, Event, State) ->
- ssl_connection:?FUNCTION_NAME(Type, Event, State, ?MODULE).
-
+ gen_handshake(?FUNCTION_NAME, Type, Event, State).
%%--------------------------------------------------------------------
-spec certify(gen_statem:event_type(), term(), #state{}) ->
gen_statem:state_function_result().
@@ -604,13 +601,13 @@ certify(enter, _, State0) ->
{State, Actions} = handle_flight_timer(State0),
{keep_state, State, Actions};
certify(info, Event, State) ->
- handle_info(Event, ?FUNCTION_NAME, State);
+ gen_info(Event, ?FUNCTION_NAME, State);
certify(internal = Type, #server_hello_done{} = Event, State) ->
ssl_connection:certify(Type, Event, prepare_flight(State), ?MODULE);
certify(state_timeout, Event, State) ->
handle_state_timeout(Event, ?FUNCTION_NAME, State);
certify(Type, Event, State) ->
- ssl_connection:?FUNCTION_NAME(Type, Event, State, ?MODULE).
+ gen_handshake(?FUNCTION_NAME, Type, Event, State).
%%--------------------------------------------------------------------
-spec cipher(gen_statem:event_type(), term(), #state{}) ->
@@ -620,7 +617,7 @@ cipher(enter, _, State0) ->
{State, Actions} = handle_flight_timer(State0),
{keep_state, State, Actions};
cipher(info, Event, State) ->
- handle_info(Event, ?FUNCTION_NAME, State);
+ gen_info(Event, ?FUNCTION_NAME, State);
cipher(internal = Type, #change_cipher_spec{type = <<1>>} = Event,
#state{connection_states = ConnectionStates0} = State) ->
ConnectionStates1 = dtls_record:save_current_connection_state(ConnectionStates0, read),
@@ -644,7 +641,7 @@ cipher(Type, Event, State) ->
connection(enter, _, State) ->
{keep_state, State};
connection(info, Event, State) ->
- handle_info(Event, ?FUNCTION_NAME, State);
+ gen_info(Event, ?FUNCTION_NAME, State);
connection(internal, #hello_request{}, #state{host = Host, port = Port,
session = #session{own_certificate = Cert} = Session0,
session_cache = Cache, session_cache_cb = CacheCb,
@@ -864,7 +861,7 @@ handle_info(new_cookie_secret, StateName,
CookieInfo#{current_cookie_secret => dtls_v1:cookie_secret(),
previous_cookie_secret => Secret}}};
handle_info(Msg, StateName, State) ->
- ssl_connection:handle_info(Msg, StateName, State).
+ ssl_connection:StateName(info, Msg, State, ?MODULE).
handle_state_timeout(flight_retransmission_timeout, StateName,
#state{flight_state = {retransmit, NextTimeout}} = State0) ->
@@ -907,6 +904,39 @@ encode_change_cipher(#change_cipher_spec{}, Version, Epoch, ConnectionStates) ->
decode_alerts(Bin) ->
ssl_alert:decode(Bin).
+gen_handshake(StateName, Type, Event,
+ #state{negotiated_version = Version} = State) ->
+ try ssl_connection:StateName(Type, Event, State, ?MODULE) of
+ Result ->
+ Result
+ catch
+ _:_ ->
+ ssl_connection:handle_own_alert(?ALERT_REC(?FATAL, ?HANDSHAKE_FAILURE,
+ malformed_handshake_data),
+ Version, StateName, State)
+ end.
+
+gen_info(Event, connection = StateName, #state{negotiated_version = Version} = State) ->
+ try handle_info(Event, StateName, State) of
+ Result ->
+ Result
+ catch
+ _:_ ->
+ ssl_connection:handle_own_alert(?ALERT_REC(?FATAL, ?INTERNAL_ERROR,
+ malformed_data),
+ Version, StateName, State)
+ end;
+
+gen_info(Event, StateName, #state{negotiated_version = Version} = State) ->
+ try handle_info(Event, StateName, State) of
+ Result ->
+ Result
+ catch
+ _:_ ->
+ ssl_connection:handle_own_alert(?ALERT_REC(?FATAL, ?HANDSHAKE_FAILURE,
+ malformed_handshake_data),
+ Version, StateName, State)
+ end.
unprocessed_events(Events) ->
%% The first handshake event will be processed immediately
%% as it is entered first in the event queue and
diff --git a/lib/ssl/src/ssl_connection.erl b/lib/ssl/src/ssl_connection.erl
index 452bbb6816..3531cdda11 100644
--- a/lib/ssl/src/ssl_connection.erl
+++ b/lib/ssl/src/ssl_connection.erl
@@ -61,14 +61,11 @@
%% General gen_statem state functions with extra callback argument
%% to determine if it is an SSL/TLS or DTLS gen_statem machine
--export([init/4, hello/4, abbreviated/4, certify/4, cipher/4, connection/4, downgrade/4]).
+-export([init/4, error/4, hello/4, abbreviated/4, certify/4, cipher/4, connection/4, downgrade/4]).
%% gen_statem callbacks
-export([terminate/3, format_status/2]).
-%% TODO: do not export, call state function instead
--export([handle_info/3, handle_call/5, handle_common_event/5]).
-
%%====================================================================
%% Setup
%%====================================================================
@@ -539,6 +536,15 @@ init(_Type, _Event, _State, _Connection) ->
{keep_state_and_data, [postpone]}.
%%--------------------------------------------------------------------
+-spec error(gen_statem:event_type(),
+ {start, timeout()} | term(), #state{},
+ tls_connection | dtls_connection) ->
+ gen_statem:state_function_result().
+%%--------------------------------------------------------------------
+error({call, From}, Msg, State, Connection) ->
+ handle_call(Msg, From, ?FUNCTION_NAME, State, Connection).
+
+%%--------------------------------------------------------------------
-spec hello(gen_statem:event_type(),
#hello_request{} | #server_hello{} | term(),
#state{}, tls_connection | dtls_connection) ->
diff --git a/lib/ssl/src/tls_connection.erl b/lib/ssl/src/tls_connection.erl
index 96243db4ae..b033eea261 100644
--- a/lib/ssl/src/tls_connection.erl
+++ b/lib/ssl/src/tls_connection.erl
@@ -141,12 +141,14 @@ next_record(#state{protocol_buffers =
end;
next_record(#state{protocol_buffers = #protocol_buffers{tls_packets = [], tls_cipher_texts = []},
socket = Socket,
+ close_tag = CloseTag,
transport_cb = Transport} = State) ->
case tls_socket:setopts(Transport, Socket, [{active,once}]) of
ok ->
{no_record, State};
_ ->
- {socket_closed, State}
+ self() ! {CloseTag, Socket},
+ {no_record, State}
end;
next_record(State) ->
{no_record, State}.
@@ -154,15 +156,10 @@ next_record(State) ->
next_event(StateName, Record, State) ->
next_event(StateName, Record, State, []).
-next_event(StateName, socket_closed, State, _) ->
- ssl_connection:handle_normal_shutdown(?ALERT_REC(?FATAL, ?CLOSE_NOTIFY), StateName, State),
- {stop, {shutdown, transport_closed}, State};
next_event(connection = StateName, no_record, State0, Actions) ->
case next_record_if_active(State0) of
{no_record, State} ->
ssl_connection:hibernate_after(StateName, State, Actions);
- {socket_closed, State} ->
- next_event(StateName, socket_closed, State, Actions);
{#ssl_tls{} = Record, State} ->
{next_state, StateName, State, [{next_event, internal, {protocol_record, Record}} | Actions]};
{#alert{} = Alert, State} ->
@@ -431,7 +428,7 @@ init({call, From}, {start, Timeout},
{Record, State} = next_record(State1),
next_event(hello, Record, State);
init(Type, Event, State) ->
- gen_handshake(ssl_connection, ?FUNCTION_NAME, Type, Event, State).
+ gen_handshake(?FUNCTION_NAME, Type, Event, State).
%%--------------------------------------------------------------------
-spec error(gen_statem:event_type(),
@@ -441,8 +438,8 @@ init(Type, Event, State) ->
error({call, From}, {start, _Timeout}, {Error, State}) ->
{stop_and_reply, normal, {reply, From, {error, Error}}, State};
-error({call, From}, Msg, State) ->
- handle_call(Msg, From, ?FUNCTION_NAME, State);
+error({call, _} = Call, Msg, State) ->
+ gen_handshake(?FUNCTION_NAME, Call, Msg, State);
error(_, _, _) ->
{keep_state_and_data, [postpone]}.
@@ -472,14 +469,13 @@ hello(internal, #client_hello{client_version = ClientVersion} = Hello,
undefined -> CurrentProtocol;
_ -> Protocol0
end,
-
- gen_handshake(ssl_connection, hello, internal, {common_client_hello, Type, ServerHelloExt},
- State#state{connection_states = ConnectionStates,
- negotiated_version = Version,
- client_hello_version = ClientVersion,
- hashsign_algorithm = HashSign,
- session = Session,
- negotiated_protocol = Protocol})
+ gen_handshake(?FUNCTION_NAME, internal, {common_client_hello, Type, ServerHelloExt},
+ State#state{connection_states = ConnectionStates,
+ negotiated_version = Version,
+ hashsign_algorithm = HashSign,
+ client_hello_version = ClientVersion,
+ session = Session,
+ negotiated_protocol = Protocol})
end;
hello(internal, #server_hello{} = Hello,
#state{connection_states = ConnectionStates0,
@@ -497,7 +493,7 @@ hello(internal, #server_hello{} = Hello,
hello(info, Event, State) ->
gen_info(Event, ?FUNCTION_NAME, State);
hello(Type, Event, State) ->
- gen_handshake(ssl_connection, ?FUNCTION_NAME, Type, Event, State).
+ gen_handshake(?FUNCTION_NAME, Type, Event, State).
%%--------------------------------------------------------------------
-spec abbreviated(gen_statem:event_type(), term(), #state{}) ->
@@ -506,7 +502,7 @@ hello(Type, Event, State) ->
abbreviated(info, Event, State) ->
gen_info(Event, ?FUNCTION_NAME, State);
abbreviated(Type, Event, State) ->
- gen_handshake(ssl_connection, ?FUNCTION_NAME, Type, Event, State).
+ gen_handshake(?FUNCTION_NAME, Type, Event, State).
%%--------------------------------------------------------------------
-spec certify(gen_statem:event_type(), term(), #state{}) ->
@@ -515,7 +511,7 @@ abbreviated(Type, Event, State) ->
certify(info, Event, State) ->
gen_info(Event, ?FUNCTION_NAME, State);
certify(Type, Event, State) ->
- gen_handshake(ssl_connection, ?FUNCTION_NAME, Type, Event, State).
+ gen_handshake(?FUNCTION_NAME, Type, Event, State).
%%--------------------------------------------------------------------
-spec cipher(gen_statem:event_type(), term(), #state{}) ->
@@ -524,7 +520,7 @@ certify(Type, Event, State) ->
cipher(info, Event, State) ->
gen_info(Event, ?FUNCTION_NAME, State);
cipher(Type, Event, State) ->
- gen_handshake(ssl_connection, ?FUNCTION_NAME, Type, Event, State).
+ gen_handshake(?FUNCTION_NAME, Type, Event, State).
%%--------------------------------------------------------------------
-spec connection(gen_statem:event_type(),
@@ -587,9 +583,6 @@ terminate(Reason, StateName, State) ->
format_status(Type, Data) ->
ssl_connection:format_status(Type, Data).
-code_change(_OldVsn, StateName, State0, {Direction, From, To}) ->
- State = convert_state(State0, Direction, From, To),
- {ok, StateName, State};
code_change(_OldVsn, StateName, State, _) ->
{ok, StateName, State}.
@@ -651,10 +644,7 @@ tls_handshake_events(Packets) ->
{next_event, internal, {handshake, Packet}}
end, Packets).
-handle_call(Event, From, StateName, State) ->
- ssl_connection:handle_call(Event, From, StateName, State, ?MODULE).
-
-%% raw data from socket, unpack records
+%% raw data from socket, upack records
handle_info({Protocol, _, Data}, StateName,
#state{data_tag = Protocol} = State0) ->
case next_tls_record(Data, State0) of
@@ -697,7 +687,7 @@ handle_info({CloseTag, Socket}, StateName,
next_event(StateName, no_record, State)
end;
handle_info(Msg, StateName, State) ->
- ssl_connection:handle_info(Msg, StateName, State).
+ ssl_connection:StateName(info, Msg, State, ?MODULE).
handle_alerts([], Result) ->
Result;
@@ -721,9 +711,9 @@ encode_change_cipher(#change_cipher_spec{}, Version, ConnectionStates) ->
decode_alerts(Bin) ->
ssl_alert:decode(Bin).
-gen_handshake(GenConnection, StateName, Type, Event,
+gen_handshake(StateName, Type, Event,
#state{negotiated_version = Version} = State) ->
- try GenConnection:StateName(Type, Event, State, ?MODULE) of
+ try ssl_connection:StateName(Type, Event, State, ?MODULE) of
Result ->
Result
catch
@@ -784,14 +774,3 @@ assert_buffer_sanity(Bin, _) ->
throw(?ALERT_REC(?FATAL, ?HANDSHAKE_FAILURE,
malformed_handshake_data))
end.
-
-convert_state(#state{ssl_options = Options} = State, up, "5.3.5", "5.3.6") ->
- State#state{ssl_options = convert_options_partial_chain(Options, up)};
-convert_state(#state{ssl_options = Options} = State, down, "5.3.6", "5.3.5") ->
- State#state{ssl_options = convert_options_partial_chain(Options, down)}.
-
-convert_options_partial_chain(Options, up) ->
- {Head, Tail} = lists:split(5, tuple_to_list(Options)),
- list_to_tuple(Head ++ [{partial_chain, fun(_) -> unknown_ca end}] ++ Tail);
-convert_options_partial_chain(Options, down) ->
- list_to_tuple(proplists:delete(partial_chain, tuple_to_list(Options))).
diff --git a/lib/stdlib/doc/src/unicode.xml b/lib/stdlib/doc/src/unicode.xml
index e86f45431f..d822aca89c 100644
--- a/lib/stdlib/doc/src/unicode.xml
+++ b/lib/stdlib/doc/src/unicode.xml
@@ -239,8 +239,13 @@
<c><anno>InEncoding</anno></c>.</p>
</item>
</list>
- <p>Only when <c><anno>InEncoding</anno></c> is one of the UTF
- encodings, integers in the list are allowed to be &gt; 255.</p>
+ <p>
+ Note that integers in the list always represent code points
+ regardless of <c><anno>InEncoding</anno></c> passed. If
+ <c><anno>InEncoding</anno> latin1</c> is passed, only code
+ points &lt; 256 are allowed; otherwise, all valid unicode code
+ points are allowed.
+ </p>
<p>If <c><anno>InEncoding</anno></c> is <c>latin1</c>, parameter
<c><anno>Data</anno></c> corresponds to the <c>iodata()</c> type,
but for <c>unicode</c>, parameter <c><anno>Data</anno></c> can
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index 4972da297d..ab041ff53c 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -74,19 +74,21 @@
-export([to_upper/1, to_lower/1]).
%%
-import(lists,[member/2]).
-
-compile({no_auto_import,[length/1]}).
+-compile({inline, [btoken/2, rev/1, append/2, stack/2, search_compile/1]}).
+-define(ASCII_LIST(CP1,CP2), CP1 < 256, CP2 < 256, CP1 =/= $\r).
-export_type([grapheme_cluster/0]).
-type grapheme_cluster() :: char() | [char()].
-type direction() :: 'leading' | 'trailing'.
--dialyzer({no_improper_lists, stack/2}).
+-dialyzer({no_improper_lists, [stack/2, length_b/3]}).
%%% BIFs internal (not documented) should not to be used outside of this module
%%% May be removed
-export([list_to_float/1, list_to_integer/1]).
+
%% Uses bifs: string:list_to_float/1 and string:list_to_integer/1
-spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when
String :: string(),
@@ -117,8 +119,10 @@ is_empty(_) -> false.
%% Count the number of grapheme clusters in chardata
-spec length(String::unicode:chardata()) -> non_neg_integer().
+length(<<CP1/utf8, Bin/binary>>) ->
+ length_b(Bin, CP1, 0);
length(CD) ->
- length_1(unicode_util:gc(CD), 0).
+ length_1(CD, 0).
%% Convert a string to a list of grapheme clusters
-spec to_graphemes(String::unicode:chardata()) -> [grapheme_cluster()].
@@ -166,6 +170,8 @@ equal(A, B, true, Norm) ->
%% Reverse grapheme clusters
-spec reverse(String::unicode:chardata()) -> [grapheme_cluster()].
+reverse(<<CP1/utf8, Rest/binary>>) ->
+ reverse_b(Rest, CP1, []);
reverse(CD) ->
reverse_1(CD, []).
@@ -176,7 +182,10 @@ reverse(CD) ->
Start :: non_neg_integer(),
Slice :: unicode:chardata().
slice(CD, N) when is_integer(N), N >= 0 ->
- slice_l(CD, N, is_binary(CD)).
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ Res -> Res
+ end.
-spec slice(String, Start, Length) -> Slice when
String::unicode:chardata(),
@@ -185,9 +194,15 @@ slice(CD, N) when is_integer(N), N >= 0 ->
Slice :: unicode:chardata().
slice(CD, N, Length)
when is_integer(N), N >= 0, is_integer(Length), Length > 0 ->
- slice_trail(slice_l(CD, N, is_binary(CD)), Length);
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ L -> slice_trail(L, Length)
+ end;
slice(CD, N, infinity) ->
- slice_l(CD, N, is_binary(CD));
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ Res -> Res
+ end;
slice(CD, _, 0) ->
case is_binary(CD) of
true -> <<>>;
@@ -246,18 +261,22 @@ trim(Str, Dir) ->
Dir :: direction() | 'both',
Characters :: [grapheme_cluster()].
trim(Str, _, []) -> Str;
+trim(Str, leading, [Sep]) when is_list(Str), Sep < 256 ->
+ trim_ls(Str, Sep);
trim(Str, leading, Sep) when is_list(Sep) ->
- trim_l(Str, search_pattern(Sep));
-trim(Str, trailing, Sep) when is_list(Sep) ->
- trim_t(Str, 0, search_pattern(Sep));
-trim(Str, both, Sep0) when is_list(Sep0) ->
- Sep = search_pattern(Sep0),
- trim_t(trim_l(Str,Sep), 0, Sep).
+ trim_l(Str, Sep);
+trim(Str, trailing, [Sep]) when is_list(Str), Sep < 256 ->
+ trim_ts(Str, Sep);
+trim(Str, trailing, Seps0) when is_list(Seps0) ->
+ Seps = search_pattern(Seps0),
+ trim_t(Str, 0, Seps);
+trim(Str, both, Sep) when is_list(Sep) ->
+ trim(trim(Str,leading,Sep), trailing, Sep).
%% Delete trailing newlines or \r\n
-spec chomp(String::unicode:chardata()) -> unicode:chardata().
chomp(Str) ->
- trim_t(Str,0, {[[$\r,$\n],$\n], [$\r,$\n], [<<$\r>>,<<$\n>>]}).
+ trim(Str, trailing, [[$\r,$\n],$\n]).
%% Split String into two parts where the leading part consists of Characters
-spec take(String, Characters) -> {Leading, Trailing} when
@@ -290,8 +309,7 @@ take(Str, [], Complement, Dir) ->
{true, leading} -> {Str, Empty};
{true, trailing} -> {Empty, Str}
end;
-take(Str, Sep0, false, leading) ->
- Sep = search_pattern(Sep0),
+take(Str, Sep, false, leading) ->
take_l(Str, Sep, []);
take(Str, Sep0, true, leading) ->
Sep = search_pattern(Sep0),
@@ -451,6 +469,7 @@ replace(String, SearchPattern, Replacement, Where) ->
SeparatorList::[grapheme_cluster()]) ->
[unicode:chardata()].
lexemes([], _) -> [];
+lexemes(Str, []) -> [Str];
lexemes(Str, Seps0) when is_list(Seps0) ->
Seps = search_pattern(Seps0),
lexemes_m(Str, Seps, []).
@@ -484,13 +503,13 @@ find(String, SearchPattern, leading) ->
find(String, SearchPattern, trailing) ->
find_r(String, unicode:characters_to_list(SearchPattern), nomatch).
-%% Fetch first codepoint and return rest in tail
+%% Fetch first grapheme cluster and return rest in tail
-spec next_grapheme(String::unicode:chardata()) ->
maybe_improper_list(grapheme_cluster(),unicode:chardata()) |
{error,unicode:chardata()}.
next_grapheme(CD) -> unicode_util:gc(CD).
-%% Fetch first grapheme cluster and return rest in tail
+%% Fetch first codepoint and return rest in tail
-spec next_codepoint(String::unicode:chardata()) ->
maybe_improper_list(char(),unicode:chardata()) |
{error,unicode:chardata()}.
@@ -498,10 +517,23 @@ next_codepoint(CD) -> unicode_util:cp(CD).
%% Internals
-length_1([_|Rest], N) ->
- length_1(unicode_util:gc(Rest), N+1);
-length_1([], N) ->
- N.
+length_1([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2) ->
+ length_1(Cont, N+1);
+length_1(Str, N) ->
+ case unicode_util:gc(Str) of
+ [] -> N;
+ [_|Rest] -> length_1(Rest, N+1)
+ end.
+
+length_b(<<CP2/utf8, Rest/binary>>, CP1, N)
+ when ?ASCII_LIST(CP1,CP2) ->
+ length_b(Rest, CP2, N+1);
+length_b(Bin0, CP1, N) ->
+ [_|Bin1] = unicode_util:gc([CP1|Bin0]),
+ case unicode_util:cp(Bin1) of
+ [] -> N+1;
+ [CP3|Bin] -> length_b(Bin, CP3, N+1)
+ end.
equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) ->
A =:= B andalso equal_1(AR, BR);
@@ -540,29 +572,66 @@ equal_norm_nocase(A0, B0, Norm) ->
{L1,L2} when is_list(L1), is_list(L2) -> false
end.
+reverse_1([CP1|[CP2|_]=Cont], Acc) when ?ASCII_LIST(CP1,CP2) ->
+ reverse_1(Cont, [CP1|Acc]);
reverse_1(CD, Acc) ->
case unicode_util:gc(CD) of
[GC|Rest] -> reverse_1(Rest, [GC|Acc]);
[] -> Acc
end.
-slice_l(CD, N, Binary) when N > 0 ->
+reverse_b(<<CP2/utf8, Rest/binary>>, CP1, Acc)
+ when ?ASCII_LIST(CP1,CP2) ->
+ reverse_b(Rest, CP2, [CP1|Acc]);
+reverse_b(Bin0, CP1, Acc) ->
+ [GC|Bin1] = unicode_util:gc([CP1|Bin0]),
+ case unicode_util:cp(Bin1) of
+ [] -> [GC|Acc];
+ [CP3|Bin] -> reverse_b(Bin, CP3, [GC|Acc])
+ end.
+
+slice_l0(<<CP1/utf8, Bin/binary>>, N) when N > 0 ->
+ slice_lb(Bin, CP1, N);
+slice_l0(L, N) ->
+ slice_l(L, N).
+
+slice_l([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
+ slice_l(Cont, N-1);
+slice_l(CD, N) when N > 0 ->
case unicode_util:gc(CD) of
- [_|Cont] -> slice_l(Cont, N-1, Binary);
- [] when Binary -> <<>>;
+ [_|Cont] -> slice_l(Cont, N-1);
[] -> []
end;
-slice_l(Cont, 0, Binary) ->
- case is_empty(Cont) of
- true when Binary -> <<>>;
- _ -> Cont
+slice_l(Cont, 0) ->
+ Cont.
+
+slice_lb(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 1 ->
+ slice_lb(Bin, CP2, N-1);
+slice_lb(Bin, CP1, N) ->
+ [_|Rest] = unicode_util:gc([CP1|Bin]),
+ if N > 1 ->
+ case unicode_util:cp(Rest) of
+ [CP2|Cont] -> slice_lb(Cont, CP2, N-1);
+ [] -> <<>>
+ end;
+ N =:= 1 ->
+ Rest
end.
+slice_trail(Orig, N) when is_binary(Orig) ->
+ case Orig of
+ <<CP1/utf8, Bin/binary>> when N > 0 ->
+ Length = slice_bin(Bin, CP1, N),
+ Sz = byte_size(Orig) - Length,
+ <<Keep:Sz/binary, _/binary>> = Orig,
+ Keep;
+ _ -> <<>>
+ end;
slice_trail(CD, N) when is_list(CD) ->
- slice_list(CD, N);
-slice_trail(CD, N) when is_binary(CD) ->
- slice_bin(CD, N, CD).
+ slice_list(CD, N).
+slice_list([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
+ [CP1|slice_list(Cont, N-1)];
slice_list(CD, N) when N > 0 ->
case unicode_util:gc(CD) of
[GC|Cont] -> append(GC, slice_list(Cont, N-1));
@@ -571,17 +640,16 @@ slice_list(CD, N) when N > 0 ->
slice_list(_, 0) ->
[].
-slice_bin(CD, N, Orig) when N > 0 ->
- case unicode_util:gc(CD) of
- [_|Cont] -> slice_bin(Cont, N-1, Orig);
- [] -> Orig
+slice_bin(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 0 ->
+ slice_bin(Bin, CP2, N-1);
+slice_bin(CD, CP1, N) when N > 0 ->
+ [_|Bin] = unicode_util:gc([CP1|CD]),
+ case unicode_util:cp(Bin) of
+ [CP2|Cont] -> slice_bin(Cont, CP2, N-1);
+ [] -> 0
end;
-slice_bin([], 0, Orig) ->
- Orig;
-slice_bin(CD, 0, Orig) ->
- Sz = byte_size(Orig) - byte_size(CD),
- <<Keep:Sz/binary, _/binary>> = Orig,
- Keep.
+slice_bin(CD, CP1, 0) ->
+ byte_size(CD)+byte_size(<<CP1/utf8>>).
uppercase_list(CPs0) ->
case unicode_util:uppercase(CPs0) of
@@ -631,16 +699,31 @@ casefold_bin(CPs0, Acc) ->
[] -> Acc
end.
-
+%% Fast path for ascii searching for one character in lists
+trim_ls([CP1|[CP2|_]=Cont]=Str, Sep)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case Sep of
+ CP1 -> trim_ls(Cont, Sep);
+ _ -> Str
+ end;
+trim_ls(Str, Sep) ->
+ trim_l(Str, [Sep]).
+
+trim_l([CP1|[CP2|_]=Cont]=Str, Sep)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, Sep) of
+ true -> trim_l(Cont, Sep);
+ false -> Str
+ end;
trim_l([Bin|Cont0], Sep) when is_binary(Bin) ->
case bin_search_inv(Bin, Cont0, Sep) of
{nomatch, Cont} -> trim_l(Cont, Sep);
Keep -> Keep
end;
-trim_l(Str, {GCs, _, _}=Sep) when is_list(Str) ->
+trim_l(Str, Sep) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
- case lists:member(C, GCs) of
+ case lists:member(C, Sep) of
true -> trim_l(Cs, Sep);
false -> Str
end;
@@ -652,15 +735,51 @@ trim_l(Bin, Sep) when is_binary(Bin) ->
[Keep] -> Keep
end.
-trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+%% Fast path for ascii searching for one character in lists
+trim_ts([Sep|Cs1]=Str, Sep) ->
+ case Cs1 of
+ [] -> [];
+ [CP2|_] when ?ASCII_LIST(Sep,CP2) ->
+ Tail = trim_ts(Cs1, Sep),
+ case is_empty(Tail) of
+ true -> [];
+ false -> [Sep|Tail]
+ end;
+ _ ->
+ trim_t(Str, 0, search_pattern([Sep]))
+ end;
+trim_ts([CP|Cont],Sep) when is_integer(CP) ->
+ [CP|trim_ts(Cont, Sep)];
+trim_ts(Str, Sep) ->
+ trim_t(Str, 0, search_pattern([Sep])).
+
+trim_t([CP1|Cont]=Cs0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Cs1] = unicode_util:gc(Cs0),
+ case lists:member(GC, GCs) of
+ true ->
+ Tail = trim_t(Cs1, 0, Seps),
+ case is_empty(Tail) of
+ true -> [];
+ false -> append(GC,Tail)
+ end;
+ false ->
+ append(GC,trim_t(Cs1, 0, Seps))
+ end;
+ false ->
+ [CP1|trim_t(Cont, 0, Seps)]
+ end;
+trim_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Cont0, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, Cont0, Seps) of
{nomatch,_} ->
- stack(Bin, trim_t(Cont0, 0, Sep));
+ stack(Bin, trim_t(Cont0, 0, Seps));
[SepStart|Cont1] ->
- case bin_search_inv(SepStart, Cont1, Sep) of
+ case bin_search_inv(SepStart, Cont1, GCs) of
{nomatch, Cont} ->
- Tail = trim_t(Cont, 0, Sep),
+ Tail = trim_t(Cont, 0, Seps),
case is_empty(Tail) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
@@ -672,67 +791,69 @@ trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- trim_t([Bin|Cont], KeepSz, Sep)
+ trim_t([Bin|Cont], KeepSz, Seps)
end
end;
-trim_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
+trim_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- true ->
- Tail = trim_t(Cs1, 0, Sep),
- case is_empty(Tail) of
- true -> [];
- false -> append(GC,Tail)
- end;
- false ->
- append(GC,trim_t(Cs1, 0, Sep))
+ Tail = trim_t(Cs1, 0, Seps),
+ case is_empty(Tail) of
+ true -> [];
+ false -> append(GC,Tail)
end;
false ->
- append(CP,trim_t(Cs, 0, Sep))
+ append(GC,trim_t(Cs1, 0, Seps))
end;
[] -> []
end;
-trim_t(Bin, N, Sep) when is_binary(Bin) ->
+trim_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, [], Seps) of
{nomatch,_} -> Bin;
[SepStart] ->
- case bin_search_inv(SepStart, [], Sep) of
+ case bin_search_inv(SepStart, [], GCs) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, _/binary>> = Bin,
Keep;
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- trim_t(Bin, KeepSz, Sep)
+ trim_t(Bin, KeepSz, Seps)
end
end.
-take_l([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Sep) of
+
+take_l([CP1|[CP2|_]=Cont]=Str, Seps, Acc)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, Seps) of
+ true -> take_l(Cont, Seps, [CP1|Acc]);
+ false -> {rev(Acc), Str}
+ end;
+take_l([Bin|Cont0], Seps, Acc) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, Seps) of
{nomatch, Cont} ->
Used = cp_prefix(Cont0, Cont),
- take_l(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+ take_l(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]);
[Bin1|_]=After when is_binary(Bin1) ->
First = byte_size(Bin) - byte_size(Bin1),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep,Acc), After}
end;
-take_l(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+take_l(Str, Seps, Acc) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
- case lists:member(C, GCs) of
- true -> take_l(Cs, Sep, append(rev(C),Acc));
+ case lists:member(C, Seps) of
+ true -> take_l(Cs, Seps, append(rev(C),Acc));
false -> {rev(Acc), Str}
end;
[] -> {rev(Acc), []}
end;
-take_l(Bin, Sep, Acc) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Sep) of
+take_l(Bin, Seps, Acc) when is_binary(Bin) ->
+ case bin_search_inv(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin, Acc), <<>>};
[After] ->
@@ -741,27 +862,41 @@ take_l(Bin, Sep, Acc) when is_binary(Bin) ->
{btoken(Keep, Acc), After}
end.
-take_lc([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
- case bin_search(Bin, Cont0, Sep) of
+
+take_lc([CP1|Cont]=Str0, {GCs,CPs,_}=Seps, Acc) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Str] = unicode_util:gc(Str0),
+ case lists:member(GC, GCs) of
+ false -> take_lc(Str, Seps, append(rev(GC),Acc));
+ true -> {rev(Acc), Str0}
+ end;
+ false ->
+ take_lc(Cont, Seps, append(CP1,Acc))
+ end;
+take_lc([Bin|Cont0], Seps0, Acc) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, Cont0, Seps) of
{nomatch, Cont} ->
Used = cp_prefix(Cont0, Cont),
- take_lc(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+ take_lc(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]);
[Bin1|_]=After when is_binary(Bin1) ->
First = byte_size(Bin) - byte_size(Bin1),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep,Acc), After}
end;
-take_lc(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+take_lc(Str, {GCs,_,_}=Seps, Acc) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
case lists:member(C, GCs) of
- false -> take_lc(Cs, Sep, append(rev(C),Acc));
+ false -> take_lc(Cs, Seps, append(rev(C),Acc));
true -> {rev(Acc), Str}
end;
[] -> {rev(Acc), []}
end;
-take_lc(Bin, Sep, Acc) when is_binary(Bin) ->
- case bin_search(Bin, [], Sep) of
+take_lc(Bin, Seps0, Acc) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin, Acc), <<>>};
[After] ->
@@ -770,148 +905,192 @@ take_lc(Bin, Sep, Acc) when is_binary(Bin) ->
{btoken(Keep, Acc), After}
end.
-take_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+
+take_t([CP1|Cont]=Str0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Str] = unicode_util:gc(Str0),
+ case lists:member(GC, GCs) of
+ true ->
+ {Head, Tail} = take_t(Str, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
+ end;
+ false ->
+ {Head, Tail} = take_t(Str, 0, Seps),
+ {append(GC,Head), Tail}
+ end;
+ false ->
+ {Head, Tail} = take_t(Cont, 0, Seps),
+ {[CP1|Head], Tail}
+ end;
+take_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Cont0, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, Cont0, Seps) of
{nomatch,Cont} ->
Used = cp_prefix(Cont0, Cont),
- {Head, Tail} = take_t(Cont, 0, Sep),
+ {Head, Tail} = take_t(Cont, 0, Seps),
{stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
[SepStart|Cont1] ->
- case bin_search_inv(SepStart, Cont1, Sep) of
+ case bin_search_inv(SepStart, Cont1, GCs) of
{nomatch, Cont} ->
- {Head, Tail} = take_t(Cont, 0, Sep),
+ {Head, Tail} = take_t(Cont, 0, Seps),
Used = cp_prefix(Cont0, Cont),
- case equal(Tail, Cont) of
+ case is_empty(Head) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, End/binary>> = Bin,
- {stack(Keep,Head), stack(stack(End,Used),Tail)};
+ {Keep, stack(stack(End,Used),Tail)};
false ->
{stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_t([Bin|Cont], KeepSz, Sep)
+ take_t([Bin|Cont], KeepSz, Seps)
end
end;
-take_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
+take_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- true ->
- {Head, Tail} = take_t(Cs1, 0, Sep),
- case equal(Tail, Cs1) of
- true -> {Head, append(GC,Tail)};
- false -> {append(GC,Head), Tail}
- end;
- false ->
- {Head, Tail} = take_t(Cs, 0, Sep),
- {append(CP,Head), Tail}
+ {Head, Tail} = take_t(Cs1, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
end;
false ->
- {Head, Tail} = take_t(Cs, 0, Sep),
- {append(CP,Head), Tail}
+ {Head, Tail} = take_t(Cs1, 0, Seps),
+ {append(GC,Head), Tail}
end;
[] -> {[],[]}
end;
-take_t(Bin, N, Sep) when is_binary(Bin) ->
+take_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, [], Seps) of
{nomatch,_} -> {Bin, <<>>};
[SepStart] ->
- case bin_search_inv(SepStart, [], Sep) of
+ case bin_search_inv(SepStart, [], GCs) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Before:KeepSz/binary, End/binary>> = Bin,
{Before, End};
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_t(Bin, KeepSz, Sep)
+ take_t(Bin, KeepSz, Seps)
end
end.
-take_tc([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+take_tc([CP1|[CP2|_]=Cont], _, {GCs,_,_}=Seps) when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, GCs) of
+ false ->
+ {Head, Tail} = take_tc(Cont, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(CP1,Tail)};
+ false -> {append(CP1,Head), Tail}
+ end;
+ true ->
+ {Head, Tail} = take_tc(Cont, 0, Seps),
+ {append(CP1,Head), Tail}
+ end;
+take_tc([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search_inv(Rest, Cont0, Sep) of
+ case bin_search_inv(Rest, Cont0, GCs) of
{nomatch,Cont} ->
Used = cp_prefix(Cont0, Cont),
- {Head, Tail} = take_tc(Cont, 0, Sep),
+ {Head, Tail} = take_tc(Cont, 0, Seps0),
{stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
[SepStart|Cont1] ->
- case bin_search(SepStart, Cont1, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(SepStart, Cont1, Seps) of
{nomatch, Cont} ->
- {Head, Tail} = take_tc(Cont, 0, Sep),
+ {Head, Tail} = take_tc(Cont, 0, Seps),
Used = cp_prefix(Cont0, Cont),
- case equal(Tail, Cont) of
+ case is_empty(Head) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, End/binary>> = Bin,
- {stack(Keep,Head), stack(stack(End,Used),Tail)};
+ {Keep, stack(stack(End,Used),Tail)};
false ->
{stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_tc([Bin|Cont], KeepSz, Sep)
+ take_tc([Bin|Cont], KeepSz, Seps)
end
end;
-take_tc(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
- true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- false ->
- {Head, Tail} = take_tc(Cs1, 0, Sep),
- case equal(Tail, Cs1) of
- true -> {Head, append(GC,Tail)};
- false -> {append(GC,Head), Tail}
- end;
- true ->
- {Head, Tail} = take_tc(Cs1, 0, Sep),
- {append(GC,Head), Tail}
- end;
+take_tc(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
false ->
- {Head, Tail} = take_tc(Cs, 0, Sep),
- case equal(Tail, Cs) of
- true -> {Head, append(CP,Tail)};
- false -> {append(CP,Head), Tail}
- end
+ {Head, Tail} = take_tc(Cs1, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
+ end;
+ true ->
+ {Head, Tail} = take_tc(Cs1, 0, Seps),
+ {append(GC,Head), Tail}
end;
[] -> {[],[]}
end;
-take_tc(Bin, N, Sep) when is_binary(Bin) ->
+take_tc(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search_inv(Rest, [], Sep) of
+ case bin_search_inv(Rest, [], GCs) of
{nomatch,_} -> {Bin, <<>>};
[SepStart] ->
- case bin_search(SepStart, [], Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(SepStart, [], Seps) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Before:KeepSz/binary, End/binary>> = Bin,
{Before, End};
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_tc(Bin, KeepSz, Sep)
+ take_tc(Bin, KeepSz, Seps)
end
end.
-prefix_1(Cs, []) -> Cs;
-prefix_1(Cs, [_]=Pre) ->
- prefix_2(unicode_util:gc(Cs), Pre);
-prefix_1(Cs, Pre) ->
- prefix_2(unicode_util:cp(Cs), Pre).
-
-prefix_2([C|Cs], [C|Pre]) ->
- prefix_1(Cs, Pre);
-prefix_2(_, _) ->
- nomatch.
+prefix_1(Cs0, [GC]) ->
+ case unicode_util:gc(Cs0) of
+ [GC|Cs] -> Cs;
+ _ -> nomatch
+ end;
+prefix_1([CP|Cs], [Pre|PreR]) when is_integer(CP) ->
+ case CP =:= Pre of
+ true -> prefix_1(Cs,PreR);
+ false -> nomatch
+ end;
+prefix_1(<<CP/utf8, Cs/binary>>, [Pre|PreR]) ->
+ case CP =:= Pre of
+ true -> prefix_1(Cs,PreR);
+ false -> nomatch
+ end;
+prefix_1(Cs0, [Pre|PreR]) ->
+ case unicode_util:cp(Cs0) of
+ [Pre|Cs] -> prefix_1(Cs,PreR);
+ _ -> nomatch
+ end.
+split_1([CP1|Cs]=Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_integer(CP1) ->
+ case CP1=:=C of
+ true ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc);
+ Rest when Where =:= leading ->
+ [rev(Curr), Rest];
+ Rest when Where =:= trailing ->
+ split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]);
+ Rest when Where =:= all ->
+ split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc])
+ end;
+ false ->
+ split_1(Cs, Needle, 0, Where, append(CP1,Curr), Acc)
+ end;
split_1([Bin|Cont0], Needle, Start, Where, Curr0, Acc)
when is_binary(Bin) ->
case bin_search_str(Bin, Start, Cont0, Needle) of
@@ -971,32 +1150,50 @@ split_1(Bin, [_C|_]=Needle, Start, Where, Curr0, Acc) ->
end
end.
-lexemes_m([Bin|Cont0], Seps, Ts) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Seps) of
+lexemes_m([CP|_]=Cs0, {GCs,CPs,_}=Seps, Ts) when is_integer(CP) ->
+ case lists:member(CP, CPs) of
+ true ->
+ [GC|Cs2] = unicode_util:gc(Cs0),
+ case lists:member(GC, GCs) of
+ true ->
+ lexemes_m(Cs2, Seps, Ts);
+ false ->
+ {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
+ lexemes_m(Rest, Seps, [Lexeme|Ts])
+ end;
+ false ->
+ {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
+ lexemes_m(Rest, Seps, [Lexeme|Ts])
+ end;
+lexemes_m([Bin|Cont0], {GCs,_,_}=Seps0, Ts) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, GCs) of
{nomatch,Cont} ->
- lexemes_m(Cont, Seps, Ts);
+ lexemes_m(Cont, Seps0, Ts);
Cs ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
-lexemes_m(Cs0, {GCs, _, _}=Seps, Ts) when is_list(Cs0) ->
+lexemes_m(Cs0, {GCs, _, _}=Seps0, Ts) when is_list(Cs0) ->
case unicode_util:gc(Cs0) of
[C|Cs] ->
case lists:member(C, GCs) of
true ->
- lexemes_m(Cs, Seps, Ts);
+ lexemes_m(Cs, Seps0, Ts);
false ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
[] ->
lists:reverse(Ts)
end;
-lexemes_m(Bin, Seps, Ts) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Seps) of
+lexemes_m(Bin, {GCs,_,_}=Seps0, Ts) when is_binary(Bin) ->
+ case bin_search_inv(Bin, [], GCs) of
{nomatch,_} ->
lists:reverse(Ts);
[Cs] ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
lexemes_m(Rest, Seps, add_non_empty(Lexeme,Ts))
end.
@@ -1027,7 +1224,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> {rev(Tkn), Cs0};
+ true -> {rev(Tkn), Cs2};
false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn))
end;
false ->
@@ -1037,7 +1234,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
{rev(Tkn), []}
end;
lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
- case bin_search(Bin, Seps) of
+ case bin_search(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin,Tkn), []};
[Left] ->
@@ -1046,35 +1243,38 @@ lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
{btoken(Lexeme, Tkn), Left}
end.
-nth_lexeme_m([Bin|Cont0], Seps, N) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Seps) of
+nth_lexeme_m([Bin|Cont0], {GCs,_,_}=Seps0, N) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, GCs) of
{nomatch,Cont} ->
- nth_lexeme_m(Cont, Seps, N);
+ nth_lexeme_m(Cont, Seps0, N);
Cs when N > 1 ->
- Rest = lexeme_skip(Cs, Seps),
- nth_lexeme_m(Rest, Seps, N-1);
+ Rest = lexeme_skip(Cs, Seps0),
+ nth_lexeme_m(Rest, Seps0, N-1);
Cs ->
+ Seps = search_compile(Seps0),
{Lexeme,_} = lexeme_pick(Cs, Seps, []),
Lexeme
end;
-nth_lexeme_m(Cs0, {GCs, _, _}=Seps, N) when is_list(Cs0) ->
+nth_lexeme_m(Cs0, {GCs, _, _}=Seps0, N) when is_list(Cs0) ->
case unicode_util:gc(Cs0) of
[C|Cs] ->
case lists:member(C, GCs) of
true ->
- nth_lexeme_m(Cs, Seps, N);
+ nth_lexeme_m(Cs, Seps0, N);
false when N > 1 ->
- Cs1 = lexeme_skip(Cs, Seps),
- nth_lexeme_m(Cs1, Seps, N-1);
+ Cs1 = lexeme_skip(Cs, Seps0),
+ nth_lexeme_m(Cs1, Seps0, N-1);
false ->
+ Seps = search_compile(Seps0),
{Lexeme,_} = lexeme_pick(Cs0, Seps, []),
Lexeme
end;
[] ->
[]
end;
-nth_lexeme_m(Bin, Seps, N) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Seps) of
+nth_lexeme_m(Bin, {GCs,_,_}=Seps0, N) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search_inv(Bin, [], GCs) of
[Cs] when N > 1 ->
Cs1 = lexeme_skip(Cs, Seps),
nth_lexeme_m(Cs1, Seps, N-1);
@@ -1090,16 +1290,17 @@ lexeme_skip([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps) when is_integer(CP) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> Cs0;
+ true -> Cs2;
false -> lexeme_skip(Cs2, Seps)
end;
false ->
lexeme_skip(Cs1, Seps)
end;
-lexeme_skip([Bin|Cont0], Seps) when is_binary(Bin) ->
+lexeme_skip([Bin|Cont0], Seps0) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
case bin_search(Bin, Cont0, Seps) of
{nomatch,_} -> lexeme_skip(Cont0, Seps);
- Cs -> Cs
+ Cs -> tl(unicode_util:gc(Cs))
end;
lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
case unicode_util:cp(Cs0) of
@@ -1108,7 +1309,7 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> Cs0;
+ true -> Cs2;
false -> lexeme_skip(Cs2, Seps)
end;
false ->
@@ -1117,12 +1318,23 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
[] ->
[]
end;
-lexeme_skip(Bin, Seps) when is_binary(Bin) ->
- case bin_search(Bin, Seps) of
+lexeme_skip(Bin, Seps0) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, [], Seps) of
{nomatch,_} -> <<>>;
- [Left] -> Left
+ [Left] -> tl(unicode_util:gc(Left))
end.
+find_l([C1|Cs]=Cs0, [C|_]=Needle) when is_integer(C1) ->
+ case C1 of
+ C ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> find_l(Cs, Needle);
+ _ -> Cs0
+ end;
+ _ ->
+ find_l(Cs, Needle)
+ end;
find_l([Bin|Cont0], Needle) when is_binary(Bin) ->
case bin_search_str(Bin, 0, Cont0, Needle) of
{nomatch, _, Cont} ->
@@ -1147,6 +1359,16 @@ find_l(Bin, Needle) ->
{_Before, [Cs], _After} -> Cs
end.
+find_r([Cp|Cs]=Cs0, [C|_]=Needle, Res) when is_integer(Cp) ->
+ case Cp of
+ C ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> find_r(Cs, Needle, Res);
+ _ -> find_r(Cs, Needle, Cs0)
+ end;
+ _ ->
+ find_r(Cs, Needle, Res)
+ end;
find_r([Bin|Cont0], Needle, Res) when is_binary(Bin) ->
case bin_search_str(Bin, 0, Cont0, Needle) of
{nomatch,_,Cont} ->
@@ -1217,11 +1439,6 @@ cp_prefix_1(Orig, Until, Cont) ->
%% Binary special
-bin_search(Bin, Seps) ->
- bin_search(Bin, [], Seps).
-
-bin_search(_Bin, Cont, {[],_,_}) ->
- {nomatch, Cont};
bin_search(Bin, Cont, {Seps,_,BP}) ->
bin_search_loop(Bin, 0, BP, Cont, Seps).
@@ -1229,10 +1446,14 @@ bin_search(Bin, Cont, {Seps,_,BP}) ->
%% i.e. å in nfd form $a "COMBINING RING ABOVE"
%% and PREPEND characters like "ARABIC NUMBER SIGN" 1536 <<216,128>>
%% combined with other characters are currently ignored.
+search_pattern({_,_,_}=P) -> P;
search_pattern(Seps) ->
CPs = search_cp(Seps),
- Bin = bin_pattern(CPs),
- {Seps, CPs, Bin}.
+ {Seps, CPs, undefined}.
+
+search_compile({Sep, CPs, undefined}) ->
+ {Sep, CPs, binary:compile_pattern(bin_pattern(CPs))};
+search_compile({_,_,_}=Compiled) -> Compiled.
search_cp([CP|Seps]) when is_integer(CP) ->
[CP|search_cp(Seps)];
@@ -1253,9 +1474,21 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
case binary:match(Bin, BinSeps) of
nomatch ->
{nomatch,Cont};
+ {Where, _CL} when Cont =:= [] ->
+ <<_:Where/binary, Cont1/binary>> = Bin,
+ [GC|Cont2] = unicode_util:gc(Cont1),
+ case lists:member(GC, Seps) of
+ false when Cont2 =:= [] ->
+ {nomatch, []};
+ false ->
+ Next = byte_size(Bin0) - byte_size(Cont2),
+ bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
+ true ->
+ [Cont1]
+ end;
{Where, _CL} ->
<<_:Where/binary, Cont0/binary>> = Bin,
- Cont1 = stack(Cont0, Cont),
+ Cont1 = [Cont0|Cont],
[GC|Cont2] = unicode_util:gc(Cont1),
case lists:member(GC, Seps) of
false ->
@@ -1263,55 +1496,108 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
[BinR|Cont] when is_binary(BinR) ->
Next = byte_size(Bin0) - byte_size(BinR),
bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
- BinR when is_binary(BinR), Cont =:= [] ->
- Next = byte_size(Bin0) - byte_size(BinR),
- bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
_ ->
{nomatch, Cont2}
end;
- true when is_list(Cont1) ->
- Cont1;
true ->
- [Cont1]
+ Cont1
end
end.
-bin_search_inv(Bin, Cont, {[], _, _}) ->
- [Bin|Cont];
-bin_search_inv(Bin, Cont, {[Sep], _, _}) ->
- bin_search_inv_1([Bin|Cont], Sep);
-bin_search_inv(Bin, Cont, {Seps, _, _}) ->
- bin_search_inv_n([Bin|Cont], Seps).
-
-bin_search_inv_1([<<>>|CPs], _) ->
- {nomatch, CPs};
-bin_search_inv_1(CPs = [Bin0|Cont], Sep) when is_binary(Bin0) ->
- case unicode_util:gc(CPs) of
- [Sep|Bin] when is_binary(Bin), Cont =:= [] ->
- bin_search_inv_1([Bin], Sep);
- [Sep|[Bin|Cont]=Cs] when is_binary(Bin) ->
- bin_search_inv_1(Cs, Sep);
- [Sep|Cs] ->
- {nomatch, Cs};
- _ -> CPs
- end.
+bin_search_inv(<<>>, Cont, _) ->
+ {nomatch, Cont};
+bin_search_inv(Bin, Cont, [Sep]) ->
+ bin_search_inv_1(Bin, Cont, Sep);
+bin_search_inv(Bin, Cont, Seps) ->
+ bin_search_inv_n(Bin, Cont, Seps).
+
+bin_search_inv_1(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Sep) ->
+ case BinRest of
+ <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) ->
+ case CP1 of
+ Sep -> bin_search_inv_1(BinRest, Cont, Sep);
+ _ -> [Bin0|Cont]
+ end;
+ _ when Cont =:= [] ->
+ case unicode_util:gc(Bin0) of
+ [Sep|Bin] -> bin_search_inv_1(Bin, Cont, Sep);
+ _ -> [Bin0|Cont]
+ end;
+ _ ->
+ case unicode_util:gc([Bin0|Cont]) of
+ [Sep|[Bin|Cont]] when is_binary(Bin) ->
+ bin_search_inv_1(Bin, Cont, Sep);
+ [Sep|Cs] ->
+ {nomatch, Cs};
+ _ -> [Bin0|Cont]
+ end
+ end;
+bin_search_inv_1(<<>>, Cont, _Sep) ->
+ {nomatch, Cont};
+bin_search_inv_1([], Cont, _Sep) ->
+ {nomatch, Cont}.
-bin_search_inv_n([<<>>|CPs], _) ->
- {nomatch, CPs};
-bin_search_inv_n([Bin0|Cont]=CPs, Seps) when is_binary(Bin0) ->
- [C|Cs0] = unicode_util:gc(CPs),
- case {lists:member(C, Seps), Cs0} of
- {true, Cs} when is_binary(Cs), Cont =:= [] ->
- bin_search_inv_n([Cs], Seps);
- {true, [Bin|Cont]=Cs} when is_binary(Bin) ->
- bin_search_inv_n(Cs, Seps);
- {true, Cs} -> {nomatch, Cs};
- {false, _} -> CPs
- end.
+bin_search_inv_n(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Seps) ->
+ case BinRest of
+ <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) ->
+ case lists:member(CP1,Seps) of
+ true -> bin_search_inv_n(BinRest, Cont, Seps);
+ false -> [Bin0|Cont]
+ end;
+ _ when Cont =:= [] ->
+ [GC|Bin] = unicode_util:gc(Bin0),
+ case lists:member(GC, Seps) of
+ true -> bin_search_inv_n(Bin, Cont, Seps);
+ false -> [Bin0|Cont]
+ end;
+ _ ->
+ [GC|Cs0] = unicode_util:gc([Bin0|Cont]),
+ case lists:member(GC, Seps) of
+ false -> [Bin0|Cont];
+ true ->
+ case Cs0 of
+ [Bin|Cont] when is_binary(Bin) ->
+ bin_search_inv_n(Bin, Cont, Seps);
+ _ ->
+ {nomatch, Cs0}
+ end
+ end
+ end;
+bin_search_inv_n(<<>>, Cont, _Sep) ->
+ {nomatch, Cont};
+bin_search_inv_n([], Cont, _Sep) ->
+ {nomatch, Cont}.
+
+bin_search_str(Bin0, Start, [], SearchCPs) ->
+ Compiled = binary:compile_pattern(unicode:characters_to_binary(SearchCPs)),
+ bin_search_str_1(Bin0, Start, Compiled, SearchCPs);
bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
+ First = binary:compile_pattern(<<CP/utf8>>),
+ bin_search_str_2(Bin0, Start, Cont, First, SearchCPs).
+
+bin_search_str_1(Bin0, Start, First, SearchCPs) ->
+ <<_:Start/binary, Bin/binary>> = Bin0,
+ case binary:match(Bin, First) of
+ nomatch -> {nomatch, byte_size(Bin0), []};
+ {Where0, _} ->
+ Where = Start+Where0,
+ <<Keep:Where/binary, Cs0/binary>> = Bin0,
+ case prefix_1(Cs0, SearchCPs) of
+ nomatch ->
+ <<_/utf8, Cs/binary>> = Cs0,
+ KeepSz = byte_size(Bin0) - byte_size(Cs),
+ bin_search_str_1(Bin0, KeepSz, First, SearchCPs);
+ [] ->
+ {Keep, [Cs0], <<>>};
+ Rest ->
+ {Keep, [Cs0], Rest}
+ end
+ end.
+
+bin_search_str_2(Bin0, Start, Cont, First, SearchCPs) ->
<<_:Start/binary, Bin/binary>> = Bin0,
- case binary:match(Bin, <<CP/utf8>>) of
+ case binary:match(Bin, First) of
nomatch -> {nomatch, byte_size(Bin0), Cont};
{Where0, _} ->
Where = Start+Where0,
@@ -1320,7 +1606,7 @@ bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
case prefix_1(stack(Cs0,Cont), SearchCPs) of
nomatch when is_binary(Cs) ->
KeepSz = byte_size(Bin0) - byte_size(Cs),
- bin_search_str(Bin0, KeepSz, Cont, SearchCPs);
+ bin_search_str_2(Bin0, KeepSz, Cont, First, SearchCPs);
nomatch ->
{nomatch, Where, stack([GC|Cs],Cont)};
[] ->
diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl
index 90f980c0e5..f43bfb4482 100644
--- a/lib/stdlib/test/string_SUITE.erl
+++ b/lib/stdlib/test/string_SUITE.erl
@@ -92,14 +92,11 @@ end_per_testcase(_Case, _Config) ->
ok.
debug() ->
- Config = [{data_dir, ?MODULE_STRING++"_data"}],
+ Config = [{data_dir, "./" ++ ?MODULE_STRING++"_data"}],
[io:format("~p:~p~n",[Test,?MODULE:Test(Config)]) ||
{_,Tests} <- groups(), Test <- Tests].
-define(TEST(B,C,D), test(?LINE,?FUNCTION_NAME,B,C,D, true)).
--define(TEST_EQ(B,C,D),
- test(?LINE,?FUNCTION_NAME,B,C,D, true),
- test(?LINE,?FUNCTION_NAME,hd(C),[B|tl(C),D, true)).
-define(TEST_NN(B,C,D),
test(?LINE,?FUNCTION_NAME,B,C,D, false),
@@ -294,6 +291,7 @@ trim(_) ->
?TEST(["..h", ".e", <<"j..">>], [both, ". "], "h.ej"),
?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [both, ". "], "h.ejsan"),
%% Test that it behaves with graphemes (i.e. nfd tests are the hard part)
+ ?TEST([1013,101,778,101,101], [trailing, [101]], [1013,101,778]),
?TEST("aaåaa", [both, "a"], "å"),
?TEST(["aaa",778,"äöoo"], [both, "ao"], "åäö"),
?TEST([<<"aaa">>,778,"äöoo"], [both, "ao"], "åäö"),
@@ -353,6 +351,7 @@ take(_) ->
?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [Chars, true, leading], {".. ", "h.ej .."}),
?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [Chars, true, leading], {"..", "h.ejsan.."}),
%% Test that it behaves with graphemes (i.e. nfd tests are the hard part)
+ ?TEST([101,778], [[[101, 779]], true], {[101,778], []}),
?TEST(["aaee",778,"äöoo"], [[[$e,778]], true, leading], {"aae", [$e,778|"äöoo"]}),
?TEST([<<"aae">>,778,"äöoo"], [[[$e,778]],true,leading], {"aa", [$e,778|"äöoo"]}),
?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [[[$e,778]], true, leading], {[], [$e,778]++"åäöe"++[778]}),
@@ -713,29 +712,123 @@ nth_lexeme(_) ->
meas(Config) ->
+ Parent = self(),
+ Exec = fun() ->
+ DataDir0 = proplists:get_value(data_dir, Config),
+ DataDir = filename:join(lists:droplast(filename:split(DataDir0))),
+ case proplists:get_value(profile, Config, false) of
+ false ->
+ do_measure(DataDir);
+ eprof ->
+ eprof:profile(fun() -> do_measure(DataDir) end, [set_on_spawn]),
+ eprof:stop_profiling(),
+ eprof:analyze(),
+ eprof:stop()
+ end,
+ Parent ! {test_done, self()},
+ normal
+ end,
+ ct:timetrap({minutes,2}),
case ct:get_timetrap_info() of
{_,{_,Scale}} when Scale > 1 ->
{skip,{will_not_run_in_debug,Scale}};
- _ -> % No scaling
- DataDir = proplists:get_value(data_dir, Config),
- TestDir = filename:dirname(string:trim(DataDir, trailing, "/")),
- do_measure(TestDir)
+ _ -> % No scaling, run at most 1.5 min
+ Tester = spawn(Exec),
+ receive {test_done, Tester} -> ok
+ after 90000 ->
+ io:format("Timelimit reached stopping~n",[]),
+ exit(Tester, die)
+ end,
+ ok
end.
-do_measure(TestDir) ->
- File = filename:join(TestDir, ?MODULE_STRING ++ ".erl"),
+do_measure(DataDir) ->
+ File = filename:join([DataDir,"unicode_util_SUITE_data","NormalizationTest.txt"]),
io:format("File ~s ",[File]),
{ok, Bin} = file:read_file(File),
io:format("~p~n",[byte_size(Bin)]),
Do = fun(Name, Func, Mode) ->
{N, Mean, Stddev, _} = time_func(Func, Mode, Bin),
- io:format("~10w ~6w ~6.2fms ±~4.2fms #~.2w gc included~n",
+ io:format("~15w ~6w ~6.2fms ±~5.2fms #~.2w gc included~n",
[Name, Mode, Mean/1000, Stddev/1000, N])
end,
+ Do2 = fun(Name, Func, Mode) ->
+ {N, Mean, Stddev, _} = time_func(Func, binary, <<>>),
+ io:format("~15w ~6w ~6.2fms ±~5.2fms #~.2w gc included~n",
+ [Name, Mode, Mean/1000, Stddev/1000, N])
+ end,
io:format("----------------------~n"),
- Do(tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list),
+
+ Do(old_tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list),
Tokens = {lexemes, fun(Str) -> string:lexemes(Str, [$\n,$\r]) end},
[Do(Name,Fun,Mode) || {Name,Fun} <- [Tokens], Mode <- [list, binary]],
+
+ S0 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy.....",
+ S0B = <<"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy.....">>,
+ Do2(old_strip_l, repeat(fun() -> string:strip(S0, left, $x) end), list),
+ Do2(trim_l, repeat(fun() -> string:trim(S0, leading, [$x]) end), list),
+ Do2(trim_l, repeat(fun() -> string:trim(S0B, leading, [$x]) end), binary),
+ Do2(old_strip_r, repeat(fun() -> string:strip(S0, right, $.) end), list),
+ Do2(trim_t, repeat(fun() -> string:trim(S0, trailing, [$.]) end), list),
+ Do2(trim_t, repeat(fun() -> string:trim(S0B, trailing, [$.]) end), binary),
+
+ Do2(old_chr_sub, repeat(fun() -> string:sub_string(S0, string:chr(S0, $.)) end), list),
+ Do2(old_str_sub, repeat(fun() -> string:sub_string(S0, string:str(S0, [$.])) end), list),
+ Do2(find, repeat(fun() -> string:find(S0, [$.]) end), list),
+ Do2(find, repeat(fun() -> string:find(S0B, [$.]) end), binary),
+ Do2(old_str_sub2, repeat(fun() -> N = string:str(S0, "xy.."),
+ {string:sub_string(S0,1,N), string:sub_string(S0,N+4)} end), list),
+ Do2(split, repeat(fun() -> string:split(S0, "xy..") end), list),
+ Do2(split, repeat(fun() -> string:split(S0B, "xy..") end), binary),
+
+ Do2(old_rstr_sub, repeat(fun() -> string:sub_string(S0, string:rstr(S0, [$y])) end), list),
+ Do2(find_t, repeat(fun() -> string:find(S0, [$y], trailing) end), list),
+ Do2(find_t, repeat(fun() -> string:find(S0B, [$y], trailing) end), binary),
+ Do2(old_rstr_sub2, repeat(fun() -> N = string:rstr(S0, "y.."),
+ {string:sub_string(S0,1,N), string:sub_string(S0,N+3)} end), list),
+ Do2(split_t, repeat(fun() -> string:split(S0, "y..", trailing) end), list),
+ Do2(split_t, repeat(fun() -> string:split(S0B, "y..", trailing) end), binary),
+
+ Do2(old_span, repeat(fun() -> N=string:span(S0, [$x, $y]),
+ {string:sub_string(S0,1,N),string:sub_string(S0,N+1)}
+ end), list),
+ Do2(take, repeat(fun() -> string:take(S0, [$x, $y]) end), list),
+ Do2(take, repeat(fun() -> string:take(S0B, [$x, $y]) end), binary),
+
+ Do2(old_cspan, repeat(fun() -> N=string:cspan(S0, [$.,$y]),
+ {string:sub_string(S0,1,N),string:sub_string(S0,N+1)}
+ end), list),
+ Do2(take_c, repeat(fun() -> string:take(S0, [$.,$y], true) end), list),
+ Do2(take_c, repeat(fun() -> string:take(S0B, [$.,$y], true) end), binary),
+
+ Do2(old_substr, repeat(fun() -> string:substr(S0, 21, 15) end), list),
+ Do2(slice, repeat(fun() -> string:slice(S0, 20, 15) end), list),
+ Do2(slice, repeat(fun() -> string:slice(S0B, 20, 15) end), binary),
+
+ io:format("--~n",[]),
+ NthTokens = {nth_lexemes, fun(Str) -> string:nth_lexeme(Str, 18000, [$\n,$\r]) end},
+ [Do(Name,Fun,Mode) || {Name,Fun} <- [NthTokens], Mode <- [list, binary]],
+ Do2(take_t, repeat(fun() -> string:take(S0, [$.,$y], false, trailing) end), list),
+ Do2(take_t, repeat(fun() -> string:take(S0B, [$.,$y], false, trailing) end), binary),
+ Do2(take_tc, repeat(fun() -> string:take(S0, [$x], true, trailing) end), list),
+ Do2(take_tc, repeat(fun() -> string:take(S0B, [$x], true, trailing) end), binary),
+
+ Length = {length, fun(Str) -> string:length(Str) end},
+ [Do(Name,Fun,Mode) || {Name,Fun} <- [Length], Mode <- [list, binary]],
+
+ Reverse = {reverse, fun(Str) -> string:reverse(Str) end},
+ [Do(Name,Fun,Mode) || {Name,Fun} <- [Reverse], Mode <- [list, binary]],
+
+ ok.
+
+repeat(F) ->
+ fun(_) -> repeat_1(F,20000) end.
+
+repeat_1(F, N) when N > 0 ->
+ F(),
+ repeat_1(F, N-1);
+repeat_1(_, _) ->
+ erlang:garbage_collect(),
ok.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -865,8 +958,6 @@ check_types_1({list, _},{list, undefined}) ->
ok;
check_types_1({list, _},{list, codepoints}) ->
ok;
-check_types_1({list, _},{list, {list, codepoints}}) ->
- ok;
check_types_1({list, {list, _}},{list, {list, codepoints}}) ->
ok;
check_types_1(mixed,_) ->
@@ -947,7 +1038,7 @@ time_func(Fun, Mode, Bin) ->
end),
receive {Pid,Msg} -> Msg end.
-time_func(N,Sum,SumSq, Fun, Str, _) when N < 50 ->
+time_func(N,Sum,SumSq, Fun, Str, _) when N < 20 ->
{Time, Res} = timer:tc(fun() -> Fun(Str) end),
time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res);
time_func(N,Sum,SumSq, _, _, Res) ->
diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl
index 03c24c7027..a89627eba5 100644
--- a/lib/stdlib/test/unicode_util_SUITE.erl
+++ b/lib/stdlib/test/unicode_util_SUITE.erl
@@ -310,12 +310,23 @@ get(_) ->
add_get_tests.
count(Config) ->
+ Parent = self(),
+ Exec = fun() ->
+ do_measure(Config),
+ Parent ! {test_done, self()}
+ end,
ct:timetrap({minutes,5}),
case ct:get_timetrap_info() of
- {_,{_,Scale}} ->
+ {_,{_,Scale}} when Scale > 1 ->
{skip,{measurments_skipped_debug,Scale}};
- _ -> % No scaling
- do_measure(Config)
+ _ -> % No scaling, run at most 2 min
+ Tester = spawn(Exec),
+ receive {test_done, Tester} -> ok
+ after 120000 ->
+ io:format("Timelimit reached stopping~n",[]),
+ exit(Tester, die)
+ end,
+ ok
end.
do_measure(Config) ->
diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript
index fefd7d3b70..73c351e1af 100755
--- a/lib/stdlib/uc_spec/gen_unicode_mod.escript
+++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript
@@ -170,7 +170,7 @@ gen_header(Fd) ->
io:put_chars(Fd, "-export([spec_version/0, lookup/1, get_case/1]).\n"),
io:put_chars(Fd, "-inline([class/1]).\n"),
io:put_chars(Fd, "-compile(nowarn_unused_vars).\n"),
- io:put_chars(Fd, "-dialyzer({no_improper_lists, [cp/1, gc_prepend/2, gc_e_cont/2]}).\n"),
+ io:put_chars(Fd, "-dialyzer({no_improper_lists, [cp/1, gc/1, gc_prepend/2, gc_e_cont/2]}).\n"),
io:put_chars(Fd, "-type gc() :: char()|[char()].\n\n\n"),
ok.
@@ -240,7 +240,7 @@ gen_norm(Fd) ->
"-spec nfd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n"
"nfd(Str0) ->\n"
" case gc(Str0) of\n"
- " [GC|R] when GC < 127 -> [GC|R];\n"
+ " [GC|R] when GC < 128 -> [GC|R];\n"
" [GC|Str] -> [decompose(GC)|Str];\n"
" [] -> [];\n"
" {error,_}=Error -> Error\n end.\n\n"
@@ -250,7 +250,7 @@ gen_norm(Fd) ->
"-spec nfkd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n"
"nfkd(Str0) ->\n"
" case gc(Str0) of\n"
- " [GC|R] when GC < 127 -> [GC|R];\n"
+ " [GC|R] when GC < 128 -> [GC|R];\n"
" [GC|Str] -> [decompose_compat(GC)|Str];\n"
" [] -> [];\n"
" {error,_}=Error -> Error\n end.\n\n"
@@ -260,7 +260,7 @@ gen_norm(Fd) ->
"-spec nfc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n"
"nfc(Str0) ->\n"
" case gc(Str0) of\n"
- " [GC|R] when GC < 255 -> [GC|R];\n"
+ " [GC|R] when GC < 256 -> [GC|R];\n"
" [GC|Str] -> [compose(decompose(GC))|Str];\n"
" [] -> [];\n"
" {error,_}=Error -> Error\n end.\n\n"
@@ -270,7 +270,7 @@ gen_norm(Fd) ->
"-spec nfkc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n"
"nfkc(Str0) ->\n"
" case gc(Str0) of\n"
- " [GC|R] when GC < 127 -> [GC|R];\n"
+ " [GC|R] when GC < 128 -> [GC|R];\n"
" [GC|Str] -> [compose_compat_0(decompose_compat(GC))|Str];\n"
" [] -> [];\n"
" {error,_}=Error -> Error\n end.\n\n"
@@ -476,13 +476,30 @@ gen_gc(Fd, GBP) ->
"-spec gc(String::unicode:chardata()) ->"
" maybe_improper_list() | {error, unicode:chardata()}.\n"),
io:put_chars(Fd,
+ "gc([CP1, CP2|_]=T)\n"
+ " when CP1 < 256, CP2 < 256, CP1 =/= $\r -> %% Ascii Fast path\n"
+ " T;\n"
+ "gc(<<CP1/utf8, Rest/binary>>) ->\n"
+ " if CP1 < 256, CP1 =/= $\r ->\n"
+ " case Rest of\n"
+ " <<CP2/utf8, _/binary>> when CP2 < 256 -> %% Ascii Fast path\n"
+ " [CP1|Rest];\n"
+ " _ -> gc_1([CP1|Rest])\n"
+ " end;\n"
+ " true -> gc_1([CP1|Rest])\n"
+ " end;\n"
"gc(Str) ->\n"
" gc_1(cp(Str)).\n\n"
"gc_1([$\\r|R0] = R) ->\n"
" case cp(R0) of % Don't break CRLF\n"
" [$\\n|R1] -> [[$\\r,$\\n]|R1];\n"
" _ -> R\n"
- " end;\n"),
+ " end;\n"
+ %% "gc_1([CP1, CP2|_]=T) when CP1 < 256, CP2 < 256 ->\n"
+ %% " T; %% Fast path\n"
+ %% "gc_1([CP1|<<CP2/utf8, _/binary>>]=T) when CP1 < 256, CP2 < 256 ->\n"
+ %% " T; %% Fast path\n"
+ ),
io:put_chars(Fd, "%% Handle control\n"),
GenControl = fun(Range) -> io:format(Fd, "gc_1~s R0;\n", [gen_clause(Range)]) end,
@@ -490,7 +507,7 @@ gen_gc(Fd, GBP) ->
[R1,R2,R3|Crs] = CRs0,
[GenControl(CP) || CP <- merge_ranges([R1,R2,R3], split), CP =/= {$\r, undefined}],
%%GenControl(R1),GenControl(R2),GenControl(R3),
- io:format(Fd, "gc_1([CP|R]) when CP < 255 -> gc_extend(R,CP);\n", []),
+ io:format(Fd, "gc_1([CP|R]) when CP < 256 -> gc_extend(R,CP);\n", []),
[GenControl(CP) || CP <- Crs],
%% One clause per CP
%% CRs0 = merge_ranges(maps:get(cr, GBP) ++ maps:get(lf, GBP) ++ maps:get(control, GBP)),