diff options
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/doc/src/Makefile | 17 | ||||
-rw-r--r-- | lib/stdlib/doc/src/ets.xml | 4 | ||||
-rw-r--r-- | lib/stdlib/doc/src/fascicules.xml | 18 | ||||
-rw-r--r-- | lib/stdlib/doc/src/part_notes.xml | 39 | ||||
-rw-r--r-- | lib/stdlib/doc/src/part_notes_history.xml | 39 | ||||
-rw-r--r-- | lib/stdlib/doc/src/unicode.xml | 9 | ||||
-rw-r--r-- | lib/stdlib/doc/src/user_guide.gif | bin | 1581 -> 0 bytes | |||
-rw-r--r-- | lib/stdlib/src/ets.erl | 2 | ||||
-rw-r--r-- | lib/stdlib/src/filename.erl | 17 | ||||
-rw-r--r-- | lib/stdlib/src/string.erl | 748 | ||||
-rw-r--r-- | lib/stdlib/test/filename_SUITE.erl | 120 | ||||
-rw-r--r-- | lib/stdlib/test/qlc_SUITE.erl | 16 | ||||
-rw-r--r-- | lib/stdlib/test/string_SUITE.erl | 121 | ||||
-rw-r--r-- | lib/stdlib/test/unicode_util_SUITE.erl | 17 | ||||
-rwxr-xr-x | lib/stdlib/uc_spec/gen_unicode_mod.escript | 31 |
15 files changed, 828 insertions, 370 deletions
diff --git a/lib/stdlib/doc/src/Makefile b/lib/stdlib/doc/src/Makefile index 26602764a6..e7ea38c5c3 100644 --- a/lib/stdlib/doc/src/Makefile +++ b/lib/stdlib/doc/src/Makefile @@ -1,7 +1,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 1997-2016. All Rights Reserved. +# Copyright Ericsson AB 1997-2017. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -103,8 +103,9 @@ XML_REF3_FILES = \ XML_REF6_FILES = stdlib_app.xml -XML_PART_FILES = part.xml part_notes.xml part_notes_history.xml -XML_CHAPTER_FILES = io_protocol.xml unicode_usage.xml notes.xml notes_history.xml assert_hrl.xml +XML_PART_FILES = part.xml +XML_CHAPTER_FILES = introduction.xml io_protocol.xml unicode_usage.xml \ + notes.xml assert_hrl.xml BOOK_FILES = book.xml @@ -131,9 +132,9 @@ SPECS_FILES = $(XML_REF3_FILES:%.xml=$(SPECDIR)/specs_%.xml) TOP_SPECS_FILE = specs.xml # ---------------------------------------------------- -# FLAGS +# FLAGS # ---------------------------------------------------- -XML_FLAGS += +XML_FLAGS += SPECS_FLAGS = -I../../include -I../../../kernel/include @@ -150,7 +151,7 @@ html: $(HTML_REF_MAN_FILE) man: $(MAN3_FILES) $(MAN6_FILES) -debug opt: +debug opt: clean clean_docs: rm -rf $(HTMLDIR)/* @@ -158,7 +159,7 @@ clean clean_docs: rm -f $(MAN6DIR)/* rm -f $(TOP_PDF_FILE) $(TOP_PDF_FILE:%.pdf=%.fo) rm -f $(SPECDIR)/* - rm -f errs core *~ + rm -f errs core *~ $(SPECDIR)/specs_erl_id_trans.xml: escript $(SPECS_EXTRACTOR) $(SPECS_FLAGS) \ @@ -166,7 +167,7 @@ $(SPECDIR)/specs_erl_id_trans.xml: # ---------------------------------------------------- # Release Target -# ---------------------------------------------------- +# ---------------------------------------------------- include $(ERL_TOP)/make/otp_release_targets.mk release_docs_spec: docs diff --git a/lib/stdlib/doc/src/ets.xml b/lib/stdlib/doc/src/ets.xml index 95af2b77a5..576959b1c8 100644 --- a/lib/stdlib/doc/src/ets.xml +++ b/lib/stdlib/doc/src/ets.xml @@ -325,7 +325,7 @@ <p><c><anno>Acc0</anno></c> is returned if the table is empty. This function is similar to <seealso marker="lists#foldl/3"><c>lists:foldl/3</c></seealso>. - The table elements are traversed is unspecified order, except for + The table elements are traversed in an unspecified order, except for <c>ordered_set</c> tables, where they are traversed first to last.</p> <p>If <c><anno>Function</anno></c> inserts objects into the table, or another @@ -341,7 +341,7 @@ <p><c><anno>Acc0</anno></c> is returned if the table is empty. This function is similar to <seealso marker="lists#foldr/3"><c>lists:foldr/3</c></seealso>. - The table elements are traversed is unspecified order, except for + The table elements are traversed in an unspecified order, except for <c>ordered_set</c> tables, where they are traversed last to first.</p> <p>If <c><anno>Function</anno></c> inserts objects into the table, or another diff --git a/lib/stdlib/doc/src/fascicules.xml b/lib/stdlib/doc/src/fascicules.xml deleted file mode 100644 index 0ded9007e0..0000000000 --- a/lib/stdlib/doc/src/fascicules.xml +++ /dev/null @@ -1,18 +0,0 @@ -<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE fascicules SYSTEM "fascicules.dtd"> - -<fascicules> - <fascicule file="part" href="part_frame.html" entry="no"> - STDLIB User's Guide - </fascicule> - <fascicule file="ref_man" href="ref_man_frame.html" entry="yes"> - Reference Manual - </fascicule> - <fascicule file="part_notes" href="part_notes_frame.html" entry="no"> - Release Notes - </fascicule> - <fascicule file="" href="../../../../doc/print.html" entry="no"> - Off-Print - </fascicule> -</fascicules> - diff --git a/lib/stdlib/doc/src/part_notes.xml b/lib/stdlib/doc/src/part_notes.xml deleted file mode 100644 index 461de749dd..0000000000 --- a/lib/stdlib/doc/src/part_notes.xml +++ /dev/null @@ -1,39 +0,0 @@ -<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE part SYSTEM "part.dtd"> - -<part xmlns:xi="http://www.w3.org/2001/XInclude"> - <header> - <copyright> - <year>2004</year><year>2016</year> - <holder>Ericsson AB. All Rights Reserved.</holder> - </copyright> - <legalnotice> - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - </legalnotice> - - <title>STDLIB Release Notes</title> - <prepared></prepared> - <docno></docno> - <date></date> - <rev></rev> - </header> - <description> - <p>The Standard Erlang Libraries application, <em>STDLIB</em>, - contains modules for manipulating lists, strings and files etc.</p> - <p>For information about older versions, see - <url href="part_notes_history_frame.html">Release Notes History</url>.</p> - </description> - <xi:include href="notes.xml"/> -</part> - diff --git a/lib/stdlib/doc/src/part_notes_history.xml b/lib/stdlib/doc/src/part_notes_history.xml deleted file mode 100644 index 8fd048a41e..0000000000 --- a/lib/stdlib/doc/src/part_notes_history.xml +++ /dev/null @@ -1,39 +0,0 @@ -<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE part SYSTEM "part.dtd"> - -<part> - <header> - <copyright> - <year>2006</year> - <year>2016</year> - <holder>Ericsson AB, All Rights Reserved</holder> - </copyright> - <legalnotice> - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - The Initial Developer of the Original Code is Ericsson AB. - </legalnotice> - - <title>STDLIB Release Notes History</title> - <prepared></prepared> - <docno></docno> - <date></date> - <rev></rev> - </header> - <description> - <p>The Standard Erlang Libraries application, <em>STDLIB</em>, - contains modules for manipulating lists, strings and files etc.</p> - </description> - <include file="notes_history"></include> -</part> - diff --git a/lib/stdlib/doc/src/unicode.xml b/lib/stdlib/doc/src/unicode.xml index e86f45431f..d822aca89c 100644 --- a/lib/stdlib/doc/src/unicode.xml +++ b/lib/stdlib/doc/src/unicode.xml @@ -239,8 +239,13 @@ <c><anno>InEncoding</anno></c>.</p> </item> </list> - <p>Only when <c><anno>InEncoding</anno></c> is one of the UTF - encodings, integers in the list are allowed to be > 255.</p> + <p> + Note that integers in the list always represent code points + regardless of <c><anno>InEncoding</anno></c> passed. If + <c><anno>InEncoding</anno> latin1</c> is passed, only code + points < 256 are allowed; otherwise, all valid unicode code + points are allowed. + </p> <p>If <c><anno>InEncoding</anno></c> is <c>latin1</c>, parameter <c><anno>Data</anno></c> corresponds to the <c>iodata()</c> type, but for <c>unicode</c>, parameter <c><anno>Data</anno></c> can diff --git a/lib/stdlib/doc/src/user_guide.gif b/lib/stdlib/doc/src/user_guide.gif Binary files differdeleted file mode 100644 index e6275a803d..0000000000 --- a/lib/stdlib/doc/src/user_guide.gif +++ /dev/null diff --git a/lib/stdlib/src/ets.erl b/lib/stdlib/src/ets.erl index b5d3cd3c8d..1db004c91e 100644 --- a/lib/stdlib/src/ets.erl +++ b/lib/stdlib/src/ets.erl @@ -1700,6 +1700,8 @@ choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) -> io:format("~ts\n", [ErrorString]), choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) end; + eof -> + ok; _ -> choice(Height, Width, P, Mode, Tab, Key, Turn, Opos) end. diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 9bf4290916..ee807dfd09 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -439,6 +439,10 @@ join(Name1, Name2) when is_atom(Name2) -> join1([UcLetter, $:|Rest], RelativeName, [], win32) when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); +join1([$\\,$\\|Rest], RelativeName, [], win32) -> + join1([$/,$/|Rest], RelativeName, [], win32); +join1([$/,$/|Rest], RelativeName, [], win32) -> + join1(Rest, RelativeName, [$/,$/], win32); join1([$\\|Rest], RelativeName, Result, win32) -> join1([$/|Rest], RelativeName, Result, win32); join1([$/|Rest], RelativeName, [$., $/|Result], OsType) -> @@ -467,6 +471,10 @@ join1([Atom|Rest], RelativeName, Result, OsType) when is_atom(Atom) -> join1b(<<UcLetter, $:, Rest/binary>>, RelativeName, [], win32) when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1b(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); +join1b(<<$\\,$\\,Rest/binary>>, RelativeName, [], win32) -> + join1b(<<$/,$/,Rest/binary>>, RelativeName, [], win32); +join1b(<<$/,$/,Rest/binary>>, RelativeName, [], win32) -> + join1b(Rest, RelativeName, [$/,$/], win32); join1b(<<$\\,Rest/binary>>, RelativeName, Result, win32) -> join1b(<<$/,Rest/binary>>, RelativeName, Result, win32); join1b(<<$/,Rest/binary>>, RelativeName, [$., $/|Result], OsType) -> @@ -477,6 +485,8 @@ join1b(<<>>, <<>>, Result, OsType) -> list_to_binary(maybe_remove_dirsep(Result, OsType)); join1b(<<>>, RelativeName, [$:|Rest], win32) -> join1b(RelativeName, <<>>, [$:|Rest], win32); +join1b(<<>>, RelativeName, [$/,$/|Result], win32) -> + join1b(RelativeName, <<>>, [$/,$/|Result], win32); join1b(<<>>, RelativeName, [$/|Result], OsType) -> join1b(RelativeName, <<>>, [$/|Result], OsType); join1b(<<>>, RelativeName, [$., $/|Result], OsType) -> @@ -490,6 +500,8 @@ maybe_remove_dirsep([$/, $:, Letter], win32) -> [Letter, $:, $/]; maybe_remove_dirsep([$/], _) -> [$/]; +maybe_remove_dirsep([$/,$/], win32) -> + [$/,$/]; maybe_remove_dirsep([$/|Name], _) -> lists:reverse(Name); maybe_remove_dirsep(Name, _) -> @@ -679,6 +691,9 @@ win32_splitb(<<Letter0,$:,Rest/binary>>) when ?IS_DRIVELETTER(Letter0) -> Letter = fix_driveletter(Letter0), L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), [<<Letter,$:>> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(<<Slash,Slash,Rest/binary>>) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<<"//">> | [ X || X <- L, X =/= <<>> ]]; win32_splitb(<<Slash,Rest/binary>>) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), [<<$/>> | [ X || X <- L, X =/= <<>> ]]; @@ -690,6 +705,8 @@ win32_splitb(Name) -> unix_split(Name) -> split(Name, [], unix). +win32_split([Slash,Slash|Rest]) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> + split(Rest, [[$/,$/]], win32); win32_split([$\\|Rest]) -> win32_split([$/|Rest]); win32_split([X, $\\|Rest]) when is_integer(X) -> diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl index 4972da297d..ab041ff53c 100644 --- a/lib/stdlib/src/string.erl +++ b/lib/stdlib/src/string.erl @@ -74,19 +74,21 @@ -export([to_upper/1, to_lower/1]). %% -import(lists,[member/2]). - -compile({no_auto_import,[length/1]}). +-compile({inline, [btoken/2, rev/1, append/2, stack/2, search_compile/1]}). +-define(ASCII_LIST(CP1,CP2), CP1 < 256, CP2 < 256, CP1 =/= $\r). -export_type([grapheme_cluster/0]). -type grapheme_cluster() :: char() | [char()]. -type direction() :: 'leading' | 'trailing'. --dialyzer({no_improper_lists, stack/2}). +-dialyzer({no_improper_lists, [stack/2, length_b/3]}). %%% BIFs internal (not documented) should not to be used outside of this module %%% May be removed -export([list_to_float/1, list_to_integer/1]). + %% Uses bifs: string:list_to_float/1 and string:list_to_integer/1 -spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when String :: string(), @@ -117,8 +119,10 @@ is_empty(_) -> false. %% Count the number of grapheme clusters in chardata -spec length(String::unicode:chardata()) -> non_neg_integer(). +length(<<CP1/utf8, Bin/binary>>) -> + length_b(Bin, CP1, 0); length(CD) -> - length_1(unicode_util:gc(CD), 0). + length_1(CD, 0). %% Convert a string to a list of grapheme clusters -spec to_graphemes(String::unicode:chardata()) -> [grapheme_cluster()]. @@ -166,6 +170,8 @@ equal(A, B, true, Norm) -> %% Reverse grapheme clusters -spec reverse(String::unicode:chardata()) -> [grapheme_cluster()]. +reverse(<<CP1/utf8, Rest/binary>>) -> + reverse_b(Rest, CP1, []); reverse(CD) -> reverse_1(CD, []). @@ -176,7 +182,10 @@ reverse(CD) -> Start :: non_neg_integer(), Slice :: unicode:chardata(). slice(CD, N) when is_integer(N), N >= 0 -> - slice_l(CD, N, is_binary(CD)). + case slice_l0(CD, N) of + [] when is_binary(CD) -> <<>>; + Res -> Res + end. -spec slice(String, Start, Length) -> Slice when String::unicode:chardata(), @@ -185,9 +194,15 @@ slice(CD, N) when is_integer(N), N >= 0 -> Slice :: unicode:chardata(). slice(CD, N, Length) when is_integer(N), N >= 0, is_integer(Length), Length > 0 -> - slice_trail(slice_l(CD, N, is_binary(CD)), Length); + case slice_l0(CD, N) of + [] when is_binary(CD) -> <<>>; + L -> slice_trail(L, Length) + end; slice(CD, N, infinity) -> - slice_l(CD, N, is_binary(CD)); + case slice_l0(CD, N) of + [] when is_binary(CD) -> <<>>; + Res -> Res + end; slice(CD, _, 0) -> case is_binary(CD) of true -> <<>>; @@ -246,18 +261,22 @@ trim(Str, Dir) -> Dir :: direction() | 'both', Characters :: [grapheme_cluster()]. trim(Str, _, []) -> Str; +trim(Str, leading, [Sep]) when is_list(Str), Sep < 256 -> + trim_ls(Str, Sep); trim(Str, leading, Sep) when is_list(Sep) -> - trim_l(Str, search_pattern(Sep)); -trim(Str, trailing, Sep) when is_list(Sep) -> - trim_t(Str, 0, search_pattern(Sep)); -trim(Str, both, Sep0) when is_list(Sep0) -> - Sep = search_pattern(Sep0), - trim_t(trim_l(Str,Sep), 0, Sep). + trim_l(Str, Sep); +trim(Str, trailing, [Sep]) when is_list(Str), Sep < 256 -> + trim_ts(Str, Sep); +trim(Str, trailing, Seps0) when is_list(Seps0) -> + Seps = search_pattern(Seps0), + trim_t(Str, 0, Seps); +trim(Str, both, Sep) when is_list(Sep) -> + trim(trim(Str,leading,Sep), trailing, Sep). %% Delete trailing newlines or \r\n -spec chomp(String::unicode:chardata()) -> unicode:chardata(). chomp(Str) -> - trim_t(Str,0, {[[$\r,$\n],$\n], [$\r,$\n], [<<$\r>>,<<$\n>>]}). + trim(Str, trailing, [[$\r,$\n],$\n]). %% Split String into two parts where the leading part consists of Characters -spec take(String, Characters) -> {Leading, Trailing} when @@ -290,8 +309,7 @@ take(Str, [], Complement, Dir) -> {true, leading} -> {Str, Empty}; {true, trailing} -> {Empty, Str} end; -take(Str, Sep0, false, leading) -> - Sep = search_pattern(Sep0), +take(Str, Sep, false, leading) -> take_l(Str, Sep, []); take(Str, Sep0, true, leading) -> Sep = search_pattern(Sep0), @@ -451,6 +469,7 @@ replace(String, SearchPattern, Replacement, Where) -> SeparatorList::[grapheme_cluster()]) -> [unicode:chardata()]. lexemes([], _) -> []; +lexemes(Str, []) -> [Str]; lexemes(Str, Seps0) when is_list(Seps0) -> Seps = search_pattern(Seps0), lexemes_m(Str, Seps, []). @@ -484,13 +503,13 @@ find(String, SearchPattern, leading) -> find(String, SearchPattern, trailing) -> find_r(String, unicode:characters_to_list(SearchPattern), nomatch). -%% Fetch first codepoint and return rest in tail +%% Fetch first grapheme cluster and return rest in tail -spec next_grapheme(String::unicode:chardata()) -> maybe_improper_list(grapheme_cluster(),unicode:chardata()) | {error,unicode:chardata()}. next_grapheme(CD) -> unicode_util:gc(CD). -%% Fetch first grapheme cluster and return rest in tail +%% Fetch first codepoint and return rest in tail -spec next_codepoint(String::unicode:chardata()) -> maybe_improper_list(char(),unicode:chardata()) | {error,unicode:chardata()}. @@ -498,10 +517,23 @@ next_codepoint(CD) -> unicode_util:cp(CD). %% Internals -length_1([_|Rest], N) -> - length_1(unicode_util:gc(Rest), N+1); -length_1([], N) -> - N. +length_1([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2) -> + length_1(Cont, N+1); +length_1(Str, N) -> + case unicode_util:gc(Str) of + [] -> N; + [_|Rest] -> length_1(Rest, N+1) + end. + +length_b(<<CP2/utf8, Rest/binary>>, CP1, N) + when ?ASCII_LIST(CP1,CP2) -> + length_b(Rest, CP2, N+1); +length_b(Bin0, CP1, N) -> + [_|Bin1] = unicode_util:gc([CP1|Bin0]), + case unicode_util:cp(Bin1) of + [] -> N+1; + [CP3|Bin] -> length_b(Bin, CP3, N+1) + end. equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) -> A =:= B andalso equal_1(AR, BR); @@ -540,29 +572,66 @@ equal_norm_nocase(A0, B0, Norm) -> {L1,L2} when is_list(L1), is_list(L2) -> false end. +reverse_1([CP1|[CP2|_]=Cont], Acc) when ?ASCII_LIST(CP1,CP2) -> + reverse_1(Cont, [CP1|Acc]); reverse_1(CD, Acc) -> case unicode_util:gc(CD) of [GC|Rest] -> reverse_1(Rest, [GC|Acc]); [] -> Acc end. -slice_l(CD, N, Binary) when N > 0 -> +reverse_b(<<CP2/utf8, Rest/binary>>, CP1, Acc) + when ?ASCII_LIST(CP1,CP2) -> + reverse_b(Rest, CP2, [CP1|Acc]); +reverse_b(Bin0, CP1, Acc) -> + [GC|Bin1] = unicode_util:gc([CP1|Bin0]), + case unicode_util:cp(Bin1) of + [] -> [GC|Acc]; + [CP3|Bin] -> reverse_b(Bin, CP3, [GC|Acc]) + end. + +slice_l0(<<CP1/utf8, Bin/binary>>, N) when N > 0 -> + slice_lb(Bin, CP1, N); +slice_l0(L, N) -> + slice_l(L, N). + +slice_l([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 -> + slice_l(Cont, N-1); +slice_l(CD, N) when N > 0 -> case unicode_util:gc(CD) of - [_|Cont] -> slice_l(Cont, N-1, Binary); - [] when Binary -> <<>>; + [_|Cont] -> slice_l(Cont, N-1); [] -> [] end; -slice_l(Cont, 0, Binary) -> - case is_empty(Cont) of - true when Binary -> <<>>; - _ -> Cont +slice_l(Cont, 0) -> + Cont. + +slice_lb(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 1 -> + slice_lb(Bin, CP2, N-1); +slice_lb(Bin, CP1, N) -> + [_|Rest] = unicode_util:gc([CP1|Bin]), + if N > 1 -> + case unicode_util:cp(Rest) of + [CP2|Cont] -> slice_lb(Cont, CP2, N-1); + [] -> <<>> + end; + N =:= 1 -> + Rest end. +slice_trail(Orig, N) when is_binary(Orig) -> + case Orig of + <<CP1/utf8, Bin/binary>> when N > 0 -> + Length = slice_bin(Bin, CP1, N), + Sz = byte_size(Orig) - Length, + <<Keep:Sz/binary, _/binary>> = Orig, + Keep; + _ -> <<>> + end; slice_trail(CD, N) when is_list(CD) -> - slice_list(CD, N); -slice_trail(CD, N) when is_binary(CD) -> - slice_bin(CD, N, CD). + slice_list(CD, N). +slice_list([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 -> + [CP1|slice_list(Cont, N-1)]; slice_list(CD, N) when N > 0 -> case unicode_util:gc(CD) of [GC|Cont] -> append(GC, slice_list(Cont, N-1)); @@ -571,17 +640,16 @@ slice_list(CD, N) when N > 0 -> slice_list(_, 0) -> []. -slice_bin(CD, N, Orig) when N > 0 -> - case unicode_util:gc(CD) of - [_|Cont] -> slice_bin(Cont, N-1, Orig); - [] -> Orig +slice_bin(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 0 -> + slice_bin(Bin, CP2, N-1); +slice_bin(CD, CP1, N) when N > 0 -> + [_|Bin] = unicode_util:gc([CP1|CD]), + case unicode_util:cp(Bin) of + [CP2|Cont] -> slice_bin(Cont, CP2, N-1); + [] -> 0 end; -slice_bin([], 0, Orig) -> - Orig; -slice_bin(CD, 0, Orig) -> - Sz = byte_size(Orig) - byte_size(CD), - <<Keep:Sz/binary, _/binary>> = Orig, - Keep. +slice_bin(CD, CP1, 0) -> + byte_size(CD)+byte_size(<<CP1/utf8>>). uppercase_list(CPs0) -> case unicode_util:uppercase(CPs0) of @@ -631,16 +699,31 @@ casefold_bin(CPs0, Acc) -> [] -> Acc end. - +%% Fast path for ascii searching for one character in lists +trim_ls([CP1|[CP2|_]=Cont]=Str, Sep) + when ?ASCII_LIST(CP1,CP2) -> + case Sep of + CP1 -> trim_ls(Cont, Sep); + _ -> Str + end; +trim_ls(Str, Sep) -> + trim_l(Str, [Sep]). + +trim_l([CP1|[CP2|_]=Cont]=Str, Sep) + when ?ASCII_LIST(CP1,CP2) -> + case lists:member(CP1, Sep) of + true -> trim_l(Cont, Sep); + false -> Str + end; trim_l([Bin|Cont0], Sep) when is_binary(Bin) -> case bin_search_inv(Bin, Cont0, Sep) of {nomatch, Cont} -> trim_l(Cont, Sep); Keep -> Keep end; -trim_l(Str, {GCs, _, _}=Sep) when is_list(Str) -> +trim_l(Str, Sep) when is_list(Str) -> case unicode_util:gc(Str) of [C|Cs] -> - case lists:member(C, GCs) of + case lists:member(C, Sep) of true -> trim_l(Cs, Sep); false -> Str end; @@ -652,15 +735,51 @@ trim_l(Bin, Sep) when is_binary(Bin) -> [Keep] -> Keep end. -trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) -> +%% Fast path for ascii searching for one character in lists +trim_ts([Sep|Cs1]=Str, Sep) -> + case Cs1 of + [] -> []; + [CP2|_] when ?ASCII_LIST(Sep,CP2) -> + Tail = trim_ts(Cs1, Sep), + case is_empty(Tail) of + true -> []; + false -> [Sep|Tail] + end; + _ -> + trim_t(Str, 0, search_pattern([Sep])) + end; +trim_ts([CP|Cont],Sep) when is_integer(CP) -> + [CP|trim_ts(Cont, Sep)]; +trim_ts(Str, Sep) -> + trim_t(Str, 0, search_pattern([Sep])). + +trim_t([CP1|Cont]=Cs0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) -> + case lists:member(CP1, CPs) of + true -> + [GC|Cs1] = unicode_util:gc(Cs0), + case lists:member(GC, GCs) of + true -> + Tail = trim_t(Cs1, 0, Seps), + case is_empty(Tail) of + true -> []; + false -> append(GC,Tail) + end; + false -> + append(GC,trim_t(Cs1, 0, Seps)) + end; + false -> + [CP1|trim_t(Cont, 0, Seps)] + end; +trim_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search(Rest, Cont0, Sep) of + Seps = search_compile(Seps0), + case bin_search(Rest, Cont0, Seps) of {nomatch,_} -> - stack(Bin, trim_t(Cont0, 0, Sep)); + stack(Bin, trim_t(Cont0, 0, Seps)); [SepStart|Cont1] -> - case bin_search_inv(SepStart, Cont1, Sep) of + case bin_search_inv(SepStart, Cont1, GCs) of {nomatch, Cont} -> - Tail = trim_t(Cont, 0, Sep), + Tail = trim_t(Cont, 0, Seps), case is_empty(Tail) of true -> KeepSz = byte_size(Bin) - byte_size(SepStart), @@ -672,67 +791,69 @@ trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) -> end; [NonSep|Cont] when is_binary(NonSep) -> KeepSz = byte_size(Bin) - byte_size(NonSep), - trim_t([Bin|Cont], KeepSz, Sep) + trim_t([Bin|Cont], KeepSz, Seps) end end; -trim_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) -> - case unicode_util:cp(Str) of - [CP|Cs] -> - case lists:member(CP, CPs) of +trim_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) -> + case unicode_util:gc(Str) of + [GC|Cs1] -> + case lists:member(GC, GCs) of true -> - [GC|Cs1] = unicode_util:gc(Str), - case lists:member(GC, GCs) of - true -> - Tail = trim_t(Cs1, 0, Sep), - case is_empty(Tail) of - true -> []; - false -> append(GC,Tail) - end; - false -> - append(GC,trim_t(Cs1, 0, Sep)) + Tail = trim_t(Cs1, 0, Seps), + case is_empty(Tail) of + true -> []; + false -> append(GC,Tail) end; false -> - append(CP,trim_t(Cs, 0, Sep)) + append(GC,trim_t(Cs1, 0, Seps)) end; [] -> [] end; -trim_t(Bin, N, Sep) when is_binary(Bin) -> +trim_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search(Rest, Sep) of + Seps = search_compile(Seps0), + case bin_search(Rest, [], Seps) of {nomatch,_} -> Bin; [SepStart] -> - case bin_search_inv(SepStart, [], Sep) of + case bin_search_inv(SepStart, [], GCs) of {nomatch,_} -> KeepSz = byte_size(Bin) - byte_size(SepStart), <<Keep:KeepSz/binary, _/binary>> = Bin, Keep; [NonSep] -> KeepSz = byte_size(Bin) - byte_size(NonSep), - trim_t(Bin, KeepSz, Sep) + trim_t(Bin, KeepSz, Seps) end end. -take_l([Bin|Cont0], Sep, Acc) when is_binary(Bin) -> - case bin_search_inv(Bin, Cont0, Sep) of + +take_l([CP1|[CP2|_]=Cont]=Str, Seps, Acc) + when ?ASCII_LIST(CP1,CP2) -> + case lists:member(CP1, Seps) of + true -> take_l(Cont, Seps, [CP1|Acc]); + false -> {rev(Acc), Str} + end; +take_l([Bin|Cont0], Seps, Acc) when is_binary(Bin) -> + case bin_search_inv(Bin, Cont0, Seps) of {nomatch, Cont} -> Used = cp_prefix(Cont0, Cont), - take_l(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]); + take_l(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]); [Bin1|_]=After when is_binary(Bin1) -> First = byte_size(Bin) - byte_size(Bin1), <<Keep:First/binary, _/binary>> = Bin, {btoken(Keep,Acc), After} end; -take_l(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) -> +take_l(Str, Seps, Acc) when is_list(Str) -> case unicode_util:gc(Str) of [C|Cs] -> - case lists:member(C, GCs) of - true -> take_l(Cs, Sep, append(rev(C),Acc)); + case lists:member(C, Seps) of + true -> take_l(Cs, Seps, append(rev(C),Acc)); false -> {rev(Acc), Str} end; [] -> {rev(Acc), []} end; -take_l(Bin, Sep, Acc) when is_binary(Bin) -> - case bin_search_inv(Bin, [], Sep) of +take_l(Bin, Seps, Acc) when is_binary(Bin) -> + case bin_search_inv(Bin, [], Seps) of {nomatch,_} -> {btoken(Bin, Acc), <<>>}; [After] -> @@ -741,27 +862,41 @@ take_l(Bin, Sep, Acc) when is_binary(Bin) -> {btoken(Keep, Acc), After} end. -take_lc([Bin|Cont0], Sep, Acc) when is_binary(Bin) -> - case bin_search(Bin, Cont0, Sep) of + +take_lc([CP1|Cont]=Str0, {GCs,CPs,_}=Seps, Acc) when is_integer(CP1) -> + case lists:member(CP1, CPs) of + true -> + [GC|Str] = unicode_util:gc(Str0), + case lists:member(GC, GCs) of + false -> take_lc(Str, Seps, append(rev(GC),Acc)); + true -> {rev(Acc), Str0} + end; + false -> + take_lc(Cont, Seps, append(CP1,Acc)) + end; +take_lc([Bin|Cont0], Seps0, Acc) when is_binary(Bin) -> + Seps = search_compile(Seps0), + case bin_search(Bin, Cont0, Seps) of {nomatch, Cont} -> Used = cp_prefix(Cont0, Cont), - take_lc(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]); + take_lc(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]); [Bin1|_]=After when is_binary(Bin1) -> First = byte_size(Bin) - byte_size(Bin1), <<Keep:First/binary, _/binary>> = Bin, {btoken(Keep,Acc), After} end; -take_lc(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) -> +take_lc(Str, {GCs,_,_}=Seps, Acc) when is_list(Str) -> case unicode_util:gc(Str) of [C|Cs] -> case lists:member(C, GCs) of - false -> take_lc(Cs, Sep, append(rev(C),Acc)); + false -> take_lc(Cs, Seps, append(rev(C),Acc)); true -> {rev(Acc), Str} end; [] -> {rev(Acc), []} end; -take_lc(Bin, Sep, Acc) when is_binary(Bin) -> - case bin_search(Bin, [], Sep) of +take_lc(Bin, Seps0, Acc) when is_binary(Bin) -> + Seps = search_compile(Seps0), + case bin_search(Bin, [], Seps) of {nomatch,_} -> {btoken(Bin, Acc), <<>>}; [After] -> @@ -770,148 +905,192 @@ take_lc(Bin, Sep, Acc) when is_binary(Bin) -> {btoken(Keep, Acc), After} end. -take_t([Bin|Cont0], N, Sep) when is_binary(Bin) -> + +take_t([CP1|Cont]=Str0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) -> + case lists:member(CP1, CPs) of + true -> + [GC|Str] = unicode_util:gc(Str0), + case lists:member(GC, GCs) of + true -> + {Head, Tail} = take_t(Str, 0, Seps), + case is_empty(Head) of + true -> {Head, append(GC,Tail)}; + false -> {append(GC,Head), Tail} + end; + false -> + {Head, Tail} = take_t(Str, 0, Seps), + {append(GC,Head), Tail} + end; + false -> + {Head, Tail} = take_t(Cont, 0, Seps), + {[CP1|Head], Tail} + end; +take_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search(Rest, Cont0, Sep) of + Seps = search_compile(Seps0), + case bin_search(Rest, Cont0, Seps) of {nomatch,Cont} -> Used = cp_prefix(Cont0, Cont), - {Head, Tail} = take_t(Cont, 0, Sep), + {Head, Tail} = take_t(Cont, 0, Seps), {stack(unicode:characters_to_binary([Bin|Used]), Head), Tail}; [SepStart|Cont1] -> - case bin_search_inv(SepStart, Cont1, Sep) of + case bin_search_inv(SepStart, Cont1, GCs) of {nomatch, Cont} -> - {Head, Tail} = take_t(Cont, 0, Sep), + {Head, Tail} = take_t(Cont, 0, Seps), Used = cp_prefix(Cont0, Cont), - case equal(Tail, Cont) of + case is_empty(Head) of true -> KeepSz = byte_size(Bin) - byte_size(SepStart), <<Keep:KeepSz/binary, End/binary>> = Bin, - {stack(Keep,Head), stack(stack(End,Used),Tail)}; + {Keep, stack(stack(End,Used),Tail)}; false -> {stack(unicode:characters_to_binary([Bin|Used]),Head), Tail} end; [NonSep|Cont] when is_binary(NonSep) -> KeepSz = byte_size(Bin) - byte_size(NonSep), - take_t([Bin|Cont], KeepSz, Sep) + take_t([Bin|Cont], KeepSz, Seps) end end; -take_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) -> - case unicode_util:cp(Str) of - [CP|Cs] -> - case lists:member(CP, CPs) of +take_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) -> + case unicode_util:gc(Str) of + [GC|Cs1] -> + case lists:member(GC, GCs) of true -> - [GC|Cs1] = unicode_util:gc(Str), - case lists:member(GC, GCs) of - true -> - {Head, Tail} = take_t(Cs1, 0, Sep), - case equal(Tail, Cs1) of - true -> {Head, append(GC,Tail)}; - false -> {append(GC,Head), Tail} - end; - false -> - {Head, Tail} = take_t(Cs, 0, Sep), - {append(CP,Head), Tail} + {Head, Tail} = take_t(Cs1, 0, Seps), + case is_empty(Head) of + true -> {Head, append(GC,Tail)}; + false -> {append(GC,Head), Tail} end; false -> - {Head, Tail} = take_t(Cs, 0, Sep), - {append(CP,Head), Tail} + {Head, Tail} = take_t(Cs1, 0, Seps), + {append(GC,Head), Tail} end; [] -> {[],[]} end; -take_t(Bin, N, Sep) when is_binary(Bin) -> +take_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search(Rest, Sep) of + Seps = search_compile(Seps0), + case bin_search(Rest, [], Seps) of {nomatch,_} -> {Bin, <<>>}; [SepStart] -> - case bin_search_inv(SepStart, [], Sep) of + case bin_search_inv(SepStart, [], GCs) of {nomatch,_} -> KeepSz = byte_size(Bin) - byte_size(SepStart), <<Before:KeepSz/binary, End/binary>> = Bin, {Before, End}; [NonSep] -> KeepSz = byte_size(Bin) - byte_size(NonSep), - take_t(Bin, KeepSz, Sep) + take_t(Bin, KeepSz, Seps) end end. -take_tc([Bin|Cont0], N, Sep) when is_binary(Bin) -> +take_tc([CP1|[CP2|_]=Cont], _, {GCs,_,_}=Seps) when ?ASCII_LIST(CP1,CP2) -> + case lists:member(CP1, GCs) of + false -> + {Head, Tail} = take_tc(Cont, 0, Seps), + case is_empty(Head) of + true -> {Head, append(CP1,Tail)}; + false -> {append(CP1,Head), Tail} + end; + true -> + {Head, Tail} = take_tc(Cont, 0, Seps), + {append(CP1,Head), Tail} + end; +take_tc([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search_inv(Rest, Cont0, Sep) of + case bin_search_inv(Rest, Cont0, GCs) of {nomatch,Cont} -> Used = cp_prefix(Cont0, Cont), - {Head, Tail} = take_tc(Cont, 0, Sep), + {Head, Tail} = take_tc(Cont, 0, Seps0), {stack(unicode:characters_to_binary([Bin|Used]), Head), Tail}; [SepStart|Cont1] -> - case bin_search(SepStart, Cont1, Sep) of + Seps = search_compile(Seps0), + case bin_search(SepStart, Cont1, Seps) of {nomatch, Cont} -> - {Head, Tail} = take_tc(Cont, 0, Sep), + {Head, Tail} = take_tc(Cont, 0, Seps), Used = cp_prefix(Cont0, Cont), - case equal(Tail, Cont) of + case is_empty(Head) of true -> KeepSz = byte_size(Bin) - byte_size(SepStart), <<Keep:KeepSz/binary, End/binary>> = Bin, - {stack(Keep,Head), stack(stack(End,Used),Tail)}; + {Keep, stack(stack(End,Used),Tail)}; false -> {stack(unicode:characters_to_binary([Bin|Used]),Head), Tail} end; [NonSep|Cont] when is_binary(NonSep) -> KeepSz = byte_size(Bin) - byte_size(NonSep), - take_tc([Bin|Cont], KeepSz, Sep) + take_tc([Bin|Cont], KeepSz, Seps) end end; -take_tc(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) -> - case unicode_util:cp(Str) of - [CP|Cs] -> - case lists:member(CP, CPs) of - true -> - [GC|Cs1] = unicode_util:gc(Str), - case lists:member(GC, GCs) of - false -> - {Head, Tail} = take_tc(Cs1, 0, Sep), - case equal(Tail, Cs1) of - true -> {Head, append(GC,Tail)}; - false -> {append(GC,Head), Tail} - end; - true -> - {Head, Tail} = take_tc(Cs1, 0, Sep), - {append(GC,Head), Tail} - end; +take_tc(Str, 0, {GCs,_,_}=Seps) when is_list(Str) -> + case unicode_util:gc(Str) of + [GC|Cs1] -> + case lists:member(GC, GCs) of false -> - {Head, Tail} = take_tc(Cs, 0, Sep), - case equal(Tail, Cs) of - true -> {Head, append(CP,Tail)}; - false -> {append(CP,Head), Tail} - end + {Head, Tail} = take_tc(Cs1, 0, Seps), + case is_empty(Head) of + true -> {Head, append(GC,Tail)}; + false -> {append(GC,Head), Tail} + end; + true -> + {Head, Tail} = take_tc(Cs1, 0, Seps), + {append(GC,Head), Tail} end; [] -> {[],[]} end; -take_tc(Bin, N, Sep) when is_binary(Bin) -> +take_tc(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, - case bin_search_inv(Rest, [], Sep) of + case bin_search_inv(Rest, [], GCs) of {nomatch,_} -> {Bin, <<>>}; [SepStart] -> - case bin_search(SepStart, [], Sep) of + Seps = search_compile(Seps0), + case bin_search(SepStart, [], Seps) of {nomatch,_} -> KeepSz = byte_size(Bin) - byte_size(SepStart), <<Before:KeepSz/binary, End/binary>> = Bin, {Before, End}; [NonSep] -> KeepSz = byte_size(Bin) - byte_size(NonSep), - take_tc(Bin, KeepSz, Sep) + take_tc(Bin, KeepSz, Seps) end end. -prefix_1(Cs, []) -> Cs; -prefix_1(Cs, [_]=Pre) -> - prefix_2(unicode_util:gc(Cs), Pre); -prefix_1(Cs, Pre) -> - prefix_2(unicode_util:cp(Cs), Pre). - -prefix_2([C|Cs], [C|Pre]) -> - prefix_1(Cs, Pre); -prefix_2(_, _) -> - nomatch. +prefix_1(Cs0, [GC]) -> + case unicode_util:gc(Cs0) of + [GC|Cs] -> Cs; + _ -> nomatch + end; +prefix_1([CP|Cs], [Pre|PreR]) when is_integer(CP) -> + case CP =:= Pre of + true -> prefix_1(Cs,PreR); + false -> nomatch + end; +prefix_1(<<CP/utf8, Cs/binary>>, [Pre|PreR]) -> + case CP =:= Pre of + true -> prefix_1(Cs,PreR); + false -> nomatch + end; +prefix_1(Cs0, [Pre|PreR]) -> + case unicode_util:cp(Cs0) of + [Pre|Cs] -> prefix_1(Cs,PreR); + _ -> nomatch + end. +split_1([CP1|Cs]=Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_integer(CP1) -> + case CP1=:=C of + true -> + case prefix_1(Cs0, Needle) of + nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc); + Rest when Where =:= leading -> + [rev(Curr), Rest]; + Rest when Where =:= trailing -> + split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]); + Rest when Where =:= all -> + split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc]) + end; + false -> + split_1(Cs, Needle, 0, Where, append(CP1,Curr), Acc) + end; split_1([Bin|Cont0], Needle, Start, Where, Curr0, Acc) when is_binary(Bin) -> case bin_search_str(Bin, Start, Cont0, Needle) of @@ -971,32 +1150,50 @@ split_1(Bin, [_C|_]=Needle, Start, Where, Curr0, Acc) -> end end. -lexemes_m([Bin|Cont0], Seps, Ts) when is_binary(Bin) -> - case bin_search_inv(Bin, Cont0, Seps) of +lexemes_m([CP|_]=Cs0, {GCs,CPs,_}=Seps, Ts) when is_integer(CP) -> + case lists:member(CP, CPs) of + true -> + [GC|Cs2] = unicode_util:gc(Cs0), + case lists:member(GC, GCs) of + true -> + lexemes_m(Cs2, Seps, Ts); + false -> + {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []), + lexemes_m(Rest, Seps, [Lexeme|Ts]) + end; + false -> + {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []), + lexemes_m(Rest, Seps, [Lexeme|Ts]) + end; +lexemes_m([Bin|Cont0], {GCs,_,_}=Seps0, Ts) when is_binary(Bin) -> + case bin_search_inv(Bin, Cont0, GCs) of {nomatch,Cont} -> - lexemes_m(Cont, Seps, Ts); + lexemes_m(Cont, Seps0, Ts); Cs -> + Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs, Seps, []), lexemes_m(Rest, Seps, [Lexeme|Ts]) end; -lexemes_m(Cs0, {GCs, _, _}=Seps, Ts) when is_list(Cs0) -> +lexemes_m(Cs0, {GCs, _, _}=Seps0, Ts) when is_list(Cs0) -> case unicode_util:gc(Cs0) of [C|Cs] -> case lists:member(C, GCs) of true -> - lexemes_m(Cs, Seps, Ts); + lexemes_m(Cs, Seps0, Ts); false -> + Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []), lexemes_m(Rest, Seps, [Lexeme|Ts]) end; [] -> lists:reverse(Ts) end; -lexemes_m(Bin, Seps, Ts) when is_binary(Bin) -> - case bin_search_inv(Bin, [], Seps) of +lexemes_m(Bin, {GCs,_,_}=Seps0, Ts) when is_binary(Bin) -> + case bin_search_inv(Bin, [], GCs) of {nomatch,_} -> lists:reverse(Ts); [Cs] -> + Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs, Seps, []), lexemes_m(Rest, Seps, add_non_empty(Lexeme,Ts)) end. @@ -1027,7 +1224,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) -> true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of - true -> {rev(Tkn), Cs0}; + true -> {rev(Tkn), Cs2}; false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn)) end; false -> @@ -1037,7 +1234,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) -> {rev(Tkn), []} end; lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) -> - case bin_search(Bin, Seps) of + case bin_search(Bin, [], Seps) of {nomatch,_} -> {btoken(Bin,Tkn), []}; [Left] -> @@ -1046,35 +1243,38 @@ lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) -> {btoken(Lexeme, Tkn), Left} end. -nth_lexeme_m([Bin|Cont0], Seps, N) when is_binary(Bin) -> - case bin_search_inv(Bin, Cont0, Seps) of +nth_lexeme_m([Bin|Cont0], {GCs,_,_}=Seps0, N) when is_binary(Bin) -> + case bin_search_inv(Bin, Cont0, GCs) of {nomatch,Cont} -> - nth_lexeme_m(Cont, Seps, N); + nth_lexeme_m(Cont, Seps0, N); Cs when N > 1 -> - Rest = lexeme_skip(Cs, Seps), - nth_lexeme_m(Rest, Seps, N-1); + Rest = lexeme_skip(Cs, Seps0), + nth_lexeme_m(Rest, Seps0, N-1); Cs -> + Seps = search_compile(Seps0), {Lexeme,_} = lexeme_pick(Cs, Seps, []), Lexeme end; -nth_lexeme_m(Cs0, {GCs, _, _}=Seps, N) when is_list(Cs0) -> +nth_lexeme_m(Cs0, {GCs, _, _}=Seps0, N) when is_list(Cs0) -> case unicode_util:gc(Cs0) of [C|Cs] -> case lists:member(C, GCs) of true -> - nth_lexeme_m(Cs, Seps, N); + nth_lexeme_m(Cs, Seps0, N); false when N > 1 -> - Cs1 = lexeme_skip(Cs, Seps), - nth_lexeme_m(Cs1, Seps, N-1); + Cs1 = lexeme_skip(Cs, Seps0), + nth_lexeme_m(Cs1, Seps0, N-1); false -> + Seps = search_compile(Seps0), {Lexeme,_} = lexeme_pick(Cs0, Seps, []), Lexeme end; [] -> [] end; -nth_lexeme_m(Bin, Seps, N) when is_binary(Bin) -> - case bin_search_inv(Bin, [], Seps) of +nth_lexeme_m(Bin, {GCs,_,_}=Seps0, N) when is_binary(Bin) -> + Seps = search_compile(Seps0), + case bin_search_inv(Bin, [], GCs) of [Cs] when N > 1 -> Cs1 = lexeme_skip(Cs, Seps), nth_lexeme_m(Cs1, Seps, N-1); @@ -1090,16 +1290,17 @@ lexeme_skip([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps) when is_integer(CP) -> true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of - true -> Cs0; + true -> Cs2; false -> lexeme_skip(Cs2, Seps) end; false -> lexeme_skip(Cs1, Seps) end; -lexeme_skip([Bin|Cont0], Seps) when is_binary(Bin) -> +lexeme_skip([Bin|Cont0], Seps0) when is_binary(Bin) -> + Seps = search_compile(Seps0), case bin_search(Bin, Cont0, Seps) of {nomatch,_} -> lexeme_skip(Cont0, Seps); - Cs -> Cs + Cs -> tl(unicode_util:gc(Cs)) end; lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) -> case unicode_util:cp(Cs0) of @@ -1108,7 +1309,7 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) -> true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of - true -> Cs0; + true -> Cs2; false -> lexeme_skip(Cs2, Seps) end; false -> @@ -1117,12 +1318,23 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) -> [] -> [] end; -lexeme_skip(Bin, Seps) when is_binary(Bin) -> - case bin_search(Bin, Seps) of +lexeme_skip(Bin, Seps0) when is_binary(Bin) -> + Seps = search_compile(Seps0), + case bin_search(Bin, [], Seps) of {nomatch,_} -> <<>>; - [Left] -> Left + [Left] -> tl(unicode_util:gc(Left)) end. +find_l([C1|Cs]=Cs0, [C|_]=Needle) when is_integer(C1) -> + case C1 of + C -> + case prefix_1(Cs0, Needle) of + nomatch -> find_l(Cs, Needle); + _ -> Cs0 + end; + _ -> + find_l(Cs, Needle) + end; find_l([Bin|Cont0], Needle) when is_binary(Bin) -> case bin_search_str(Bin, 0, Cont0, Needle) of {nomatch, _, Cont} -> @@ -1147,6 +1359,16 @@ find_l(Bin, Needle) -> {_Before, [Cs], _After} -> Cs end. +find_r([Cp|Cs]=Cs0, [C|_]=Needle, Res) when is_integer(Cp) -> + case Cp of + C -> + case prefix_1(Cs0, Needle) of + nomatch -> find_r(Cs, Needle, Res); + _ -> find_r(Cs, Needle, Cs0) + end; + _ -> + find_r(Cs, Needle, Res) + end; find_r([Bin|Cont0], Needle, Res) when is_binary(Bin) -> case bin_search_str(Bin, 0, Cont0, Needle) of {nomatch,_,Cont} -> @@ -1217,11 +1439,6 @@ cp_prefix_1(Orig, Until, Cont) -> %% Binary special -bin_search(Bin, Seps) -> - bin_search(Bin, [], Seps). - -bin_search(_Bin, Cont, {[],_,_}) -> - {nomatch, Cont}; bin_search(Bin, Cont, {Seps,_,BP}) -> bin_search_loop(Bin, 0, BP, Cont, Seps). @@ -1229,10 +1446,14 @@ bin_search(Bin, Cont, {Seps,_,BP}) -> %% i.e. å in nfd form $a "COMBINING RING ABOVE" %% and PREPEND characters like "ARABIC NUMBER SIGN" 1536 <<216,128>> %% combined with other characters are currently ignored. +search_pattern({_,_,_}=P) -> P; search_pattern(Seps) -> CPs = search_cp(Seps), - Bin = bin_pattern(CPs), - {Seps, CPs, Bin}. + {Seps, CPs, undefined}. + +search_compile({Sep, CPs, undefined}) -> + {Sep, CPs, binary:compile_pattern(bin_pattern(CPs))}; +search_compile({_,_,_}=Compiled) -> Compiled. search_cp([CP|Seps]) when is_integer(CP) -> [CP|search_cp(Seps)]; @@ -1253,9 +1474,21 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) -> case binary:match(Bin, BinSeps) of nomatch -> {nomatch,Cont}; + {Where, _CL} when Cont =:= [] -> + <<_:Where/binary, Cont1/binary>> = Bin, + [GC|Cont2] = unicode_util:gc(Cont1), + case lists:member(GC, Seps) of + false when Cont2 =:= [] -> + {nomatch, []}; + false -> + Next = byte_size(Bin0) - byte_size(Cont2), + bin_search_loop(Bin0, Next, BinSeps, Cont, Seps); + true -> + [Cont1] + end; {Where, _CL} -> <<_:Where/binary, Cont0/binary>> = Bin, - Cont1 = stack(Cont0, Cont), + Cont1 = [Cont0|Cont], [GC|Cont2] = unicode_util:gc(Cont1), case lists:member(GC, Seps) of false -> @@ -1263,55 +1496,108 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) -> [BinR|Cont] when is_binary(BinR) -> Next = byte_size(Bin0) - byte_size(BinR), bin_search_loop(Bin0, Next, BinSeps, Cont, Seps); - BinR when is_binary(BinR), Cont =:= [] -> - Next = byte_size(Bin0) - byte_size(BinR), - bin_search_loop(Bin0, Next, BinSeps, Cont, Seps); _ -> {nomatch, Cont2} end; - true when is_list(Cont1) -> - Cont1; true -> - [Cont1] + Cont1 end end. -bin_search_inv(Bin, Cont, {[], _, _}) -> - [Bin|Cont]; -bin_search_inv(Bin, Cont, {[Sep], _, _}) -> - bin_search_inv_1([Bin|Cont], Sep); -bin_search_inv(Bin, Cont, {Seps, _, _}) -> - bin_search_inv_n([Bin|Cont], Seps). - -bin_search_inv_1([<<>>|CPs], _) -> - {nomatch, CPs}; -bin_search_inv_1(CPs = [Bin0|Cont], Sep) when is_binary(Bin0) -> - case unicode_util:gc(CPs) of - [Sep|Bin] when is_binary(Bin), Cont =:= [] -> - bin_search_inv_1([Bin], Sep); - [Sep|[Bin|Cont]=Cs] when is_binary(Bin) -> - bin_search_inv_1(Cs, Sep); - [Sep|Cs] -> - {nomatch, Cs}; - _ -> CPs - end. +bin_search_inv(<<>>, Cont, _) -> + {nomatch, Cont}; +bin_search_inv(Bin, Cont, [Sep]) -> + bin_search_inv_1(Bin, Cont, Sep); +bin_search_inv(Bin, Cont, Seps) -> + bin_search_inv_n(Bin, Cont, Seps). + +bin_search_inv_1(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Sep) -> + case BinRest of + <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) -> + case CP1 of + Sep -> bin_search_inv_1(BinRest, Cont, Sep); + _ -> [Bin0|Cont] + end; + _ when Cont =:= [] -> + case unicode_util:gc(Bin0) of + [Sep|Bin] -> bin_search_inv_1(Bin, Cont, Sep); + _ -> [Bin0|Cont] + end; + _ -> + case unicode_util:gc([Bin0|Cont]) of + [Sep|[Bin|Cont]] when is_binary(Bin) -> + bin_search_inv_1(Bin, Cont, Sep); + [Sep|Cs] -> + {nomatch, Cs}; + _ -> [Bin0|Cont] + end + end; +bin_search_inv_1(<<>>, Cont, _Sep) -> + {nomatch, Cont}; +bin_search_inv_1([], Cont, _Sep) -> + {nomatch, Cont}. -bin_search_inv_n([<<>>|CPs], _) -> - {nomatch, CPs}; -bin_search_inv_n([Bin0|Cont]=CPs, Seps) when is_binary(Bin0) -> - [C|Cs0] = unicode_util:gc(CPs), - case {lists:member(C, Seps), Cs0} of - {true, Cs} when is_binary(Cs), Cont =:= [] -> - bin_search_inv_n([Cs], Seps); - {true, [Bin|Cont]=Cs} when is_binary(Bin) -> - bin_search_inv_n(Cs, Seps); - {true, Cs} -> {nomatch, Cs}; - {false, _} -> CPs - end. +bin_search_inv_n(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Seps) -> + case BinRest of + <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) -> + case lists:member(CP1,Seps) of + true -> bin_search_inv_n(BinRest, Cont, Seps); + false -> [Bin0|Cont] + end; + _ when Cont =:= [] -> + [GC|Bin] = unicode_util:gc(Bin0), + case lists:member(GC, Seps) of + true -> bin_search_inv_n(Bin, Cont, Seps); + false -> [Bin0|Cont] + end; + _ -> + [GC|Cs0] = unicode_util:gc([Bin0|Cont]), + case lists:member(GC, Seps) of + false -> [Bin0|Cont]; + true -> + case Cs0 of + [Bin|Cont] when is_binary(Bin) -> + bin_search_inv_n(Bin, Cont, Seps); + _ -> + {nomatch, Cs0} + end + end + end; +bin_search_inv_n(<<>>, Cont, _Sep) -> + {nomatch, Cont}; +bin_search_inv_n([], Cont, _Sep) -> + {nomatch, Cont}. + +bin_search_str(Bin0, Start, [], SearchCPs) -> + Compiled = binary:compile_pattern(unicode:characters_to_binary(SearchCPs)), + bin_search_str_1(Bin0, Start, Compiled, SearchCPs); bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) -> + First = binary:compile_pattern(<<CP/utf8>>), + bin_search_str_2(Bin0, Start, Cont, First, SearchCPs). + +bin_search_str_1(Bin0, Start, First, SearchCPs) -> + <<_:Start/binary, Bin/binary>> = Bin0, + case binary:match(Bin, First) of + nomatch -> {nomatch, byte_size(Bin0), []}; + {Where0, _} -> + Where = Start+Where0, + <<Keep:Where/binary, Cs0/binary>> = Bin0, + case prefix_1(Cs0, SearchCPs) of + nomatch -> + <<_/utf8, Cs/binary>> = Cs0, + KeepSz = byte_size(Bin0) - byte_size(Cs), + bin_search_str_1(Bin0, KeepSz, First, SearchCPs); + [] -> + {Keep, [Cs0], <<>>}; + Rest -> + {Keep, [Cs0], Rest} + end + end. + +bin_search_str_2(Bin0, Start, Cont, First, SearchCPs) -> <<_:Start/binary, Bin/binary>> = Bin0, - case binary:match(Bin, <<CP/utf8>>) of + case binary:match(Bin, First) of nomatch -> {nomatch, byte_size(Bin0), Cont}; {Where0, _} -> Where = Start+Where0, @@ -1320,7 +1606,7 @@ bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) -> case prefix_1(stack(Cs0,Cont), SearchCPs) of nomatch when is_binary(Cs) -> KeepSz = byte_size(Bin0) - byte_size(Cs), - bin_search_str(Bin0, KeepSz, Cont, SearchCPs); + bin_search_str_2(Bin0, KeepSz, Cont, First, SearchCPs); nomatch -> {nomatch, Where, stack([GC|Cs],Cont)}; [] -> diff --git a/lib/stdlib/test/filename_SUITE.erl b/lib/stdlib/test/filename_SUITE.erl index fc77593bb8..f284eb1ed6 100644 --- a/lib/stdlib/test/filename_SUITE.erl +++ b/lib/stdlib/test/filename_SUITE.erl @@ -107,6 +107,17 @@ absname(Config) when is_list(Config) -> [Drive|":/erlang/src"] = filename:absname([Drive|":erlang/src"]), "a:/erlang" = filename:absname("a:erlang"), + "//foo" = filename:absname("//foo"), + "//foo/bar" = filename:absname("//foo/bar"), + "//foo/\bar" = filename:absname("//foo/\bar"), + "//foo/bar/baz" = filename:absname("//foo/bar\\baz"), + "//foo/bar/baz" = filename:absname("//foo\\bar/baz"), + "//foo" = filename:absname("\\\\foo"), + "//foo/bar" = filename:absname("\\\\foo/bar"), + "//foo/\bar" = filename:absname("\\\\foo/\bar"), + "//foo/bar/baz" = filename:absname("\\\\foo/bar\\baz"), + "//foo/bar/baz" = filename:absname("\\\\foo\\bar/baz"), + file:set_cwd(Cwd), ok; {unix, _} -> @@ -167,6 +178,23 @@ absname_2(Config) when is_list(Config) -> [Drive|":/"]), "a:/erlang" = filename:absname("a:erlang", [Drive|":/"]), + "//foo" = filename:absname("foo","//"), + "//foo/bar" = filename:absname("foo/bar", "//"), + "//foo/bar" = filename:absname("bar", "//foo"), + "//bar" = filename:absname("/bar", "//foo"), + "//foo/bar/baz" = filename:absname("bar/baz", "//foo"), + "//bar/baz" = filename:absname("//bar/baz", "//foo"), + "//\bar" = filename:absname("/\bar", "//foo"), + "//foo" = filename:absname("foo","\\\\"), + "//foo/bar" = filename:absname("foo/bar", "\\\\"), + "//foo/bar" = filename:absname("bar", "\\\\foo"), + "//bar" = filename:absname("/bar", "\\\\foo"), + "//foo/bar/baz" = filename:absname("bar/baz", "\\\\foo"), + "//bar/baz" = filename:absname("\\\\bar/baz", "\\\\foo"), + "//\bar" = filename:absname("/\bar", "\\\\foo"), + "//bar/baz" = filename:absname("\\\\bar/baz", "//foo"), + "//bar/baz" = filename:absname("//bar/baz", "\\\\foo"), + ok; _ -> "/usr/foo" = filename:absname(foo, "/usr"), @@ -244,6 +272,18 @@ dirname(Config) when is_list(Config) -> "A:usr" = filename:dirname("A:usr/foo.erl"), "/usr" = filename:dirname("\\usr\\foo.erl"), "/" = filename:dirname("\\usr"), + "//foo/bar" = filename:dirname("//foo/bar/baz.erl"), + "//foo/\bar" = filename:dirname("//foo/\bar/baz.erl"), + "//foo/bar" = filename:dirname("//foo\\bar/baz.erl"), + "//foo/bar" = filename:dirname("\\\\foo/bar/baz.erl"), + "//foo/\bar" = filename:dirname("\\\\foo/\bar/baz.erl"), + "//foo/bar" = filename:dirname("\\\\foo\\bar/baz.erl"), + "//foo" = filename:dirname("//foo/baz.erl"), + "//foo" = filename:dirname("//foo/\baz.erl"), + "//foo" = filename:dirname("//foo\\baz.erl"), + "//foo" = filename:dirname("\\\\foo/baz.erl"), + "//foo" = filename:dirname("\\\\foo/\baz.erl"), + "//foo" = filename:dirname("\\\\foo\\baz.erl"), "A:" = filename:dirname("A:"); _ -> true end, @@ -289,7 +329,6 @@ join(Config) when is_list(Config) -> %% join/1 and join/2 (OTP-12158) by using help function %% filename_join/2. "/" = filename:join(["/"]), - "/" = filename:join(["//"]), "usr/foo.erl" = filename_join("usr","foo.erl"), "/src/foo.erl" = filename_join(usr, "/src/foo.erl"), "/src/foo.erl" = filename_join("/src/",'foo.erl'), @@ -301,7 +340,6 @@ join(Config) when is_list(Config) -> "a/b/c/d/e/f/g" = filename_join("a//b/c/", "d//e/f/g"), "a/b/c/d/e/f/g" = filename_join("a//b/c", "d//e/f/g"), "/d/e/f/g" = filename_join("a//b/c", "/d//e/f/g"), - "/d/e/f/g" = filename:join("a//b/c", "//d//e/f/g"), "foo/bar" = filename_join([$f,$o,$o,$/,[]], "bar"), @@ -332,6 +370,7 @@ join(Config) when is_list(Config) -> case os:type() of {win32, _} -> + "//" = filename:join(["//"]), "d:/" = filename:join(["D:/"]), "d:/" = filename:join(["D:\\"]), "d:/abc" = filename_join("D:/", "abc"), @@ -345,8 +384,35 @@ join(Config) when is_list(Config) -> "c:/usr/foo.erl" = filename:join(["A:","C:/usr","foo.erl"]), "c:usr/foo.erl" = filename:join(["A:","C:usr","foo.erl"]), "d:/foo" = filename:join([$D, $:, $/, []], "foo"), + "//" = filename:join("\\\\", ""), + "//foo" = filename:join("\\\\", "foo"), + "//foo/bar" = filename:join("\\\\", "foo\\\\bar"), + "//foo/bar/baz" = filename:join("\\\\foo", "bar\\\\baz"), + "//foo/bar/baz" = filename:join("\\\\foo", "bar\\baz"), + "//foo/bar/baz" = filename:join("\\\\foo\\bar", baz), + "//foo/\bar/baz" = filename:join("\\\\foo/\bar", baz), + "//foo/bar/baz" = filename:join("\\\\foo/bar", baz), + "//bar/baz" = filename:join("\\\\foo", "\\\\bar\\baz"), + "//bar/baz" = filename:join("\\\\foo", "//bar\\baz"), + "//bar/baz" = filename:join("\\\\foo", "//bar/baz"), + "//bar/baz" = filename:join("\\\\foo", "\\\\bar/baz"), + "//d/e/f/g" = filename:join("a//b/c", "//d//e/f/g"), + "//" = filename:join("//", ""), + "//foo" = filename:join("//", "foo"), + "//foo/bar" = filename:join("//", "foo\\\\bar"), + "//foo/bar/baz" = filename:join("//foo", "bar\\\\baz"), + "//foo/bar/baz" = filename:join("//foo", "bar\\baz"), + "//foo/bar/baz" = filename:join("//foo\\bar", baz), + "//foo/\bar/baz" = filename:join("//foo/\bar", baz), + "//foo/bar/baz" = filename:join("//foo/bar", baz), + "//bar/baz" = filename:join("//foo", "\\\\bar\\baz"), + "//bar/baz" = filename:join("//foo", "//bar\\baz"), + "//bar/baz" = filename:join("//foo", "//bar/baz"), + "//bar/baz" = filename:join("//foo", "\\\\bar/baz"), ok; _ -> + "/" = filename:join(["//"]), + "/d/e/f/g" = filename:join("a//b/c", "//d//e/f/g"), ok end. @@ -402,6 +468,16 @@ split(Config) when is_list(Config) -> filename:split("a:\\msdev\\include"), ["a:","msdev","include"] = filename:split("a:msdev\\include"), + ["//","foo"] = + filename:split("\\\\foo"), + ["//","foo"] = + filename:split("//foo"), + ["//","foo","bar"] = + filename:split("\\\\foo\\\\bar"), + ["//","foo","baz"] = + filename:split("\\\\foo\\baz"), + ["//","foo","baz"] = + filename:split("//foo\\baz"), ok; _ -> ok @@ -630,7 +706,6 @@ extension_bin(Config) when is_list(Config) -> join_bin(Config) when is_list(Config) -> <<"/">> = filename:join([<<"/">>]), - <<"/">> = filename:join([<<"//">>]), <<"usr/foo.erl">> = filename:join(<<"usr">>,<<"foo.erl">>), <<"/src/foo.erl">> = filename:join(usr, <<"/src/foo.erl">>), <<"/src/foo.erl">> = filename:join([<<"/src/">>,'foo.erl']), @@ -642,7 +717,6 @@ join_bin(Config) when is_list(Config) -> <<"a/b/c/d/e/f/g">> = filename:join([<<"a//b/c/">>, <<"d//e/f/g">>]), <<"a/b/c/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"d//e/f/g">>]), <<"/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"/d//e/f/g">>]), - <<"/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"//d//e/f/g">>]), <<"foo/bar">> = filename:join([$f,$o,$o,$/,[]], <<"bar">>), @@ -695,6 +769,7 @@ join_bin(Config) when is_list(Config) -> case os:type() of {win32, _} -> + <<"//">> = filename:join([<<"//">>]), <<"d:/">> = filename:join([<<"D:/">>]), <<"d:/">> = filename:join([<<"D:\\">>]), <<"d:/abc">> = filename:join([<<"D:/">>, <<"abc">>]), @@ -708,8 +783,35 @@ join_bin(Config) when is_list(Config) -> <<"c:/usr/foo.erl">> = filename:join([<<"A:">>,<<"C:/usr">>,<<"foo.erl">>]), <<"c:usr/foo.erl">> = filename:join([<<"A:">>,<<"C:usr">>,<<"foo.erl">>]), <<"d:/foo">> = filename:join([$D, $:, $/, []], <<"foo">>), + <<"//">> = filename:join(<<"\\\\">>, <<"">>), + <<"//foo">> = filename:join(<<"\\\\">>, <<"foo">>), + <<"//foo/bar">> = filename:join(<<"\\\\">>, <<"foo\\\\bar">>), + <<"//foo/bar/baz">> = filename:join(<<"\\\\foo">>, <<"bar\\\\baz">>), + <<"//bar/baz">> = filename:join(<<"\\\\foo">>, <<"\\\\bar\\baz">>), + <<"//foo/bar/baz">> = filename:join(<<"\\\\foo\\bar">>, baz), + <<"//foo/\bar/baz">> = filename:join(<<"\\\\foo/\bar">>, baz), + <<"//foo/bar/baz">> = filename:join(<<"\\\\foo/bar">>, baz), + <<"//bar/baz">> = filename:join(<<"\\\\foo">>, <<"\\\\bar\\baz">>), + <<"//bar/baz">> = filename:join(<<"\\\\foo">>, <<"//bar\\baz">>), + <<"//bar/baz">> = filename:join(<<"\\\\foo">>, <<"//bar/baz">>), + <<"//bar/baz">> = filename:join(<<"\\\\foo">>, <<"\\\\bar/baz">>), + <<"//d/e/f/g">> = filename:join([<<"a//b/c">>, <<"//d//e/f/g">>]), + <<"//">> = filename:join(<<"//">>, <<"">>), + <<"//foo">> = filename:join(<<"//">>, <<"foo">>), + <<"//foo/bar">> = filename:join(<<"//">>, <<"foo\\\\bar">>), + <<"//foo/bar/baz">> = filename:join(<<"//foo">>, <<"bar\\\\baz">>), + <<"//bar/baz">> = filename:join(<<"//foo">>, <<"\\\\bar\\baz">>), + <<"//foo/bar/baz">> = filename:join(<<"//foo\\bar">>, baz), + <<"//foo/\bar/baz">> = filename:join(<<"//foo/\bar">>, baz), + <<"//foo/bar/baz">> = filename:join(<<"//foo/bar">>, baz), + <<"//bar/baz">> = filename:join(<<"//foo">>, <<"\\\\bar\\baz">>), + <<"//bar/baz">> = filename:join(<<"//foo">>, <<"//bar\\baz">>), + <<"//bar/baz">> = filename:join(<<"//foo">>, <<"//bar/baz">>), + <<"//bar/baz">> = filename:join(<<"//foo">>, <<"\\\\bar/baz">>), ok; _ -> + <<"/">> = filename:join([<<"//">>]), + <<"/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"//d//e/f/g">>]), ok end. @@ -756,6 +858,16 @@ split_bin(Config) when is_list(Config) -> filename:split(<<"a:\\msdev\\include">>), [<<"a:">>,<<"msdev">>,<<"include">>] = filename:split(<<"a:msdev\\include">>), + [<<"//">>,<<"foo">>] = + filename:split(<<"\\\\foo">>), + [<<"//">>,<<"foo">>] = + filename:split(<<"//foo">>), + [<<"//">>,<<"foo">>,<<"bar">>] = + filename:split(<<"\\\\foo\\\\bar">>), + [<<"//">>,<<"foo">>,<<"baz">>] = + filename:split(<<"\\\\foo\\baz">>), + [<<"//">>,<<"foo">>,<<"baz">>] = + filename:split(<<"//foo\\baz">>), ok; _ -> ok diff --git a/lib/stdlib/test/qlc_SUITE.erl b/lib/stdlib/test/qlc_SUITE.erl index 5e9e03e410..949142ec77 100644 --- a/lib/stdlib/test/qlc_SUITE.erl +++ b/lib/stdlib/test/qlc_SUITE.erl @@ -7871,7 +7871,7 @@ run_test(Config, Extra, {cres, Body, Opts, ExpectedCompileReturn}) -> {module, _} = code:load_abs(AbsFile, Mod), Ms0 = erlang:process_info(self(),messages), - Before = {{get(), lists:sort(ets:all()), Ms0}, pps()}, + Before = {{lget(), lists:sort(ets:all()), Ms0}, pps()}, %% Prepare the check that the qlc module does not call qlc_pt. _ = [unload_pt() || {file, Name} <- [code:is_loaded(qlc_pt)], @@ -7903,7 +7903,7 @@ run_test(Config, Extra, Body) -> wait_for_expected(R, {Strict0,PPS0}=Before, SourceFile, Wait) -> Ms = erlang:process_info(self(),messages), - After = {_,PPS1} = {{get(), lists:sort(ets:all()), Ms}, pps()}, + After = {_,PPS1} = {{lget(), lists:sort(ets:all()), Ms}, pps()}, case {R, After} of {ok, Before} -> ok; @@ -7931,6 +7931,18 @@ wait_for_expected(R, {Strict0,PPS0}=Before, SourceFile, Wait) -> expected({ok,Before}, {R,After}, SourceFile) end. +%% The qlc modules uses the process dictionary for storing names of files. +lget() -> + lists:sort([T || {K, _} = T <- get(), is_qlc_key(K)]). + +%% Copied from the qlc module. +-define(LCACHE_FILE(Ref), {Ref, '$_qlc_cache_tmpfiles_'}). +-define(MERGE_JOIN_FILE, '$_qlc_merge_join_tmpfiles_'). + +is_qlc_key(?LCACHE_FILE(_)) -> true; +is_qlc_key(?MERGE_JOIN_FILE) -> true; +is_qlc_key(_) -> false. + unload_pt() -> erlang:garbage_collect(), % get rid of references to qlc_pt... _ = code:purge(qlc_pt), diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl index 90f980c0e5..f43bfb4482 100644 --- a/lib/stdlib/test/string_SUITE.erl +++ b/lib/stdlib/test/string_SUITE.erl @@ -92,14 +92,11 @@ end_per_testcase(_Case, _Config) -> ok. debug() -> - Config = [{data_dir, ?MODULE_STRING++"_data"}], + Config = [{data_dir, "./" ++ ?MODULE_STRING++"_data"}], [io:format("~p:~p~n",[Test,?MODULE:Test(Config)]) || {_,Tests} <- groups(), Test <- Tests]. -define(TEST(B,C,D), test(?LINE,?FUNCTION_NAME,B,C,D, true)). --define(TEST_EQ(B,C,D), - test(?LINE,?FUNCTION_NAME,B,C,D, true), - test(?LINE,?FUNCTION_NAME,hd(C),[B|tl(C),D, true)). -define(TEST_NN(B,C,D), test(?LINE,?FUNCTION_NAME,B,C,D, false), @@ -294,6 +291,7 @@ trim(_) -> ?TEST(["..h", ".e", <<"j..">>], [both, ". "], "h.ej"), ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [both, ". "], "h.ejsan"), %% Test that it behaves with graphemes (i.e. nfd tests are the hard part) + ?TEST([1013,101,778,101,101], [trailing, [101]], [1013,101,778]), ?TEST("aaåaa", [both, "a"], "å"), ?TEST(["aaa",778,"äöoo"], [both, "ao"], "åäö"), ?TEST([<<"aaa">>,778,"äöoo"], [both, "ao"], "åäö"), @@ -353,6 +351,7 @@ take(_) -> ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [Chars, true, leading], {".. ", "h.ej .."}), ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [Chars, true, leading], {"..", "h.ejsan.."}), %% Test that it behaves with graphemes (i.e. nfd tests are the hard part) + ?TEST([101,778], [[[101, 779]], true], {[101,778], []}), ?TEST(["aaee",778,"äöoo"], [[[$e,778]], true, leading], {"aae", [$e,778|"äöoo"]}), ?TEST([<<"aae">>,778,"äöoo"], [[[$e,778]],true,leading], {"aa", [$e,778|"äöoo"]}), ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [[[$e,778]], true, leading], {[], [$e,778]++"åäöe"++[778]}), @@ -713,29 +712,123 @@ nth_lexeme(_) -> meas(Config) -> + Parent = self(), + Exec = fun() -> + DataDir0 = proplists:get_value(data_dir, Config), + DataDir = filename:join(lists:droplast(filename:split(DataDir0))), + case proplists:get_value(profile, Config, false) of + false -> + do_measure(DataDir); + eprof -> + eprof:profile(fun() -> do_measure(DataDir) end, [set_on_spawn]), + eprof:stop_profiling(), + eprof:analyze(), + eprof:stop() + end, + Parent ! {test_done, self()}, + normal + end, + ct:timetrap({minutes,2}), case ct:get_timetrap_info() of {_,{_,Scale}} when Scale > 1 -> {skip,{will_not_run_in_debug,Scale}}; - _ -> % No scaling - DataDir = proplists:get_value(data_dir, Config), - TestDir = filename:dirname(string:trim(DataDir, trailing, "/")), - do_measure(TestDir) + _ -> % No scaling, run at most 1.5 min + Tester = spawn(Exec), + receive {test_done, Tester} -> ok + after 90000 -> + io:format("Timelimit reached stopping~n",[]), + exit(Tester, die) + end, + ok end. -do_measure(TestDir) -> - File = filename:join(TestDir, ?MODULE_STRING ++ ".erl"), +do_measure(DataDir) -> + File = filename:join([DataDir,"unicode_util_SUITE_data","NormalizationTest.txt"]), io:format("File ~s ",[File]), {ok, Bin} = file:read_file(File), io:format("~p~n",[byte_size(Bin)]), Do = fun(Name, Func, Mode) -> {N, Mean, Stddev, _} = time_func(Func, Mode, Bin), - io:format("~10w ~6w ~6.2fms ±~4.2fms #~.2w gc included~n", + io:format("~15w ~6w ~6.2fms ±~5.2fms #~.2w gc included~n", [Name, Mode, Mean/1000, Stddev/1000, N]) end, + Do2 = fun(Name, Func, Mode) -> + {N, Mean, Stddev, _} = time_func(Func, binary, <<>>), + io:format("~15w ~6w ~6.2fms ±~5.2fms #~.2w gc included~n", + [Name, Mode, Mean/1000, Stddev/1000, N]) + end, io:format("----------------------~n"), - Do(tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list), + + Do(old_tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list), Tokens = {lexemes, fun(Str) -> string:lexemes(Str, [$\n,$\r]) end}, [Do(Name,Fun,Mode) || {Name,Fun} <- [Tokens], Mode <- [list, binary]], + + S0 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy.....", + S0B = <<"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy.....">>, + Do2(old_strip_l, repeat(fun() -> string:strip(S0, left, $x) end), list), + Do2(trim_l, repeat(fun() -> string:trim(S0, leading, [$x]) end), list), + Do2(trim_l, repeat(fun() -> string:trim(S0B, leading, [$x]) end), binary), + Do2(old_strip_r, repeat(fun() -> string:strip(S0, right, $.) end), list), + Do2(trim_t, repeat(fun() -> string:trim(S0, trailing, [$.]) end), list), + Do2(trim_t, repeat(fun() -> string:trim(S0B, trailing, [$.]) end), binary), + + Do2(old_chr_sub, repeat(fun() -> string:sub_string(S0, string:chr(S0, $.)) end), list), + Do2(old_str_sub, repeat(fun() -> string:sub_string(S0, string:str(S0, [$.])) end), list), + Do2(find, repeat(fun() -> string:find(S0, [$.]) end), list), + Do2(find, repeat(fun() -> string:find(S0B, [$.]) end), binary), + Do2(old_str_sub2, repeat(fun() -> N = string:str(S0, "xy.."), + {string:sub_string(S0,1,N), string:sub_string(S0,N+4)} end), list), + Do2(split, repeat(fun() -> string:split(S0, "xy..") end), list), + Do2(split, repeat(fun() -> string:split(S0B, "xy..") end), binary), + + Do2(old_rstr_sub, repeat(fun() -> string:sub_string(S0, string:rstr(S0, [$y])) end), list), + Do2(find_t, repeat(fun() -> string:find(S0, [$y], trailing) end), list), + Do2(find_t, repeat(fun() -> string:find(S0B, [$y], trailing) end), binary), + Do2(old_rstr_sub2, repeat(fun() -> N = string:rstr(S0, "y.."), + {string:sub_string(S0,1,N), string:sub_string(S0,N+3)} end), list), + Do2(split_t, repeat(fun() -> string:split(S0, "y..", trailing) end), list), + Do2(split_t, repeat(fun() -> string:split(S0B, "y..", trailing) end), binary), + + Do2(old_span, repeat(fun() -> N=string:span(S0, [$x, $y]), + {string:sub_string(S0,1,N),string:sub_string(S0,N+1)} + end), list), + Do2(take, repeat(fun() -> string:take(S0, [$x, $y]) end), list), + Do2(take, repeat(fun() -> string:take(S0B, [$x, $y]) end), binary), + + Do2(old_cspan, repeat(fun() -> N=string:cspan(S0, [$.,$y]), + {string:sub_string(S0,1,N),string:sub_string(S0,N+1)} + end), list), + Do2(take_c, repeat(fun() -> string:take(S0, [$.,$y], true) end), list), + Do2(take_c, repeat(fun() -> string:take(S0B, [$.,$y], true) end), binary), + + Do2(old_substr, repeat(fun() -> string:substr(S0, 21, 15) end), list), + Do2(slice, repeat(fun() -> string:slice(S0, 20, 15) end), list), + Do2(slice, repeat(fun() -> string:slice(S0B, 20, 15) end), binary), + + io:format("--~n",[]), + NthTokens = {nth_lexemes, fun(Str) -> string:nth_lexeme(Str, 18000, [$\n,$\r]) end}, + [Do(Name,Fun,Mode) || {Name,Fun} <- [NthTokens], Mode <- [list, binary]], + Do2(take_t, repeat(fun() -> string:take(S0, [$.,$y], false, trailing) end), list), + Do2(take_t, repeat(fun() -> string:take(S0B, [$.,$y], false, trailing) end), binary), + Do2(take_tc, repeat(fun() -> string:take(S0, [$x], true, trailing) end), list), + Do2(take_tc, repeat(fun() -> string:take(S0B, [$x], true, trailing) end), binary), + + Length = {length, fun(Str) -> string:length(Str) end}, + [Do(Name,Fun,Mode) || {Name,Fun} <- [Length], Mode <- [list, binary]], + + Reverse = {reverse, fun(Str) -> string:reverse(Str) end}, + [Do(Name,Fun,Mode) || {Name,Fun} <- [Reverse], Mode <- [list, binary]], + + ok. + +repeat(F) -> + fun(_) -> repeat_1(F,20000) end. + +repeat_1(F, N) when N > 0 -> + F(), + repeat_1(F, N-1); +repeat_1(_, _) -> + erlang:garbage_collect(), ok. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -865,8 +958,6 @@ check_types_1({list, _},{list, undefined}) -> ok; check_types_1({list, _},{list, codepoints}) -> ok; -check_types_1({list, _},{list, {list, codepoints}}) -> - ok; check_types_1({list, {list, _}},{list, {list, codepoints}}) -> ok; check_types_1(mixed,_) -> @@ -947,7 +1038,7 @@ time_func(Fun, Mode, Bin) -> end), receive {Pid,Msg} -> Msg end. -time_func(N,Sum,SumSq, Fun, Str, _) when N < 50 -> +time_func(N,Sum,SumSq, Fun, Str, _) when N < 20 -> {Time, Res} = timer:tc(fun() -> Fun(Str) end), time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res); time_func(N,Sum,SumSq, _, _, Res) -> diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl index 03c24c7027..a89627eba5 100644 --- a/lib/stdlib/test/unicode_util_SUITE.erl +++ b/lib/stdlib/test/unicode_util_SUITE.erl @@ -310,12 +310,23 @@ get(_) -> add_get_tests. count(Config) -> + Parent = self(), + Exec = fun() -> + do_measure(Config), + Parent ! {test_done, self()} + end, ct:timetrap({minutes,5}), case ct:get_timetrap_info() of - {_,{_,Scale}} -> + {_,{_,Scale}} when Scale > 1 -> {skip,{measurments_skipped_debug,Scale}}; - _ -> % No scaling - do_measure(Config) + _ -> % No scaling, run at most 2 min + Tester = spawn(Exec), + receive {test_done, Tester} -> ok + after 120000 -> + io:format("Timelimit reached stopping~n",[]), + exit(Tester, die) + end, + ok end. do_measure(Config) -> diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript index fefd7d3b70..73c351e1af 100755 --- a/lib/stdlib/uc_spec/gen_unicode_mod.escript +++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript @@ -170,7 +170,7 @@ gen_header(Fd) -> io:put_chars(Fd, "-export([spec_version/0, lookup/1, get_case/1]).\n"), io:put_chars(Fd, "-inline([class/1]).\n"), io:put_chars(Fd, "-compile(nowarn_unused_vars).\n"), - io:put_chars(Fd, "-dialyzer({no_improper_lists, [cp/1, gc_prepend/2, gc_e_cont/2]}).\n"), + io:put_chars(Fd, "-dialyzer({no_improper_lists, [cp/1, gc/1, gc_prepend/2, gc_e_cont/2]}).\n"), io:put_chars(Fd, "-type gc() :: char()|[char()].\n\n\n"), ok. @@ -240,7 +240,7 @@ gen_norm(Fd) -> "-spec nfd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfd(Str0) ->\n" " case gc(Str0) of\n" - " [GC|R] when GC < 127 -> [GC|R];\n" + " [GC|R] when GC < 128 -> [GC|R];\n" " [GC|Str] -> [decompose(GC)|Str];\n" " [] -> [];\n" " {error,_}=Error -> Error\n end.\n\n" @@ -250,7 +250,7 @@ gen_norm(Fd) -> "-spec nfkd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfkd(Str0) ->\n" " case gc(Str0) of\n" - " [GC|R] when GC < 127 -> [GC|R];\n" + " [GC|R] when GC < 128 -> [GC|R];\n" " [GC|Str] -> [decompose_compat(GC)|Str];\n" " [] -> [];\n" " {error,_}=Error -> Error\n end.\n\n" @@ -260,7 +260,7 @@ gen_norm(Fd) -> "-spec nfc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfc(Str0) ->\n" " case gc(Str0) of\n" - " [GC|R] when GC < 255 -> [GC|R];\n" + " [GC|R] when GC < 256 -> [GC|R];\n" " [GC|Str] -> [compose(decompose(GC))|Str];\n" " [] -> [];\n" " {error,_}=Error -> Error\n end.\n\n" @@ -270,7 +270,7 @@ gen_norm(Fd) -> "-spec nfkc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfkc(Str0) ->\n" " case gc(Str0) of\n" - " [GC|R] when GC < 127 -> [GC|R];\n" + " [GC|R] when GC < 128 -> [GC|R];\n" " [GC|Str] -> [compose_compat_0(decompose_compat(GC))|Str];\n" " [] -> [];\n" " {error,_}=Error -> Error\n end.\n\n" @@ -476,13 +476,30 @@ gen_gc(Fd, GBP) -> "-spec gc(String::unicode:chardata()) ->" " maybe_improper_list() | {error, unicode:chardata()}.\n"), io:put_chars(Fd, + "gc([CP1, CP2|_]=T)\n" + " when CP1 < 256, CP2 < 256, CP1 =/= $\r -> %% Ascii Fast path\n" + " T;\n" + "gc(<<CP1/utf8, Rest/binary>>) ->\n" + " if CP1 < 256, CP1 =/= $\r ->\n" + " case Rest of\n" + " <<CP2/utf8, _/binary>> when CP2 < 256 -> %% Ascii Fast path\n" + " [CP1|Rest];\n" + " _ -> gc_1([CP1|Rest])\n" + " end;\n" + " true -> gc_1([CP1|Rest])\n" + " end;\n" "gc(Str) ->\n" " gc_1(cp(Str)).\n\n" "gc_1([$\\r|R0] = R) ->\n" " case cp(R0) of % Don't break CRLF\n" " [$\\n|R1] -> [[$\\r,$\\n]|R1];\n" " _ -> R\n" - " end;\n"), + " end;\n" + %% "gc_1([CP1, CP2|_]=T) when CP1 < 256, CP2 < 256 ->\n" + %% " T; %% Fast path\n" + %% "gc_1([CP1|<<CP2/utf8, _/binary>>]=T) when CP1 < 256, CP2 < 256 ->\n" + %% " T; %% Fast path\n" + ), io:put_chars(Fd, "%% Handle control\n"), GenControl = fun(Range) -> io:format(Fd, "gc_1~s R0;\n", [gen_clause(Range)]) end, @@ -490,7 +507,7 @@ gen_gc(Fd, GBP) -> [R1,R2,R3|Crs] = CRs0, [GenControl(CP) || CP <- merge_ranges([R1,R2,R3], split), CP =/= {$\r, undefined}], %%GenControl(R1),GenControl(R2),GenControl(R3), - io:format(Fd, "gc_1([CP|R]) when CP < 255 -> gc_extend(R,CP);\n", []), + io:format(Fd, "gc_1([CP|R]) when CP < 256 -> gc_extend(R,CP);\n", []), [GenControl(CP) || CP <- Crs], %% One clause per CP %% CRs0 = merge_ranges(maps:get(cr, GBP) ++ maps:get(lf, GBP) ++ maps:get(control, GBP)), |